ExaFMM 1
Fast-multipole Method for exascale systems
|
00001 /* 00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba 00003 00004 Permission is hereby granted, free of charge, to any person obtaining a copy 00005 of this software and associated documentation files (the "Software"), to deal 00006 in the Software without restriction, including without limitation the rights 00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00008 copies of the Software, and to permit persons to whom the Software is 00009 furnished to do so, subject to the following conditions: 00010 00011 The above copyright notice and this permission notice shall be included in 00012 all copies or substantial portions of the Software. 00013 00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00020 THE SOFTWARE. 00021 */ 00022 #ifndef partition_h 00023 #define partition_h 00024 #include "mympi.h" 00025 #include "serialfmm.h" 00026 00027 //! Handles all the partitioning of domains 00028 template<Equation equation> 00029 class Partition : public MyMPI, public SerialFMM<equation> { 00030 private: 00031 int numCells1D; //!< Number of cells in one dimension (leaf level) 00032 00033 protected: 00034 int LEVEL; //!< Level of the MPI process binary tree 00035 std::vector<vect> XMIN; //!< Minimum position vector of bodies 00036 std::vector<vect> XMAX; //!< Maximum position vector of bodies 00037 int nprocs[64][2]; //!< Number of processes in the two split groups 00038 int offset[64][2]; //!< Offset of body in the two split groups 00039 int color[64][3]; //!< Color of hypercube communicators 00040 int key[64][3]; //!< Key of hypercube communicators 00041 MPI_Comm MPI_COMM[64][3]; //!< Hypercube communicators 00042 00043 public: 00044 using Kernel<equation>::printNow; //!< Switch to print timings 00045 using Kernel<equation>::startTimer; //!< Start timer for given event 00046 using Kernel<equation>::stopTimer; //!< Stop timer for given event 00047 using Kernel<equation>::sortBodies; //!< Sort bodies according to cell index 00048 using Kernel<equation>::X0; //!< Center of root cell 00049 using Kernel<equation>::R0; //!< Radius of root cell 00050 using TreeStructure<equation>::buffer; //!< Buffer for MPI communication & sorting 00051 using TreeStructure<equation>::getLevel; //!< Get level from cell index 00052 using BottomUp<equation>::getMaxLevel; //!< Max level for bottom up tree build 00053 00054 private: 00055 //! Split domain according to iSplit 00056 void splitDomain(bigint iSplit, int l, int d) { 00057 real X = (iSplit + 1) * (XMAX[l][d] - XMIN[l][d]) / numCells1D + XMIN[l][d];// Coordinate corresponding to iSplit 00058 XMAX[l+1] = XMAX[l]; // Set XMAX for next subdivision 00059 XMIN[l+1] = XMIN[l]; // Set XMIN for next subdivision 00060 if( color[l+1][0] % 2 == 0 ) { // If on left side 00061 XMAX[l+1][d] = X; // Set max to X 00062 } else { // If on right side 00063 XMIN[l+1][d] = X; // Set min to X 00064 } // Endif for sides 00065 } 00066 00067 //! Get global bucket data for parallel nth_element 00068 template<typename T> 00069 int getBucket(T &data, int numData, int lOffset, Bigints &send, Bigints &recv, MPI_Comm MPI_COMM0) { 00070 int maxBucket = send.size(); // Maximum number of buckets 00071 int numBucket; // Number of buckets 00072 int numSample = std::min(maxBucket/MPISIZES,numData); // Number of local samples 00073 MPI_Datatype MPI_TYPE = getType(data[0].ICELL); // Get MPI data type 00074 int *rcnt = new int [MPISIZES]; // MPI recv count 00075 int *rdsp = new int [MPISIZES]; // MPI recv displacement 00076 for( int i=0; i!=numSample; ++i ) { // Loop over local samples 00077 int stride = numData/numSample; // Sampling stride 00078 send[i] = data[lOffset + i * stride].ICELL; // Put sampled data in send buffer 00079 } // End loop over samples 00080 MPI_Gather(&numSample,1,MPI_INT, // Gather size of sample data to rank 0 00081 rcnt, 1,MPI_INT, 00082 0,MPI_COMM0); 00083 if( MPIRANKS == 0 ) { // Only rank 0 operates on gathered info 00084 numBucket = 0; // Initialize number of buckets 00085 for( int irank=0; irank!=MPISIZES; ++irank ) { // Loop over processes 00086 rdsp[irank] = numBucket; // Calculate recv displacement 00087 numBucket += rcnt[irank]; // Accumulate recv count to get number of buckets 00088 } // End loop over processes 00089 recv.resize(numBucket); // Resize recv so that end() is valid 00090 } // Endif for rank 0 00091 MPI_Gatherv(&send[0],numSample,MPI_TYPE, // Gather sample data to rank 0 00092 &recv[0],rcnt,rdsp,MPI_TYPE, 00093 0,MPI_COMM0); 00094 if( MPIRANKS == 0 ) { // Only rank 0 operates on gathered info 00095 std::sort(recv.begin(),recv.end()); // Sort the bucket data 00096 numBucket = std::unique(recv.begin(),recv.end()) - recv.begin();// Remove duplicate bucket data 00097 recv.resize(numBucket); // Resize recv again 00098 } // Endif for rank 0 00099 MPI_Bcast(&numBucket,1,MPI_INT,0,MPI_COMM0); // Broadcast number of buckets 00100 MPI_Bcast(&recv[0],numBucket,MPI_TYPE,0,MPI_COMM0); // Broadcast bucket data 00101 delete[] rcnt; // Delete recv count 00102 delete[] rdsp; // Delete recv displacement 00103 return numBucket; // Return number of buckets 00104 } 00105 00106 protected: 00107 //! Split the MPI communicator into N-D hypercube 00108 void bisectionGetComm(int l) { 00109 for( int i=1; i>=0; --i ) { // Loop over 1 and 0 00110 int pSplit = (nprocs[l][0] + i) / 2; // Splitting criteria 00111 if( MPIRANK - offset[l][0] < pSplit ) { // If on left side 00112 nprocs[l+1][i] = pSplit; // Group size is the splitting criteria 00113 offset[l+1][i] = offset[l][0]; // Offset is the same as previous 00114 color[l+1][i] = color[l][0] * 2; // Color is twice the previous 00115 } else { // If on right side 00116 nprocs[l+1][i] = nprocs[l][0] - pSplit; // Group size is what remains from the left side 00117 offset[l+1][i] = offset[l][0] + pSplit; // Offset is incremented by splitting criteria 00118 color[l+1][i] = color[l][0] * 2 + 1; // Color is twice the previous plus one 00119 } // Endif for sides 00120 key[l+1][i] = MPIRANK - offset[l+1][i]; // Key is determined from offset 00121 } // End loop over 1 and 0 00122 key[l+1][2] = color[l+1][1] % 2; // The third type of key is determined from color[1] 00123 color[l+1][2] = key[l+1][1] + color[l][0] * (1 << (LEVEL - l - 1));// The corresponding color is determined from key[1] 00124 for( int i=0; i!=3; ++i ) { // Loop over the three types of colors and keys 00125 MPI_Comm_split(MPI_COMM_WORLD,color[l+1][i],key[l+1][i],&MPI_COMM[l+1][i]);// Split the MPI communicator 00126 } // End loop over three types 00127 #ifdef DEBUG 00128 print("level : ",0); // Print identifier 00129 print(l+1,0); // Print current level 00130 print("\n",0); // New line 00131 print("key : \n",0); // Print identifier 00132 print(key[l+1],0,3); // Print key 00133 print("color : \n",0); // Print identifier 00134 print(color[l+1],0,3); // Print color 00135 #endif 00136 } 00137 00138 //! One-to-one MPI_Alltoallv 00139 void bisectionAlltoall(Bodies &bodies, int nthLocal, int numLocal, int &newSize, int l) { 00140 startTimer("Bi Alltoall "); // Start timer 00141 const int bytes = sizeof(bodies[0]); // Byte size of body structure 00142 int scnt[2] = {nthLocal, numLocal - nthLocal}; // Set send count to right and left size 00143 int rcnt[2] = {0, 0}; // Initialize recv count 00144 MPI_Alltoall(scnt,1,MPI_INT,rcnt,1,MPI_INT,MPI_COMM[l+1][2]);// Communicate the send count to get recv count 00145 int sdsp[2] = {0, scnt[0]}; // Send displacement 00146 int rdsp[2] = {0, rcnt[0]}; // Recv displacement 00147 newSize = rcnt[0] + rcnt[1]; // Get new size from recv count 00148 if( color[l+1][0] != color[l+1][1] ) newSize = numLocal; // Unless it's a leftover process of an odd group 00149 buffer.resize(newSize); // Resize recv buffer 00150 00151 for(int i=0; i!=2; ++i ) { // Loop over 0 and 1 00152 scnt[i] *= bytes; // Multiply send count by byte size of data 00153 sdsp[i] *= bytes; // Multiply send displacement by byte size of data 00154 rcnt[i] *= bytes; // Multiply recv count by byte size of data 00155 rdsp[i] *= bytes; // Multiply recv displacement by byte size of data 00156 } // End loop over 0 and 1 00157 MPI_Alltoallv(&bodies[0],scnt,sdsp,MPI_BYTE, // Communicate bodies 00158 &buffer[0],rcnt,rdsp,MPI_BYTE, // Using same buffer as sort buffer 00159 MPI_COMM[l+1][2]); // MPI_COMM[2] is for the one-to-one pair 00160 if( color[l+1][0] == color[l+1][1] ) bodies = buffer; // Don't update if leftover process 00161 buffer.resize(bodies.size()); // Resize sort buffer 00162 stopTimer("Bi Alltoall ",printNow); // Stop timer 00163 sortBodies(bodies,buffer); // Sort bodies in ascending order 00164 } 00165 00166 //! Scattering from leftover processes 00167 void bisectionScatter(Bodies &bodies, int nthLocal, int &newSize, int l) { 00168 startTimer("Bi Scatter "); // Start timer 00169 const int bytes = sizeof(bodies[0]); // Byte size of body structure 00170 int numScatter = nprocs[l+1][1] - 1; // Number of processes to scatter to 00171 int oldSize = newSize; // Size of recv buffer before communication 00172 int *scnt = new int [nprocs[l+1][1]]; // Send count 00173 int *sdsp = new int [nprocs[l+1][1]]; // Send displacement 00174 int rcnt; // Recv count 00175 if( key[l+1][1] == numScatter ) { // If this is the leftover proc to scatter from 00176 sdsp[0] = 0; // Initialize send displacement 00177 for(int i=0; i!=numScatter; ++i ) { // Loop over processes to send to 00178 int begin = 0, end = nthLocal; // Set begin and end of range to send 00179 splitRange(begin,end,i,numScatter); // Split range into numScatter and get i-th range 00180 scnt[i] = end - begin; // Set send count based on range 00181 sdsp[i+1] = sdsp[i] + scnt[i]; // Set send displacement based on send count 00182 } // End loop over processes to send to 00183 scnt[numScatter] = 0; // Send count to self should be 0 00184 oldSize = 0; // Reset oldSize to account for sent data 00185 newSize -= sdsp[numScatter]; // Set newSize to account for sent data 00186 buffer.erase(buffer.begin(),buffer.begin()+sdsp[numScatter]);// Erase from recv buffer the part that was sent 00187 } // Endif for leftover proc 00188 MPI_Scatter(scnt,1,MPI_INT,&rcnt,1,MPI_INT,numScatter,MPI_COMM[l+1][1]);// Scatter the send count to get recv count 00189 if( key[l+1][1] != numScatter ) { // If it's one the receiving procs 00190 newSize += rcnt; // Increment newSize by the recv count 00191 buffer.resize(newSize); // Resize the recv buffer 00192 } // Endif for receiving procs 00193 for(int i=0; i!= nprocs[l+1][1]; ++i ) { // Loop over group of processes 00194 scnt[i] *= bytes; // Multiply send count by byte size of data 00195 sdsp[i] *= bytes; // Multiply send displacement by byte size of data 00196 } // End loop over group of processes 00197 rcnt *= bytes; // Multiply recv count by byte size of data 00198 MPI_Scatterv(&bodies[0], scnt,sdsp,MPI_BYTE, // Communicate bodies via MPI_Scatterv 00199 &buffer[oldSize],rcnt, MPI_BYTE, // Offset recv buffer by oldSize 00200 numScatter,MPI_COMM[l+1][1]); // MPI_COMM[1] is used for scatter 00201 bodies = buffer; // Copy recv buffer to bodies 00202 if( key[l+1][1] != numScatter ) sortBodies(bodies,buffer); // Sort bodies in ascending order 00203 delete[] scnt; // Delete send count 00204 delete[] sdsp; // Delete send displacement 00205 stopTimer("Bi Scatter ",printNow); // Stop timer 00206 } 00207 00208 //! Gathering to leftover processes 00209 void bisectionGather(Bodies &bodies, int nthLocal, int numLocal, int &newSize, int l) { 00210 startTimer("Bi Gather "); // Start timer 00211 const int bytes = sizeof(bodies[0]); // Byte size of body structure 00212 int numGather = nprocs[l+1][0] - 1; // Number of processes to gather to 00213 int oldSize = newSize; // Size of recv buffer before communication 00214 int scnt; // Send count 00215 int *rcnt = new int [nprocs[l+1][0]]; // Recv count 00216 int *rdsp = new int [nprocs[l+1][0]]; // Recv displacement 00217 if( key[l+1][0] != 0 ) { // If this is not the leftover proc 00218 int begin = 0, end = numLocal - nthLocal; // Set begin and end of range to send 00219 splitRange(begin,end,key[l+1][0]-1,nprocs[l+1][0]); // Split range into nprocs[0] 00220 scnt = end - begin; // Set send count based on range 00221 newSize -= scnt; // Set newSize to account for sent data 00222 buffer.erase(buffer.begin(),buffer.begin()+scnt); // Erase from recv buffer the part that was sent 00223 } // Endif for leftover proc 00224 MPI_Barrier(MPI_COMM[l+1][0]); // Sync processes 00225 MPI_Gather(&scnt,1,MPI_INT,rcnt,1,MPI_INT,0,MPI_COMM[l+1][0]);// Gather the send count to get recv count 00226 if( key[l+1][0] == 0 ) { // If this is the leftover proc 00227 rdsp[0] = 0; // Initialize the recv displacement 00228 for(int i=0; i!=numGather; ++i ) { // Loop over processes to gather from 00229 rdsp[i+1] = rdsp[i] + rcnt[i]; // Set recv displacement based on recv count 00230 } // End loop over processes 00231 newSize += rdsp[numGather] + rcnt[numGather]; // Increment newSize by the recv count 00232 buffer.resize(newSize); // Resize recv buffer 00233 } // Endif for leftover proc 00234 scnt *= bytes; // Multiply send count by byte size of data 00235 for(int i=0; i!= nprocs[l+1][0]; ++i ) { // Loop over group of processes 00236 rcnt[i] *= bytes; // Multiply recv count by byte size of data 00237 rdsp[i] *= bytes; // Multiply recv displacement by byte size of data 00238 } // End loop over group of processes 00239 MPI_Gatherv(&bodies[0], scnt, MPI_BYTE, // Communicate bodies via MPI_Gatherv 00240 &buffer[oldSize],rcnt,rdsp,MPI_BYTE, // Offset recv buffer by oldSize 00241 0,MPI_COMM[l+1][0]); // MPI_COMM[0] is used for gather 00242 bodies = buffer; // Copy recv buffer to bodies 00243 delete[] rcnt; // Delete recv count 00244 delete[] rdsp; // Delete send count 00245 if( key[l+1][0] == 0 ) sortBodies(bodies,buffer); // Sort bodies in ascending order 00246 stopTimer("Bi Gather ",printNow); // Stop timer 00247 } 00248 00249 public: 00250 //! Constructor 00251 Partition() : SerialFMM<equation>() { 00252 LEVEL = int(log(MPISIZE) / M_LN2 - 1e-5) + 1; // Level of the process binary tree 00253 if(MPISIZE == 1) LEVEL = 0; // Level is 0 for a serial execution 00254 XMIN.resize(LEVEL+1); // Minimum position vector at each level 00255 XMAX.resize(LEVEL+1); // Maximum position vector at each level 00256 startTimer("Split comm "); // Start timer 00257 nprocs[0][0] = nprocs[0][1] = MPISIZE; // Initialize number of processes in groups 00258 offset[0][0] = offset[0][1] = 0; // Initialize offset of body in groups 00259 color[0][0] = color[0][1] = color[0][2] = 0; // Initialize color of communicators 00260 key[0][0] = key[0][1] = key[0][2] = 0; // Initialize key of communicators 00261 for( int l=0; l!=LEVEL; ++l ) { // Loop over levels of N-D hypercube communication 00262 bisectionGetComm(l); // Split the MPI communicator for that level 00263 } // End loop over levels of N-D hypercube communication 00264 stopTimer("Split comm ",printNow); // Stop timer 00265 } 00266 //! Destructor 00267 ~Partition() {} 00268 00269 //! Set bounds of domain to be partitioned 00270 void setGlobDomain(Bodies &bodies, vect x0=0, real r0=M_PI) { 00271 numCells1D = 1 << getMaxLevel(bodies); // Set initial number of bodies 00272 B_iter B = bodies.begin(); // Reset body iterator 00273 XMIN[0] = XMAX[0] = B->X; // Initialize xmin,xmax 00274 MPI_Datatype MPI_TYPE = getType(XMIN[0][0]); // Get MPI data type 00275 for( B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00276 for( int d=0; d!=3; ++d ) { // Loop over each dimension 00277 if (B->X[d] < XMIN[0][d]) XMIN[0][d] = B->X[d]; // Determine xmin 00278 else if(B->X[d] > XMAX[0][d]) XMAX[0][d] = B->X[d]; // Determine xmax 00279 } // End loop over each dimension 00280 } // End loop over bodies 00281 vect X; // Recv buffer 00282 MPI_Allreduce(XMAX[0],X,3,MPI_TYPE,MPI_MAX,MPI_COMM_WORLD); // Reduce global maximum 00283 XMAX[0] = X; // Get data from buffer 00284 MPI_Allreduce(XMIN[0],X,3,MPI_TYPE,MPI_MIN,MPI_COMM_WORLD); // Reduce global minimum 00285 XMIN[0] = X; // Get data from buffer 00286 for( int d=0; d!=3; ++d ) { // Loop over each dimension 00287 X0[d] = (XMAX[0][d] + XMIN[0][d]) / 2; // Calculate center of domain 00288 X0[d] = int(X0[d]+.5); // Shift center to nearest integer 00289 R0 = std::max(XMAX[0][d] - X0[d], R0); // Calculate max distance from center 00290 R0 = std::max(X0[d] - XMIN[0][d], R0); // Calculate max distance from center 00291 } // End loop over each dimension 00292 if( IMAGES != 0 ) { // If periodic boundary condition 00293 if( X0[0]-R0 < x0[0]-r0 || x0[0]+r0 < X0[0]+R0 // Check for outliers in x direction 00294 || X0[1]-R0 < x0[1]-r0 || x0[1]+r0 < X0[1]+R0 // Check for outliers in y direction 00295 || X0[2]-R0 < x0[2]-r0 || x0[2]+r0 < X0[2]+R0 ) { // Check for outliers in z direction 00296 std::cout << "Error: Particles located outside periodic domain @ rank " 00297 << MPIRANK << std::endl; // Print error message 00298 } // End if for outlier checking 00299 X0 = x0; // Center is [0, 0, 0] 00300 R0 = r0; // Radius is M_PI 00301 } else { // If not periodic boundary condition 00302 R0 += 1e-5; // Add some leeway to root radius 00303 } // Endif for periodic boundary condition 00304 XMAX[0] = X0 + R0; // Reposition global maximum 00305 XMIN[0] = X0 - R0; // Reposition global minimum 00306 } 00307 00308 //! Turn positions into indices of bins 00309 void binBodies(Bodies &bodies, int d) { 00310 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00311 B->ICELL = bigint((B->X[d] - XMIN[0][d]) // Bin body positions into integers 00312 / (XMAX[0][d] - XMIN[0][d]) * numCells1D); 00313 } // End loop over bodies 00314 } 00315 00316 //! Split bodies according to iSplit 00317 int splitBodies(Bodies &bodies, bigint iSplit) { 00318 int nth = 0; // Initialize splitting index 00319 while( bodies[nth].ICELL <= iSplit && nth < int(bodies.size()) ) nth++;// Determine end index of left side 00320 return nth; // Return end index of left side 00321 } 00322 00323 //! Send bodies to next rank (round robin) 00324 void shiftBodies(Bodies &bodies) { 00325 int newSize; // New number of bodies 00326 int oldSize = bodies.size(); // Current number of bodies 00327 const int bytes = sizeof(bodies[0]); // Byte size of body structure 00328 const int isend = (MPIRANK + 1 ) % MPISIZE; // Send to next rank (wrap around) 00329 const int irecv = (MPIRANK - 1 + MPISIZE) % MPISIZE; // Receive from previous rank (wrap around) 00330 MPI_Request sreq,rreq; // Send, recv request handles 00331 00332 MPI_Isend(&oldSize,1,MPI_INT,irecv,0,MPI_COMM_WORLD,&sreq); // Send current number of bodies 00333 MPI_Irecv(&newSize,1,MPI_INT,isend,0,MPI_COMM_WORLD,&rreq); // Receive new number of bodies 00334 MPI_Wait(&sreq,MPI_STATUS_IGNORE); // Wait for send to complete 00335 MPI_Wait(&rreq,MPI_STATUS_IGNORE); // Wait for recv to complete 00336 00337 buffer.resize(newSize); // Resize buffer to new number of bodies 00338 MPI_Isend(&bodies[0],oldSize*bytes,MPI_BYTE,irecv, // Send bodies to next rank 00339 1,MPI_COMM_WORLD,&sreq); 00340 MPI_Irecv(&buffer[0],newSize*bytes,MPI_BYTE,isend, // Receive bodies from previous rank 00341 1,MPI_COMM_WORLD,&rreq); 00342 MPI_Wait(&sreq,MPI_STATUS_IGNORE); // Wait for send to complete 00343 MPI_Wait(&rreq,MPI_STATUS_IGNORE); // Wait for recv to complete 00344 bodies = buffer; // Copy bodies from buffer 00345 } 00346 00347 //! Parallel global nth_element on distributed memory 00348 template<typename T, typename T2> 00349 T2 nth_element(T &data, T2 n, MPI_Comm MPI_COMM0=0) { 00350 if( MPI_COMM0 == 0 ) { // If MPI_COMM is not specified 00351 MPI_Comm_split(MPI_COMM_WORLD,0,MPIRANK,&MPI_COMM0); // Create an artificial MPI_COMM 00352 } 00353 MPI_Comm_size(MPI_COMM0,&MPISIZES); // Get number of MPI processes for split comm 00354 MPI_Comm_rank(MPI_COMM0,&MPIRANKS); // Get index of current MPI process for split comm 00355 int numData = data.size(); // Total size of data to perform nth_element 00356 int maxBucket = 1000; // Maximum number of buckets 00357 int numBucket; // Number of buckets 00358 int lOffset = 0; // Local offset of region being considered 00359 MPI_Datatype MPI_TYPE = getType(n); // Get MPI data type 00360 int *rcnt = new int [MPISIZES]; // MPI recv count 00361 Bigints send(maxBucket); // MPI send buffer for data 00362 Bigints recv(maxBucket); // MPI recv buffer for data 00363 T2 gOffset = 0; // Global offset of region being considered 00364 T2 *isend = new T2 [maxBucket]; // MPI send buffer for index 00365 T2 *irecv = new T2 [maxBucket]; // MPI recv buffer for index 00366 T2 *iredu = new T2 [maxBucket]; // Local scan of index buffer 00367 numBucket = getBucket(data,numData,lOffset,send,recv,MPI_COMM0);// Get global bucket data 00368 00369 while( numBucket > 1 ) { // While there are multipole candidates 00370 int ic=0, nth=0; // Initialize counters 00371 for( int i=0; i!=maxBucket; ++i ) isend[i] = 0; // Initialize bucket counter 00372 for( int i=0; i!=numData; ++i ) { // Loop over range of data 00373 while( data[lOffset + i].ICELL > recv[ic] && ic < numBucket-1 ) ++ic;// Set counter to current bucket 00374 isend[ic]++; // Increment bucket counter 00375 } // End loop over data 00376 MPI_Reduce(isend,irecv,numBucket,MPI_TYPE, // Reduce bucket counter 00377 MPI_SUM,0,MPI_COMM0); 00378 if( MPIRANKS == 0 ) { // Only rank 0 operates on reduced data 00379 iredu[0] = 0; // Initialize global scan index 00380 for( int i=0; i!=numBucket-1; ++i ) { // Loop over buckets 00381 iredu[i+1] = iredu[i] + irecv[i]; // Increment global scan index 00382 } // End loop over buckets 00383 nth = 0; // Initialize index for bucket containing nth element 00384 while( n - gOffset > iredu[nth] && nth < numBucket ) ++nth;// Find index for bucket containing nth element 00385 --nth; // Account for overshoot (do while?) 00386 if( nth == -1 ) nth = 0; // If nth is -1 don't split 00387 gOffset += iredu[nth]; // Increment global offset of region being considered 00388 } // Endif for rank 0 00389 MPI_Bcast(&nth,1,MPI_INT,0,MPI_COMM0); // Broadcast index for bucket containing nth element 00390 MPI_Bcast(&gOffset,1,MPI_TYPE,0,MPI_COMM0); // Broadcast global offset 00391 iredu[0] = 0; // Initialize local scan index 00392 for( int i=0; i!=numBucket-1; ++i ) { // Loop over buckets 00393 iredu[i+1] = iredu[i] + isend[i]; // Increment local scan index 00394 } // End loop over buckets 00395 if( nth == numBucket-1 ) { // If nth is last bucket 00396 numData = numData-iredu[nth]; // Region of interest is that bucket 00397 } else { // If nth is not the last bucket 00398 numData = iredu[nth+1]-iredu[nth]; // Region of interest is that bucket 00399 } // Endif for last bucket 00400 lOffset += iredu[nth]; // Increment local offset to that bucket 00401 numBucket = getBucket(data,numData,lOffset,send,recv,MPI_COMM0);// Get global bucket data 00402 } // End while loop 00403 delete[] rcnt; // Delete recv count 00404 delete[] isend; // Delete send buffer for index 00405 delete[] irecv; // Delete recv buffer for index 00406 delete[] iredu; // Delete local scan of index buffer 00407 return recv[0]-1; // Return nth element 00408 } 00409 00410 //! Partitioning by recursive bisection 00411 void bisection(Bodies &bodies) { 00412 startTimer("Bin bodies "); // Start timer 00413 MPI_Datatype MPI_TYPE = getType(bodies[0].ICELL); // Get MPI data type 00414 int newSize; // New size of recv buffer 00415 bigint numLocal = bodies.size(); // Local data size 00416 bigint numGlobal; // Global data size 00417 MPI_Allreduce(&numLocal,&numGlobal,1,MPI_TYPE,MPI_SUM,MPI_COMM_WORLD);// Reduce Global data size 00418 bigint nthGlobal = (numGlobal * (nprocs[0][0] / 2)) / nprocs[0][0];// Split at nth global element 00419 binBodies(bodies,2); // Bin bodies into leaf level cells 00420 buffer.resize(numLocal); // Resize sort buffer 00421 stopTimer("Bin bodies ",printNow); // Stop timer 00422 sortBodies(bodies,buffer); // Sort bodies in ascending order 00423 startTimer("Split bodies "); // Start timer 00424 bigint iSplit = nth_element(bodies,nthGlobal); // Get cell index of nth global element 00425 int nthLocal = splitBodies(bodies,iSplit); // Split bodies based on iSplit 00426 stopTimer("Split bodies ",printNow); // Stop timer 00427 for( int l=0; l!=LEVEL; ++l ) { // Loop over levels of N-D hypercube communication 00428 splitDomain(iSplit,l,2-l%3); // Split the domain according to iSplit 00429 bisectionAlltoall(bodies,nthLocal,numLocal,newSize,l); // Communicate bodies by one-to-one MPI_Alltoallv 00430 if( nprocs[l][0] % 2 == 1 && nprocs[l][0] != 1 && nprocs[l+1][0] <= nprocs[l+1][1] )// If scatter is necessary 00431 bisectionScatter(bodies,nthLocal,newSize,l); // Communicate bodies by scattering from leftover proc 00432 if( nprocs[l][0] % 2 == 1 && nprocs[l][0] != 1 && nprocs[l+1][0] >= nprocs[l+1][1] )// If gather is necessary 00433 bisectionGather(bodies,nthLocal,numLocal,newSize,l); // Communicate bodies by gathering to leftover proc 00434 #ifdef DEBUG 00435 for(int j=0; j!=MPISIZE; ++j) { // Loop over ranks 00436 MPI_Barrier(MPI_COMM_WORLD); // Sync processes 00437 usleep(WAIT); // Wait for "WAIT" milliseconds 00438 if( MPIRANK == j ) { // If it's my turn to print 00439 std::cout << "MPIRANK : " << j << std::endl; // Print rank 00440 for(int i=0; i!=9; ++i) std::cout << bodies[numLocal/10*i].I << " ";// Print sampled body indices 00441 std::cout << std::endl; // New line 00442 } // Endif for my turn 00443 } // End loop over ranks 00444 #endif 00445 startTimer("Bin bodies "); // Start timer 00446 numLocal = newSize; // Update local data size 00447 MPI_Allreduce(&numLocal,&numGlobal,1,MPI_TYPE,MPI_SUM,MPI_COMM[l+1][0]);// Reduce global data size 00448 nthGlobal = (numGlobal * (nprocs[l+1][0] / 2)) / nprocs[l+1][0];// Split at nth global element 00449 binBodies(bodies,2-(l+1)%3); // Bin bodies into leaf level cells 00450 buffer.resize(numLocal); // Resize sort buffer 00451 stopTimer("Bin bodies ",printNow); // Stop timer 00452 sortBodies(bodies,buffer); // Sort bodies in ascending order 00453 startTimer("Split bodies "); // Start timer 00454 iSplit = nth_element(bodies,nthGlobal,MPI_COMM[l+1][0]); // Get cell index of nth global element 00455 nthLocal = splitBodies(bodies,iSplit); // Split bodies based on iSplit 00456 stopTimer("Split bodies ",printNow); // Stop timer 00457 } // End loop over levels of N-D hypercube communication 00458 } 00459 00460 //! Partition by recursive octsection 00461 void octsection(Bodies &bodies) { 00462 startTimer("Partition "); // Start timer 00463 int byte = sizeof(bodies[0]); // Byte size of body structure 00464 int level = int(log(MPISIZE-1) / M_LN2 / 3) + 1; // Level of local root cell 00465 if( MPISIZE == 1 ) level = 0; // For serial execution local root cell is root cell 00466 BottomUp<equation>::setIndex(bodies,level); // Set index of bodies for that level 00467 buffer.resize(bodies.size()); // Resize sort buffer 00468 stopTimer("Partition "); // Stop timer 00469 sortBodies(bodies,buffer); // Sort bodies in ascending order 00470 startTimer("Partition "); // Start timer 00471 int *scnt = new int [MPISIZE]; // Send count 00472 int *sdsp = new int [MPISIZE]; // Send displacement 00473 int *rcnt = new int [MPISIZE]; // Recv count 00474 int *rdsp = new int [MPISIZE]; // Recv displacement 00475 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00476 scnt[i] = 0; // Initialize send counts 00477 } // End loop over ranks 00478 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00479 int index = B->ICELL - ((1 << 3*level) - 1) / 7; // Get levelwise index 00480 int irank = index / (int(pow(8,level)) / MPISIZE); // Get rank which the cell belongs to 00481 scnt[irank]++; // Fill send count bucket 00482 } // End loop over bodies 00483 MPI_Alltoall(scnt,1,MPI_INT,rcnt,1,MPI_INT,MPI_COMM_WORLD); // Communicate send count to get recv count 00484 sdsp[0] = rdsp[0] = 0; // Initialize send/recv displacements 00485 for( int irank=0; irank!=MPISIZE-1; ++irank ) { // Loop over ranks 00486 sdsp[irank+1] = sdsp[irank] + scnt[irank]; // Set send displacement based on send count 00487 rdsp[irank+1] = rdsp[irank] + rcnt[irank]; // Set recv displacement based on recv count 00488 } // End loop over ranks 00489 buffer.resize(rdsp[MPISIZE-1]+rcnt[MPISIZE-1]); // Resize recv buffer 00490 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks 00491 scnt[irank] *= byte; // Multiply send count by byte size of data 00492 sdsp[irank] *= byte; // Multiply send displacement by byte size of data 00493 rcnt[irank] *= byte; // Multiply recv count by byte size of data 00494 rdsp[irank] *= byte; // Multiply recv displacement by byte size of data 00495 } // End loop over ranks 00496 MPI_Alltoallv(&bodies[0],scnt,sdsp,MPI_BYTE,&buffer[0],rcnt,rdsp,MPI_BYTE,MPI_COMM_WORLD);// Communicat bodies 00497 bodies = buffer; // Copy recv buffer to bodies 00498 for( int l=0; l!=LEVEL; ++l ) { // Loop over level of N-D hypercube 00499 int d = 2 - l % 3; // Dimension of subdivision 00500 XMIN[l+1] = XMIN[l]; // Set XMAX for next subdivision 00501 XMAX[l+1] = XMAX[l]; // Set XMIN for next subdivision 00502 if( (MPIRANK >> (LEVEL - l - 1)) % 2 ) { // If on left side 00503 XMIN[l+1][d] = (XMAX[l][d]+XMIN[l][d]) / 2; // Set XMIN to midpoint 00504 } else { // If on right side 00505 XMAX[l+1][d] = (XMAX[l][d]+XMIN[l][d]) / 2; // Set XMAX to midpoint 00506 } // Endif for side 00507 } // End loop over levels 00508 delete[] scnt; // Delete send count 00509 delete[] sdsp; // Delete send displacement 00510 delete[] rcnt; // Delete recv count 00511 delete[] rdsp; // Delete recv displacement 00512 stopTimer("Partition ",printNow); // Stop timer 00513 } 00514 00515 //! Send bodies back to where they came from 00516 void unpartition(Bodies &bodies) { 00517 startTimer("Unpartition "); // Start timer 00518 int byte = sizeof(bodies[0]); // Byte size of body structure 00519 int *scnt = new int [MPISIZE]; // Send count 00520 int *sdsp = new int [MPISIZE]; // Send displacement 00521 int *rcnt = new int [MPISIZE]; // Recv count 00522 int *rdsp = new int [MPISIZE]; // Recv displacement 00523 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00524 B->ICELL = B->IPROC; // Copy process rank to cell index for sorting 00525 } // End loop over bodies 00526 buffer.resize(bodies.size()); // Resize sort buffer 00527 stopTimer("Unpartition "); // Stop timer 00528 sortBodies(bodies,buffer); // Sort bodies in ascending order 00529 startTimer("Unpartition "); // Start timer 00530 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00531 scnt[i] = 0; // Initialize send counts 00532 } // End loop over ranks 00533 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00534 int irank = B->IPROC; // Get rank which the body belongs to 00535 scnt[irank]++; // Fill send count bucket 00536 } // End loop over bodies 00537 MPI_Alltoall(scnt,1,MPI_INT,rcnt,1,MPI_INT,MPI_COMM_WORLD); // Communicate send count to get recv count 00538 sdsp[0] = rdsp[0] = 0; // Initialize send/recv displacements 00539 for( int irank=0; irank!=MPISIZE-1; ++irank ) { // Loop over ranks 00540 sdsp[irank+1] = sdsp[irank] + scnt[irank]; // Set send displacement based on send count 00541 rdsp[irank+1] = rdsp[irank] + rcnt[irank]; // Set recv displacement based on recv count 00542 } // End loop over ranks 00543 buffer.resize(rdsp[MPISIZE-1]+rcnt[MPISIZE-1]); // Resize recv buffer 00544 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks 00545 scnt[irank] *= byte; // Multiply send count by byte size of data 00546 sdsp[irank] *= byte; // Multiply send displacement by byte size of data 00547 rcnt[irank] *= byte; // Multiply recv count by byte size of data 00548 rdsp[irank] *= byte; // Multiply recv displacement by byte size of data 00549 } // End loop over ranks 00550 MPI_Alltoallv(&bodies[0],scnt,sdsp,MPI_BYTE,&buffer[0],rcnt,rdsp,MPI_BYTE,MPI_COMM_WORLD);// Communicat bodies 00551 bodies = buffer; // Copy recv buffer to bodies 00552 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00553 assert(B->IPROC == MPIRANK); // Make sure bodies are in the right rank 00554 } // End loop over bodies 00555 delete[] scnt; // Delete send count 00556 delete[] sdsp; // Delete send displacement 00557 delete[] rcnt; // Delete recv count 00558 delete[] rdsp; // Delete recv displacement 00559 stopTimer("Unpartition ",printNow); // Stop timer 00560 } 00561 }; 00562 00563 #endif