ExaFMM 1
Fast-multipole Method for exascale systems
|
00001 /* 00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba 00003 00004 Permission is hereby granted, free of charge, to any person obtaining a copy 00005 of this software and associated documentation files (the "Software"), to deal 00006 in the Software without restriction, including without limitation the rights 00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00008 copies of the Software, and to permit persons to whom the Software is 00009 furnished to do so, subject to the following conditions: 00010 00011 The above copyright notice and this permission notice shall be included in 00012 all copies or substantial portions of the Software. 00013 00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00020 THE SOFTWARE. 00021 */ 00022 #ifndef parallelfmm_h 00023 #define parallelfmm_h 00024 #include "partition.h" 00025 00026 //! Handles all the communication of local essential trees 00027 template<Equation equation> 00028 class ParallelFMM : public Partition<equation> { 00029 private: 00030 std::vector<int> sendBodyCnt; //!< Vector of body send counts 00031 std::vector<int> sendBodyDsp; //!< Vector of body send displacements 00032 std::vector<int> recvBodyCnt; //!< Vector of body recv counts 00033 std::vector<int> recvBodyDsp; //!< Vector of body recv displacements 00034 std::vector<int> sendBodyRanks; //!< Vector of ranks to send bodies to 00035 std::vector<int> sendBodyCellCnt; //!< Vector of send counts for cells of bodies 00036 std::vector<C_iter> sendBodyCells; //!< Vector of cell iterators for cells of bodies to send 00037 std::vector<int> sendCellCnt; //!< Vector of cell send counts 00038 std::vector<int> sendCellDsp; //!< Vector of cell send displacements 00039 std::vector<int> recvCellCnt; //!< Vector of cell recv counts 00040 std::vector<int> recvCellDsp; //!< Vector of cell recv displacements 00041 std::vector<vect> xminAll; //!< Buffer for gathering XMIN 00042 std::vector<vect> xmaxAll; //!< Buffer for gathering XMAX 00043 00044 JBodies sendBodies; //!< Send buffer for bodies 00045 JBodies recvBodies; //!< Recv buffer for bodies 00046 JCells sendCells; //!< Send buffer for cells 00047 JCells recvCells; //!< Recv buffer for cells 00048 00049 public: 00050 using Kernel<equation>::printNow; //!< Switch to print timings 00051 using Kernel<equation>::startTimer; //!< Start timer for given event 00052 using Kernel<equation>::stopTimer; //!< Stop timer for given event 00053 using Kernel<equation>::sortBodies; //!< Sort bodies according to cell index 00054 using Kernel<equation>::sortCells; //!< Sort cells according to cell index 00055 using Kernel<equation>::R0; //!< Radius of root cell 00056 using TreeStructure<equation>::buffer; //!< Buffer for MPI communication & sorting 00057 using TreeStructure<equation>::getLevel; //!< Get level from cell index 00058 using TreeStructure<equation>::getCenter; //!< Get cell center and radius from cell index 00059 using TreeStructure<equation>::bodies2twigs; //!< Group bodies into twig cells 00060 using TreeStructure<equation>::twigs2cells; //!< Link twigs bottomup to create all cells in tree 00061 using Partition<equation>::isPowerOfTwo; //!< If n is power of two return true 00062 using Partition<equation>::splitRange; //!< Split range and return partial range 00063 using Partition<equation>::print; //!< Print in MPI 00064 using Partition<equation>::LEVEL; //!< Level of the MPI process binary tree 00065 using Partition<equation>::XMIN; //!< Minimum position vector of bodies 00066 using Partition<equation>::XMAX; //!< Maximum position vector of bodies 00067 using Partition<equation>::nprocs; //!< Number of processes in the two split groups 00068 using Partition<equation>::color; //!< Color for hypercube communicators 00069 using Partition<equation>::key; //!< Key for hypercube communicators 00070 using Partition<equation>::MPI_COMM; //!< Hypercube communicators 00071 00072 private: 00073 //! Gather bounds of other domain 00074 void gatherBounds() { 00075 xminAll.resize(MPISIZE); // Resize buffer for gathering xmin 00076 xmaxAll.resize(MPISIZE); // Resize buffer for gathering xmax 00077 sendBodyCnt.resize(MPISIZE); // Resize vector of body send counts 00078 sendBodyDsp.resize(MPISIZE); // Resize vector of body send displacements 00079 recvBodyCnt.resize(MPISIZE); // Resize vector of body recv counts 00080 recvBodyDsp.resize(MPISIZE); // Resize vector of body recv displacements 00081 sendCellCnt.resize(MPISIZE); // Resize vector of cell send counts 00082 sendCellDsp.resize(MPISIZE); // Resize vector of cell send displacements 00083 recvCellCnt.resize(MPISIZE); // Resize vector of cell recv counts 00084 recvCellDsp.resize(MPISIZE); // Resize vector of cell recv displacements 00085 MPI_Datatype MPI_TYPE = getType(XMIN[LEVEL][0]); // Get MPI data type 00086 MPI_Allgather(&XMIN[LEVEL][0],3,MPI_TYPE, // Gather XMIN 00087 &xminAll[0][0],3,MPI_TYPE,MPI_COMM_WORLD); 00088 MPI_Allgather(&XMAX[LEVEL][0],3,MPI_TYPE, // Gather XMAX 00089 &xmaxAll[0][0],3,MPI_TYPE,MPI_COMM_WORLD); 00090 } 00091 00092 //! Get neighbor ranks to send to 00093 void getSendRank(Cells &cells) { 00094 sendBodyRanks.clear(); // Clear send ranks 00095 sendBodyCellCnt.clear(); // Clear send counts 00096 sendBodyCells.clear(); // Clear send body cells 00097 int oldsize = 0; // Per rank offset of the number of cells to send 00098 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks 00099 int ic = 0; // Initialize neighbor dimension counter 00100 for( int d=0; d!=3; ++d ) { // Loop over dimensions 00101 if(xminAll[irank][d] < 2 * XMAX[LEVEL][d] - XMIN[LEVEL][d] &&// If the two domains are touching or overlapping 00102 xmaxAll[irank][d] > 2 * XMIN[LEVEL][d] - XMAX[LEVEL][d]) {// in all dimensions, they are neighboring domains 00103 ic++; // Increment neighbor dimension counter 00104 } // Endif for overlapping domains 00105 } // End loop over dimensions 00106 ic = 3; 00107 if( ic == 3 && irank != MPIRANK ) { // If ranks are neighbors in all dimensions 00108 for( C_iter C=cells.begin(); C!=cells.end(); ++C ) { // Loop over cells 00109 if( C->NCHILD == 0 ) { // If cell is a twig 00110 bool send = false; // Initialize logical for sending 00111 if( IMAGES == 0 ) { // If free boundary condition 00112 Xperiodic = 0; // Set periodic coordinate offset 00113 real R = getDistance(C,xminAll[irank],xmaxAll[irank]);// Get distance to other domain 00114 send |= CLET * C->R > THETA * R - EPS2; // If the cell seems close enough for P2P 00115 } else { // If periodic boundary condition 00116 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00117 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00118 for( int iz=-1; iz<=1; ++iz ) { // Loop over z periodic direction 00119 Xperiodic[0] = ix * 2 * R0; // Coordinate offset for x periodic direction 00120 Xperiodic[1] = iy * 2 * R0; // Coordinate offset for y periodic direction 00121 Xperiodic[2] = iz * 2 * R0; // Coordinate offset for z periodic direction 00122 real R = getDistance(C,xminAll[irank],xmaxAll[irank]);// Get distance to other domain 00123 send |= CLET * C->R > THETA * R - EPS2; // If the cell seems close enough for P2P 00124 } // End loop over z periodic direction 00125 } // End loop over y periodic direction 00126 } // End loop over x periodic direction 00127 } // Endif for periodic boundary condition 00128 if( send ) { // If the cell seems close enough for P2P 00129 sendBodyCells.push_back(C); // Add cell iterator to scells 00130 } // Endif for cell distance 00131 } // Endif for twigs 00132 } // End loop over cells 00133 sendBodyRanks.push_back(irank); // Add current rank to sendBodyRanks 00134 sendBodyCellCnt.push_back(sendBodyCells.size()-oldsize);// Add current cell count to sendBodyCellCnt 00135 oldsize = sendBodyCells.size(); // Set new offset for cell count 00136 } // Endif for neighbor ranks 00137 } // End loop over ranks 00138 } 00139 00140 //! Get size of data to send 00141 void getSendCount(bool comm=true) { 00142 int ic = 0, ssize = 0; // Initialize counter and offset for scells 00143 sendBodyCnt.assign(MPISIZE,0); // Initialize send count 00144 sendBodyDsp.assign(MPISIZE,0); // Initialize send displacement 00145 for( int i=0; i!=int(sendBodyRanks.size()); ++i ) { // Loop over ranks to send to & recv from 00146 int irank = sendBodyRanks[i]; // Rank to send to & recv from 00147 for( int c=0; c!=sendBodyCellCnt[i]; ++c,++ic ) { // Loop over cells to send to that rank 00148 C_iter C = sendBodyCells[ic]; // Set cell iterator 00149 for( B_iter B=C->LEAF; B!=C->LEAF+C->NDLEAF; ++B ) { // Loop over bodies in that cell 00150 JBody body; // Set compact body type for sending 00151 body.ICELL = B->ICELL; // Set cell index of compact body type 00152 body.X = B->X; // Set position of compact body type 00153 body.SRC = B->SRC; // Set source values of compact body type 00154 sendBodies.push_back(body); // Push it into the send buffer 00155 } // End loop over bodies 00156 } // End loop over cells 00157 sendBodyCnt[irank] = sendBodies.size()-ssize; // Set send count of current rank 00158 sendBodyDsp[irank] = ssize; // Set send displacement of current rank 00159 ssize += sendBodyCnt[irank]; // Increment offset for vector scells 00160 } // End loop over ranks 00161 if( comm ) { // If communication is necessary 00162 MPI_Alltoall(&sendBodyCnt[0],1,MPI_INT,&recvBodyCnt[0],1,MPI_INT,MPI_COMM_WORLD);// Communicate the send counts 00163 int rsize = 0; // Initialize total recv count 00164 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks to recv from 00165 recvBodyDsp[i] = rsize; // Set recv displacements 00166 rsize += recvBodyCnt[i]; // Accumulate recv counts 00167 } // End loop over ranks to recv from 00168 recvBodies.resize(rsize); // Resize recv buffer 00169 } 00170 } 00171 00172 //! Communicate cells by one-to-one MPI_Alltoallv 00173 void commBodiesAlltoall() { 00174 assert(isPowerOfTwo(MPISIZE)); // Make sure the number of processes is a power of two 00175 int bytes = sizeof(sendBodies[0]); // Byte size of jbody structure 00176 int *scntd = new int [MPISIZE]; // Permuted send count 00177 int *rcntd = new int [MPISIZE]; // Permuted recv count 00178 int *rdspd = new int [MPISIZE]; // Permuted recv displacement 00179 int *irev = new int [MPISIZE]; // Map original to compressed index 00180 JBodies sendBuffer = sendBodies; // Send buffer 00181 JBodies recvBuffer; // Recv buffer 00182 for( int l=0; l!=LEVEL; ++l ) { // Loop over levels of N-D hypercube communication 00183 int npart = 1 << (LEVEL - l - 1); // Size of partition block 00184 int scnt2[2], sdsp2[2], rcnt2[2], rdsp2[2]; // Send/recv counts/displacements per level 00185 int ic = 0; // Initialize counter 00186 for( int i=0; i!=2; ++i ) { // Loop over the two blocks 00187 scnt2[i] = 0; // Initialize send count per level 00188 for( int irank=0; irank!=MPISIZE/2; ++irank ) { // Loop over ranks in each block 00189 int idata = (irank / npart) * 2 * npart + irank % npart + i * npart;// Original index 00190 int isend = i * MPISIZE / 2 + irank; // Compressed index 00191 irev[idata] = isend; // Map original to compressed index 00192 scntd[isend] = sendBodyCnt[idata]; // Permuted send count 00193 scnt2[i] += sendBodyCnt[idata] * bytes; // Send count per block 00194 for( int id=sendBodyDsp[idata]; id!=sendBodyDsp[idata]+sendBodyCnt[idata]; ++id,++ic ) {// Loop over bodies 00195 sendBuffer[ic] = sendBodies[id]; // Fill send buffer 00196 } // End loop over bodies 00197 } // End loop over ranks in each block 00198 } // End loop over blocks 00199 MPI_Alltoall(scntd,MPISIZE/2,MPI_INT,rcntd,MPISIZE/2,MPI_INT,MPI_COMM[l+1][2]);// Comm permuted count 00200 MPI_Alltoall(scnt2,1,MPI_INT,rcnt2,1,MPI_INT,MPI_COMM[l+1][2]);// Comm send count per block 00201 sdsp2[0] = 0; sdsp2[1] = scnt2[0]; // Set send displacement 00202 rdsp2[0] = 0; rdsp2[1] = rcnt2[0]; // Set recv displacement 00203 int rsize = (rdsp2[1] + rcnt2[1]) / bytes; // Size of recieved bodies 00204 sendBodies.resize(rsize); // Resize send bodies 00205 sendBuffer.resize(rsize); // Resize send buffer 00206 recvBuffer.resize(rsize); // Resize recv buffer 00207 MPI_Alltoallv(&sendBuffer[0],scnt2,sdsp2,MPI_BYTE, // Communicate cells 00208 &recvBuffer[0],rcnt2,rdsp2,MPI_BYTE,MPI_COMM[l+1][2]);// MPI_COMM[2] is for the one-to-one pair 00209 rdspd[0] = 0; // Initialize permuted recv displacement 00210 for( int irank=0; irank!=MPISIZE-1; ++irank ) { // Loop over ranks 00211 rdspd[irank+1] = rdspd[irank] + rcntd[irank]; // Set permuted recv displacement 00212 } // End loop over ranks 00213 ic = 0; // Initiaize counter 00214 for( int i=0; i!=2; ++i ) { // Loop over the two blocks 00215 for( int irank=0; irank!=MPISIZE/2; ++irank ) { // Loop over ranks in each block 00216 int idata = (irank / npart) * 2 * npart + irank % npart + i * npart;// Original index 00217 int irecv = i * MPISIZE / 2 + irank; // Compressed index 00218 recvBodyCnt[idata] = rcntd[irecv]; // Set recv cound 00219 idata = irev[irecv]; // Set premuted index 00220 for( int id=rdspd[idata]; id!=rdspd[idata]+rcntd[idata]; ++id,++ic ) {// Loop over bodies 00221 sendBodies[ic] = recvBuffer[id]; // Get data from recv buffer 00222 } // End loop over bodies 00223 } // End loop over ranks in each block 00224 } // End loop over blocks 00225 recvBodyDsp[0] = 0; // Initialize recv displacement 00226 for( int irank=0; irank!=MPISIZE-1; ++irank ) { // Loop over ranks 00227 recvBodyDsp[irank+1] = recvBodyDsp[irank] + recvBodyCnt[irank];// Set recv displacement 00228 } // End loop over ranks 00229 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks 00230 sendBodyCnt[irank] = recvBodyCnt[irank]; // Get next send count 00231 sendBodyDsp[irank] = recvBodyDsp[irank]; // Get next send displacement 00232 } // End loop over ranks 00233 } // End loop over levels of N-D hypercube communication 00234 recvBodies = sendBodies; // Copy send bodies to recv bodies 00235 delete[] scntd; // Delete permuted send count 00236 delete[] rcntd; // Delete permuted recv count 00237 delete[] rdspd; // Delete permuted recv displacement 00238 delete[] irev; // Delete map from original to compressed index 00239 } 00240 00241 //! Get boundries of domains on other processes 00242 void getOtherDomain(vect &xmin, vect &xmax, int l) { 00243 startTimer("Get domain "); // Start timer 00244 MPI_Datatype MPI_TYPE = getType(XMIN[l][0]); // Get MPI data type 00245 vect send[2],recv[2]; // Send and recv buffer 00246 MPI_Request req; // MPI requests 00247 send[0] = send[1] = XMIN[l]; // Set XMIN into send buffer 00248 recv[0] = recv[1] = 0; // Initialize recv buffer 00249 MPI_Alltoall(send,3,MPI_TYPE,recv,3,MPI_TYPE,MPI_COMM[l][2]);// Communicate XMIN 00250 xmin = recv[1-key[l][2]]; // Copy xmin from recv buffer 00251 send[0] = send[1] = XMAX[l]; // Set XMAX into send buffer 00252 recv[0] = recv[1] = 0; // Initialize recv buffer 00253 MPI_Alltoall(send,3,MPI_TYPE,recv,3,MPI_TYPE,MPI_COMM[l][2]);// Communicate XMAX 00254 xmax = recv[1-key[l][2]]; // Copy xmax from recv buffer 00255 if( nprocs[l-1][0] % 2 == 1 && nprocs[l][0] >= nprocs[l][1] ) {// If right half of odd group 00256 int isend = (key[l][0] + 1 ) % nprocs[l][0]; // Send to next rank (wrapped) 00257 int irecv = (key[l][0] - 1 + nprocs[l][0]) % nprocs[l][0];// Recv from previous rank (wrapped) 00258 send[0] = xmin; // Set xmin in send buffer 00259 MPI_Isend(send,3,MPI_TYPE,isend,0,MPI_COMM[l][0],&req); // Send to next rank 00260 MPI_Irecv(recv,3,MPI_TYPE,irecv,0,MPI_COMM[l][0],&req); // Recv from previous rank 00261 MPI_Wait(&req,MPI_STATUS_IGNORE); // Wait for recv to finish 00262 if( color[l][0] != color[l][1] ) xmin = recv[0]; // Update only if leftover process of odd group 00263 send[0] = xmax; // Set xmax in send buffer 00264 MPI_Isend(send,3,MPI_TYPE,isend,0,MPI_COMM[l][0],&req); // Send to next rank 00265 MPI_Irecv(recv,3,MPI_TYPE,irecv,0,MPI_COMM[l][0],&req); // Recv from previous rank 00266 MPI_Wait(&req,MPI_STATUS_IGNORE); // Wait for recv to finish 00267 if( color[l][0] != color[l][1] ) xmax = recv[0]; // Update only if leftover process of odd group 00268 } // Endif for right half of odd group 00269 if( nprocs[l-1][0] == 1 ) { // If previous group had one process 00270 xmin = XMIN[l]; // Use own XMIN value for xmin 00271 xmax = XMAX[l]; // Use own XMAX value for xmax 00272 } // Endif for isolated process 00273 stopTimer("Get domain ",printNow); // Stop timer 00274 } 00275 00276 //! Get disatnce to other domain 00277 real getDistance(C_iter C, vect xmin, vect xmax) { 00278 vect dist; // Distance vector 00279 for( int d=0; d!=3; ++d ) { // Loop over dimensions 00280 dist[d] = (C->X[d] + Xperiodic[d] > xmax[d])* // Calculate the distance between cell C and 00281 (C->X[d] + Xperiodic[d] - xmax[d])+ // the nearest point in domain [xmin,xmax]^3 00282 (C->X[d] + Xperiodic[d] < xmin[d])* // Take the differnece from xmin or xmax 00283 (C->X[d] + Xperiodic[d] - xmin[d]); // or 0 if between xmin and xmax 00284 } // End loop over dimensions 00285 real R = std::sqrt(norm(dist)); // Scalar distance 00286 return R; 00287 } 00288 00289 //! Determine which cells to send 00290 void getLET(C_iter C0, C_iter C, vect xmin, vect xmax) { 00291 int level = int(log(MPISIZE-1) / M_LN2 / 3) + 1; // Level of local root cell 00292 if( MPISIZE == 1 ) level = 0; // Account for serial case 00293 for( int i=0; i!=C->NCHILD; i++ ) { // Loop over child cells 00294 C_iter CC = C0+C->CHILD+i; // Iterator for child cell 00295 bool divide = false; // Initialize logical for dividing 00296 if( IMAGES == 0 ) { // If free boundary condition 00297 Xperiodic = 0; // Set periodic coordinate offset 00298 real R = getDistance(CC,xmin,xmax); // Get distance to other domain 00299 divide |= CLET * CC->R > THETA * R - EPS2; // If the cell seems too close and not twig 00300 } else { // If periodic boundary condition 00301 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00302 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00303 for( int iz=-1; iz<=1; ++iz ) { // Loop over z periodic direction 00304 Xperiodic[0] = ix * 2 * R0; // Coordinate offset for x periodic direction 00305 Xperiodic[1] = iy * 2 * R0; // Coordinate offset for y periodic direction 00306 Xperiodic[2] = iz * 2 * R0; // Coordinate offset for z periodic direction 00307 real R = getDistance(CC,xmin,xmax); // Get distance to other domain 00308 divide |= CLET * CC->R > THETA * R - EPS2; // If the cell seems too close and not twig 00309 } // End loop over z periodic direction 00310 } // End loop over y periodic direction 00311 } // End loop over x periodic direction 00312 } // Endif for periodic boundary condition 00313 divide |= R0 / (1 << level) + 1e-5 < CC->R; // If the cell is larger than the local root cell 00314 if( divide && CC->NCHILD != 0 ) { // If the cell seems too close and not twig 00315 getLET(C0,CC,xmin,xmax); // Traverse the tree further 00316 } else { // If the cell is far or a twig 00317 assert( R0 / (1 << level) + 1e-5 > CC->R ); // Can't send cells that are larger than local root 00318 JCell cell; // Set compact cell type for sending 00319 cell.ICELL = CC->ICELL; // Set index of compact cell type 00320 cell.M = CC->M; // Set Multipoles of compact cell type 00321 sendCells.push_back(cell); // Push cell into send buffer vector 00322 } // Endif for interaction 00323 } // End loop over child cells 00324 if( C->ICELL == 0 && C->NCHILD == 0 ) { // If the root cell has no children 00325 JCell cell; // Set compact cell type for sending 00326 cell.ICELL = C->ICELL; // Set index of compact cell type 00327 cell.M = C->M; // Set Multipoles of compact cell type 00328 sendCells.push_back(cell); // Push cell into send buffer vector 00329 } // Endif for root cells children 00330 } 00331 00332 //! Communicate cells by one-to-one MPI_Alltoallv 00333 void commCellsAlltoall(int l) { 00334 const int bytes = sizeof(sendCells[0]); // Byte size of JCell structure 00335 int rcnt[2], scnt[2] = {0, 0}; // Recv count, send count 00336 scnt[1-key[l+1][2]] = sendCells.size()*bytes; // Set send count to size of send buffer * bytes 00337 MPI_Alltoall(scnt,1,MPI_INT,rcnt,1,MPI_INT,MPI_COMM[l+1][2]);// Communicate the send count to get recv count 00338 int sdsp[2] = {0, scnt[0]}; // Send displacement 00339 int rdsp[2] = {0, rcnt[0]}; // Recv displacement 00340 if( color[l+1][0] != color[l+1][1] ) { // If leftover process of odd group 00341 rcnt[1-key[l+1][2]] = 0; // It won't have a pair for this communication 00342 } // Endif for leftover process of odd group 00343 recvCells.resize(rcnt[1-key[l+1][2]]/bytes); // Resize recv buffer 00344 MPI_Alltoallv(&sendCells[0],scnt,sdsp,MPI_BYTE, // Communicate cells 00345 &recvCells[0],rcnt,rdsp,MPI_BYTE,MPI_COMM[l+1][2]);// MPI_COMM[2] is for the one-to-one pair 00346 } 00347 00348 //! Communicate cells by scattering from leftover processes 00349 void commCellsScatter(int l) { 00350 const int bytes = sizeof(sendCells[0]); // Byte size of JCell structure 00351 int numScatter = nprocs[l+1][1] - 1; // Number of processes to scatter to 00352 int oldSize = recvCells.size(); // Size of recv buffer before communication 00353 int *scnt = new int [nprocs[l+1][1]]; // Send count 00354 int *sdsp = new int [nprocs[l+1][1]]; // Send displacement 00355 int rcnt; // Recv count 00356 if( key[l+1][1] == numScatter ) { // If this is the leftover proc to scatter from 00357 sdsp[0] = 0; // Initialize send displacement 00358 for(int i=0; i!=numScatter; ++i ) { // Loop over processes to send to 00359 int begin = 0, end = sendCells.size(); // Set begin and end of range to send 00360 splitRange(begin,end,i,numScatter); // Split range into numScatter and get i-th range 00361 scnt[i] = end - begin; // Set send count based on range 00362 sdsp[i+1] = sdsp[i] + scnt[i]; // Set send displacement based on send count 00363 } // End loop over processes to send to 00364 scnt[numScatter] = 0; // Send count to self should be 0 00365 } // Endif for leftover proc 00366 MPI_Scatter(scnt,1,MPI_INT,&rcnt,1,MPI_INT,numScatter,MPI_COMM[l+1][1]);// Scatter the send count to get recv count 00367 00368 recvCells.resize(oldSize+rcnt); // Resize recv buffer based on recv count 00369 for(int i=0; i!= nprocs[l+1][1]; ++i ) { // Loop over group of processes 00370 scnt[i] *= bytes; // Multiply send count by byte size of data 00371 sdsp[i] *= bytes; // Multiply send displacement by byte size of data 00372 } // End loop over group of processes 00373 rcnt *= bytes; // Multiply recv count by byte size of data 00374 MPI_Scatterv(&sendCells[0], scnt,sdsp,MPI_BYTE, // Communicate cells via MPI_Scatterv 00375 &recvCells[oldSize],rcnt, MPI_BYTE, // Offset recv buffer by oldSize 00376 numScatter,MPI_COMM[l+1][1]); 00377 delete[] scnt; // Delete send count 00378 delete[] sdsp; // Delete send displacement 00379 } 00380 00381 //! Turn recv bodies to twigs 00382 void rbodies2twigs(Bodies &bodies, Cells &twigs) { 00383 startTimer("Recv bodies "); // Start timer 00384 for( JB_iter JB=recvBodies.begin(); JB!=recvBodies.end(); ++JB ) {// Loop over recv bodies 00385 Body body; // Body structure 00386 body.IBODY = 0; // Initialize body index 00387 body.IPROC = 0; // Initialize proc index 00388 body.TRG = 0; // Initialize target values 00389 body.ICELL = JB->ICELL; // Set index of cell 00390 body.X = JB->X; // Set position of body 00391 body.SRC = JB->SRC; // Set source values of body 00392 bodies.push_back(body); // Push body into bodies vector 00393 } // End loop over recv bodies 00394 buffer.resize(bodies.size()); // Resize sort buffer 00395 stopTimer("Recv bodies ",printNow); // Stop timer 00396 sortBodies(bodies,buffer,false); // Sort bodies in descending order 00397 bodies2twigs(bodies,twigs); // Turn bodies to twigs 00398 } 00399 00400 //! Turn cells to twigs 00401 void cells2twigs(Cells &cells, Cells &twigs, bool last) { 00402 while( !cells.empty() ) { // While cell vector is not empty 00403 if( cells.back().NCHILD == 0 ) { // If cell has no child 00404 if( cells.back().NDLEAF == 0 || !last ) { // If cell has no leaf or is not last iteration 00405 cells.back().NDLEAF = 0; // Set number of leafs to 0 00406 twigs.push_back(cells.back()); // Push cell into twig vector 00407 } // Endif for no leaf 00408 } // Endif for no child 00409 cells.pop_back(); // Pop last element from cell vector 00410 } // End while for cell vector 00411 } 00412 00413 //! Turn send buffer to twigs 00414 void send2twigs(Bodies &bodies, Cells &twigs, int offTwigs) { 00415 for( JC_iter JC=sendCells.begin(); JC!=sendCells.begin()+offTwigs; ++JC ) {// Loop over send buffer 00416 Cell cell; // Cell structure 00417 cell.ICELL = JC->ICELL; // Set index of cell 00418 cell.M = JC->M; // Set multipole of cell 00419 cell.NDLEAF = cell.NCHILD = 0; // Set number of leafs and children 00420 cell.LEAF = bodies.end(); // Set pointer to first leaf 00421 getCenter(cell); // Set center and radius 00422 twigs.push_back(cell); // Push cell into twig vector 00423 } // End loop over send buffer 00424 sendCells.clear(); // Clear send buffer 00425 } 00426 00427 //! Turn recv buffer to twigs 00428 void recv2twigs(Bodies &bodies, Cells &twigs) { 00429 for( JC_iter JC=recvCells.begin(); JC!=recvCells.end(); ++JC ) {// Loop over recv buffer 00430 Cell cell; // Cell structure 00431 cell.ICELL = JC->ICELL; // Set index of cell 00432 cell.M = JC->M; // Set multipole of cell 00433 cell.NDLEAF = cell.NCHILD = 0; // Set number of leafs and children 00434 cell.LEAF = bodies.end(); // Set pointer to first leaf 00435 getCenter(cell); // Set center and radius 00436 twigs.push_back(cell); // Push cell into twig vector 00437 } // End loop over recv buffer 00438 } 00439 00440 //! Zip two groups of twigs that overlap 00441 void zipTwigs(Cells &twigs, Cells &cells, Cells &sticks, bool last) { 00442 startTimer("Sort resize "); // Start timer 00443 Cells cbuffer = twigs; // Sort buffer for cells 00444 stopTimer("Sort resize ",printNow); // Stop timer 00445 sortCells(twigs,cbuffer); // Sort twigs in ascending order 00446 startTimer("Ziptwigs "); // Start timer 00447 bigint index = -1; // Initialize index counter 00448 while( !twigs.empty() ) { // While twig vector is not empty 00449 if( twigs.back().ICELL != index ) { // If twig's index is different from previous 00450 cells.push_back(twigs.back()); // Push twig into cell vector 00451 index = twigs.back().ICELL; // Update index counter 00452 } else if ( twigs.back().NDLEAF == 0 || !last ) { // Elseif twig-twig collision 00453 cells.back().M += twigs.back().M; // Accumulate the multipole 00454 } else if ( cells.back().NDLEAF == 0 ) { // Elseif twig-body collision 00455 Mset M; // Multipole for temporary storage 00456 M = cells.back().M; // Save multipoles from cells 00457 cells.back() = twigs.back(); // Copy twigs to cells 00458 cells.back().M = M; // Copy back multipoles to cells 00459 twigs.back().M = M - twigs.back().M; // Take the difference of the two 00460 if( std::abs(twigs.back().M[0]/M[0]) > EPS ) { // If the difference is non-zero 00461 sticks.push_back(twigs.back()); // Save this difference in the sticks vector 00462 } // Endif for non-zero difference 00463 } else { // Else body-body collision (don't do anything) 00464 } // Endif for collision type 00465 twigs.pop_back(); // Pop last element from twig vector 00466 } // End while for twig vector 00467 stopTimer("Ziptwigs ",printNow); // Stop timer 00468 sortCells(cells,cbuffer); // Sort cells in ascending order 00469 startTimer("Ziptwigs "); // Start timer 00470 twigs = cells; // Copy cells to twigs 00471 cells.clear(); // Clear cells 00472 stopTimer("Ziptwigs ",printNow); // Stop timer 00473 } 00474 00475 //! Re-index bodies 00476 void reindexBodies(Bodies &bodies, Cells &twigs, Cells &cells ,Cells &sticks) { 00477 startTimer("Reindex "); // Start timer 00478 while( !twigs.empty() ) { // While twig vector is not empty 00479 if( twigs.back().NDLEAF == 0 ) { // If twig has no leafs 00480 cells.push_back(twigs.back()); // Push twig into cell vector 00481 } // Endif for no leafs 00482 twigs.pop_back(); // Pop last element from twig vector 00483 } // End while for twig vector 00484 // BottomUp::setIndex(bodies,-1,0,0,true); // Set index of bodies 00485 buffer.resize(bodies.size()); // Resize sort buffer 00486 stopTimer("Reindex ",printNow); // Stop timer 00487 // sortBodies(bodies,buffer,false); // Sort bodies in descending order 00488 // BottomUp::grow(bodies); // Grow tree structure 00489 sortBodies(bodies,buffer,false); // Sort bodies in descending order 00490 bodies2twigs(bodies,twigs); // Turn bodies to twigs 00491 startTimer("Reindex "); // Start timer 00492 for( C_iter C=twigs.begin(); C!=twigs.end(); ++C ) { // Loop over cells 00493 if( sticks.size() > 0 ) { // If stick vector is not empty 00494 if( C->ICELL == sticks.back().ICELL ) { // If twig's index is equal to stick's index 00495 C->M += sticks.back().M; // Accumulate multipole 00496 sticks.pop_back(); // Pop last element from stick vector 00497 } // Endif for twig's index 00498 } // Endif for stick vector 00499 } // End loop over cells 00500 cells.insert(cells.begin(),twigs.begin(),twigs.end()); // Add twigs to the end of cell vector 00501 cells.insert(cells.begin(),sticks.begin(),sticks.end()); // Add remaining sticks to the end of cell vector 00502 sticks.clear(); // Clear sticks 00503 Cells cbuffer = cells; // Sort buffer for cells 00504 stopTimer("Reindex ",printNow); // Stop timer 00505 sortCells(cells,cbuffer); // Sort cells in ascending order 00506 startTimer("Reindex "); // Start timer 00507 twigs = cells; // Copy cells to twigs 00508 cells.clear(); // Clear cells 00509 stopTimer("Reindex ",printNow); // Stop timer 00510 } 00511 00512 //! Turn sticks to send buffer 00513 void sticks2send(Cells &sticks, int &offTwigs) { 00514 while( !sticks.empty() ) { // While stick vector is not empty 00515 JCell cell; // Cell structure 00516 cell.ICELL = sticks.back().ICELL; // Set index of cell 00517 cell.M = sticks.back().M; // Set multipole of cell 00518 sendCells.push_back(cell); // Push cell into send buffer 00519 sticks.pop_back(); // Pop last element of stick vector 00520 } // End while for stick vector 00521 offTwigs = sendCells.size(); // Keep track of current send buffer size 00522 } 00523 00524 //! Validate number of send cells 00525 void checkNumCells(int l) { // Only works with octsection 00526 int maxLevel = int(log(MPISIZE-1) / M_LN2 / 3) + 1; 00527 if( MPISIZE == 1 ) maxLevel = 0; 00528 int octant0 = -1; 00529 int numCells = 0; 00530 for( JC_iter JC=sendCells.begin(); JC!=sendCells.end(); ++JC ) { 00531 int level = getLevel(JC->ICELL); 00532 int index = JC->ICELL - ((1 << 3*level) - 1) / 7; 00533 int octant = index / (1 << 3 * (level - maxLevel)); 00534 if( octant != octant0 ) { 00535 octant0 = octant; 00536 numCells++; 00537 } 00538 } 00539 int numCellsExpect = (1 << (3 * maxLevel - 1)) / (1 << l); // Isn't true for far domains 00540 if( numCellsExpect != numCells && MPIRANK == 0) std::cout << numCells << " " << numCellsExpect << std::endl; 00541 } 00542 00543 //! Check total charge 00544 void checkSumMass(Cells &cells) { 00545 real localMass = 0; 00546 for( C_iter C=cells.begin(); C!=cells.end(); ++C ) { 00547 if( C->NCHILD == 0 ) { 00548 localMass += std::abs(C->M[0]); 00549 } 00550 } 00551 real globalMass; 00552 MPI_Allreduce(&localMass,&globalMass,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); 00553 print("localMass : ",0); 00554 print(localMass); 00555 print("globalMass : ",0); 00556 print(globalMass,0); 00557 print("\n",0); 00558 } 00559 00560 public: 00561 //! Constructor 00562 ParallelFMM() : Partition<equation>() {} 00563 //! Destructor 00564 ~ParallelFMM() {} 00565 00566 //! Set bodies to communicate 00567 void setCommBodies(Cells &cells) { 00568 startTimer("Gather bounds"); // Start timer 00569 gatherBounds(); // Gather bounds of other domain 00570 stopTimer("Gather bounds",printNow); // Stop timer 00571 startTimer("Get send rank"); // Start timer 00572 getSendRank(cells); // Get neighbor ranks to send to 00573 stopTimer("Get send rank",printNow); // Stop timer 00574 } 00575 00576 //! Update bodies using the previous send count 00577 void updateBodies(bool comm=true) { 00578 startTimer("Get send cnt "); // Start timer 00579 getSendCount(comm); // Get size of data to send 00580 stopTimer("Get send cnt ",printNow); // Stop timer 00581 startTimer("Alltoall B "); // Start timer 00582 #if 1 00583 int bytes = sizeof(sendBodies[0]); // Byte size of jbody structure 00584 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00585 sendBodyCnt[i] *= bytes; // Multiply by bytes 00586 sendBodyDsp[i] *= bytes; // Multiply by bytes 00587 recvBodyCnt[i] *= bytes; // Multiply by bytes 00588 recvBodyDsp[i] *= bytes; // Multiply by bytes 00589 } // End loop over ranks 00590 MPI_Alltoallv(&sendBodies[0],&sendBodyCnt[0],&sendBodyDsp[0],MPI_BYTE, 00591 &recvBodies[0],&recvBodyCnt[0],&recvBodyDsp[0],MPI_BYTE,MPI_COMM_WORLD); 00592 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00593 sendBodyCnt[i] /= bytes; // Divide by bytes 00594 sendBodyDsp[i] /= bytes; // Divide by bytes 00595 recvBodyCnt[i] /= bytes; // Divide by bytes 00596 recvBodyDsp[i] /= bytes; // Divide by bytes 00597 } // End loop over ranks 00598 #else 00599 commBodiesAlltoall(); 00600 #endif 00601 sendBodies.clear(); // Clear send buffer for bodies 00602 stopTimer("Alltoall B ",printNow); // Stop timer 00603 } 00604 00605 //! Communicate bodies in the local essential tree 00606 void commBodies(Cells &cells) { 00607 setCommBodies(cells); // Set bodies to communicate 00608 updateBodies(); // Update bodies with alltoall 00609 } 00610 00611 //! Convert recvBodies to cells 00612 void bodies2cells(Bodies &bodies, Cells &cells) { 00613 Cells twigs,sticks; // Twigs and sticks are special types of cells 00614 rbodies2twigs(bodies,twigs); // Put recv bodies into twig vector 00615 twigs2cells(twigs,cells,sticks); // Turn twigs to cells 00616 } 00617 00618 //! Communicate cells in the local essential tree 00619 void commCells(Bodies &bodies, Cells &cells) { 00620 vect xmin = 0, xmax = 0; // Initialize domain boundaries 00621 Cells twigs,sticks; // Twigs and sticks are special types of cells 00622 00623 #if 1 00624 startTimer("Get LET "); // Start timer 00625 int ssize = 0; // Initialize offset for send cells 00626 sendCellCnt.assign(MPISIZE,0); // Initialize cell send count 00627 sendCellDsp.assign(MPISIZE,0); // Initialize cell send displacement 00628 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks to send to 00629 getLET(cells.begin(),cells.end()-1,xminAll[irank],xmaxAll[irank]);// Determine which cells to send 00630 sendCellCnt[irank] = sendCells.size()-ssize; // Set cell send count of current rank 00631 sendCellDsp[irank] = ssize; // Set cell send displacement of current rank 00632 ssize += sendCellCnt[irank]; // Increment offset for vector send cells 00633 } // End loop over ranks 00634 stopTimer("Get LET ",printNow); // Stop timer 00635 startTimer("Alltoall C "); // Start timer 00636 MPI_Alltoall(&sendCellCnt[0],1,MPI_INT,&recvCellCnt[0],1,MPI_INT,MPI_COMM_WORLD);// Communicate the send counts 00637 int rsize = 0; // Initialize total recv count 00638 for( int irank=0; irank!=MPISIZE; ++irank ) { // Loop over ranks to recv from 00639 recvCellDsp[irank] = rsize; // Set recv displacements 00640 rsize += recvCellCnt[irank]; // Accumulate recv counts 00641 } // End loop over ranks to recv from 00642 recvCells.resize(rsize); // Resize recv buffer 00643 int bytes = sizeof(sendCells[0]); // Byte size of jbody structure 00644 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00645 sendCellCnt[i] *= bytes; // Multiply by bytes 00646 sendCellDsp[i] *= bytes; // Multiply by bytes 00647 recvCellCnt[i] *= bytes; // Multiply by bytes 00648 recvCellDsp[i] *= bytes; // Multiply by bytes 00649 } // End loop over ranks 00650 MPI_Alltoallv(&sendCells[0],&sendCellCnt[0],&sendCellDsp[0],MPI_BYTE, 00651 &recvCells[0],&recvCellCnt[0],&recvCellDsp[0],MPI_BYTE,MPI_COMM_WORLD); 00652 for( int i=0; i!=MPISIZE; ++i ) { // Loop over ranks 00653 sendCellCnt[i] /= bytes; // Divide by bytes 00654 sendCellDsp[i] /= bytes; // Divide by bytes 00655 recvCellCnt[i] /= bytes; // Divide by bytes 00656 recvCellDsp[i] /= bytes; // Divide by bytes 00657 } // End loop over ranks 00658 stopTimer("Alltoall C ",printNow); // Stop timer 00659 rbodies2twigs(bodies,twigs); // Put recv bodies into twig vector 00660 startTimer("Cells2twigs "); // Start timer 00661 cells2twigs(cells,twigs,true); // Put cells into twig vector 00662 stopTimer("Cells2twigs ",printNow); // Stop timer 00663 startTimer("Recv2twigs "); // Start timer 00664 recv2twigs(bodies,twigs); // Put recv buffer into twig vector 00665 stopTimer("Recv2twigs ",printNow); // Stop timer 00666 zipTwigs(twigs,cells,sticks,true); // Zip two groups of twigs that overlap 00667 reindexBodies(bodies,twigs,cells,sticks); // Re-index bodies 00668 twigs2cells(twigs,cells,sticks); // Turn twigs to cells 00669 sendCells.clear(); // Clear send buffer 00670 recvCells.clear(); // Clear recv buffer 00671 #else 00672 int offTwigs = 0; // Initialize offset of twigs 00673 for( int l=0; l!=LEVEL; ++l ) { // Loop over levels of N-D hypercube communication 00674 getOtherDomain(xmin,xmax,l+1); // Get boundries of domains on other processes 00675 startTimer("Get LET "); // Start timer 00676 getLET(cells.begin(),cells.end()-1,xmin,xmax); // Determine which cells to send 00677 #ifdef DEBUG 00678 checkNumCells(LEVEL-l-1); 00679 checkSumMass(cells); 00680 #endif 00681 stopTimer("Get LET ",printNow); // Stop timer 00682 startTimer("Alltoall C "); // Start timer 00683 commCellsAlltoall(l); // Communicate cells by one-to-one MPI_Alltoallv 00684 if( nprocs[l][0] % 2 == 1 && nprocs[l][0] != 1 && nprocs[l+1][0] <= nprocs[l+1][1] ) {// If scatter is necessary 00685 commCellsScatter(l); // Communicate cells by scattering from leftover proc 00686 } // Endif for odd number of procs 00687 stopTimer("Alltoall C ",printNow); // Stop timer 00688 if( l == LEVEL - 1 ) rbodies2twigs(bodies,twigs); // Put recv bodies into twig vector 00689 startTimer("Cells2twigs "); // Start timer 00690 cells2twigs(cells,twigs,l==LEVEL-1); // Put cells into twig vector 00691 stopTimer("Cells2twigs ",printNow); // Stop timer 00692 startTimer("Send2twigs "); // Start timer 00693 send2twigs(bodies,twigs,offTwigs); // Put send buffer (sticks) into twig vector 00694 stopTimer("Send2twigs ",printNow); // Stop timer 00695 startTimer("Recv2twigs "); // Start timer 00696 recv2twigs(bodies,twigs); // Put recv buffer into twig vector 00697 stopTimer("Recv2twigs ",printNow); // Stop timer 00698 #ifdef DEBUG 00699 if( l == LEVEL - 1 ) { // If at last level 00700 complex SUM = 0; // Initialize accumulator 00701 for(C_iter C=twigs.begin(); C!=twigs.end(); ++C) { // Loop over twigs 00702 if( C->NDLEAF == 0 ) SUM += C->M[0]; // Add multipoles of empty twigs 00703 } // End loop over twigs 00704 print("Before recv : ",0); // Print identifier 00705 print(SUM); // Print sum of multipoles 00706 } // Endif for last level 00707 #endif 00708 zipTwigs(twigs,cells,sticks,l==LEVEL-1); // Zip two groups of twigs that overlap 00709 #ifdef DEBUG 00710 if( l == LEVEL - 1 ) { // If at last level 00711 complex SUM = 0; // Initialize accumulator 00712 for(C_iter C=twigs.begin(); C!=twigs.end(); ++C) { // Loop over twigs 00713 SUM += C->M[0]; // Add multipoles 00714 } // End loop over twigs 00715 print("After merge : ",0); // Print identifier 00716 print(SUM); // Print sum of multipoles 00717 print("sticks.size() : ",0); // Print identifier 00718 print(sticks.size()); // Print size of stick vector 00719 } // Endif for last level 00720 #endif 00721 if( l == LEVEL - 1 ) reindexBodies(bodies,twigs,cells,sticks);// Re-index bodies 00722 twigs2cells(twigs,cells,sticks); // Turn twigs to cells 00723 startTimer("Sticks2send "); // Start timer 00724 sticks2send(sticks,offTwigs); // Turn sticks to send buffer 00725 stopTimer("Sticks2send ",printNow); // Stop timer 00726 } // End loop over levels of N-D hypercube communication 00727 #endif 00728 00729 #ifdef DEBUG 00730 print("M[0] @ root : ",0); // Print identifier 00731 print((cells.end()-1)->M[0]); // Print monopole of root (should be 1 for test) 00732 print("bodies.size() : ",0); // Print identifier 00733 print(bodies.size()); // Print size of body vector 00734 #endif 00735 sendCells.clear(); // Clear send buffer 00736 recvCells.clear(); // Clear recv buffer 00737 } 00738 00739 //! Remove cells that belong to current process 00740 void eraseLocalTree(Cells &cells) { 00741 int level = int(log(MPISIZE-1) / M_LN2 / 3) + 1; // Level of process root cell 00742 if( MPISIZE == 1 ) level = 0; // Account for serial case 00743 int off = ((1 << 3 * level) - 1) / 7; // Levelwise offset of ICELL 00744 int size = (1 << 3 * level) / MPISIZE; // Number of cells to remove 00745 unsigned begin = MPIRANK * size + off; // Begin index of cells to remove 00746 unsigned end = (MPIRANK + 1) * size + off; // End index of cells to remove 00747 for( C_iter C=cells.begin(); C!=cells.end(); ++C ) { // Loop over cells 00748 int nchild = 0; // Initialize child cell counter 00749 for( int c=0; c!=C->NCHILD; ++c ) { // Loop over child cells 00750 C_iter CC = cells.begin()+C->CHILD+c; // Iterator of child cell 00751 if( CC->ICELL < begin || end <= CC->ICELL ) { // If child cell is not within the removal range 00752 C_iter CH = cells.begin()+C->CHILD+nchild; // New iterator of child cell 00753 *CH = *CC; // Copy data of child cell 00754 nchild++; // Increment child cell counter 00755 } // Endif for removal range 00756 } // End loop over child cells 00757 C->NCHILD = nchild; // Update number of child cells 00758 } // End loop over cells 00759 } 00760 }; 00761 00762 #endif