ExaFMM 1
Fast-multipole Method for exascale systems
include/parallelfmm.h
Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba
00003 
00004 Permission is hereby granted, free of charge, to any person obtaining a copy
00005 of this software and associated documentation files (the "Software"), to deal
00006 in the Software without restriction, including without limitation the rights
00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00008 copies of the Software, and to permit persons to whom the Software is
00009 furnished to do so, subject to the following conditions:
00010 
00011 The above copyright notice and this permission notice shall be included in
00012 all copies or substantial portions of the Software.
00013 
00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00020 THE SOFTWARE.
00021 */
00022 #ifndef parallelfmm_h
00023 #define parallelfmm_h
00024 #include "partition.h"
00025 
00026 //! Handles all the communication of local essential trees
00027 template<Equation equation>
00028 class ParallelFMM : public Partition<equation> {
00029 private:
00030   std::vector<int>    sendBodyCnt;                              //!< Vector of body send counts
00031   std::vector<int>    sendBodyDsp;                              //!< Vector of body send displacements
00032   std::vector<int>    recvBodyCnt;                              //!< Vector of body recv counts
00033   std::vector<int>    recvBodyDsp;                              //!< Vector of body recv displacements
00034   std::vector<int>    sendBodyRanks;                            //!< Vector of ranks to send bodies to
00035   std::vector<int>    sendBodyCellCnt;                          //!< Vector of send counts for cells of bodies
00036   std::vector<C_iter> sendBodyCells;                            //!< Vector of cell iterators for cells of bodies to send
00037   std::vector<int>    sendCellCnt;                              //!< Vector of cell send counts
00038   std::vector<int>    sendCellDsp;                              //!< Vector of cell send displacements
00039   std::vector<int>    recvCellCnt;                              //!< Vector of cell recv counts
00040   std::vector<int>    recvCellDsp;                              //!< Vector of cell recv displacements
00041   std::vector<vect>   xminAll;                                  //!< Buffer for gathering XMIN
00042   std::vector<vect>   xmaxAll;                                  //!< Buffer for gathering XMAX
00043 
00044   JBodies sendBodies;                                           //!< Send buffer for bodies
00045   JBodies recvBodies;                                           //!< Recv buffer for bodies
00046   JCells  sendCells;                                            //!< Send buffer for cells
00047   JCells  recvCells;                                            //!< Recv buffer for cells
00048 
00049 public:
00050   using Kernel<equation>::printNow;                             //!< Switch to print timings
00051   using Kernel<equation>::startTimer;                           //!< Start timer for given event
00052   using Kernel<equation>::stopTimer;                            //!< Stop timer for given event
00053   using Kernel<equation>::sortBodies;                           //!< Sort bodies according to cell index
00054   using Kernel<equation>::sortCells;                            //!< Sort cells according to cell index
00055   using Kernel<equation>::R0;                                   //!< Radius of root cell
00056   using TreeStructure<equation>::buffer;                        //!< Buffer for MPI communication & sorting
00057   using TreeStructure<equation>::getLevel;                      //!< Get level from cell index
00058   using TreeStructure<equation>::getCenter;                     //!< Get cell center and radius from cell index
00059   using TreeStructure<equation>::bodies2twigs;                  //!< Group bodies into twig cells
00060   using TreeStructure<equation>::twigs2cells;                   //!< Link twigs bottomup to create all cells in tree
00061   using Partition<equation>::isPowerOfTwo;                      //!< If n is power of two return true
00062   using Partition<equation>::splitRange;                        //!< Split range and return partial range
00063   using Partition<equation>::print;                             //!< Print in MPI
00064   using Partition<equation>::LEVEL;                             //!< Level of the MPI process binary tree
00065   using Partition<equation>::XMIN;                              //!< Minimum position vector of bodies
00066   using Partition<equation>::XMAX;                              //!< Maximum position vector of bodies
00067   using Partition<equation>::nprocs;                            //!< Number of processes in the two split groups
00068   using Partition<equation>::color;                             //!< Color for hypercube communicators
00069   using Partition<equation>::key;                               //!< Key for hypercube communicators
00070   using Partition<equation>::MPI_COMM;                          //!< Hypercube communicators
00071 
00072 private:
00073 //! Gather bounds of other domain
00074   void gatherBounds() {
00075     xminAll.resize(MPISIZE);                                    // Resize buffer for gathering xmin
00076     xmaxAll.resize(MPISIZE);                                    // Resize buffer for gathering xmax
00077     sendBodyCnt.resize(MPISIZE);                                // Resize vector of body send counts
00078     sendBodyDsp.resize(MPISIZE);                                // Resize vector of body send displacements
00079     recvBodyCnt.resize(MPISIZE);                                // Resize vector of body recv counts
00080     recvBodyDsp.resize(MPISIZE);                                // Resize vector of body recv displacements
00081     sendCellCnt.resize(MPISIZE);                                // Resize vector of cell send counts
00082     sendCellDsp.resize(MPISIZE);                                // Resize vector of cell send displacements
00083     recvCellCnt.resize(MPISIZE);                                // Resize vector of cell recv counts
00084     recvCellDsp.resize(MPISIZE);                                // Resize vector of cell recv displacements
00085     MPI_Datatype MPI_TYPE = getType(XMIN[LEVEL][0]);            // Get MPI data type
00086     MPI_Allgather(&XMIN[LEVEL][0],3,MPI_TYPE,                   // Gather XMIN
00087                   &xminAll[0][0],3,MPI_TYPE,MPI_COMM_WORLD);
00088     MPI_Allgather(&XMAX[LEVEL][0],3,MPI_TYPE,                   // Gather XMAX
00089                   &xmaxAll[0][0],3,MPI_TYPE,MPI_COMM_WORLD);
00090   }
00091 
00092 //! Get neighbor ranks to send to
00093   void getSendRank(Cells &cells) {
00094     sendBodyRanks.clear();                                      // Clear send ranks
00095     sendBodyCellCnt.clear();                                    // Clear send counts
00096     sendBodyCells.clear();                                      // Clear send body cells
00097     int oldsize = 0;                                            // Per rank offset of the number of cells to send
00098     for( int irank=0; irank!=MPISIZE; ++irank ) {               // Loop over ranks
00099       int ic = 0;                                               //  Initialize neighbor dimension counter
00100       for( int d=0; d!=3; ++d ) {                               //  Loop over dimensions
00101         if(xminAll[irank][d] < 2 * XMAX[LEVEL][d] - XMIN[LEVEL][d] &&// If the two domains are touching or overlapping
00102            xmaxAll[irank][d] > 2 * XMIN[LEVEL][d] - XMAX[LEVEL][d]) {// in all dimensions, they are neighboring domains
00103           ic++;                                                 //    Increment neighbor dimension counter
00104         }                                                       //   Endif for overlapping domains
00105       }                                                         //  End loop over dimensions
00106       ic = 3;
00107       if( ic == 3 && irank != MPIRANK ) {                       //  If ranks are neighbors in all dimensions
00108         for( C_iter C=cells.begin(); C!=cells.end(); ++C ) {    //   Loop over cells
00109           if( C->NCHILD == 0 ) {                                //    If cell is a twig
00110             bool send = false;                                  //     Initialize logical for sending
00111             if( IMAGES == 0 ) {                                 //     If free boundary condition
00112               Xperiodic = 0;                                    //      Set periodic coordinate offset
00113               real R = getDistance(C,xminAll[irank],xmaxAll[irank]);//  Get distance to other domain
00114               send |= CLET * C->R > THETA * R - EPS2;           //      If the cell seems close enough for P2P
00115             } else {                                            //     If periodic boundary condition
00116               for( int ix=-1; ix<=1; ++ix ) {                   //      Loop over x periodic direction
00117                 for( int iy=-1; iy<=1; ++iy ) {                 //       Loop over y periodic direction
00118                   for( int iz=-1; iz<=1; ++iz ) {               //        Loop over z periodic direction
00119                     Xperiodic[0] = ix * 2 * R0;                 //         Coordinate offset for x periodic direction
00120                     Xperiodic[1] = iy * 2 * R0;                 //         Coordinate offset for y periodic direction
00121                     Xperiodic[2] = iz * 2 * R0;                 //         Coordinate offset for z periodic direction
00122                     real R = getDistance(C,xminAll[irank],xmaxAll[irank]);// Get distance to other domain
00123                     send |= CLET * C->R > THETA * R - EPS2;     //         If the cell seems close enough for P2P
00124                   }                                             //        End loop over z periodic direction
00125                 }                                               //       End loop over y periodic direction
00126               }                                                 //      End loop over x periodic direction
00127             }                                                   //     Endif for periodic boundary condition
00128             if( send ) {                                        //     If the cell seems close enough for P2P
00129               sendBodyCells.push_back(C);                       //      Add cell iterator to scells
00130             }                                                   //     Endif for cell distance
00131           }                                                     //    Endif for twigs
00132         }                                                       //   End loop over cells
00133         sendBodyRanks.push_back(irank);                         //   Add current rank to sendBodyRanks
00134         sendBodyCellCnt.push_back(sendBodyCells.size()-oldsize);//   Add current cell count to sendBodyCellCnt
00135         oldsize = sendBodyCells.size();                         //   Set new offset for cell count
00136       }                                                         //  Endif for neighbor ranks
00137     }                                                           // End loop over ranks
00138   }
00139 
00140 //! Get size of data to send
00141   void getSendCount(bool comm=true) {
00142     int ic = 0, ssize = 0;                                      // Initialize counter and offset for scells
00143     sendBodyCnt.assign(MPISIZE,0);                              // Initialize send count
00144     sendBodyDsp.assign(MPISIZE,0);                              // Initialize send displacement
00145     for( int i=0; i!=int(sendBodyRanks.size()); ++i ) {         // Loop over ranks to send to & recv from
00146       int irank = sendBodyRanks[i];                             //  Rank to send to & recv from
00147       for( int c=0; c!=sendBodyCellCnt[i]; ++c,++ic ) {         //  Loop over cells to send to that rank
00148         C_iter C = sendBodyCells[ic];                           //   Set cell iterator
00149         for( B_iter B=C->LEAF; B!=C->LEAF+C->NDLEAF; ++B ) {    //   Loop over bodies in that cell
00150           JBody body;                                           //    Set compact body type for sending
00151           body.ICELL = B->ICELL;                                //    Set cell index of compact body type
00152           body.X     = B->X;                                    //    Set position of compact body type
00153           body.SRC   = B->SRC;                                  //    Set source values of compact body type
00154           sendBodies.push_back(body);                           //    Push it into the send buffer
00155         }                                                       //   End loop over bodies
00156       }                                                         //  End loop over cells
00157       sendBodyCnt[irank] = sendBodies.size()-ssize;             //  Set send count of current rank
00158       sendBodyDsp[irank] = ssize;                               //  Set send displacement of current rank
00159       ssize += sendBodyCnt[irank];                              //  Increment offset for vector scells
00160     }                                                           // End loop over ranks
00161     if( comm ) {                                                // If communication is necessary
00162       MPI_Alltoall(&sendBodyCnt[0],1,MPI_INT,&recvBodyCnt[0],1,MPI_INT,MPI_COMM_WORLD);// Communicate the send counts
00163       int rsize = 0;                                            // Initialize total recv count
00164       for( int i=0; i!=MPISIZE; ++i ) {                         // Loop over ranks to recv from
00165         recvBodyDsp[i] = rsize;                                 //  Set recv displacements
00166         rsize += recvBodyCnt[i];                                //  Accumulate recv counts
00167       }                                                         // End loop over ranks to recv from
00168       recvBodies.resize(rsize);                                 // Resize recv buffer
00169     }
00170   }
00171 
00172 //! Communicate cells by one-to-one MPI_Alltoallv
00173   void commBodiesAlltoall() {
00174     assert(isPowerOfTwo(MPISIZE));                              // Make sure the number of processes is a power of two
00175     int bytes = sizeof(sendBodies[0]);                          // Byte size of jbody structure
00176     int *scntd = new int [MPISIZE];                             // Permuted send count
00177     int *rcntd = new int [MPISIZE];                             // Permuted recv count
00178     int *rdspd = new int [MPISIZE];                             // Permuted recv displacement
00179     int *irev  = new int [MPISIZE];                             // Map original to compressed index
00180     JBodies sendBuffer = sendBodies;                            // Send buffer
00181     JBodies recvBuffer;                                         // Recv buffer
00182     for( int l=0; l!=LEVEL; ++l ) {                             // Loop over levels of N-D hypercube communication
00183       int npart = 1 << (LEVEL - l - 1);                         // Size of partition block
00184       int scnt2[2], sdsp2[2], rcnt2[2], rdsp2[2];               // Send/recv counts/displacements per level
00185       int ic = 0;                                               // Initialize counter
00186       for( int i=0; i!=2; ++i ) {                               // Loop over the two blocks
00187         scnt2[i] = 0;                                           //  Initialize send count per level
00188         for( int irank=0; irank!=MPISIZE/2; ++irank ) {         //  Loop over ranks in each block
00189           int idata = (irank / npart) * 2 * npart + irank % npart + i * npart;// Original index
00190           int isend = i * MPISIZE / 2 + irank;                  //   Compressed index
00191           irev[idata] = isend;                                  //   Map original to compressed index
00192           scntd[isend] = sendBodyCnt[idata];                    //   Permuted send count
00193           scnt2[i] += sendBodyCnt[idata] * bytes;               //   Send count per block
00194           for( int id=sendBodyDsp[idata]; id!=sendBodyDsp[idata]+sendBodyCnt[idata]; ++id,++ic ) {// Loop over bodies
00195             sendBuffer[ic] = sendBodies[id];                    //    Fill send buffer
00196           }                                                     //   End loop over bodies
00197         }                                                       //  End loop over ranks in each block
00198       }                                                         // End loop over blocks
00199       MPI_Alltoall(scntd,MPISIZE/2,MPI_INT,rcntd,MPISIZE/2,MPI_INT,MPI_COMM[l+1][2]);// Comm permuted count
00200       MPI_Alltoall(scnt2,1,MPI_INT,rcnt2,1,MPI_INT,MPI_COMM[l+1][2]);// Comm send count per block
00201       sdsp2[0] = 0; sdsp2[1] = scnt2[0];                        // Set send displacement
00202       rdsp2[0] = 0; rdsp2[1] = rcnt2[0];                        // Set recv displacement
00203       int rsize = (rdsp2[1] + rcnt2[1]) / bytes;                // Size of recieved bodies
00204       sendBodies.resize(rsize);                                 // Resize send bodies
00205       sendBuffer.resize(rsize);                                 // Resize send buffer
00206       recvBuffer.resize(rsize);                                 // Resize recv buffer
00207       MPI_Alltoallv(&sendBuffer[0],scnt2,sdsp2,MPI_BYTE,        // Communicate cells
00208                     &recvBuffer[0],rcnt2,rdsp2,MPI_BYTE,MPI_COMM[l+1][2]);// MPI_COMM[2] is for the one-to-one pair
00209       rdspd[0] = 0;                                             // Initialize permuted recv displacement
00210       for( int irank=0; irank!=MPISIZE-1; ++irank ) {           // Loop over ranks
00211         rdspd[irank+1] = rdspd[irank] + rcntd[irank];           //  Set permuted recv displacement
00212       }                                                         // End loop over ranks
00213       ic = 0;                                                   // Initiaize counter
00214       for( int i=0; i!=2; ++i ) {                               // Loop over the two blocks
00215         for( int irank=0; irank!=MPISIZE/2; ++irank ) {         //  Loop over ranks in each block
00216           int idata = (irank / npart) * 2 * npart + irank % npart + i * npart;// Original index
00217           int irecv = i * MPISIZE / 2 + irank;                  //   Compressed index
00218           recvBodyCnt[idata] = rcntd[irecv];                    //   Set recv cound
00219           idata = irev[irecv];                                  //   Set premuted index
00220           for( int id=rdspd[idata]; id!=rdspd[idata]+rcntd[idata]; ++id,++ic ) {// Loop over bodies
00221             sendBodies[ic] = recvBuffer[id];                    //    Get data from recv buffer
00222           }                                                     //   End loop over bodies
00223         }                                                       //  End loop over ranks in each block
00224       }                                                         // End loop over blocks
00225       recvBodyDsp[0] = 0;                                       // Initialize recv displacement
00226       for( int irank=0; irank!=MPISIZE-1; ++irank ) {           // Loop over ranks
00227         recvBodyDsp[irank+1] = recvBodyDsp[irank] + recvBodyCnt[irank];//  Set recv displacement
00228       }                                                         // End loop over ranks
00229       for( int irank=0; irank!=MPISIZE; ++irank ) {             // Loop over ranks
00230         sendBodyCnt[irank] = recvBodyCnt[irank];                //  Get next send count
00231         sendBodyDsp[irank] = recvBodyDsp[irank];                //  Get next send displacement
00232       }                                                         // End loop over ranks
00233     }                                                           // End loop over levels of N-D hypercube communication
00234     recvBodies = sendBodies;                                    // Copy send bodies to recv bodies
00235     delete[] scntd;                                             // Delete permuted send count
00236     delete[] rcntd;                                             // Delete permuted recv count
00237     delete[] rdspd;                                             // Delete permuted recv displacement
00238     delete[] irev;                                              // Delete map from original to compressed index
00239   }
00240 
00241 //! Get boundries of domains on other processes
00242   void getOtherDomain(vect &xmin, vect &xmax, int l) {
00243     startTimer("Get domain   ");                                // Start timer
00244     MPI_Datatype MPI_TYPE = getType(XMIN[l][0]);                // Get MPI data type
00245     vect send[2],recv[2];                                       // Send and recv buffer
00246     MPI_Request req;                                            // MPI requests
00247     send[0] = send[1] = XMIN[l];                                // Set XMIN into send buffer
00248     recv[0] = recv[1] = 0;                                      // Initialize recv buffer
00249     MPI_Alltoall(send,3,MPI_TYPE,recv,3,MPI_TYPE,MPI_COMM[l][2]);// Communicate XMIN
00250     xmin = recv[1-key[l][2]];                                   // Copy xmin from recv buffer
00251     send[0] = send[1] = XMAX[l];                                // Set XMAX into send buffer
00252     recv[0] = recv[1] = 0;                                      // Initialize recv buffer
00253     MPI_Alltoall(send,3,MPI_TYPE,recv,3,MPI_TYPE,MPI_COMM[l][2]);// Communicate XMAX
00254     xmax = recv[1-key[l][2]];                                   // Copy xmax from recv buffer
00255     if( nprocs[l-1][0] % 2 == 1 && nprocs[l][0] >= nprocs[l][1] ) {// If right half of odd group
00256       int isend = (key[l][0] + 1            ) % nprocs[l][0];   //  Send to next rank (wrapped)
00257       int irecv = (key[l][0] - 1 + nprocs[l][0]) % nprocs[l][0];//  Recv from previous rank (wrapped)
00258       send[0] = xmin;                                           //  Set xmin in send buffer
00259       MPI_Isend(send,3,MPI_TYPE,isend,0,MPI_COMM[l][0],&req);   //  Send to next rank
00260       MPI_Irecv(recv,3,MPI_TYPE,irecv,0,MPI_COMM[l][0],&req);   //  Recv from previous rank
00261       MPI_Wait(&req,MPI_STATUS_IGNORE);                         //  Wait for recv to finish
00262       if( color[l][0] != color[l][1] ) xmin = recv[0];          //  Update only if leftover process of odd group
00263       send[0] = xmax;                                           //  Set xmax in send buffer
00264       MPI_Isend(send,3,MPI_TYPE,isend,0,MPI_COMM[l][0],&req);   //  Send to next rank
00265       MPI_Irecv(recv,3,MPI_TYPE,irecv,0,MPI_COMM[l][0],&req);   //  Recv from previous rank
00266       MPI_Wait(&req,MPI_STATUS_IGNORE);                         //  Wait for recv to finish
00267       if( color[l][0] != color[l][1] ) xmax = recv[0];          //  Update only if leftover process of odd group
00268     }                                                           // Endif for right half of odd group
00269     if( nprocs[l-1][0] == 1 ) {                                 // If previous group had one process
00270       xmin = XMIN[l];                                           //  Use own XMIN value for xmin
00271       xmax = XMAX[l];                                           //  Use own XMAX value for xmax
00272     }                                                           // Endif for isolated process
00273     stopTimer("Get domain   ",printNow);                        // Stop timer 
00274   }
00275 
00276 //! Get disatnce to other domain
00277   real getDistance(C_iter C, vect xmin, vect xmax) {
00278     vect dist;                                                  // Distance vector
00279     for( int d=0; d!=3; ++d ) {                                 // Loop over dimensions
00280       dist[d] = (C->X[d] + Xperiodic[d] > xmax[d])*             //  Calculate the distance between cell C and
00281                 (C->X[d] + Xperiodic[d] - xmax[d])+             //  the nearest point in domain [xmin,xmax]^3
00282                 (C->X[d] + Xperiodic[d] < xmin[d])*             //  Take the differnece from xmin or xmax
00283                 (C->X[d] + Xperiodic[d] - xmin[d]);             //  or 0 if between xmin and xmax
00284     }                                                           // End loop over dimensions
00285     real R = std::sqrt(norm(dist));                             // Scalar distance
00286     return R;
00287   }
00288 
00289 //! Determine which cells to send
00290   void getLET(C_iter C0, C_iter C, vect xmin, vect xmax) {
00291     int level = int(log(MPISIZE-1) / M_LN2 / 3) + 1;            // Level of local root cell
00292     if( MPISIZE == 1 ) level = 0;                               // Account for serial case
00293     for( int i=0; i!=C->NCHILD; i++ ) {                         // Loop over child cells
00294       C_iter CC = C0+C->CHILD+i;                                //  Iterator for child cell
00295       bool divide = false;                                      //  Initialize logical for dividing
00296       if( IMAGES == 0 ) {                                       //  If free boundary condition
00297         Xperiodic = 0;                                          //   Set periodic coordinate offset
00298         real R = getDistance(CC,xmin,xmax);                     //   Get distance to other domain
00299         divide |= CLET * CC->R > THETA * R - EPS2;              //   If the cell seems too close and not twig
00300       } else {                                                  //  If periodic boundary condition
00301         for( int ix=-1; ix<=1; ++ix ) {                         //   Loop over x periodic direction
00302           for( int iy=-1; iy<=1; ++iy ) {                       //    Loop over y periodic direction
00303             for( int iz=-1; iz<=1; ++iz ) {                     //     Loop over z periodic direction
00304               Xperiodic[0] = ix * 2 * R0;                       //      Coordinate offset for x periodic direction
00305               Xperiodic[1] = iy * 2 * R0;                       //      Coordinate offset for y periodic direction
00306               Xperiodic[2] = iz * 2 * R0;                       //      Coordinate offset for z periodic direction
00307               real R = getDistance(CC,xmin,xmax);               //      Get distance to other domain
00308               divide |= CLET * CC->R > THETA * R - EPS2;        //      If the cell seems too close and not twig
00309             }                                                   //     End loop over z periodic direction
00310           }                                                     //    End loop over y periodic direction
00311         }                                                       //   End loop over x periodic direction
00312       }                                                         //  Endif for periodic boundary condition
00313       divide |= R0 / (1 << level) + 1e-5 < CC->R;               //  If the cell is larger than the local root cell
00314       if( divide && CC->NCHILD != 0 ) {                         //  If the cell seems too close and not twig
00315         getLET(C0,CC,xmin,xmax);                                //   Traverse the tree further
00316       } else {                                                  //  If the cell is far or a twig
00317         assert( R0 / (1 << level) + 1e-5 > CC->R );             //   Can't send cells that are larger than local root
00318         JCell cell;                                             //   Set compact cell type for sending
00319         cell.ICELL = CC->ICELL;                                 //   Set index of compact cell type
00320         cell.M     = CC->M;                                     //   Set Multipoles of compact cell type
00321         sendCells.push_back(cell);                              //    Push cell into send buffer vector
00322       }                                                         //  Endif for interaction
00323     }                                                           // End loop over child cells
00324     if( C->ICELL == 0 && C->NCHILD == 0 ) {                     // If the root cell has no children
00325       JCell cell;                                               //  Set compact cell type for sending
00326       cell.ICELL = C->ICELL;                                    //  Set index of compact cell type
00327       cell.M     = C->M;                                        //  Set Multipoles of compact cell type
00328       sendCells.push_back(cell);                                //  Push cell into send buffer vector
00329     }                                                           // Endif for root cells children
00330   }
00331 
00332 //! Communicate cells by one-to-one MPI_Alltoallv
00333   void commCellsAlltoall(int l) {
00334     const int bytes = sizeof(sendCells[0]);                     // Byte size of JCell structure
00335     int rcnt[2], scnt[2] = {0, 0};                              // Recv count, send count
00336     scnt[1-key[l+1][2]] = sendCells.size()*bytes;               // Set send count to size of send buffer * bytes
00337     MPI_Alltoall(scnt,1,MPI_INT,rcnt,1,MPI_INT,MPI_COMM[l+1][2]);// Communicate the send count to get recv count
00338     int sdsp[2] = {0, scnt[0]};                                 // Send displacement
00339     int rdsp[2] = {0, rcnt[0]};                                 // Recv displacement
00340     if( color[l+1][0] != color[l+1][1] ) {                      // If leftover process of odd group
00341       rcnt[1-key[l+1][2]] = 0;                                  //  It won't have a pair for this communication
00342     }                                                           // Endif for leftover process of odd group
00343     recvCells.resize(rcnt[1-key[l+1][2]]/bytes);                // Resize recv buffer
00344     MPI_Alltoallv(&sendCells[0],scnt,sdsp,MPI_BYTE,             // Communicate cells
00345                   &recvCells[0],rcnt,rdsp,MPI_BYTE,MPI_COMM[l+1][2]);// MPI_COMM[2] is for the one-to-one pair
00346   }
00347 
00348 //! Communicate cells by scattering from leftover processes
00349   void commCellsScatter(int l) {
00350     const int bytes = sizeof(sendCells[0]);                     // Byte size of JCell structure
00351     int numScatter = nprocs[l+1][1] - 1;                        // Number of processes to scatter to
00352     int oldSize = recvCells.size();                             // Size of recv buffer before communication
00353     int *scnt = new int [nprocs[l+1][1]];                       // Send count
00354     int *sdsp = new int [nprocs[l+1][1]];                       // Send displacement
00355     int rcnt;                                                   // Recv count
00356     if( key[l+1][1] == numScatter ) {                           // If this is the leftover proc to scatter from
00357       sdsp[0] = 0;                                              //  Initialize send displacement
00358       for(int i=0; i!=numScatter; ++i ) {                       //  Loop over processes to send to
00359         int begin = 0, end = sendCells.size();                  //   Set begin and end of range to send
00360         splitRange(begin,end,i,numScatter);                     //   Split range into numScatter and get i-th range
00361         scnt[i] = end - begin;                                  //   Set send count based on range
00362         sdsp[i+1] = sdsp[i] + scnt[i];                          //   Set send displacement based on send count
00363       }                                                         //  End loop over processes to send to
00364       scnt[numScatter] = 0;                                     //  Send count to self should be 0
00365     }                                                           // Endif for leftover proc
00366     MPI_Scatter(scnt,1,MPI_INT,&rcnt,1,MPI_INT,numScatter,MPI_COMM[l+1][1]);// Scatter the send count to get recv count
00367 
00368     recvCells.resize(oldSize+rcnt);                             // Resize recv buffer based on recv count
00369     for(int i=0; i!= nprocs[l+1][1]; ++i ) {                    // Loop over group of processes
00370       scnt[i] *= bytes;                                         //  Multiply send count by byte size of data
00371       sdsp[i] *= bytes;                                         //  Multiply send displacement by byte size of data
00372     }                                                           // End loop over group of processes
00373     rcnt *= bytes;                                              // Multiply recv count by byte size of data
00374     MPI_Scatterv(&sendCells[0],      scnt,sdsp,MPI_BYTE,        // Communicate cells via MPI_Scatterv
00375                  &recvCells[oldSize],rcnt,     MPI_BYTE,        // Offset recv buffer by oldSize
00376                  numScatter,MPI_COMM[l+1][1]);
00377     delete[] scnt;                                              // Delete send count
00378     delete[] sdsp;                                              // Delete send displacement
00379   }
00380 
00381 //! Turn recv bodies to twigs
00382   void rbodies2twigs(Bodies &bodies, Cells &twigs) {
00383     startTimer("Recv bodies  ");                                //  Start timer
00384     for( JB_iter JB=recvBodies.begin(); JB!=recvBodies.end(); ++JB ) {// Loop over recv bodies
00385       Body body;                                                //  Body structure
00386       body.IBODY = 0;                                           //  Initialize body index
00387       body.IPROC = 0;                                           //  Initialize proc index
00388       body.TRG   = 0;                                           //  Initialize target values
00389       body.ICELL = JB->ICELL;                                   //  Set index of cell
00390       body.X     = JB->X;                                       //  Set position of body
00391       body.SRC   = JB->SRC;                                     //  Set source values of body
00392       bodies.push_back(body);                                   //  Push body into bodies vector
00393     }                                                           // End loop over recv bodies
00394     buffer.resize(bodies.size());                               // Resize sort buffer
00395     stopTimer("Recv bodies  ",printNow);                        //  Stop timer 
00396     sortBodies(bodies,buffer,false);                            // Sort bodies in descending order
00397     bodies2twigs(bodies,twigs);                                 // Turn bodies to twigs
00398   }
00399 
00400 //! Turn cells to twigs
00401   void cells2twigs(Cells &cells, Cells &twigs, bool last) {
00402     while( !cells.empty() ) {                                   // While cell vector is not empty
00403       if( cells.back().NCHILD == 0 ) {                          //  If cell has no child
00404         if( cells.back().NDLEAF == 0 || !last ) {               //   If cell has no leaf or is not last iteration
00405           cells.back().NDLEAF = 0;                              //    Set number of leafs to 0
00406           twigs.push_back(cells.back());                        //    Push cell into twig vector
00407         }                                                       //   Endif for no leaf
00408       }                                                         //  Endif for no child
00409       cells.pop_back();                                         //  Pop last element from cell vector
00410     }                                                           // End while for cell vector
00411   }
00412 
00413 //! Turn send buffer to twigs
00414   void send2twigs(Bodies &bodies, Cells &twigs, int offTwigs) {
00415     for( JC_iter JC=sendCells.begin(); JC!=sendCells.begin()+offTwigs; ++JC ) {// Loop over send buffer
00416       Cell cell;                                                //  Cell structure
00417       cell.ICELL = JC->ICELL;                                   //  Set index of cell
00418       cell.M     = JC->M;                                       //  Set multipole of cell
00419       cell.NDLEAF = cell.NCHILD = 0;                            //  Set number of leafs and children
00420       cell.LEAF  = bodies.end();                                //  Set pointer to first leaf
00421       getCenter(cell);                                          //  Set center and radius
00422       twigs.push_back(cell);                                    //  Push cell into twig vector
00423     }                                                           // End loop over send buffer
00424     sendCells.clear();                                          // Clear send buffer
00425   }
00426 
00427 //! Turn recv buffer to twigs
00428   void recv2twigs(Bodies &bodies, Cells &twigs) {
00429     for( JC_iter JC=recvCells.begin(); JC!=recvCells.end(); ++JC ) {// Loop over recv buffer
00430       Cell cell;                                                //  Cell structure
00431       cell.ICELL = JC->ICELL;                                   //  Set index of cell
00432       cell.M     = JC->M;                                       //  Set multipole of cell
00433       cell.NDLEAF = cell.NCHILD = 0;                            //  Set number of leafs and children
00434       cell.LEAF  = bodies.end();                                //  Set pointer to first leaf
00435       getCenter(cell);                                          //  Set center and radius
00436       twigs.push_back(cell);                                    //  Push cell into twig vector
00437     }                                                           // End loop over recv buffer
00438   }
00439 
00440 //! Zip two groups of twigs that overlap
00441   void zipTwigs(Cells &twigs, Cells &cells, Cells &sticks, bool last) {
00442     startTimer("Sort resize  ");                                // Start timer
00443     Cells cbuffer = twigs;                                      // Sort buffer for cells
00444     stopTimer("Sort resize  ",printNow);                        // Stop timer 
00445     sortCells(twigs,cbuffer);                                   // Sort twigs in ascending order
00446     startTimer("Ziptwigs     ");                                // Start timer
00447     bigint index = -1;                                          // Initialize index counter
00448     while( !twigs.empty() ) {                                   // While twig vector is not empty
00449       if( twigs.back().ICELL != index ) {                       //  If twig's index is different from previous
00450         cells.push_back(twigs.back());                          //   Push twig into cell vector
00451         index = twigs.back().ICELL;                             //   Update index counter
00452       } else if ( twigs.back().NDLEAF == 0 || !last ) {         //  Elseif twig-twig collision
00453         cells.back().M += twigs.back().M;                       //   Accumulate the multipole
00454       } else if ( cells.back().NDLEAF == 0 ) {                  //  Elseif twig-body collision
00455         Mset M;                                                 //   Multipole for temporary storage
00456         M = cells.back().M;                                     //   Save multipoles from cells
00457         cells.back() = twigs.back();                            //   Copy twigs to cells
00458         cells.back().M = M;                                     //   Copy back multipoles to cells
00459         twigs.back().M = M - twigs.back().M;                    //   Take the difference of the two
00460         if( std::abs(twigs.back().M[0]/M[0]) > EPS ) {          //   If the difference is non-zero
00461           sticks.push_back(twigs.back());                       //    Save this difference in the sticks vector
00462         }                                                       //   Endif for non-zero difference
00463       } else {                                                  //  Else body-body collision (don't do anything)
00464       }                                                         //  Endif for collision type
00465       twigs.pop_back();                                         //  Pop last element from twig vector
00466     }                                                           // End while for twig vector
00467     stopTimer("Ziptwigs     ",printNow);                        // Stop timer 
00468     sortCells(cells,cbuffer);                                   // Sort cells in ascending order
00469     startTimer("Ziptwigs     ");                                // Start timer
00470     twigs = cells;                                              // Copy cells to twigs
00471     cells.clear();                                              // Clear cells
00472     stopTimer("Ziptwigs     ",printNow);                        // Stop timer 
00473   }
00474 
00475 //! Re-index bodies
00476   void reindexBodies(Bodies &bodies, Cells &twigs, Cells &cells ,Cells &sticks) {
00477     startTimer("Reindex      ");                                // Start timer
00478     while( !twigs.empty() ) {                                   // While twig vector is not empty
00479       if( twigs.back().NDLEAF == 0 ) {                          //  If twig has no leafs
00480         cells.push_back(twigs.back());                          //   Push twig into cell vector
00481       }                                                         //  Endif for no leafs
00482       twigs.pop_back();                                         //  Pop last element from twig vector
00483     }                                                           // End while for twig vector
00484 //    BottomUp::setIndex(bodies,-1,0,0,true);                     // Set index of bodies
00485     buffer.resize(bodies.size());                               // Resize sort buffer
00486     stopTimer("Reindex      ",printNow);                        // Stop timer 
00487 //    sortBodies(bodies,buffer,false);                            // Sort bodies in descending order
00488 //    BottomUp::grow(bodies);                                     // Grow tree structure
00489     sortBodies(bodies,buffer,false);                              // Sort bodies in descending order
00490     bodies2twigs(bodies,twigs);                                 // Turn bodies to twigs
00491     startTimer("Reindex      ");                                // Start timer
00492     for( C_iter C=twigs.begin(); C!=twigs.end(); ++C ) {        // Loop over cells
00493       if( sticks.size() > 0 ) {                                 //  If stick vector is not empty
00494         if( C->ICELL == sticks.back().ICELL ) {                 //   If twig's index is equal to stick's index
00495           C->M += sticks.back().M;                              //    Accumulate multipole
00496           sticks.pop_back();                                    //    Pop last element from stick vector
00497         }                                                       //   Endif for twig's index
00498       }                                                         //  Endif for stick vector
00499     }                                                           // End loop over cells
00500     cells.insert(cells.begin(),twigs.begin(),twigs.end());      // Add twigs to the end of cell vector
00501     cells.insert(cells.begin(),sticks.begin(),sticks.end());    // Add remaining sticks to the end of cell vector
00502     sticks.clear();                                             // Clear sticks
00503     Cells cbuffer = cells;                                      // Sort buffer for cells
00504     stopTimer("Reindex      ",printNow);                        // Stop timer 
00505     sortCells(cells,cbuffer);                                   // Sort cells in ascending order
00506     startTimer("Reindex      ");                                // Start timer
00507     twigs = cells;                                              // Copy cells to twigs
00508     cells.clear();                                              // Clear cells
00509     stopTimer("Reindex      ",printNow);                        // Stop timer 
00510   }
00511 
00512 //! Turn sticks to send buffer
00513   void sticks2send(Cells &sticks, int &offTwigs) {
00514     while( !sticks.empty() ) {                                  // While stick vector is not empty
00515       JCell cell;                                               //  Cell structure
00516       cell.ICELL = sticks.back().ICELL;                         //  Set index of cell
00517       cell.M     = sticks.back().M;                             //  Set multipole of cell
00518       sendCells.push_back(cell);                                //  Push cell into send buffer
00519       sticks.pop_back();                                        //  Pop last element of stick vector
00520     }                                                           // End while for stick vector
00521     offTwigs = sendCells.size();                                // Keep track of current send buffer size
00522   }
00523 
00524 //! Validate number of send cells
00525   void checkNumCells(int l) {                                   // Only works with octsection
00526     int maxLevel = int(log(MPISIZE-1) / M_LN2 / 3) + 1;
00527     if( MPISIZE == 1 ) maxLevel = 0;
00528     int octant0 = -1;
00529     int numCells = 0;
00530     for( JC_iter JC=sendCells.begin(); JC!=sendCells.end(); ++JC ) {
00531       int level = getLevel(JC->ICELL);
00532       int index = JC->ICELL - ((1 << 3*level) - 1) / 7;
00533       int octant = index / (1 << 3 * (level - maxLevel));
00534       if( octant != octant0 ) {
00535         octant0 = octant;
00536         numCells++;
00537       }
00538     }
00539     int numCellsExpect = (1 << (3 * maxLevel - 1)) / (1 << l);  // Isn't true for far domains
00540     if( numCellsExpect != numCells && MPIRANK == 0) std::cout << numCells << " " << numCellsExpect << std::endl;
00541   }
00542 
00543 //! Check total charge
00544   void checkSumMass(Cells &cells) {
00545     real localMass = 0;
00546     for( C_iter C=cells.begin(); C!=cells.end(); ++C ) {
00547       if( C->NCHILD == 0 ) {
00548         localMass += std::abs(C->M[0]);
00549       }
00550     }
00551     real globalMass;
00552     MPI_Allreduce(&localMass,&globalMass,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
00553     print("localMass : ",0);
00554     print(localMass);
00555     print("globalMass : ",0);
00556     print(globalMass,0);
00557     print("\n",0);
00558   }
00559 
00560 public:
00561 //! Constructor
00562   ParallelFMM() : Partition<equation>() {}
00563 //! Destructor
00564   ~ParallelFMM() {}
00565 
00566 //! Set bodies to communicate
00567   void setCommBodies(Cells &cells) {
00568     startTimer("Gather bounds");                                // Start timer
00569     gatherBounds();                                             // Gather bounds of other domain
00570     stopTimer("Gather bounds",printNow);                        // Stop timer 
00571     startTimer("Get send rank");                                // Start timer
00572     getSendRank(cells);                                         // Get neighbor ranks to send to
00573     stopTimer("Get send rank",printNow);                        // Stop timer 
00574   }
00575 
00576 //! Update bodies using the previous send count
00577   void updateBodies(bool comm=true) {
00578     startTimer("Get send cnt ");                                // Start timer
00579     getSendCount(comm);                                         // Get size of data to send
00580     stopTimer("Get send cnt ",printNow);                        // Stop timer 
00581     startTimer("Alltoall B   ");                                // Start timer
00582 #if 1
00583     int bytes = sizeof(sendBodies[0]);                          // Byte size of jbody structure
00584     for( int i=0; i!=MPISIZE; ++i ) {                           // Loop over ranks
00585       sendBodyCnt[i] *= bytes;                                  //  Multiply by bytes
00586       sendBodyDsp[i] *= bytes;                                  //  Multiply by bytes
00587       recvBodyCnt[i] *= bytes;                                  //  Multiply by bytes
00588       recvBodyDsp[i] *= bytes;                                  //  Multiply by bytes
00589     }                                                           // End loop over ranks
00590     MPI_Alltoallv(&sendBodies[0],&sendBodyCnt[0],&sendBodyDsp[0],MPI_BYTE,
00591                   &recvBodies[0],&recvBodyCnt[0],&recvBodyDsp[0],MPI_BYTE,MPI_COMM_WORLD);
00592     for( int i=0; i!=MPISIZE; ++i ) {                           // Loop over ranks
00593       sendBodyCnt[i] /= bytes;                                  //  Divide by bytes
00594       sendBodyDsp[i] /= bytes;                                  //  Divide by bytes
00595       recvBodyCnt[i] /= bytes;                                  //  Divide by bytes
00596       recvBodyDsp[i] /= bytes;                                  //  Divide by bytes
00597     }                                                           // End loop over ranks
00598 #else
00599     commBodiesAlltoall();
00600 #endif
00601     sendBodies.clear();                                         // Clear send buffer for bodies
00602     stopTimer("Alltoall B   ",printNow);                        // Stop timer 
00603   }
00604 
00605 //! Communicate bodies in the local essential tree
00606   void commBodies(Cells &cells) {
00607     setCommBodies(cells);                                       // Set bodies to communicate
00608     updateBodies();                                             // Update bodies with alltoall
00609   }
00610 
00611 //! Convert recvBodies to cells
00612   void bodies2cells(Bodies &bodies, Cells &cells) {
00613     Cells twigs,sticks;                                         // Twigs and sticks are special types of cells
00614     rbodies2twigs(bodies,twigs);                                // Put recv bodies into twig vector
00615     twigs2cells(twigs,cells,sticks);                            // Turn twigs to cells
00616   }
00617 
00618 //! Communicate cells in the local essential tree
00619   void commCells(Bodies &bodies, Cells &cells) {
00620     vect xmin = 0, xmax = 0;                                    // Initialize domain boundaries
00621     Cells twigs,sticks;                                         // Twigs and sticks are special types of cells
00622 
00623 #if 1
00624     startTimer("Get LET      ");                                // Start timer
00625     int ssize = 0;                                              // Initialize offset for send cells
00626     sendCellCnt.assign(MPISIZE,0);                              // Initialize cell send count
00627     sendCellDsp.assign(MPISIZE,0);                              // Initialize cell send displacement
00628     for( int irank=0; irank!=MPISIZE; ++irank ) {               // Loop over ranks to send to
00629       getLET(cells.begin(),cells.end()-1,xminAll[irank],xmaxAll[irank]);//  Determine which cells to send
00630       sendCellCnt[irank] = sendCells.size()-ssize;              //  Set cell send count of current rank
00631       sendCellDsp[irank] = ssize;                               //  Set cell send displacement of current rank
00632       ssize += sendCellCnt[irank];                              //  Increment offset for vector send cells
00633     }                                                           // End loop over ranks
00634     stopTimer("Get LET      ",printNow);                        // Stop timer 
00635     startTimer("Alltoall C   ");                                // Start timer
00636     MPI_Alltoall(&sendCellCnt[0],1,MPI_INT,&recvCellCnt[0],1,MPI_INT,MPI_COMM_WORLD);// Communicate the send counts
00637     int rsize = 0;                                              // Initialize total recv count
00638     for( int irank=0; irank!=MPISIZE; ++irank ) {               // Loop over ranks to recv from
00639       recvCellDsp[irank] = rsize;                               //  Set recv displacements
00640       rsize += recvCellCnt[irank];                              //  Accumulate recv counts
00641     }                                                           // End loop over ranks to recv from
00642     recvCells.resize(rsize);                                    // Resize recv buffer
00643     int bytes = sizeof(sendCells[0]);                           // Byte size of jbody structure
00644     for( int i=0; i!=MPISIZE; ++i ) {                           // Loop over ranks
00645       sendCellCnt[i] *= bytes;                                  //  Multiply by bytes
00646       sendCellDsp[i] *= bytes;                                  //  Multiply by bytes
00647       recvCellCnt[i] *= bytes;                                  //  Multiply by bytes
00648       recvCellDsp[i] *= bytes;                                  //  Multiply by bytes
00649     }                                                           // End loop over ranks
00650     MPI_Alltoallv(&sendCells[0],&sendCellCnt[0],&sendCellDsp[0],MPI_BYTE,
00651                   &recvCells[0],&recvCellCnt[0],&recvCellDsp[0],MPI_BYTE,MPI_COMM_WORLD);
00652     for( int i=0; i!=MPISIZE; ++i ) {                           // Loop over ranks
00653       sendCellCnt[i] /= bytes;                                  //  Divide by bytes
00654       sendCellDsp[i] /= bytes;                                  //  Divide by bytes
00655       recvCellCnt[i] /= bytes;                                  //  Divide by bytes
00656       recvCellDsp[i] /= bytes;                                  //  Divide by bytes
00657     }                                                           // End loop over ranks
00658     stopTimer("Alltoall C   ",printNow);                        // Stop timer 
00659     rbodies2twigs(bodies,twigs);                                // Put recv bodies into twig vector
00660     startTimer("Cells2twigs  ");                                // Start timer
00661     cells2twigs(cells,twigs,true);                              // Put cells into twig vector
00662     stopTimer("Cells2twigs  ",printNow);                        // Stop timer 
00663     startTimer("Recv2twigs   ");                                // Start timer
00664     recv2twigs(bodies,twigs);                                   // Put recv buffer into twig vector
00665     stopTimer("Recv2twigs   ",printNow);                        // Stop timer 
00666     zipTwigs(twigs,cells,sticks,true);                          // Zip two groups of twigs that overlap
00667     reindexBodies(bodies,twigs,cells,sticks);                   // Re-index bodies
00668     twigs2cells(twigs,cells,sticks);                            // Turn twigs to cells
00669     sendCells.clear();                                          // Clear send buffer
00670     recvCells.clear();                                          // Clear recv buffer
00671 #else
00672     int offTwigs = 0;                                           // Initialize offset of twigs
00673     for( int l=0; l!=LEVEL; ++l ) {                             // Loop over levels of N-D hypercube communication
00674       getOtherDomain(xmin,xmax,l+1);                            //  Get boundries of domains on other processes
00675       startTimer("Get LET      ");                              //  Start timer
00676       getLET(cells.begin(),cells.end()-1,xmin,xmax);            //  Determine which cells to send
00677 #ifdef DEBUG
00678       checkNumCells(LEVEL-l-1);
00679       checkSumMass(cells);
00680 #endif
00681       stopTimer("Get LET      ",printNow);                      //  Stop timer 
00682       startTimer("Alltoall C   ");                              //  Start timer
00683       commCellsAlltoall(l);                                     //  Communicate cells by one-to-one MPI_Alltoallv
00684       if( nprocs[l][0] % 2 == 1 && nprocs[l][0] != 1 && nprocs[l+1][0] <= nprocs[l+1][1] ) {// If scatter is necessary
00685         commCellsScatter(l);                                    //   Communicate cells by scattering from leftover proc
00686       }                                                         //  Endif for odd number of procs
00687       stopTimer("Alltoall C   ",printNow);                      //  Stop timer 
00688       if( l == LEVEL - 1 ) rbodies2twigs(bodies,twigs);         //  Put recv bodies into twig vector
00689       startTimer("Cells2twigs  ");                              //  Start timer
00690       cells2twigs(cells,twigs,l==LEVEL-1);                      //  Put cells into twig vector
00691       stopTimer("Cells2twigs  ",printNow);                      //  Stop timer 
00692       startTimer("Send2twigs   ");                              //  Start timer
00693       send2twigs(bodies,twigs,offTwigs);                        //  Put send buffer (sticks) into twig vector
00694       stopTimer("Send2twigs   ",printNow);                      //  Stop timer 
00695       startTimer("Recv2twigs   ");                              //  Start timer
00696       recv2twigs(bodies,twigs);                                 //  Put recv buffer into twig vector
00697       stopTimer("Recv2twigs   ",printNow);                      //  Stop timer 
00698 #ifdef DEBUG
00699       if( l == LEVEL - 1 ) {                                    //  If at last level
00700         complex SUM = 0;                                        //   Initialize accumulator
00701         for(C_iter C=twigs.begin(); C!=twigs.end(); ++C) {      //   Loop over twigs
00702           if( C->NDLEAF == 0 ) SUM += C->M[0];                  //    Add multipoles of empty twigs
00703         }                                                       //   End loop over twigs
00704         print("Before recv   : ",0);                            //   Print identifier
00705         print(SUM);                                             //   Print sum of multipoles
00706       }                                                         //  Endif for last level
00707 #endif
00708       zipTwigs(twigs,cells,sticks,l==LEVEL-1);                  //  Zip two groups of twigs that overlap
00709 #ifdef DEBUG
00710       if( l == LEVEL - 1 ) {                                    //  If at last level
00711         complex SUM = 0;                                        //   Initialize accumulator
00712         for(C_iter C=twigs.begin(); C!=twigs.end(); ++C) {      //   Loop over twigs
00713           SUM += C->M[0];                                       //    Add multipoles
00714         }                                                       //   End loop over twigs
00715         print("After merge   : ",0);                            //   Print identifier
00716         print(SUM);                                             //   Print sum of multipoles
00717         print("sticks.size() : ",0);                            //   Print identifier
00718         print(sticks.size());                                   //   Print size of stick vector
00719       }                                                         //  Endif for last level
00720 #endif
00721       if( l == LEVEL - 1 ) reindexBodies(bodies,twigs,cells,sticks);// Re-index bodies
00722       twigs2cells(twigs,cells,sticks);                          //  Turn twigs to cells
00723       startTimer("Sticks2send  ");                              //  Start timer
00724       sticks2send(sticks,offTwigs);                             //  Turn sticks to send buffer
00725       stopTimer("Sticks2send  ",printNow);                      //  Stop timer 
00726     }                                                           // End loop over levels of N-D hypercube communication
00727 #endif
00728 
00729 #ifdef DEBUG
00730     print("M[0] @ root   : ",0);                                // Print identifier
00731     print((cells.end()-1)->M[0]);                               // Print monopole of root (should be 1 for test)
00732     print("bodies.size() : ",0);                                // Print identifier
00733     print(bodies.size());                                       // Print size of body vector
00734 #endif
00735     sendCells.clear();                                          // Clear send buffer
00736     recvCells.clear();                                          // Clear recv buffer
00737   }
00738 
00739 //! Remove cells that belong to current process
00740   void eraseLocalTree(Cells &cells) {
00741     int level = int(log(MPISIZE-1) / M_LN2 / 3) + 1;            // Level of process root cell
00742     if( MPISIZE == 1 ) level = 0;                               // Account for serial case
00743     int off = ((1 << 3 * level) - 1) / 7;                       // Levelwise offset of ICELL
00744     int size = (1 << 3 * level) / MPISIZE;                      // Number of cells to remove
00745     unsigned begin = MPIRANK * size + off;                      // Begin index of cells to remove
00746     unsigned end = (MPIRANK + 1) * size + off;                  // End index of cells to remove
00747     for( C_iter C=cells.begin(); C!=cells.end(); ++C ) {        // Loop over cells
00748       int nchild = 0;                                           //  Initialize child cell counter
00749       for( int c=0; c!=C->NCHILD; ++c ) {                       //  Loop over child cells
00750         C_iter CC = cells.begin()+C->CHILD+c;                   //   Iterator of child cell
00751         if( CC->ICELL < begin || end <= CC->ICELL ) {           //   If child cell is not within the removal range
00752           C_iter CH = cells.begin()+C->CHILD+nchild;            //    New iterator of child cell
00753           *CH = *CC;                                            //    Copy data of child cell
00754           nchild++;                                             //    Increment child cell counter
00755         }                                                       //   Endif for removal range
00756       }                                                         //  End loop over child cells
00757       C->NCHILD = nchild;                                       //  Update number of child cells
00758     }                                                           // End loop over cells
00759   }
00760 };
00761 
00762 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines