ExaFMM 1
Fast-multipole Method for exascale systems
include/evaluator.h
Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba
00003 
00004 Permission is hereby granted, free of charge, to any person obtaining a copy
00005 of this software and associated documentation files (the "Software"), to deal
00006 in the Software without restriction, including without limitation the rights
00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00008 copies of the Software, and to permit persons to whom the Software is
00009 furnished to do so, subject to the following conditions:
00010 
00011 The above copyright notice and this permission notice shall be included in
00012 all copies or substantial portions of the Software.
00013 
00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00020 THE SOFTWARE.
00021 */
00022 #ifndef evaluator_h
00023 #define evaluator_h
00024 #include "dataset.h"
00025 #define splitFirst(Ci,Cj) Cj->NCHILD == 0 || (Ci->NCHILD != 0 && Ci->R > Cj->R)
00026 
00027 //! Interface between tree and kernel
00028 template<Equation equation>
00029 class Evaluator : public Dataset<equation> {
00030 protected:
00031   C_iter      CiB;                                              //!< icells begin per call
00032   C_iter      CiE;                                              //!< icells end per call
00033   C_iter      CjB;                                              //!< jcells begin per call
00034   C_iter      CjE;                                              //!< jcells end per call
00035   Lists       listM2L;                                          //!< M2L interaction list
00036   Lists       listM2P;                                          //!< M2P interaction list
00037   Lists       listP2P;                                          //!< P2P interaction list
00038   real        timeM2L;                                          //!< M2L execution time
00039   real        timeM2P;                                          //!< M2P execution time
00040   real        timeP2P;                                          //!< P2P execution time
00041 
00042   int         Iperiodic;                                        //!< Periodic image flag (using each bit for images)
00043   const int   Icenter;                                          //!< Periodic image flag at center
00044   Maps        flagM2L;                                          //!< Existance of periodic image for M2L
00045   Maps        flagM2P;                                          //!< Existance of periodic image for M2P
00046   Maps        flagP2P;                                          //!< Existance of periodic image for P2P
00047 
00048 public:
00049   using Kernel<equation>::printNow;                             //!< Switch to print timings
00050   using Kernel<equation>::startTimer;                           //!< Start timer for given event
00051   using Kernel<equation>::stopTimer;                            //!< Stop timer for given event
00052   using Kernel<equation>::writeTrace;                           //!< Write traces of all events
00053   using Kernel<equation>::R0;                                   //!< Radius of root cell
00054   using Kernel<equation>::Ci0;                                  //!< icells.begin()
00055   using Kernel<equation>::Cj0;                                  //!< jcells.begin()
00056   using Kernel<equation>::ALPHA;                                //!< Scaling parameter for Ewald summation
00057   using Kernel<equation>::keysHost;                             //!< Offsets for rangeHost
00058   using Kernel<equation>::rangeHost;                            //!< Offsets for sourceHost
00059   using Kernel<equation>::constHost;                            //!< Constants on host
00060   using Kernel<equation>::sourceHost;                           //!< Sources on host
00061   using Kernel<equation>::targetHost;                           //!< Targets on host
00062   using Kernel<equation>::sourceBegin;                          //!< Define map for offset of source cells
00063   using Kernel<equation>::sourceSize;                           //!< Define map for size of source cells
00064   using Kernel<equation>::targetBegin;                          //!< Define map for offset of target cells
00065   using Kernel<equation>::NP2P;                                 //!< Number of P2P kernel calls
00066   using Kernel<equation>::NM2P;                                 //!< Number of M2P kernel calls
00067   using Kernel<equation>::NM2L;                                 //!< Number of M2L kernel calls
00068   using Kernel<equation>::allocate;                             //!< Allocate GPU kernels
00069   using Kernel<equation>::hostToDevice;                         //!< Copy from host to device
00070   using Kernel<equation>::deviceToHost;                         //!< Copy from device to host
00071   using Kernel<equation>::P2M;                                  //!< Evaluate P2M kernel
00072   using Kernel<equation>::M2M;                                  //!< Evaluate M2M kernel
00073   using Kernel<equation>::M2L;                                  //!< Evaluate M2L kernel
00074   using Kernel<equation>::M2P;                                  //!< Evaluate M2P kernel
00075   using Kernel<equation>::P2P;                                  //!< Evaluate P2P kernel
00076   using Kernel<equation>::L2L;                                  //!< Evaluate L2L kernel
00077   using Kernel<equation>::L2P;                                  //!< Evaluate L2P kernel
00078   using Kernel<equation>::EwaldReal;                            //!< Evaluate Ewald real part
00079   using Dataset<equation>::initSource;                          //!< Initialize source values
00080   using Dataset<equation>::initTarget;                          //!< Initialize target values
00081 
00082 private:
00083 //! Approximate interaction between two cells
00084   inline void approximate(C_iter Ci, C_iter Cj) {
00085 #if HYBRID
00086     if( timeP2P*Cj->NDLEAF < timeM2P && timeP2P*Ci->NDLEAF*Cj->NDLEAF < timeM2L) {// If P2P is fastest
00087       evalP2P(Ci,Cj);                                           //  Evaluate on CPU, queue on GPU
00088     } else if ( timeM2P < timeP2P*Cj->NDLEAF && timeM2P*Ci->NDLEAF < timeM2L ) {// If M2P is fastest
00089       evalM2P(Ci,Cj);                                           //  Evaluate on CPU, queue on GPU
00090     } else {                                                    // If M2L is fastest
00091       evalM2L(Ci,Cj);                                           //  Evaluate on CPU, queue on GPU
00092     }                                                           // End if for kernel selection
00093 #elif TREECODE
00094     evalM2P(Ci,Cj);                                             // Evaluate on CPU, queue on GPU
00095 #else
00096     evalM2L(Ci,Cj);                                             // Evalaute on CPU, queue on GPU
00097 #endif
00098   }
00099 
00100 #if QUARK
00101   inline void interact(C_iter Ci, C_iter Cj, Quark *quark);     //!< interact() function using QUARK
00102 #endif
00103 
00104 //! Traverse a single tree using a stack
00105   void traverseStack(C_iter Ci, C_iter C) {
00106     CellStack cellStack;                                        // Stack of cells
00107     cellStack.push(C);                                          // Push pair to queue
00108     while( !cellStack.empty() ) {                               // While traversal stack is not empty
00109       C = cellStack.top();                                      //  Get cell from top of stack
00110       cellStack.pop();                                          //  Pop traversal stack
00111       for( C_iter Cj=Cj0+C->CHILD; Cj!=Cj0+C->CHILD+C->NCHILD; ++Cj ) {// Loop over cell's children
00112         vect dX = Ci->X - Cj->X - Xperiodic;                    //   Distance vector from source to target
00113         real Rq = std::sqrt(norm(dX));                          //   Scalar distance
00114         if( Rq * THETA < Ci->R + Cj->R && Cj->NCHILD == 0 ) {   //   If twigs are close
00115           evalEwaldReal(Ci,Cj);                                 //    Ewald real part
00116         } else if( Cj->NCHILD != 0 ) {                          //   If cells are not twigs
00117           cellStack.push(Cj);                                   //    Push source cell to stack
00118         }                                                       //   End if for twig cells
00119       }                                                         //  End loop over cell's children
00120     }                                                           // End while loop for traversal stack
00121   }
00122 
00123 //! Traverse a pair of trees using a queue
00124   void traverseQueue(Pair pair) {
00125     PairQueue pairQueue;                                        // Queue of interacting cell pairs
00126 #if QUARK
00127     Quark *quark = QUARK_New(4);                                // Initialize QUARK object
00128 #endif
00129     pairQueue.push(pair);                                       // Push pair to queue
00130     while( !pairQueue.empty() ) {                               // While dual traversal queue is not empty
00131       pair = pairQueue.front();                                 //  Get interaction pair from front of queue
00132       pairQueue.pop();                                          //  Pop dual traversal queue
00133       if(splitFirst(pair.first,pair.second)) {                  //  If first cell is larger
00134         C_iter C = pair.first;                                  //   Split the first cell
00135         for( C_iter Ci=Ci0+C->CHILD; Ci!=Ci0+C->CHILD+C->NCHILD; ++Ci ) {// Loop over first cell's children
00136           interact(Ci,pair.second,pairQueue);                   //    Calculate interaction between cells
00137         }                                                       //   End loop over fist cell's children
00138       } else {                                                  //  Else if second cell is larger
00139         C_iter C = pair.second;                                 //   Split the second cell
00140         for( C_iter Cj=Cj0+C->CHILD; Cj!=Cj0+C->CHILD+C->NCHILD; ++Cj ) {// Loop over second cell's children
00141           interact(pair.first,Cj,pairQueue);                    //    Calculate interaction betwen cells
00142         }                                                       //   End loop over second cell's children
00143       }                                                         //  End if for which cell to split
00144 #if QUARK
00145       if( pairQueue.size() > 100 ) {                            //  When queue size reaches threshold
00146         while( !pairQueue.empty() ) {                           //   While dual traversal queue is not empty
00147           pair = pairQueue.front();                             //    Get interaction pair from front of queue
00148           pairQueue.pop();                                      //    Pop dual traversal queue
00149           interact(pair.first,pair.second,quark);               //    Schedule interact() task on QUARK
00150         }                                                       //   End while loop for dual traversal queue
00151       }                                                         //  End if for queue size
00152 #endif
00153     }                                                           // End while loop for dual traversal queue
00154 #if QUARK
00155     QUARK_Delete(quark);                                        // Delete QUARK object 
00156     writeTrace();                                               // Write event trace to file
00157 #endif
00158   }
00159 
00160 //! Get range of periodic images
00161   int getPeriodicRange() {
00162     int prange = 0;                                             //  Range of periodic images
00163     for( int i=0; i!=IMAGES; ++i ) {                            //  Loop over periodic image sublevels
00164       prange += int(pow(3,i));                                  //   Accumulate range of periodic images
00165     }                                                           //  End loop over perioidc image sublevels
00166     return prange;                                              // Return range of periodic images
00167   }
00168 
00169 protected:
00170 //! Get level from cell index
00171   int getLevel(bigint index) {
00172     int i = index;                                              // Copy to dummy index
00173     int level = -1;                                             // Initialize level counter
00174     while( i >= 0 ) {                                           // While cell index is non-negative
00175       level++;                                                  //  Increment level
00176       i -= 1 << 3*level;                                        //  Subtract number of cells in that level
00177     }                                                           // End while loop for cell index
00178     return level;                                               // Return the level
00179   }
00180 
00181   void timeKernels();                                           //!< Time all kernels for auto-tuning
00182 
00183 //! Upward phase for periodic cells
00184   void upwardPeriodic(Cells &jcells) {
00185     Cells pccells, pjcells;                                     // Periodic center cell and jcell
00186     pccells.push_back(jcells.back());                           // Root cell is first periodic cell
00187     for( int level=0; level<IMAGES-1; ++level ) {               // Loop over sublevels of tree
00188       Cell cell;                                                //  New periodic cell at next sublevel
00189       C_iter C = pccells.end() - 1;                             //  Set previous periodic center cell as source
00190       pccells.pop_back();                                       //  Pop periodic center cell from vector
00191       for( int ix=-1; ix<=1; ++ix ) {                           //  Loop over x periodic direction
00192         for( int iy=-1; iy<=1; ++iy ) {                         //   Loop over y periodic direction
00193           for( int iz=-1; iz<=1; ++iz ) {                       //    Loop over z periodic direction
00194             if( ix != 0 || iy != 0 || iz != 0 ) {               //     If periodic cell is not at center
00195               for( int cx=-1; cx<=1; ++cx ) {                   //      Loop over x periodic direction (child)
00196                 for( int cy=-1; cy<=1; ++cy ) {                 //       Loop over y periodic direction (child)
00197                   for( int cz=-1; cz<=1; ++cz ) {               //        Loop over z periodic direction (child)
00198                     cell.X[0]  = C->X[0] + (ix * 6 + cx * 2) * C->R;//     Set new x coordinate for periodic image
00199                     cell.X[1]  = C->X[1] + (iy * 6 + cy * 2) * C->R;//     Set new y cooridnate for periodic image
00200                     cell.X[2]  = C->X[2] + (iz * 6 + cz * 2) * C->R;//     Set new z coordinate for periodic image
00201                     cell.M     = C->M;                          //         Copy multipoles to new periodic image
00202                     cell.NDLEAF = cell.NCHILD = 0;              //         Initialize NDLEAF & NCHILD
00203                     jcells.push_back(cell);                     //         Push cell into periodic jcell vector
00204                   }                                             //        End loop over z periodic direction (child)
00205                 }                                               //       End loop over y periodic direction (child)
00206               }                                                 //      End loop over x periodic direction (child)
00207             }                                                   //     Endif for periodic center cell
00208           }                                                     //    End loop over z periodic direction
00209         }                                                       //   End loop over y periodic direction
00210       }                                                         //  End loop over x periodic direction
00211       for( int ix=-1; ix<=1; ++ix ) {                           //  Loop over x periodic direction
00212         for( int iy=-1; iy<=1; ++iy ) {                         //   Loop over y periodic direction
00213           for( int iz=-1; iz<=1; ++iz ) {                       //    Loop over z periodic direction
00214             cell.X[0] = C->X[0] + ix * 2 * C->R;                //     Set new x coordinate for periodic image
00215             cell.X[1] = C->X[1] + iy * 2 * C->R;                //     Set new y cooridnate for periodic image
00216             cell.X[2] = C->X[2] + iz * 2 * C->R;                //     Set new z coordinate for periodic image
00217             cell.M = C->M;                                      //     Copy multipoles to new periodic image
00218             pjcells.push_back(cell);                            //     Push cell into periodic jcell vector
00219           }                                                     //    End loop over z periodic direction
00220         }                                                       //   End loop over y periodic direction
00221       }                                                         //  End loop over x periodic direction
00222       cell.X = C->X;                                            //  This is the center cell
00223       cell.R = 3 * C->R;                                        //  The cell size increases three times
00224       pccells.push_back(cell);                                  //  Push cell into periodic cell vector
00225       C_iter Ci = pccells.end() - 1;                            //  Set current cell as target for M2M
00226       Ci->CHILD = 0;                                            //  Set child cells for periodic M2M
00227       Ci->NCHILD = 27;                                          //  Set number of child cells for periodic M2M
00228       evalM2M(pccells,pjcells);                                 // Evaluate periodic M2M kernels for this sublevel
00229       pjcells.clear();                                          // Clear periodic jcell vector
00230     }                                                           // End loop over sublevels of tree
00231   }
00232 
00233 //! Traverse tree for periodic cells
00234   void traversePeriodic(Cells &cells, Cells &jcells) {          
00235     Xperiodic = 0;                                              // Set periodic coordinate offset
00236     Iperiodic = Icenter;                                        // Set periodic flag to center
00237     C_iter Cj = jcells.end()-1;                                 // Initialize iterator for periodic source cell
00238     for( int level=0; level<IMAGES-1; ++level ) {               // Loop over sublevels of tree
00239       for( int I=0; I!=26*27; ++I, --Cj ) {                     //  Loop over periodic images (exclude center)
00240 #if TREECODE
00241         for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) { //   Loop over cells
00242           if( Ci->NCHILD == 0 ) {                               //    If cell is twig
00243             evalM2P(Ci,Cj);                                     //     Perform M2P kernel
00244           }                                                     //    Endif for twig
00245         }                                                       //   End loop over cells
00246 #else
00247         C_iter Ci = cells.end() - 1;                            //   Set root cell as target iterator
00248         evalM2L(Ci,Cj);                                         //   Perform M2P kernel
00249 #endif
00250       }                                                         //  End loop over x periodic direction
00251     }                                                           // End loop over sublevels of tree
00252   }
00253 
00254 public:
00255 //! Constructor
00256   Evaluator() : Icenter(1 << 13) {}
00257 //! Destructor
00258   ~Evaluator() {}
00259 
00260 //! Random distribution in [-1,1]^3 cube
00261   void random(Bodies &bodies, int seed=1, int numSplit=1) {
00262     srand(seed);                                                // Set seed for random number generator
00263     for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) {      // Loop over bodies
00264       if( numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit) ) {// Mimic parallel dataset
00265         seed++;                                                 //   Mimic seed at next rank
00266         srand(seed);                                            //   Set seed for random number generator
00267       }                                                         //  Endif for mimicing parallel dataset
00268       for( int d=0; d!=3; ++d ) {                               //  Loop over dimension
00269         B->X[d] = drand48() * 2 * M_PI - M_PI;                  //   Initialize positions
00270       }                                                         //  End loop over dimension
00271     }                                                           // End loop over bodies
00272     initSource(bodies);                                         // Initialize source values
00273     initTarget(bodies);                                         // Initialize target values
00274   }
00275 
00276 //! Random distribution on r = 1 sphere
00277   void sphere(Bodies &bodies, int seed=1, int numSplit=1) {
00278     srand(seed);                                                // Set seed for random number generator
00279     for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) {      // Loop over bodies
00280       if( numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit) ) {// Mimic parallel dataset
00281         seed++;                                                 //   Mimic seed at next rank
00282         srand(seed);                                            //   Set seed for random number generator
00283       }                                                         //  Endif for mimicing parallel dataset
00284       for( int d=0; d!=3; ++d ) {                               //  Loop over dimension
00285         B->X[d] = drand48() * 2 - 1;                            //   Initialize positions
00286       }                                                         //  End loop over dimension
00287       real r = std::sqrt(norm(B->X));                           //  Distance from center
00288       for( int d=0; d!=3; ++d ) {                               //  Loop over dimension
00289         B->X[d] /= r * 1.1;                                     //   Normalize positions
00290       }                                                         //  End loop over dimension
00291     }                                                           // End loop over bodies
00292     initSource(bodies);                                         // Initialize source values
00293     initTarget(bodies);                                         // Initialize target values
00294   }
00295 
00296 //! Uniform distribution on [-1,1]^3 lattice (for debugging)
00297   void lattice(Bodies &bodies) {
00298     int level = int(log(bodies.size()*MPISIZE+1.)/M_LN2/3);     // Level of tree
00299     for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) {      // Loop over bodies
00300       int d = 0, l = 0;                                         //  Initialize dimension and level
00301       int index = MPIRANK * bodies.size() + (B-bodies.begin()); //  Set index of body iterator
00302       vec<3,int> nx = 0;                                        //  Initialize 3-D cell index
00303       while( index != 0 ) {                                     //  Deinterleave bits while index is nonzero
00304         nx[d] += (index % 2) * (1 << l);                        //   Add deinterleaved bit to 3-D cell index
00305         index >>= 1;                                            //   Right shift the bits
00306         d = (d+1) % 3;                                          //   Increment dimension
00307         if( d == 0 ) l++;                                       //   If dimension is 0 again, increment level
00308       }                                                         //  End while loop for deinterleaving bits
00309       for( d=0; d!=3; ++d ) {                                   //  Loop over dimensions
00310         B->X[d] = -1 + (2 * nx[d] + 1.) / (1 << level);         //   Calculate cell center from 3-D cell index
00311       }                                                         //  End loop over dimensions
00312     }                                                           // End loop over bodies
00313     initSource(bodies);                                         // Initialize source values
00314     initTarget(bodies);                                         // Initialize target values
00315   }
00316 
00317 //! Add single list for kernel unit test
00318   void addM2L(C_iter Cj) {
00319     listM2L.resize(1);                                          // Resize vector of M2L interation lists
00320     flagM2L.resize(1);                                          // Resize vector of M2L periodic image flags
00321     listM2L[0].push_back(Cj);                                   // Push single cell into list
00322     flagM2L[0][Cj] |= Icenter;                                  // Flip bit of periodic image flag
00323   }
00324 
00325 //! Add single list for kernel unit test
00326   void addM2P(C_iter Cj) {
00327     listM2P.resize(1);                                          // Resize vector of M2P interation lists
00328     flagM2P.resize(1);                                          // Resize vector of M2L periodic image flags
00329     listM2P[0].push_back(Cj);                                   // Push single cell into list
00330     flagM2P[0][Cj] |= Icenter;                                  // Flip bit of periodic image flag
00331   }
00332 
00333 //! Create periodic images of bodies
00334   Bodies periodicBodies(Bodies &bodies) {
00335     Bodies jbodies;                                             // Vector for periodic images of bodies
00336     int prange = getPeriodicRange();                            // Get range of periodic images
00337     for( int ix=-prange; ix<=prange; ++ix ) {                   // Loop over x periodic direction
00338       for( int iy=-prange; iy<=prange; ++iy ) {                 //  Loop over y periodic direction
00339         for( int iz=-prange; iz<=prange; ++iz ) {               //   Loop over z periodic direction
00340           for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) {//    Loop over bodies
00341             Body body = *B;                                     //     Copy current body
00342             body.X[0] += ix * 2 * R0;                           //     Shift x position
00343             body.X[1] += iy * 2 * R0;                           //     Shift y position
00344             body.X[2] += iz * 2 * R0;                           //     Shift z position
00345             jbodies.push_back(body);                            //     Push shifted body into jbodies
00346           }                                                     //    End loop over bodies
00347         }                                                       //   End loop over z periodic direction
00348       }                                                         //  End loop over y periodic direction
00349     }                                                           // End loop over x periodic direction
00350     return jbodies;                                             // Return vector for periodic images of bodies
00351   }
00352 
00353 //! Use multipole acceptance criteria to determine whether to approximate, do P2P, or subdivide
00354   void interact(C_iter Ci, C_iter Cj, PairQueue &pairQueue) {
00355     vect dX = Ci->X - Cj->X - Xperiodic;                        // Distance vector from source to target
00356     real Rq = std::sqrt(norm(dX));                              // Scalar distance
00357     if( Rq * THETA > Ci->R + Cj->R ) {                          // If distance if far enough
00358       approximate(Ci,Cj);                                       //  Use approximate kernels, e.g. M2L, M2P
00359     } else if(Ci->NCHILD == 0 && Cj->NCHILD == 0) {             // Else if both cells are leafs
00360       evalP2P(Ci,Cj);                                           //  Use P2P
00361     } else {                                                    // If cells are close but not leafs
00362       Pair pair(Ci,Cj);                                         //  Form a pair of cell iterators
00363       pairQueue.push(pair);                                     //  Push pair to queue
00364     }                                                           // End if for multipole acceptance
00365   }
00366 
00367 //! Dual tree traversal
00368   void traverse(Cells &cells, Cells &jcells) {
00369     C_iter root = cells.end() - 1;                              // Iterator for root target cell
00370     C_iter jroot = jcells.end() - 1;                            // Iterator for root source cell
00371     if( IMAGES != 0 ) {                                         // If periodic boundary condition
00372       jroot = jcells.end() - 1 - 26 * 27 * (IMAGES - 1);        //  The root is not at the end
00373     }                                                           // Endif for periodic boundary condition
00374     Ci0 = cells.begin();                                        // Set begin iterator for target cells
00375     Cj0 = jcells.begin();                                       // Set begin iterator for source cells
00376     listM2L.resize(cells.size());                               // Resize M2L interaction list
00377     listM2P.resize(cells.size());                               // Resize M2P interaction list
00378     listP2P.resize(cells.size());                               // Resize P2P interaction list
00379     flagM2L.resize(cells.size());                               // Resize M2L periodic image flag
00380     flagM2P.resize(cells.size());                               // Resize M2P periodic image flag
00381     flagP2P.resize(cells.size());                               // Resize P2P periodic image flag
00382     if( IMAGES == 0 ) {                                         // If free boundary condition
00383       Iperiodic = Icenter;                                      //  Set periodic image flag to center
00384       Xperiodic = 0;                                            //  Set periodic coordinate offset
00385       Pair pair(root,jroot);                                    //  Form pair of root cells
00386       traverseQueue(pair);                                      //  Traverse a pair of trees
00387     } else {                                                    // If periodic boundary condition
00388       int I = 0;                                                //  Initialize index of periodic image
00389       for( int ix=-1; ix<=1; ++ix ) {                           //  Loop over x periodic direction
00390         for( int iy=-1; iy<=1; ++iy ) {                         //   Loop over y periodic direction
00391           for( int iz=-1; iz<=1; ++iz, ++I ) {                  //    Loop over z periodic direction
00392             Iperiodic = 1 << I;                                 //     Set periodic image flag
00393             Xperiodic[0] = ix * 2 * R0;                         //     Coordinate offset for x periodic direction
00394             Xperiodic[1] = iy * 2 * R0;                         //     Coordinate offset for y periodic direction
00395             Xperiodic[2] = iz * 2 * R0;                         //     Coordinate offset for z periodic direction
00396             Pair pair(root,jroot);                              //     Form pair of root cells
00397             traverseQueue(pair);                                //     Traverse a pair of trees
00398           }                                                     //    End loop over z periodic direction
00399         }                                                       //   End loop over y periodic direction
00400       }                                                         //  End loop over x periodic direction
00401       for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) {   //  Loop over target cells
00402         listM2L[Ci-Ci0].sort();                                 //  Sort interaction list
00403         listM2L[Ci-Ci0].unique();                               //  Eliminate duplicate periodic entries
00404         listM2P[Ci-Ci0].sort();                                 //  Sort interaction list
00405         listM2P[Ci-Ci0].unique();                               //  Eliminate duplicate periodic entries
00406         listP2P[Ci-Ci0].sort();                                 //  Sort interaction list
00407         listP2P[Ci-Ci0].unique();                               //  Eliminate duplicate periodic entries
00408       }                                                         //  End loop over target cells
00409     }                                                           // Endif for periodic boundary condition
00410   }
00411 
00412 //! Traverse neighbor cells only (for cutoff based methods)
00413   void neighbor(Cells &cells, Cells &jcells) {
00414     C_iter root = cells.end() - 1;                              // Iterator for root target cell
00415     C_iter jroot = jcells.end() - 1;                            // Iterator for root source cell
00416     Ci0 = cells.begin();                                        // Set begin iterator for target cells
00417     Cj0 = jcells.begin();                                       // Set begin iterator for source cells
00418     listP2P.resize(cells.size());                               // Resize P2P interaction list
00419     flagP2P.resize(cells.size());                               // Resize P2P periodic image flag
00420     for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) {     // Loop over target cells
00421       if( Ci->NCHILD == 0 ) {                                   //  If cell is a twig
00422         int I = 0;                                              //   Initialize index of periodic image
00423         for( int ix=-1; ix<=1; ++ix ) {                         //   Loop over x periodic direction
00424           for( int iy=-1; iy<=1; ++iy ) {                       //    Loop over y periodic direction
00425             for( int iz=-1; iz<=1; ++iz, ++I ) {                //     Loop over z periodic direction
00426               Iperiodic = 1 << I;                               //      Set periodic image flag
00427               Xperiodic[0] = ix * 2 * R0;                       //      Coordinate offset for x periodic direction
00428               Xperiodic[1] = iy * 2 * R0;                       //      Coordinate offset for y periodic direction
00429               Xperiodic[2] = iz * 2 * R0;                       //      Coordinate offset for z periodic direction
00430               traverseStack(Ci,jroot);                          //      Traverse the source tree
00431             }                                                   //     End loop over z periodic direction
00432           }                                                     //    End loop over y periodic direction
00433         }                                                       //   End loop over x periodic direction
00434         for( B_iter B=Ci->LEAF; B!=Ci->LEAF+Ci->NDLEAF; ++B) {  //   Loop over all leafs in cell
00435           B->TRG[0] -= M_2_SQRTPI * B->SRC * ALPHA;             //    Self term of Ewald real part
00436         }                                                       //   End loop over all leafs in cell
00437       }                                                         //  End if for twig cells
00438       listP2P[Ci-Ci0].sort();                                   //  Sort interaction list
00439       listP2P[Ci-Ci0].unique();                                 //  Eliminate duplicate periodic entries
00440     }                                                           // End loop over target cells
00441   }
00442 
00443   void setSourceBody();                                         //!< Set source buffer for bodies (for GPU)
00444   void setSourceCell(bool isM);                                 //!< Set source buffer for cells (for GPU)
00445   void setTargetBody(Lists lists, Maps flags);                  //!< Set target buffer for bodies (for GPU)
00446   void setTargetCell(Lists lists, Maps flags);                  //!< Set target buffer for cells (for GPU)
00447   void getTargetBody(Lists &lists);                             //!< Get body values from target buffer (for GPU)
00448   void getTargetCell(Lists &lists, bool isM);                   //!< Get cell values from target buffer (for GPU)
00449   void clearBuffers();                                          //!< Clear GPU buffers
00450 
00451   void evalP2P(Bodies &ibodies, Bodies &jbodies, bool onCPU=false);//!< Evaluate all P2P kernels (all pairs)
00452   void evalP2M(Cells &cells);                                   //!< Evaluate all P2M kernels
00453   void evalM2M(Cells &cells, Cells &jcells);                    //!< Evaluate all M2M kernels
00454   void evalM2L(C_iter Ci, C_iter Cj);                           //!< Evaluate on CPU, queue on GPU
00455   void evalM2L(Cells &cells);                                   //!< Evaluate queued M2L kernels
00456   void evalM2P(C_iter Ci, C_iter Cj);                           //!< Evaluate on CPU, queue on GPU
00457   void evalM2P(Cells &cells);                                   //!< Evaluate queued M2P kernels
00458   void evalP2P(C_iter Ci, C_iter Cj);                           //!< Evaluate on CPU, queue on GPU
00459   void evalP2P(Cells &cells);                                   //!< Evaluate queued P2P kernels (near field)
00460   void evalL2L(Cells &cells);                                   //!< Evaluate all L2L kernels
00461   void evalL2P(Cells &cells);                                   //!< Evaluate all L2P kernels
00462   void evalEwaldReal(C_iter Ci, C_iter Cj);                     //!< Evaluate on CPU, queue on GPU
00463   void evalEwaldReal(Cells &cells);                             //!< Evaluate queued Ewald real kernels
00464 };
00465 
00466 #if cpu
00467 #include "../kernel/cpuEvaluator.cxx"
00468 #elif gpu
00469 #include "../kernel/gpuEvaluator.cxx"
00470 #endif
00471 
00472 #undef splitFirst
00473 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines