ExaFMM 1
Fast-multipole Method for exascale systems
|
00001 /* 00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba 00003 00004 Permission is hereby granted, free of charge, to any person obtaining a copy 00005 of this software and associated documentation files (the "Software"), to deal 00006 in the Software without restriction, including without limitation the rights 00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00008 copies of the Software, and to permit persons to whom the Software is 00009 furnished to do so, subject to the following conditions: 00010 00011 The above copyright notice and this permission notice shall be included in 00012 all copies or substantial portions of the Software. 00013 00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00020 THE SOFTWARE. 00021 */ 00022 #ifndef evaluator_h 00023 #define evaluator_h 00024 #include "dataset.h" 00025 #define splitFirst(Ci,Cj) Cj->NCHILD == 0 || (Ci->NCHILD != 0 && Ci->R > Cj->R) 00026 00027 //! Interface between tree and kernel 00028 template<Equation equation> 00029 class Evaluator : public Dataset<equation> { 00030 protected: 00031 C_iter CiB; //!< icells begin per call 00032 C_iter CiE; //!< icells end per call 00033 C_iter CjB; //!< jcells begin per call 00034 C_iter CjE; //!< jcells end per call 00035 Lists listM2L; //!< M2L interaction list 00036 Lists listM2P; //!< M2P interaction list 00037 Lists listP2P; //!< P2P interaction list 00038 real timeM2L; //!< M2L execution time 00039 real timeM2P; //!< M2P execution time 00040 real timeP2P; //!< P2P execution time 00041 00042 int Iperiodic; //!< Periodic image flag (using each bit for images) 00043 const int Icenter; //!< Periodic image flag at center 00044 Maps flagM2L; //!< Existance of periodic image for M2L 00045 Maps flagM2P; //!< Existance of periodic image for M2P 00046 Maps flagP2P; //!< Existance of periodic image for P2P 00047 00048 public: 00049 using Kernel<equation>::printNow; //!< Switch to print timings 00050 using Kernel<equation>::startTimer; //!< Start timer for given event 00051 using Kernel<equation>::stopTimer; //!< Stop timer for given event 00052 using Kernel<equation>::writeTrace; //!< Write traces of all events 00053 using Kernel<equation>::R0; //!< Radius of root cell 00054 using Kernel<equation>::Ci0; //!< icells.begin() 00055 using Kernel<equation>::Cj0; //!< jcells.begin() 00056 using Kernel<equation>::ALPHA; //!< Scaling parameter for Ewald summation 00057 using Kernel<equation>::keysHost; //!< Offsets for rangeHost 00058 using Kernel<equation>::rangeHost; //!< Offsets for sourceHost 00059 using Kernel<equation>::constHost; //!< Constants on host 00060 using Kernel<equation>::sourceHost; //!< Sources on host 00061 using Kernel<equation>::targetHost; //!< Targets on host 00062 using Kernel<equation>::sourceBegin; //!< Define map for offset of source cells 00063 using Kernel<equation>::sourceSize; //!< Define map for size of source cells 00064 using Kernel<equation>::targetBegin; //!< Define map for offset of target cells 00065 using Kernel<equation>::NP2P; //!< Number of P2P kernel calls 00066 using Kernel<equation>::NM2P; //!< Number of M2P kernel calls 00067 using Kernel<equation>::NM2L; //!< Number of M2L kernel calls 00068 using Kernel<equation>::allocate; //!< Allocate GPU kernels 00069 using Kernel<equation>::hostToDevice; //!< Copy from host to device 00070 using Kernel<equation>::deviceToHost; //!< Copy from device to host 00071 using Kernel<equation>::P2M; //!< Evaluate P2M kernel 00072 using Kernel<equation>::M2M; //!< Evaluate M2M kernel 00073 using Kernel<equation>::M2L; //!< Evaluate M2L kernel 00074 using Kernel<equation>::M2P; //!< Evaluate M2P kernel 00075 using Kernel<equation>::P2P; //!< Evaluate P2P kernel 00076 using Kernel<equation>::L2L; //!< Evaluate L2L kernel 00077 using Kernel<equation>::L2P; //!< Evaluate L2P kernel 00078 using Kernel<equation>::EwaldReal; //!< Evaluate Ewald real part 00079 using Dataset<equation>::initSource; //!< Initialize source values 00080 using Dataset<equation>::initTarget; //!< Initialize target values 00081 00082 private: 00083 //! Approximate interaction between two cells 00084 inline void approximate(C_iter Ci, C_iter Cj) { 00085 #if HYBRID 00086 if( timeP2P*Cj->NDLEAF < timeM2P && timeP2P*Ci->NDLEAF*Cj->NDLEAF < timeM2L) {// If P2P is fastest 00087 evalP2P(Ci,Cj); // Evaluate on CPU, queue on GPU 00088 } else if ( timeM2P < timeP2P*Cj->NDLEAF && timeM2P*Ci->NDLEAF < timeM2L ) {// If M2P is fastest 00089 evalM2P(Ci,Cj); // Evaluate on CPU, queue on GPU 00090 } else { // If M2L is fastest 00091 evalM2L(Ci,Cj); // Evaluate on CPU, queue on GPU 00092 } // End if for kernel selection 00093 #elif TREECODE 00094 evalM2P(Ci,Cj); // Evaluate on CPU, queue on GPU 00095 #else 00096 evalM2L(Ci,Cj); // Evalaute on CPU, queue on GPU 00097 #endif 00098 } 00099 00100 #if QUARK 00101 inline void interact(C_iter Ci, C_iter Cj, Quark *quark); //!< interact() function using QUARK 00102 #endif 00103 00104 //! Traverse a single tree using a stack 00105 void traverseStack(C_iter Ci, C_iter C) { 00106 CellStack cellStack; // Stack of cells 00107 cellStack.push(C); // Push pair to queue 00108 while( !cellStack.empty() ) { // While traversal stack is not empty 00109 C = cellStack.top(); // Get cell from top of stack 00110 cellStack.pop(); // Pop traversal stack 00111 for( C_iter Cj=Cj0+C->CHILD; Cj!=Cj0+C->CHILD+C->NCHILD; ++Cj ) {// Loop over cell's children 00112 vect dX = Ci->X - Cj->X - Xperiodic; // Distance vector from source to target 00113 real Rq = std::sqrt(norm(dX)); // Scalar distance 00114 if( Rq * THETA < Ci->R + Cj->R && Cj->NCHILD == 0 ) { // If twigs are close 00115 evalEwaldReal(Ci,Cj); // Ewald real part 00116 } else if( Cj->NCHILD != 0 ) { // If cells are not twigs 00117 cellStack.push(Cj); // Push source cell to stack 00118 } // End if for twig cells 00119 } // End loop over cell's children 00120 } // End while loop for traversal stack 00121 } 00122 00123 //! Traverse a pair of trees using a queue 00124 void traverseQueue(Pair pair) { 00125 PairQueue pairQueue; // Queue of interacting cell pairs 00126 #if QUARK 00127 Quark *quark = QUARK_New(4); // Initialize QUARK object 00128 #endif 00129 pairQueue.push(pair); // Push pair to queue 00130 while( !pairQueue.empty() ) { // While dual traversal queue is not empty 00131 pair = pairQueue.front(); // Get interaction pair from front of queue 00132 pairQueue.pop(); // Pop dual traversal queue 00133 if(splitFirst(pair.first,pair.second)) { // If first cell is larger 00134 C_iter C = pair.first; // Split the first cell 00135 for( C_iter Ci=Ci0+C->CHILD; Ci!=Ci0+C->CHILD+C->NCHILD; ++Ci ) {// Loop over first cell's children 00136 interact(Ci,pair.second,pairQueue); // Calculate interaction between cells 00137 } // End loop over fist cell's children 00138 } else { // Else if second cell is larger 00139 C_iter C = pair.second; // Split the second cell 00140 for( C_iter Cj=Cj0+C->CHILD; Cj!=Cj0+C->CHILD+C->NCHILD; ++Cj ) {// Loop over second cell's children 00141 interact(pair.first,Cj,pairQueue); // Calculate interaction betwen cells 00142 } // End loop over second cell's children 00143 } // End if for which cell to split 00144 #if QUARK 00145 if( pairQueue.size() > 100 ) { // When queue size reaches threshold 00146 while( !pairQueue.empty() ) { // While dual traversal queue is not empty 00147 pair = pairQueue.front(); // Get interaction pair from front of queue 00148 pairQueue.pop(); // Pop dual traversal queue 00149 interact(pair.first,pair.second,quark); // Schedule interact() task on QUARK 00150 } // End while loop for dual traversal queue 00151 } // End if for queue size 00152 #endif 00153 } // End while loop for dual traversal queue 00154 #if QUARK 00155 QUARK_Delete(quark); // Delete QUARK object 00156 writeTrace(); // Write event trace to file 00157 #endif 00158 } 00159 00160 //! Get range of periodic images 00161 int getPeriodicRange() { 00162 int prange = 0; // Range of periodic images 00163 for( int i=0; i!=IMAGES; ++i ) { // Loop over periodic image sublevels 00164 prange += int(pow(3,i)); // Accumulate range of periodic images 00165 } // End loop over perioidc image sublevels 00166 return prange; // Return range of periodic images 00167 } 00168 00169 protected: 00170 //! Get level from cell index 00171 int getLevel(bigint index) { 00172 int i = index; // Copy to dummy index 00173 int level = -1; // Initialize level counter 00174 while( i >= 0 ) { // While cell index is non-negative 00175 level++; // Increment level 00176 i -= 1 << 3*level; // Subtract number of cells in that level 00177 } // End while loop for cell index 00178 return level; // Return the level 00179 } 00180 00181 void timeKernels(); //!< Time all kernels for auto-tuning 00182 00183 //! Upward phase for periodic cells 00184 void upwardPeriodic(Cells &jcells) { 00185 Cells pccells, pjcells; // Periodic center cell and jcell 00186 pccells.push_back(jcells.back()); // Root cell is first periodic cell 00187 for( int level=0; level<IMAGES-1; ++level ) { // Loop over sublevels of tree 00188 Cell cell; // New periodic cell at next sublevel 00189 C_iter C = pccells.end() - 1; // Set previous periodic center cell as source 00190 pccells.pop_back(); // Pop periodic center cell from vector 00191 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00192 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00193 for( int iz=-1; iz<=1; ++iz ) { // Loop over z periodic direction 00194 if( ix != 0 || iy != 0 || iz != 0 ) { // If periodic cell is not at center 00195 for( int cx=-1; cx<=1; ++cx ) { // Loop over x periodic direction (child) 00196 for( int cy=-1; cy<=1; ++cy ) { // Loop over y periodic direction (child) 00197 for( int cz=-1; cz<=1; ++cz ) { // Loop over z periodic direction (child) 00198 cell.X[0] = C->X[0] + (ix * 6 + cx * 2) * C->R;// Set new x coordinate for periodic image 00199 cell.X[1] = C->X[1] + (iy * 6 + cy * 2) * C->R;// Set new y cooridnate for periodic image 00200 cell.X[2] = C->X[2] + (iz * 6 + cz * 2) * C->R;// Set new z coordinate for periodic image 00201 cell.M = C->M; // Copy multipoles to new periodic image 00202 cell.NDLEAF = cell.NCHILD = 0; // Initialize NDLEAF & NCHILD 00203 jcells.push_back(cell); // Push cell into periodic jcell vector 00204 } // End loop over z periodic direction (child) 00205 } // End loop over y periodic direction (child) 00206 } // End loop over x periodic direction (child) 00207 } // Endif for periodic center cell 00208 } // End loop over z periodic direction 00209 } // End loop over y periodic direction 00210 } // End loop over x periodic direction 00211 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00212 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00213 for( int iz=-1; iz<=1; ++iz ) { // Loop over z periodic direction 00214 cell.X[0] = C->X[0] + ix * 2 * C->R; // Set new x coordinate for periodic image 00215 cell.X[1] = C->X[1] + iy * 2 * C->R; // Set new y cooridnate for periodic image 00216 cell.X[2] = C->X[2] + iz * 2 * C->R; // Set new z coordinate for periodic image 00217 cell.M = C->M; // Copy multipoles to new periodic image 00218 pjcells.push_back(cell); // Push cell into periodic jcell vector 00219 } // End loop over z periodic direction 00220 } // End loop over y periodic direction 00221 } // End loop over x periodic direction 00222 cell.X = C->X; // This is the center cell 00223 cell.R = 3 * C->R; // The cell size increases three times 00224 pccells.push_back(cell); // Push cell into periodic cell vector 00225 C_iter Ci = pccells.end() - 1; // Set current cell as target for M2M 00226 Ci->CHILD = 0; // Set child cells for periodic M2M 00227 Ci->NCHILD = 27; // Set number of child cells for periodic M2M 00228 evalM2M(pccells,pjcells); // Evaluate periodic M2M kernels for this sublevel 00229 pjcells.clear(); // Clear periodic jcell vector 00230 } // End loop over sublevels of tree 00231 } 00232 00233 //! Traverse tree for periodic cells 00234 void traversePeriodic(Cells &cells, Cells &jcells) { 00235 Xperiodic = 0; // Set periodic coordinate offset 00236 Iperiodic = Icenter; // Set periodic flag to center 00237 C_iter Cj = jcells.end()-1; // Initialize iterator for periodic source cell 00238 for( int level=0; level<IMAGES-1; ++level ) { // Loop over sublevels of tree 00239 for( int I=0; I!=26*27; ++I, --Cj ) { // Loop over periodic images (exclude center) 00240 #if TREECODE 00241 for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) { // Loop over cells 00242 if( Ci->NCHILD == 0 ) { // If cell is twig 00243 evalM2P(Ci,Cj); // Perform M2P kernel 00244 } // Endif for twig 00245 } // End loop over cells 00246 #else 00247 C_iter Ci = cells.end() - 1; // Set root cell as target iterator 00248 evalM2L(Ci,Cj); // Perform M2P kernel 00249 #endif 00250 } // End loop over x periodic direction 00251 } // End loop over sublevels of tree 00252 } 00253 00254 public: 00255 //! Constructor 00256 Evaluator() : Icenter(1 << 13) {} 00257 //! Destructor 00258 ~Evaluator() {} 00259 00260 //! Random distribution in [-1,1]^3 cube 00261 void random(Bodies &bodies, int seed=1, int numSplit=1) { 00262 srand(seed); // Set seed for random number generator 00263 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00264 if( numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit) ) {// Mimic parallel dataset 00265 seed++; // Mimic seed at next rank 00266 srand(seed); // Set seed for random number generator 00267 } // Endif for mimicing parallel dataset 00268 for( int d=0; d!=3; ++d ) { // Loop over dimension 00269 B->X[d] = drand48() * 2 * M_PI - M_PI; // Initialize positions 00270 } // End loop over dimension 00271 } // End loop over bodies 00272 initSource(bodies); // Initialize source values 00273 initTarget(bodies); // Initialize target values 00274 } 00275 00276 //! Random distribution on r = 1 sphere 00277 void sphere(Bodies &bodies, int seed=1, int numSplit=1) { 00278 srand(seed); // Set seed for random number generator 00279 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00280 if( numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit) ) {// Mimic parallel dataset 00281 seed++; // Mimic seed at next rank 00282 srand(seed); // Set seed for random number generator 00283 } // Endif for mimicing parallel dataset 00284 for( int d=0; d!=3; ++d ) { // Loop over dimension 00285 B->X[d] = drand48() * 2 - 1; // Initialize positions 00286 } // End loop over dimension 00287 real r = std::sqrt(norm(B->X)); // Distance from center 00288 for( int d=0; d!=3; ++d ) { // Loop over dimension 00289 B->X[d] /= r * 1.1; // Normalize positions 00290 } // End loop over dimension 00291 } // End loop over bodies 00292 initSource(bodies); // Initialize source values 00293 initTarget(bodies); // Initialize target values 00294 } 00295 00296 //! Uniform distribution on [-1,1]^3 lattice (for debugging) 00297 void lattice(Bodies &bodies) { 00298 int level = int(log(bodies.size()*MPISIZE+1.)/M_LN2/3); // Level of tree 00299 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00300 int d = 0, l = 0; // Initialize dimension and level 00301 int index = MPIRANK * bodies.size() + (B-bodies.begin()); // Set index of body iterator 00302 vec<3,int> nx = 0; // Initialize 3-D cell index 00303 while( index != 0 ) { // Deinterleave bits while index is nonzero 00304 nx[d] += (index % 2) * (1 << l); // Add deinterleaved bit to 3-D cell index 00305 index >>= 1; // Right shift the bits 00306 d = (d+1) % 3; // Increment dimension 00307 if( d == 0 ) l++; // If dimension is 0 again, increment level 00308 } // End while loop for deinterleaving bits 00309 for( d=0; d!=3; ++d ) { // Loop over dimensions 00310 B->X[d] = -1 + (2 * nx[d] + 1.) / (1 << level); // Calculate cell center from 3-D cell index 00311 } // End loop over dimensions 00312 } // End loop over bodies 00313 initSource(bodies); // Initialize source values 00314 initTarget(bodies); // Initialize target values 00315 } 00316 00317 //! Add single list for kernel unit test 00318 void addM2L(C_iter Cj) { 00319 listM2L.resize(1); // Resize vector of M2L interation lists 00320 flagM2L.resize(1); // Resize vector of M2L periodic image flags 00321 listM2L[0].push_back(Cj); // Push single cell into list 00322 flagM2L[0][Cj] |= Icenter; // Flip bit of periodic image flag 00323 } 00324 00325 //! Add single list for kernel unit test 00326 void addM2P(C_iter Cj) { 00327 listM2P.resize(1); // Resize vector of M2P interation lists 00328 flagM2P.resize(1); // Resize vector of M2L periodic image flags 00329 listM2P[0].push_back(Cj); // Push single cell into list 00330 flagM2P[0][Cj] |= Icenter; // Flip bit of periodic image flag 00331 } 00332 00333 //! Create periodic images of bodies 00334 Bodies periodicBodies(Bodies &bodies) { 00335 Bodies jbodies; // Vector for periodic images of bodies 00336 int prange = getPeriodicRange(); // Get range of periodic images 00337 for( int ix=-prange; ix<=prange; ++ix ) { // Loop over x periodic direction 00338 for( int iy=-prange; iy<=prange; ++iy ) { // Loop over y periodic direction 00339 for( int iz=-prange; iz<=prange; ++iz ) { // Loop over z periodic direction 00340 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) {// Loop over bodies 00341 Body body = *B; // Copy current body 00342 body.X[0] += ix * 2 * R0; // Shift x position 00343 body.X[1] += iy * 2 * R0; // Shift y position 00344 body.X[2] += iz * 2 * R0; // Shift z position 00345 jbodies.push_back(body); // Push shifted body into jbodies 00346 } // End loop over bodies 00347 } // End loop over z periodic direction 00348 } // End loop over y periodic direction 00349 } // End loop over x periodic direction 00350 return jbodies; // Return vector for periodic images of bodies 00351 } 00352 00353 //! Use multipole acceptance criteria to determine whether to approximate, do P2P, or subdivide 00354 void interact(C_iter Ci, C_iter Cj, PairQueue &pairQueue) { 00355 vect dX = Ci->X - Cj->X - Xperiodic; // Distance vector from source to target 00356 real Rq = std::sqrt(norm(dX)); // Scalar distance 00357 if( Rq * THETA > Ci->R + Cj->R ) { // If distance if far enough 00358 approximate(Ci,Cj); // Use approximate kernels, e.g. M2L, M2P 00359 } else if(Ci->NCHILD == 0 && Cj->NCHILD == 0) { // Else if both cells are leafs 00360 evalP2P(Ci,Cj); // Use P2P 00361 } else { // If cells are close but not leafs 00362 Pair pair(Ci,Cj); // Form a pair of cell iterators 00363 pairQueue.push(pair); // Push pair to queue 00364 } // End if for multipole acceptance 00365 } 00366 00367 //! Dual tree traversal 00368 void traverse(Cells &cells, Cells &jcells) { 00369 C_iter root = cells.end() - 1; // Iterator for root target cell 00370 C_iter jroot = jcells.end() - 1; // Iterator for root source cell 00371 if( IMAGES != 0 ) { // If periodic boundary condition 00372 jroot = jcells.end() - 1 - 26 * 27 * (IMAGES - 1); // The root is not at the end 00373 } // Endif for periodic boundary condition 00374 Ci0 = cells.begin(); // Set begin iterator for target cells 00375 Cj0 = jcells.begin(); // Set begin iterator for source cells 00376 listM2L.resize(cells.size()); // Resize M2L interaction list 00377 listM2P.resize(cells.size()); // Resize M2P interaction list 00378 listP2P.resize(cells.size()); // Resize P2P interaction list 00379 flagM2L.resize(cells.size()); // Resize M2L periodic image flag 00380 flagM2P.resize(cells.size()); // Resize M2P periodic image flag 00381 flagP2P.resize(cells.size()); // Resize P2P periodic image flag 00382 if( IMAGES == 0 ) { // If free boundary condition 00383 Iperiodic = Icenter; // Set periodic image flag to center 00384 Xperiodic = 0; // Set periodic coordinate offset 00385 Pair pair(root,jroot); // Form pair of root cells 00386 traverseQueue(pair); // Traverse a pair of trees 00387 } else { // If periodic boundary condition 00388 int I = 0; // Initialize index of periodic image 00389 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00390 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00391 for( int iz=-1; iz<=1; ++iz, ++I ) { // Loop over z periodic direction 00392 Iperiodic = 1 << I; // Set periodic image flag 00393 Xperiodic[0] = ix * 2 * R0; // Coordinate offset for x periodic direction 00394 Xperiodic[1] = iy * 2 * R0; // Coordinate offset for y periodic direction 00395 Xperiodic[2] = iz * 2 * R0; // Coordinate offset for z periodic direction 00396 Pair pair(root,jroot); // Form pair of root cells 00397 traverseQueue(pair); // Traverse a pair of trees 00398 } // End loop over z periodic direction 00399 } // End loop over y periodic direction 00400 } // End loop over x periodic direction 00401 for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) { // Loop over target cells 00402 listM2L[Ci-Ci0].sort(); // Sort interaction list 00403 listM2L[Ci-Ci0].unique(); // Eliminate duplicate periodic entries 00404 listM2P[Ci-Ci0].sort(); // Sort interaction list 00405 listM2P[Ci-Ci0].unique(); // Eliminate duplicate periodic entries 00406 listP2P[Ci-Ci0].sort(); // Sort interaction list 00407 listP2P[Ci-Ci0].unique(); // Eliminate duplicate periodic entries 00408 } // End loop over target cells 00409 } // Endif for periodic boundary condition 00410 } 00411 00412 //! Traverse neighbor cells only (for cutoff based methods) 00413 void neighbor(Cells &cells, Cells &jcells) { 00414 C_iter root = cells.end() - 1; // Iterator for root target cell 00415 C_iter jroot = jcells.end() - 1; // Iterator for root source cell 00416 Ci0 = cells.begin(); // Set begin iterator for target cells 00417 Cj0 = jcells.begin(); // Set begin iterator for source cells 00418 listP2P.resize(cells.size()); // Resize P2P interaction list 00419 flagP2P.resize(cells.size()); // Resize P2P periodic image flag 00420 for( C_iter Ci=cells.begin(); Ci!=cells.end(); ++Ci ) { // Loop over target cells 00421 if( Ci->NCHILD == 0 ) { // If cell is a twig 00422 int I = 0; // Initialize index of periodic image 00423 for( int ix=-1; ix<=1; ++ix ) { // Loop over x periodic direction 00424 for( int iy=-1; iy<=1; ++iy ) { // Loop over y periodic direction 00425 for( int iz=-1; iz<=1; ++iz, ++I ) { // Loop over z periodic direction 00426 Iperiodic = 1 << I; // Set periodic image flag 00427 Xperiodic[0] = ix * 2 * R0; // Coordinate offset for x periodic direction 00428 Xperiodic[1] = iy * 2 * R0; // Coordinate offset for y periodic direction 00429 Xperiodic[2] = iz * 2 * R0; // Coordinate offset for z periodic direction 00430 traverseStack(Ci,jroot); // Traverse the source tree 00431 } // End loop over z periodic direction 00432 } // End loop over y periodic direction 00433 } // End loop over x periodic direction 00434 for( B_iter B=Ci->LEAF; B!=Ci->LEAF+Ci->NDLEAF; ++B) { // Loop over all leafs in cell 00435 B->TRG[0] -= M_2_SQRTPI * B->SRC * ALPHA; // Self term of Ewald real part 00436 } // End loop over all leafs in cell 00437 } // End if for twig cells 00438 listP2P[Ci-Ci0].sort(); // Sort interaction list 00439 listP2P[Ci-Ci0].unique(); // Eliminate duplicate periodic entries 00440 } // End loop over target cells 00441 } 00442 00443 void setSourceBody(); //!< Set source buffer for bodies (for GPU) 00444 void setSourceCell(bool isM); //!< Set source buffer for cells (for GPU) 00445 void setTargetBody(Lists lists, Maps flags); //!< Set target buffer for bodies (for GPU) 00446 void setTargetCell(Lists lists, Maps flags); //!< Set target buffer for cells (for GPU) 00447 void getTargetBody(Lists &lists); //!< Get body values from target buffer (for GPU) 00448 void getTargetCell(Lists &lists, bool isM); //!< Get cell values from target buffer (for GPU) 00449 void clearBuffers(); //!< Clear GPU buffers 00450 00451 void evalP2P(Bodies &ibodies, Bodies &jbodies, bool onCPU=false);//!< Evaluate all P2P kernels (all pairs) 00452 void evalP2M(Cells &cells); //!< Evaluate all P2M kernels 00453 void evalM2M(Cells &cells, Cells &jcells); //!< Evaluate all M2M kernels 00454 void evalM2L(C_iter Ci, C_iter Cj); //!< Evaluate on CPU, queue on GPU 00455 void evalM2L(Cells &cells); //!< Evaluate queued M2L kernels 00456 void evalM2P(C_iter Ci, C_iter Cj); //!< Evaluate on CPU, queue on GPU 00457 void evalM2P(Cells &cells); //!< Evaluate queued M2P kernels 00458 void evalP2P(C_iter Ci, C_iter Cj); //!< Evaluate on CPU, queue on GPU 00459 void evalP2P(Cells &cells); //!< Evaluate queued P2P kernels (near field) 00460 void evalL2L(Cells &cells); //!< Evaluate all L2L kernels 00461 void evalL2P(Cells &cells); //!< Evaluate all L2P kernels 00462 void evalEwaldReal(C_iter Ci, C_iter Cj); //!< Evaluate on CPU, queue on GPU 00463 void evalEwaldReal(Cells &cells); //!< Evaluate queued Ewald real kernels 00464 }; 00465 00466 #if cpu 00467 #include "../kernel/cpuEvaluator.cxx" 00468 #elif gpu 00469 #include "../kernel/gpuEvaluator.cxx" 00470 #endif 00471 00472 #undef splitFirst 00473 #endif