ExaFMM 1
Fast-multipole Method for exascale systems
|
00001 /* 00002 Copyright (C) 2011 by Rio Yokota, Simon Layton, Lorena Barba 00003 00004 Permission is hereby granted, free of charge, to any person obtaining a copy 00005 of this software and associated documentation files (the "Software"), to deal 00006 in the Software without restriction, including without limitation the rights 00007 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00008 copies of the Software, and to permit persons to whom the Software is 00009 furnished to do so, subject to the following conditions: 00010 00011 The above copyright notice and this permission notice shall be included in 00012 all copies or substantial portions of the Software. 00013 00014 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00015 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00016 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00017 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00018 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00019 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00020 THE SOFTWARE. 00021 */ 00022 #ifndef tree_h 00023 #define tree_h 00024 #include "evaluator.h" 00025 00026 //! Base class for tree structure 00027 template<Equation equation> 00028 class TreeStructure : public Evaluator<equation> { 00029 public: 00030 Bodies buffer; //!< Buffer for MPI communication & sorting 00031 00032 using Kernel<equation>::printNow; //!< Switch to print timings 00033 using Kernel<equation>::startTimer; //!< Start timer for given event 00034 using Kernel<equation>::stopTimer; //!< Stop timer for given event 00035 using Kernel<equation>::sortCells; //!< Sort cells according to cell index 00036 using Kernel<equation>::X0; //!< Center of root cell 00037 using Kernel<equation>::R0; //!< Radius of root cell 00038 using Kernel<equation>::NP2P; //!< Number of P2P kernel calls 00039 using Kernel<equation>::NM2P; //!< Number of M2P kernel calls 00040 using Kernel<equation>::NM2L; //!< Number of M2L kernel calls 00041 using Evaluator<equation>::getLevel; //!< Get level from cell index 00042 using Evaluator<equation>::timeKernels; //!< Time all kernels for auto-tuning 00043 using Evaluator<equation>::upwardPeriodic; //!< Upward phase for periodic cells 00044 using Evaluator<equation>::traverse; //!< Traverse tree to get interaction list 00045 using Evaluator<equation>::traversePeriodic; //!< Traverse tree for periodic images 00046 using Evaluator<equation>::neighbor; //!< Traverse source tree to get neighbor list 00047 using Evaluator<equation>::evalP2M; //!< Evaluate P2M kernel 00048 using Evaluator<equation>::evalM2M; //!< Evaluate M2M kernel 00049 using Evaluator<equation>::evalM2L; //!< Evaluate M2L kernel 00050 using Evaluator<equation>::evalM2P; //!< Evaluate M2P kernel 00051 using Evaluator<equation>::evalP2P; //!< Evaluate P2P kernel 00052 using Evaluator<equation>::evalL2L; //!< Evaluate L2L kernel 00053 using Evaluator<equation>::evalL2P; //!< Evaluate L2P kernel 00054 using Evaluator<equation>::evalEwaldReal; //!< Evaluate Ewald real part 00055 using Evaluator<equation>::EwaldWave; //!< Evalaute Ewald wave part 00056 00057 private: 00058 //! Get parent cell index from current cell index 00059 bigint getParent(bigint index) { 00060 int level = getLevel(index); // Get level from cell index 00061 bigint cOff = ((1 << 3 * level ) - 1) / 7; // Cell index offset of current level 00062 bigint pOff = ((1 << 3 * (level-1)) - 1) / 7; // Cell index offset of parent level 00063 bigint i = ((index-cOff) >> 3) + pOff; // Cell index of parent cell 00064 return i; // Return cell index of parent cell 00065 } 00066 00067 //! Merge sticks with cells (levelwise) 00068 void unique(Cells &cells, Cells &sticks, int begin, int &end) { 00069 int c_old = begin; // Initialize old cell counter 00070 for( int c=begin; c!=end; ++c ) { // Loop over cells in level 00071 if( cells[c].ICELL != cells[c_old].ICELL ) { // If current cell index is different from previous 00072 c_old = c; // Update old cell counter 00073 } else if( c != c_old ) { // If cell index is repeated 00074 if( cells[c].NCHILD != 0 ) { // Stick-cell collision 00075 cells[c_old].NCHILD = cells[c].NCHILD; // Copy number of children 00076 cells[c_old].NDLEAF = cells[c].NDLEAF; // Copy number of leafs 00077 cells[c_old].PARENT = cells[c].PARENT; // Copy parent link 00078 cells[c_old].CHILD = cells[c].CHILD; // Copy child link 00079 cells[c_old].LEAF = cells[c].LEAF; // Copy iterator of first leaf 00080 sticks.push_back(cells[c_old]); // Push stick into vector 00081 } // Endif for collision type 00082 cells[c_old].M += cells[c].M; // Accumulate multipole 00083 cells.erase(cells.begin()+c); // Erase colliding cell 00084 c--; // Decrement counter to account for erase 00085 end--; // Decrement end to account for erase 00086 } // Endif for repeated cell index 00087 } // End loop over cells in level 00088 } 00089 00090 //! Form parent-child mutual link 00091 void linkParent(Cells &cells, int &begin, int &end) { 00092 Cell parent; // Parent cell 00093 Cells parents; // Parent cell vector; 00094 int oldend = end; // Save old end counter 00095 parent.ICELL = getParent(cells[begin].ICELL); // Set cell index 00096 parent.M = 0; // Initialize multipole coefficients 00097 parent.L = 0; // Initlalize local coefficients 00098 parent.NDLEAF = parent.NCHILD = 0; // Initialize NDLEAF & NCHILD 00099 parent.LEAF = cells[begin].LEAF; // Set pointer to first leaf 00100 parent.CHILD = begin; // Link to child 00101 getCenter(parent); // Set cell center and radius 00102 for( int i=begin; i!=oldend; ++i ) { // Loop over cells at this level 00103 if( getParent(cells[i].ICELL) != parent.ICELL ) { // If it belongs to a new parent cell 00104 cells.push_back(parent); // Push cells into vector 00105 end++; // Increment cell counter 00106 parent.ICELL = getParent(cells[i].ICELL); // Set cell index 00107 parent.M = 0; // Initialize multipole coefficients 00108 parent.L = 0; // Initialize local coefficients 00109 parent.NDLEAF = parent.NCHILD = 0; // Initialize NDLEAF & NCHILD 00110 parent.LEAF = cells[i].LEAF; // Set pointer to first leaf 00111 parent.CHILD = i; // Link to child 00112 getCenter(parent); // Set cell center and radius 00113 } // Endif for new parent cell 00114 for( int c=0; c!=cells[i].NCHILD; ++c ) { // Loop over child cells 00115 (cells.begin()+cells[i].CHILD+c)->PARENT = i; // Link child to current 00116 } // End loop over child cells 00117 cells[i].PARENT = end; // Link to current to parent 00118 parent.NDLEAF += cells[i].NDLEAF; // Add nleaf of child to parent 00119 parents.push_back(parent); // Push parent cell into vector 00120 parent = parents.back(); // Copy information from vector 00121 parents.pop_back(); // Pop parent cell from vector 00122 parent.NCHILD++; // Increment child counter 00123 } // End loop over cells at this level 00124 cells.push_back(parent); // Push cells into vector 00125 end++; // Increment cell counter 00126 begin = oldend; // Set new begin index to old end index 00127 } 00128 00129 protected: 00130 //! Get cell center and radius from cell index 00131 void getCenter(Cell &cell) { 00132 int level = getLevel(cell.ICELL); // Get level from cell index 00133 bigint index = cell.ICELL - ((1 << 3*level) - 1) / 7; // Subtract cell index offset of current level 00134 cell.R = R0 / (1 << level); // Cell radius 00135 int d = level = 0; // Initialize dimension and level 00136 vec<3,int> nx = 0; // Initialize 3-D cell index 00137 while( index != 0 ) { // Deinterleave bits while index is nonzero 00138 nx[d] += (index % 2) * (1 << level); // Add deinterleaved bit to 3-D cell index 00139 index >>= 1; // Right shift the bits 00140 d = (d+1) % 3; // Increment dimension 00141 if( d == 0 ) level++; // If dimension is 0 again, increment level 00142 } // End while loop for deinterleaving bits 00143 for( d=0; d!=3; ++d ) { // Loop over dimensions 00144 cell.X[d] = (X0[d]-R0) + (2 *nx[d] + 1) * cell.R; // Calculate cell center from 3-D cell index 00145 } // End loop over dimensions 00146 } 00147 00148 //! Group bodies into twig cells 00149 void bodies2twigs(Bodies &bodies, Cells &twigs) { 00150 startTimer("Bodies2twigs "); // Start timer 00151 int nleaf = 0; // Initialize number of leafs 00152 bigint index = bodies[0].ICELL; // Initialize cell index 00153 B_iter firstLeaf = bodies.begin(); // Initialize body iterator for first leaf 00154 Cell cell; // Cell structure 00155 for( B_iter B=bodies.begin(); B!=bodies.end(); ++B ) { // Loop over bodies 00156 if( B->ICELL != index ) { // If it belongs to a new cell 00157 cell.NDLEAF = nleaf; // Set number of leafs 00158 cell.NCHILD = 0; // Set number of child cells 00159 cell.ICELL = index; // Set cell index 00160 cell.LEAF = firstLeaf; // Set pointer to first leaf 00161 getCenter(cell); // Set cell center and radius 00162 twigs.push_back(cell); // Push cells into vector 00163 firstLeaf = B; // Set new first leaf 00164 nleaf = 0; // Reset number of bodies 00165 index = B->ICELL; // Set new cell 00166 } // Endif for new cell 00167 nleaf++; // Increment body counter 00168 } // End loop over bodies 00169 cell.NDLEAF = nleaf; // Set number of leafs 00170 cell.NCHILD = 0; // Set number of child cells 00171 cell.ICELL = index; // Set cell index 00172 cell.LEAF = firstLeaf; // Set pointer to first leaf 00173 getCenter(cell); // Set cell center and radius 00174 twigs.push_back(cell); // Push cells into vector 00175 stopTimer("Bodies2twigs ",printNow); // Stop timer & print 00176 evalP2M(twigs); // Evaluate all P2M kernels 00177 } 00178 00179 //! Link twigs bottomup to create all cells in tree 00180 void twigs2cells(Cells &twigs, Cells &cells, Cells &sticks) { 00181 int begin = 0, end = 0; // Initialize range of cell vector 00182 int level = getLevel(twigs.back().ICELL); // Initialize level of tree 00183 startTimer("Sort resize "); // Start timer 00184 Cells cbuffer; // Sort buffer for cells 00185 cbuffer.resize(2*twigs.size()); // Resize sort buffer for cells 00186 stopTimer("Sort resize "); // Stop timer 00187 while( !twigs.empty() ) { // Keep poppig twigs until the vector is empty 00188 while( getLevel(twigs.back().ICELL) != level ) { // While cell belongs to a higher level 00189 sortCells(cells,cbuffer,false,begin,end); // Sort cells at this level 00190 startTimer("Twigs2cells "); // Start timer 00191 unique(cells,sticks,begin,end); // Get rid of duplicate cells 00192 linkParent(cells,begin,end); // Form parent-child mutual link 00193 level--; // Go up one level 00194 stopTimer("Twigs2cells "); // Stop timer 00195 } // End while for higher level 00196 startTimer("Twigs2cells "); // Start timer 00197 cells.push_back(twigs.back()); // Push cells into vector 00198 twigs.pop_back(); // Pop twigs from vector 00199 end++; // Increment cell counter 00200 stopTimer("Twigs2cells "); // Stop timer 00201 } // End while for popping twigs 00202 for( int l=level; l>0; --l ) { // Once all the twigs are done, do the rest 00203 sortCells(cells,cbuffer,false,begin,end); // Sort cells at this level 00204 startTimer("Twigs2cells "); // Start timer 00205 unique(cells,sticks,begin,end); // Get rid of duplicate cells 00206 linkParent(cells,begin,end); // Form parent-child mutual link 00207 stopTimer("Twigs2cells "); // Stop timer 00208 } // End loop over levels 00209 startTimer("Twigs2cells "); // Start timer 00210 unique(cells,sticks,begin,end); // Just in case there is a collision at root 00211 stopTimer("Twigs2cells ",printNow); // Stop timer & print 00212 evalM2M(cells,cells); // Evaluate all M2M kernels 00213 } 00214 00215 public: 00216 //! Downward phase (M2L,M2P,P2P,L2L,L2P evaluation) 00217 void downward(Cells &cells, Cells &jcells, bool periodic=true) { 00218 #if HYBRID 00219 timeKernels(); // Time all kernels for auto-tuning 00220 #endif 00221 for( C_iter C=cells.begin(); C!=cells.end(); ++C ) C->L = 0;// Initialize local coefficients 00222 if( IMAGES != 0 ) { // If periodic boundary condition 00223 startTimer("Upward P "); // Start timer 00224 upwardPeriodic(jcells); // Upward phase for periodic images 00225 stopTimer("Upward P ",printNow); // Stop timer & print 00226 } // Endif for periodic boundary condition 00227 startTimer("Traverse "); // Start timer 00228 traverse(cells,jcells); // Traverse tree to get interaction list 00229 stopTimer("Traverse ",printNow); // Stop timer & print 00230 if( IMAGES != 0 && periodic ) { // If periodic boundary condition 00231 startTimer("Traverse P "); // Start timer 00232 traversePeriodic(cells,jcells); // Traverse tree for periodic images 00233 stopTimer("Traverse P ",printNow); // Stop timer & print 00234 } // Endif for periodic boundary condition 00235 evalM2L(cells); // Evaluate queued M2L kernels (only GPU) 00236 evalM2P(cells); // Evaluate queued M2P kernels (only GPU) 00237 evalP2P(cells); // Evaluate queued P2P kernels (only GPU) 00238 evalL2L(cells); // Evaluate all L2L kernels 00239 evalL2P(cells); // Evaluate all L2P kernels 00240 if(printNow) std::cout << "P2P: " << NP2P 00241 << " M2P: " << NM2P 00242 << " M2L: " << NM2L << std::endl; 00243 } 00244 00245 //! Calculate Ewald summation 00246 void Ewald(Bodies &bodies, Cells &cells, Cells &jcells) { 00247 startTimer("Ewald wave "); // Start timer 00248 EwaldWave(bodies); // Ewald wave part 00249 stopTimer("Ewald wave ",printNow); // Stop timer & print 00250 startTimer("Ewald real "); // Start timer 00251 neighbor(cells,jcells); // Neighbor calculation for real part 00252 evalEwaldReal(cells); // Evaluate queued Ewald real kernels (only GPU) 00253 stopTimer("Ewald real ",printNow); // Stop timer & print 00254 } 00255 }; 00256 00257 #endif