5 #ifndef SFCSORTBENCH_OCTUTILS_H 6 #define SFCSORTBENCH_OCTUTILS_H 33 #include "mathUtils.h" 39 #define OCT2BLK_DECOMP_BLK_FILL_RATIO 0.5 // gurantees how fraction of the block covered by regular octants. 40 #define OCT2BLK_DECOMP_LEV_GAP 0 49 void addBoundaryNodesType1(std::vector<ot::TreeNode> &in,
50 std::vector<ot::TreeNode>& bdy,
51 unsigned int dim,
unsigned int maxDepth);
56 int refineOctree(
const std::vector<ot::TreeNode> & inp,
57 std::vector<ot::TreeNode> &out);
59 int refineAndPartitionOctree(
const std::vector<ot::TreeNode> & inp,
60 std::vector<ot::TreeNode> &out, MPI_Comm comm);
63 int createRegularOctree(std::vector<ot::TreeNode>& out,
unsigned int lev,
unsigned int dim,
unsigned int maxDepth, MPI_Comm comm);
81 int function2Octree(std::function<
double(
double,
double,
double)> fx, std::vector<ot::TreeNode> & nodes,
unsigned int maxDepth,
const double & tol ,
unsigned int elementOrder,MPI_Comm comm );
102 int function2Octree(std::function<
void(
double,
double,
double,
double*)> fx,
const unsigned int numVars,
const unsigned int* varIndex,
const unsigned int numInterpVars, std::vector<ot::TreeNode> & nodes,
unsigned int maxDepth,
const double & tol ,
unsigned int elementOrder,MPI_Comm comm );
112 void enforceSiblingsAreNotPartitioned(std::vector<ot::TreeNode> & in,MPI_Comm comm);
127 void octree2BlockDecomposition(std::vector<ot::TreeNode>& pNodes, std::vector<ot::Block>& blockList,
unsigned int maxDepth,
unsigned int & d_min,
unsigned int & d_max, DendroIntL localBegin, DendroIntL localEnd,
unsigned int eleOrder,
unsigned int coarsetLev=0);
141 void blockListToVtk(std::vector<ot::Block>& blkList,
const std::vector<ot::TreeNode>& pNodes,
char* fNamePrefix, MPI_Comm comm);
155 template <
typename T>
156 void computeSFCBucketSplitters(
const T *pNodes,
int lev,
unsigned int maxDepth,
unsigned char rot_id,DendroIntL &begin, DendroIntL &end, DendroIntL *splitters);
165 void genEdgeSearchKeys(
const T& elem,std::vector<ot::SearchKey>& sKeys);
173 void mergeKeys(std::vector<ot::SearchKey>& sKeys,std::vector<ot::Key>& keys);
178 void generateBlkEdgeSKeys(
const ot::Block & blk, std::vector<ot::SearchKey>& sKeys);
182 void generateBlkVertexSKeys(
const ot::Block & blk, std::vector<ot::SearchKey>& sKeys);
187 bool linearSearch(
const T * pNodes,
const T& key,
unsigned int n,
unsigned int sWidth,
unsigned int &result);
204 template <
typename T>
205 void partitionBasedOnSplitters(std::vector<T>& pNodes,
const T* splitters,
unsigned int numSplitters,MPI_Comm comm);
215 template <
typename T>
216 void shrinkOrExpandOctree(std::vector<T> & in,
const double ld_tol,
const unsigned int sf_k,
bool isActive,MPI_Comm activeComm, MPI_Comm globalComm);
223 template<
typename Blk>
224 void printBlockStats(
const Blk* blkList,
unsigned int n,
unsigned int maxDepth,MPI_Comm comm);
235 unsigned int rankSelectRule(
unsigned int size_global,
unsigned int rank_global,
unsigned int size_local,
unsigned int rank_i);
245 inline bool isRankSelected(
unsigned int size_global,
unsigned int rank_global,
unsigned int size_local)
247 bool isSelected=
false;
248 for(
unsigned int p=0;p<size_local;p++)
249 if(rank_global==rankSelectRule(size_global,rank_global,size_local,p))
259 template <
typename T>
260 void computeSFCBucketSplitters(
const T *pNodes,
int lev,
unsigned int maxDepth,
unsigned char rot_id,DendroIntL &begin, DendroIntL &end, DendroIntL *splitters)
262 if ((lev >= maxDepth) || (begin == end)) {
265 for (
int ii = 0; ii < NUM_CHILDREN; ii++) {
266 int index = (rotations[2 * NUM_CHILDREN * rot_id + ii] -
'0');
268 if (ii == (NUM_CHILDREN-1))
271 nextIndex = (rotations[2 * NUM_CHILDREN * rot_id + ii + 1] -
'0');
274 splitters[index] = begin;
275 splitters[nextIndex] = end;
278 splitters[nextIndex] = splitters[index];
285 register unsigned int cnum;
286 register unsigned int cnum_prev=0;
287 DendroIntL num_elements=0;
288 unsigned int rotation=0;
289 DendroIntL count[(NUM_CHILDREN+2)]={};
292 unsigned int mid_bit = maxDepth - lev - 1;
294 for (DendroIntL i=begin; i<end; ++i) {
301 cnum = (lev < pNodes[i].getLevel())? 1 +( (((pNodes[i].getZ() >> mid_bit) & 1u) << 2u) | (((pNodes[i].getY() >> mid_bit) & 1u) << 1u) | ((pNodes[i].getX() >>mid_bit) & 1u)):0;
307 DendroIntL loc[NUM_CHILDREN+1];
308 T unsorted[NUM_CHILDREN+1];
309 unsigned int live = 0;
313 for (
unsigned int ii = 0; ii < NUM_CHILDREN; ii++) {
314 int index = (rotations[2 * NUM_CHILDREN * rot_id + ii] -
'0');
316 if (ii == (NUM_CHILDREN-1))
319 nextIndex = (rotations[2 * NUM_CHILDREN * rot_id + ii + 1] -
'0');
322 splitters[index] = begin;
323 splitters[nextIndex] = splitters[index]+count[1]+ count[(index+2)];
326 splitters[nextIndex] = splitters[index] + count[(index + 2)];
335 template <
typename T>
336 void partitionBasedOnSplitters(std::vector<T>& pNodes,
const T* splitters,
unsigned int numSplitters,MPI_Comm comm)
340 MPI_Comm_rank(comm,&rank);
341 MPI_Comm_size(comm,&npes);
344 std::vector<ot::Key> splitterKeys;
345 splitterKeys.resize(numSplitters);
347 assert(npes==numSplitters);
349 for(
unsigned int p=0;p<npes;p++) {
350 splitterKeys[p] =
ot::Key(splitters[p]);
351 pNodes.push_back(splitters[p]);
355 std::vector<T> tmpVec;
356 T rootNode(m_uiDim,m_uiMaxDepth);
360 SFC::seqSort::SFC_treeSort(&(*(pNodes.begin())),pNodes.size(),tmpVec,tmpVec,tmpVec,m_uiMaxDepth,m_uiMaxDepth,rootNode,ROOT_ROTATION,1,TS_REMOVE_DUPLICATES);
361 std::swap(pNodes,tmpVec);
364 assert(seq::test::isUniqueAndSorted(pNodes));
367 SFC::seqSearch::SFC_treeSearch(&(*(splitterKeys.begin())),&(*(pNodes.begin())),0,numSplitters,0,pNodes.size(),m_uiMaxDepth,m_uiMaxDepth,ROOT_ROTATION);
379 int * sendCount=
new int[npes];
380 int * recvCount=
new int[npes];
381 int * sendOffset=
new int [npes];
382 int * recvOffset=
new int [npes];
384 unsigned int sResult;
385 unsigned int sResultPrev=0;
386 std::vector<T> sendBuffer;
389 assert((splitterKeys[0].getFlag() & OCT_FOUND));
390 sendBuffer.resize(sendBuffer.size()+(splitterKeys[0].getSearchResult()));
392 for(
unsigned int p=0;p<npes;p++) sendCount[p]=0;
394 for(
unsigned int ele=0;ele<splitterKeys[0].getSearchResult();ele++)
396 sendBuffer[ele]=pNodes[ele];
400 for(
unsigned int p=1;p<npes;p++)
402 assert( (splitterKeys[p].getFlag() & OCT_FOUND));
403 sResultPrev=splitterKeys[p-1].getSearchResult();
404 sResult=splitterKeys[p].getSearchResult();
406 if((sResultPrev+1)<pNodes.size() && ((sResultPrev+1)<sResult))
408 sendBuffer.resize(sendBuffer.size()+(sResult-sResultPrev-1));
409 for(
unsigned int ele=(sResultPrev+1);ele<(sResult);ele++)
411 sendBuffer[sendCount[p-1]+sendCount[p]]=pNodes[ele];
423 omp_par::scan(sendCount,sendOffset,npes);
424 omp_par::scan(recvCount,recvOffset,npes);
427 pNodes.resize(recvCount[npes-1]+recvOffset[npes-1]);
431 assert(sendBuffer.size()==(sendCount[npes-1]+sendOffset[npes-1]));
432 par::Mpi_Alltoallv(&(*(sendBuffer.begin())),sendCount,sendOffset,&(*(pNodes.begin())),recvCount,recvOffset,comm);
435 for(
unsigned int ele=0;ele<pNodes.size();ele++)
437 if(pNodes[ele].getLevel()==m_uiMaxDepth) std::cout<<
"rank: "<<rank<<
" ele: "<<ele<<
" pNodes: "<<pNodes[ele]<<std::endl;
443 assert(par::test::isUniqueAndSorted(pNodes,comm));
449 delete [] sendOffset;
450 delete [] recvOffset;
461 void genEdgeSearchKeys(
const T& elem,
unsigned int blkId,std::vector<ot::SearchKey>& sKeys)
463 const unsigned int domain_max = 1u<<(m_uiMaxDepth);
465 const unsigned int myX=elem.getX();
466 const unsigned int myY=elem.getY();
467 const unsigned int myZ=elem.getZ();
468 const unsigned int mySz=1u<<(m_uiMaxDepth-elem.getLevel());
469 std::vector<ot::SearchKey>::iterator hint;
472 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX-1),(myY-1),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
473 hint->addOwner(blkId);
474 hint->addStencilIndexAndDirection(OCT_DIR_LEFT_DOWN);
477 if(myX>0 && (myY+mySz)<domain_max) {
479 hint = sKeys.emplace(sKeys.end(),
ot::SearchKey((myX - 1), (myY + mySz), (myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
480 hint->addOwner(blkId);
481 hint->addStencilIndexAndDirection(OCT_DIR_LEFT_UP);
486 hint = sKeys.emplace(sKeys.end(),
ot::SearchKey((myX - 1), (myY), (myZ - 1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
487 hint->addOwner(blkId);
488 hint->addStencilIndexAndDirection(OCT_DIR_LEFT_BACK);
491 if(myX>0 && (myZ+mySz)<domain_max)
493 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX-1),(myY),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
494 hint->addOwner(blkId);
495 hint->addStencilIndexAndDirection(OCT_DIR_LEFT_FRONT);
500 if((myX+mySz) < domain_max && myY>0)
502 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX+mySz),(myY-1),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
503 hint->addOwner(blkId);
504 hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_DOWN);
507 if((myX+mySz)<domain_max && (myY+mySz)<domain_max)
510 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX+mySz),(myY+mySz),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
511 hint->addOwner(blkId);
512 hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_UP);
517 if((myX+mySz)<domain_max && myZ>0) {
519 hint = sKeys.emplace(sKeys.end(),
ot::SearchKey((myX + mySz), (myY), (myZ - 1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
520 hint->addOwner(blkId);
521 hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_BACK);
524 if((myX+mySz)<domain_max && (myZ+mySz)<domain_max)
526 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX+mySz),(myY),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
527 hint->addOwner(blkId);
528 hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_FRONT);
534 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX),(myY-1),(myZ-1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
535 hint->addOwner(blkId);
536 hint->addStencilIndexAndDirection(OCT_DIR_DOWN_BACK);
539 if(myY > 0 && (myZ+mySz)<domain_max)
541 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX),(myY-1),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
542 hint->addOwner(blkId);
543 hint->addStencilIndexAndDirection(OCT_DIR_DOWN_FRONT);
548 if((myY+mySz)<domain_max && myZ>0)
551 hint=sKeys.emplace(sKeys.end(),
ot::SearchKey((myX),(myY+mySz),(myZ-1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
552 hint->addOwner(blkId);
553 hint->addStencilIndexAndDirection(OCT_DIR_UP_BACK);
557 if((myY+mySz)<domain_max && (myZ+mySz)<domain_max) {
559 hint = sKeys.emplace(sKeys.end(),
ot::SearchKey((myX), (myY + mySz), (myZ + mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
560 hint->addOwner(blkId);
561 hint->addStencilIndexAndDirection(OCT_DIR_UP_FRONT);
571 bool linearSearch(
const T * pNodes,
const T& key,
unsigned int n,
unsigned int sWidth,
unsigned int &result)
573 unsigned int sBegin=(int)std::max(0,(
int)(n-sWidth));
574 unsigned int sEnd=(int)std::min(n,n+sWidth);
576 for(
unsigned int e=sBegin;e<sEnd;e++)
582 result=LOOK_UP_TABLE_DEFAULT;
589 template <
typename T>
590 void shrinkOrExpandOctree(std::vector<T> & in,
const double ld_tol,
const unsigned int sf_k,
bool isActive,MPI_Comm activeComm, MPI_Comm globalComm)
594 MPI_Comm_rank(globalComm,&rank_g);
595 MPI_Comm_size(globalComm,&npes_g);
601 std::cout<<
"[Shrink/Expand Error]: active communicator does not include global rank=0. "<<std::endl;
608 MPI_Comm_size(activeComm,&activeCommSz);
612 assert(activeCommSz<=npes_g);
613 if(activeCommSz>npes_g)
615 std::cout<<
"[Shrink/Expand Error]: active communicator size is larger than the global comm. "<<std::endl;
619 int * sendCount=
new int [npes_g];
620 int * recvCount=
new int [npes_g];
621 int * sendOffset=
new int [npes_g];
622 int * recvOffset=
new int [npes_g];
625 for(
unsigned int i=0;i<npes_g;i++)
628 unsigned int localSz=in.size();
630 for(
unsigned int i=0;i<activeCommSz;i++)
631 sendCount[rankSelectRule(npes_g,rank_g,activeCommSz,i)]=(((i+1)*localSz)/activeCommSz) - ((i*localSz)/activeCommSz);
638 omp_par::scan(sendCount,sendOffset,npes_g);
639 omp_par::scan(recvCount,recvOffset,npes_g);
641 std::vector<T> recvBuf;
642 recvBuf.resize(recvOffset[npes_g-1]+recvCount[npes_g-1]);
645 std::swap(in,recvBuf);
650 delete [] sendOffset;
651 delete [] recvOffset;
657 T rootTN(m_uiDim,m_uiMaxDepth);
659 SFC::parSort::SFC_treeSort(in,recvBuf,recvBuf,recvBuf,ld_tol,m_uiMaxDepth,rootTN,ROOT_ROTATION,1,TS_REMOVE_DUPLICATES,sf_k,activeComm);
660 std::swap(in,recvBuf);
662 assert(par::test::isUniqueAndSorted(in,activeComm));
669 template<
typename Blk>
670 void printBlockStats(
const Blk* blkList,
unsigned int n,
unsigned int maxDepth,MPI_Comm comm)
674 MPI_Comm_rank(comm,&rank);
675 MPI_Comm_size(comm,&npes);
677 unsigned int blk_counts[maxDepth];
678 unsigned int blk_counts_g[maxDepth];
680 for(
unsigned int i=0;i<maxDepth;i++)
683 unsigned int ele1D,index;
684 for(
unsigned int blk=0;blk<n;blk++)
686 ele1D=blkList[blk].get1DArraySize();
687 index=(blkList[blk].get1DArraySize()-2*blkList[blk].get1DPadWidth()-1)/blkList[blk].getElementOrder();
695 for(
unsigned int k=0;k<maxDepth;k++)
697 std::cout<<
"blk_lev["<<k<<
"]: ";
698 for(
unsigned int w=0;w<blk_counts_g[k];w++)
700 std::cout<<std::endl;
716 void computeOctreeStats(
const T* in,
unsigned int n,
unsigned int * octsByLevLocal,
unsigned int * octsByLevGlobal,
double& regOcts,MPI_Comm comm)
719 unsigned int octsScanByLev[m_uiMaxDepth];
721 for(
unsigned int i=0;i<m_uiMaxDepth;i++)
724 octsByLevGlobal[i]=0;
727 for(
unsigned int i=0;i<n;i++)
728 octsByLevLocal[in[i].getLevel()]++;
733 omp_par::scan(octsByLevGlobal,octsScanByLev,m_uiMaxDepth);
735 DendroIntL totalOcts=octsScanByLev[m_uiMaxDepth-1]+octsByLevGlobal[m_uiMaxDepth-1];
737 regOcts=totalOcts/(double)(1u<<(3*(m_uiMaxDepth-2)));
743 #endif //SFCSORTBENCH_OCTUTILS_H int Mpi_Alltoallv_sparse(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Reduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)
int Mpi_Alltoall(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm)
A set of parallel utilities.
int Mpi_Allreduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)