Dendro  5.01
Dendro in Greek language means tree. The Dendro library is a large scale (262K cores on ORNL's Titan) distributed memory adaptive octree framework. The main goal of Dendro is to perform large scale multiphysics simulations efficeiently in mordern supercomputers. Dendro consists of efficient parallel data structures and algorithms to perform variational ( finite element) methods and finite difference mthods on 2:1 balanced arbitary adaptive octrees which enables the users to perform simulations raning from black holes (binary black hole mergers) to blood flow in human body, where applications ranging from relativity, astrophysics to biomedical engineering.
octUtils.h
1 //
2 // Created by milinda on 9/6/16.
3 //
4 
5 #ifndef SFCSORTBENCH_OCTUTILS_H
6 #define SFCSORTBENCH_OCTUTILS_H
7 
8 #include "key.h"
9 #include "sfcSearch.h"
10 #include "sfcSort.h"
11 #include "dendro.h"
12 
26 #include "TreeNode.h"
27 #include <vector>
28 #include <assert.h>
29 #include "mpi.h"
30 #include "parUtils.h"
31 #include <functional>
32 #include "refel.h"
33 #include "mathUtils.h"
34 #include "block.h"
35 #include "sfcSort.h"
36 #include "dendro.h"
37 #include "skey.h"
38 
39 #define OCT2BLK_DECOMP_BLK_FILL_RATIO 0.5 // gurantees how fraction of the block covered by regular octants.
40 #define OCT2BLK_DECOMP_LEV_GAP 0
41 
42 
49 void addBoundaryNodesType1(std::vector<ot::TreeNode> &in,
50  std::vector<ot::TreeNode>& bdy,
51  unsigned int dim, unsigned int maxDepth);
52 
53 
54 
55 
56 int refineOctree(const std::vector<ot::TreeNode> & inp,
57  std::vector<ot::TreeNode> &out);
58 
59 int refineAndPartitionOctree(const std::vector<ot::TreeNode> & inp,
60  std::vector<ot::TreeNode> &out, MPI_Comm comm);
61 
62 
63 int createRegularOctree(std::vector<ot::TreeNode>& out, unsigned int lev,unsigned int dim, unsigned int maxDepth, MPI_Comm comm);
64 
65 
66 
81 int function2Octree(std::function<double(double,double,double)> fx, std::vector<ot::TreeNode> & nodes,unsigned int maxDepth, const double & tol ,unsigned int elementOrder,MPI_Comm comm );
82 
83 
84 
102 int function2Octree(std::function<void(double,double,double,double*)> fx,const unsigned int numVars,const unsigned int* varIndex,const unsigned int numInterpVars, std::vector<ot::TreeNode> & nodes,unsigned int maxDepth, const double & tol ,unsigned int elementOrder,MPI_Comm comm );
103 
112 void enforceSiblingsAreNotPartitioned(std::vector<ot::TreeNode> & in,MPI_Comm comm);
113 
114 
127 void octree2BlockDecomposition(std::vector<ot::TreeNode>& pNodes, std::vector<ot::Block>& blockList,unsigned int maxDepth,unsigned int & d_min, unsigned int & d_max, DendroIntL localBegin, DendroIntL localEnd,unsigned int eleOrder,unsigned int coarsetLev=0);
128 
129 
130 
141 void blockListToVtk(std::vector<ot::Block>& blkList, const std::vector<ot::TreeNode>& pNodes,char* fNamePrefix, MPI_Comm comm);
142 
155 template <typename T>
156 void computeSFCBucketSplitters(const T *pNodes, int lev, unsigned int maxDepth,unsigned char rot_id,DendroIntL &begin, DendroIntL &end, DendroIntL *splitters);
157 
164 template<typename T>
165 void genEdgeSearchKeys(const T& elem,std::vector<ot::SearchKey>& sKeys);
166 
167 
173 void mergeKeys(std::vector<ot::SearchKey>& sKeys,std::vector<ot::Key>& keys);
174 
175 
176 
178 void generateBlkEdgeSKeys(const ot::Block & blk, std::vector<ot::SearchKey>& sKeys);
179 
180 
182 void generateBlkVertexSKeys(const ot::Block & blk, std::vector<ot::SearchKey>& sKeys);
183 
184 
185 
186 template<typename T>
187 bool linearSearch(const T * pNodes, const T& key,unsigned int n,unsigned int sWidth,unsigned int &result);
188 
189 
190 
204 template <typename T>
205 void partitionBasedOnSplitters(std::vector<T>& pNodes, const T* splitters, unsigned int numSplitters,MPI_Comm comm);
206 
207 
208 
215 template <typename T>
216 void shrinkOrExpandOctree(std::vector<T> & in,const double ld_tol,const unsigned int sf_k,bool isActive,MPI_Comm activeComm, MPI_Comm globalComm);
217 
218 
223 template<typename Blk>
224 void printBlockStats(const Blk* blkList, unsigned int n,unsigned int maxDepth,MPI_Comm comm);
225 
226 
235 unsigned int rankSelectRule(unsigned int size_global,unsigned int rank_global, unsigned int size_local,unsigned int rank_i);
236 
237 
245 inline bool isRankSelected(unsigned int size_global,unsigned int rank_global, unsigned int size_local)
246 {
247  bool isSelected=false;
248  for(unsigned int p=0;p<size_local;p++)
249  if(rank_global==rankSelectRule(size_global,rank_global,size_local,p))
250  {
251  isSelected=true;
252  break;
253  }
254  return isSelected;
255 
256 }
257 
258 
259 template <typename T>
260 void computeSFCBucketSplitters(const T *pNodes, int lev, unsigned int maxDepth,unsigned char rot_id,DendroIntL &begin, DendroIntL &end, DendroIntL *splitters)
261 {
262  if ((lev >= maxDepth) || (begin == end)) {
263  // Special Case when the considering level exceeds the max depth.
264 
265  for (int ii = 0; ii < NUM_CHILDREN; ii++) {
266  int index = (rotations[2 * NUM_CHILDREN * rot_id + ii] - '0');
267  int nextIndex = 0;
268  if (ii == (NUM_CHILDREN-1))
269  nextIndex = ii + 1;
270  else
271  nextIndex = (rotations[2 * NUM_CHILDREN * rot_id + ii + 1] - '0');
272 
273  if (ii == 0) {
274  splitters[index] = begin;
275  splitters[nextIndex] = end;
276  continue;
277  }
278  splitters[nextIndex] = splitters[index];
279  }
280  //std::cout<<"End return "<<"maxDepth "<<maxDepth<<" Lev: "<<lev<< " Begin "<<begin <<" End "<<end<<std::endl;
281  return;
282 
283  }
284 
285  register unsigned int cnum;
286  register unsigned int cnum_prev=0;
287  DendroIntL num_elements=0;
288  unsigned int rotation=0;
289  DendroIntL count[(NUM_CHILDREN+2)]={};
290  //unsigned int pMaxDepth=(lev);
291  //pMaxDepth--;
292  unsigned int mid_bit = maxDepth - lev - 1;
293  count[0]=begin;
294  for (DendroIntL i=begin; i<end; ++i) {
295 
296  /*cnum = (lev < pNodes[i].getLevel())? 1 +(((((pNodes[i].getZ() & (1u << mid_bit)) >> mid_bit) << 2u) |
297  (((pNodes[i].getY() & (1u << mid_bit)) >> mid_bit) << 1u) |
298  ((pNodes[i].getX() & (1u << mid_bit)) >>
299  mid_bit))):0;*/
300 
301  cnum = (lev < pNodes[i].getLevel())? 1 +( (((pNodes[i].getZ() >> mid_bit) & 1u) << 2u) | (((pNodes[i].getY() >> mid_bit) & 1u) << 1u) | ((pNodes[i].getX() >>mid_bit) & 1u)):0;
302  count[cnum+1]++;
303 
304 
305  }
306 
307  DendroIntL loc[NUM_CHILDREN+1];
308  T unsorted[NUM_CHILDREN+1];
309  unsigned int live = 0;
310 
311  //if(count[1]>0) std::cout<<"For rank: "<<rank<<" count [1]: "<<count[1]<<std::endl;
312 
313  for (unsigned int ii = 0; ii < NUM_CHILDREN; ii++) {
314  int index = (rotations[2 * NUM_CHILDREN * rot_id + ii] - '0');
315  int nextIndex = 0;
316  if (ii == (NUM_CHILDREN-1))
317  nextIndex = ii + 1;
318  else
319  nextIndex = (rotations[2 * NUM_CHILDREN * rot_id + ii + 1] - '0');
320 
321  if (ii == 0) {
322  splitters[index] = begin;
323  splitters[nextIndex] = splitters[index]+count[1]+ count[(index+2)]; // number of elements which needs to come before the others due to level constraint.
324 
325  }else {
326  splitters[nextIndex] = splitters[index] + count[(index + 2)];
327  }
328  // if(count[1]>0 & !rank) std::cout<<" Spliter B:"<<index <<" "<<splitters[index]<<" Splitters E "<<nextIndex<<" "<<splitters[nextIndex]<<std::endl;
329 
330  }
331 }
332 
333 
334 
335 template <typename T>
336 void partitionBasedOnSplitters(std::vector<T>& pNodes, const T* splitters, unsigned int numSplitters,MPI_Comm comm)
337 {
338 
339  int rank, npes;
340  MPI_Comm_rank(comm,&rank);
341  MPI_Comm_size(comm,&npes);
342 
343 
344  std::vector<ot::Key> splitterKeys;
345  splitterKeys.resize(numSplitters);
346 
347  assert(npes==numSplitters);
348 
349  for(unsigned int p=0;p<npes;p++) {
350  splitterKeys[p] = ot::Key(splitters[p]);
351  pNodes.push_back(splitters[p]);
352  }
353 
354 
355  std::vector<T> tmpVec;
356  T rootNode(m_uiDim,m_uiMaxDepth);
357 
358 
359 
360  SFC::seqSort::SFC_treeSort(&(*(pNodes.begin())),pNodes.size(),tmpVec,tmpVec,tmpVec,m_uiMaxDepth,m_uiMaxDepth,rootNode,ROOT_ROTATION,1,TS_REMOVE_DUPLICATES);
361  std::swap(pNodes,tmpVec);
362  tmpVec.clear();
363 
364  assert(seq::test::isUniqueAndSorted(pNodes));
365 
366 
367  SFC::seqSearch::SFC_treeSearch(&(*(splitterKeys.begin())),&(*(pNodes.begin())),0,numSplitters,0,pNodes.size(),m_uiMaxDepth,m_uiMaxDepth,ROOT_ROTATION);
368 
369  /* if(!rank)
370  {
371  for(unsigned int p=0;p<npes;p++)
372  {
373  std::cout<<" p: "<<p<<" splitterKey: "<<splitterKeys[p]<<" searchResult: "<<splitterKeys[p].getSearchResult()<<std::endl;
374  }
375  }*/
376 
377 
378 
379  int * sendCount=new int[npes];
380  int * recvCount=new int[npes];
381  int * sendOffset=new int [npes];
382  int * recvOffset=new int [npes];
383 
384  unsigned int sResult;
385  unsigned int sResultPrev=0;
386  std::vector<T> sendBuffer;
387 
388 
389  assert((splitterKeys[0].getFlag() & OCT_FOUND));
390  sendBuffer.resize(sendBuffer.size()+(splitterKeys[0].getSearchResult()));
391 
392  for(unsigned int p=0;p<npes;p++) sendCount[p]=0;
393 
394  for(unsigned int ele=0;ele<splitterKeys[0].getSearchResult();ele++)
395  {
396  sendBuffer[ele]=pNodes[ele];
397  sendCount[0]++;
398  }
399 
400  for(unsigned int p=1;p<npes;p++)
401  {
402  assert( (splitterKeys[p].getFlag() & OCT_FOUND));
403  sResultPrev=splitterKeys[p-1].getSearchResult();
404  sResult=splitterKeys[p].getSearchResult();
405 
406  if((sResultPrev+1)<pNodes.size() && ((sResultPrev+1)<sResult))
407  {
408  sendBuffer.resize(sendBuffer.size()+(sResult-sResultPrev-1));
409  for(unsigned int ele=(sResultPrev+1);ele<(sResult);ele++)
410  {
411  sendBuffer[sendCount[p-1]+sendCount[p]]=pNodes[ele];
412  sendCount[p]++;
413  }
414  }
415  }
416 
417 
418  par::Mpi_Alltoall(sendCount,recvCount,1,comm);
419 
420  sendOffset[0]=0;
421  recvOffset[0]=0;
422 
423  omp_par::scan(sendCount,sendOffset,npes);
424  omp_par::scan(recvCount,recvOffset,npes);
425 
426  pNodes.clear();
427  pNodes.resize(recvCount[npes-1]+recvOffset[npes-1]);
428 
429  //if(!rank) std::cout<<"rank: "<<rank<<" pNodes size: "<<pNodes.size()<<std::endl;
430 
431  assert(sendBuffer.size()==(sendCount[npes-1]+sendOffset[npes-1]));
432  par::Mpi_Alltoallv(&(*(sendBuffer.begin())),sendCount,sendOffset,&(*(pNodes.begin())),recvCount,recvOffset,comm);
433 
434 
435  for(unsigned int ele=0;ele<pNodes.size();ele++)
436  {
437  if(pNodes[ele].getLevel()==m_uiMaxDepth) std::cout<<"rank: "<<rank<<" ele: "<<ele<<" pNodes: "<<pNodes[ele]<<std::endl;
438 
439  }
440 
441 
442 
443  assert(par::test::isUniqueAndSorted(pNodes,comm));
444 
445 
446 
447  delete [] sendCount;
448  delete [] recvCount;
449  delete [] sendOffset;
450  delete [] recvOffset;
451 
452 
453 
454 
455 
456 }
457 
458 
459 
460 template<typename T>
461 void genEdgeSearchKeys(const T& elem,unsigned int blkId,std::vector<ot::SearchKey>& sKeys)
462 {
463  const unsigned int domain_max = 1u<<(m_uiMaxDepth);
464 
465  const unsigned int myX=elem.getX();
466  const unsigned int myY=elem.getY();
467  const unsigned int myZ=elem.getZ();
468  const unsigned int mySz=1u<<(m_uiMaxDepth-elem.getLevel());
469  std::vector<ot::SearchKey>::iterator hint;
470  if(myX>0 && myY>0)
471  {
472  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX-1),(myY-1),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
473  hint->addOwner(blkId);
474  hint->addStencilIndexAndDirection(OCT_DIR_LEFT_DOWN);
475  }
476 
477  if(myX>0 && (myY+mySz)<domain_max) {
478 
479  hint = sKeys.emplace(sKeys.end(),ot::SearchKey((myX - 1), (myY + mySz), (myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
480  hint->addOwner(blkId);
481  hint->addStencilIndexAndDirection(OCT_DIR_LEFT_UP);
482  }
483 
484  if(myX>0 && myZ>0) {
485 
486  hint = sKeys.emplace(sKeys.end(), ot::SearchKey((myX - 1), (myY), (myZ - 1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
487  hint->addOwner(blkId);
488  hint->addStencilIndexAndDirection(OCT_DIR_LEFT_BACK);
489  }
490 
491  if(myX>0 && (myZ+mySz)<domain_max)
492  {
493  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX-1),(myY),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
494  hint->addOwner(blkId);
495  hint->addStencilIndexAndDirection(OCT_DIR_LEFT_FRONT);
496 
497  }
498 
499 
500  if((myX+mySz) < domain_max && myY>0)
501  {
502  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX+mySz),(myY-1),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
503  hint->addOwner(blkId);
504  hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_DOWN);
505  }
506 
507  if((myX+mySz)<domain_max && (myY+mySz)<domain_max)
508  {
509 
510  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX+mySz),(myY+mySz),(myZ), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
511  hint->addOwner(blkId);
512  hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_UP);
513 
514  }
515 
516 
517  if((myX+mySz)<domain_max && myZ>0) {
518 
519  hint = sKeys.emplace(sKeys.end(), ot::SearchKey((myX + mySz), (myY), (myZ - 1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
520  hint->addOwner(blkId);
521  hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_BACK);
522  }
523 
524  if((myX+mySz)<domain_max && (myZ+mySz)<domain_max)
525  {
526  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX+mySz),(myY),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
527  hint->addOwner(blkId);
528  hint->addStencilIndexAndDirection(OCT_DIR_RIGHT_FRONT);
529 
530  }
531 
532  if(myY>0 && myZ>0)
533  {
534  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX),(myY-1),(myZ-1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
535  hint->addOwner(blkId);
536  hint->addStencilIndexAndDirection(OCT_DIR_DOWN_BACK);
537  }
538 
539  if(myY > 0 && (myZ+mySz)<domain_max)
540  {
541  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX),(myY-1),(myZ+mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
542  hint->addOwner(blkId);
543  hint->addStencilIndexAndDirection(OCT_DIR_DOWN_FRONT);
544 
545  }
546 
547 
548  if((myY+mySz)<domain_max && myZ>0)
549  {
550 
551  hint=sKeys.emplace(sKeys.end(),ot::SearchKey((myX),(myY+mySz),(myZ-1), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
552  hint->addOwner(blkId);
553  hint->addStencilIndexAndDirection(OCT_DIR_UP_BACK);
554 
555  }
556 
557  if((myY+mySz)<domain_max && (myZ+mySz)<domain_max) {
558 
559  hint = sKeys.emplace(sKeys.end(), ot::SearchKey((myX), (myY + mySz), (myZ + mySz), m_uiMaxDepth, m_uiDim, m_uiMaxDepth));
560  hint->addOwner(blkId);
561  hint->addStencilIndexAndDirection(OCT_DIR_UP_FRONT);
562  }
563 
564 
565 
566 }
567 
568 
569 
570 template<typename T>
571 bool linearSearch(const T * pNodes, const T& key,unsigned int n,unsigned int sWidth,unsigned int &result)
572 {
573  unsigned int sBegin=(int)std::max(0,(int)(n-sWidth));
574  unsigned int sEnd=(int)std::min(n,n+sWidth);
575 
576  for(unsigned int e=sBegin;e<sEnd;e++)
577  if(pNodes[e]==key)
578  {
579  result=e;
580  return true;
581  }
582  result=LOOK_UP_TABLE_DEFAULT;
583  return false;
584 
585 }
586 
587 
588 
589 template <typename T>
590 void shrinkOrExpandOctree(std::vector<T> & in,const double ld_tol,const unsigned int sf_k,bool isActive,MPI_Comm activeComm, MPI_Comm globalComm)
591 {
592 
593  int rank_g,npes_g;
594  MPI_Comm_rank(globalComm,&rank_g);
595  MPI_Comm_size(globalComm,&npes_g);
596 
597  if(!rank_g)
598  {
599  if(!isActive)
600  {
601  std::cout<<"[Shrink/Expand Error]: active communicator does not include global rank=0. "<<std::endl;
602  exit(0);
603  }
604  }
605 
606  int activeCommSz=0;
607  if(!rank_g)
608  MPI_Comm_size(activeComm,&activeCommSz);
609 
610  par::Mpi_Bcast(&activeCommSz,1,0,globalComm);
611 
612  assert(activeCommSz<=npes_g);
613  if(activeCommSz>npes_g)
614  {
615  std::cout<<"[Shrink/Expand Error]: active communicator size is larger than the global comm. "<<std::endl;
616  exit(0);
617  }
618 
619  int * sendCount=new int [npes_g];
620  int * recvCount=new int [npes_g];
621  int * sendOffset=new int [npes_g];
622  int * recvOffset=new int [npes_g];
623 
624 
625  for(unsigned int i=0;i<npes_g;i++)
626  sendCount[i]=0;
627 
628  unsigned int localSz=in.size();
629 
630  for(unsigned int i=0;i<activeCommSz;i++)
631  sendCount[rankSelectRule(npes_g,rank_g,activeCommSz,i)]=(((i+1)*localSz)/activeCommSz) - ((i*localSz)/activeCommSz);
632 
633  par::Mpi_Alltoall(sendCount,recvCount,1,globalComm);
634 
635  sendOffset[0]=0;
636  recvOffset[0]=0;
637 
638  omp_par::scan(sendCount,sendOffset,npes_g);
639  omp_par::scan(recvCount,recvOffset,npes_g);
640 
641  std::vector<T> recvBuf;
642  recvBuf.resize(recvOffset[npes_g-1]+recvCount[npes_g-1]);
643 
644  par::Mpi_Alltoallv_sparse(&(*(in.begin())),sendCount,sendOffset,&(*(recvBuf.begin())),recvCount,recvOffset,globalComm);
645  std::swap(in,recvBuf);
646  recvBuf.clear();
647 
648  delete [] sendCount;
649  delete [] recvCount;
650  delete [] sendOffset;
651  delete [] recvOffset;
652 
653  // now std::vector<in> should be in comm1.
654  //std::cout<<rank<<" in: "<<in.size()<<std::endl;
655  if(isActive)
656  {
657  T rootTN(m_uiDim,m_uiMaxDepth);
658  // @note: should not need a remove duplicates. but we ge duplicates if we did not do it. Just find out why .
659  SFC::parSort::SFC_treeSort(in,recvBuf,recvBuf,recvBuf,ld_tol,m_uiMaxDepth,rootTN,ROOT_ROTATION,1,TS_REMOVE_DUPLICATES,sf_k,activeComm);
660  std::swap(in,recvBuf);
661  recvBuf.clear();
662  assert(par::test::isUniqueAndSorted(in,activeComm));
663  }
664 
665 
666 
667 }
668 
669 template<typename Blk>
670 void printBlockStats(const Blk* blkList, unsigned int n,unsigned int maxDepth,MPI_Comm comm)
671 {
672  int rank,npes;
673 
674  MPI_Comm_rank(comm,&rank);
675  MPI_Comm_size(comm,&npes);
676 
677  unsigned int blk_counts[maxDepth];
678  unsigned int blk_counts_g[maxDepth];
679 
680  for(unsigned int i=0;i<maxDepth;i++)
681  blk_counts[i]=0;
682 
683  unsigned int ele1D,index;
684  for(unsigned int blk=0;blk<n;blk++)
685  {
686  ele1D=blkList[blk].get1DArraySize();
687  index=(blkList[blk].get1DArraySize()-2*blkList[blk].get1DPadWidth()-1)/blkList[blk].getElementOrder();
688  blk_counts[index]++;
689  }
690 
691  par::Mpi_Reduce(blk_counts,blk_counts_g,maxDepth,MPI_SUM,0,comm);
692 
693  if(!rank)
694  {
695  for(unsigned int k=0;k<maxDepth;k++)
696  {
697  std::cout<<"blk_lev["<<k<<"]: ";
698  for(unsigned int w=0;w<blk_counts_g[k];w++)
699  std::cout<<"*";
700  std::cout<<std::endl;
701  }
702  }
703 
704  return ;
705 
706 }
707 
715 template<typename T>
716 void computeOctreeStats(const T* in, unsigned int n, unsigned int * octsByLevLocal,unsigned int * octsByLevGlobal, double& regOcts,MPI_Comm comm)
717 {
718 
719  unsigned int octsScanByLev[m_uiMaxDepth];
720 
721  for(unsigned int i=0;i<m_uiMaxDepth;i++)
722  {
723  octsByLevLocal[i]=0;
724  octsByLevGlobal[i]=0;
725  }
726 
727  for(unsigned int i=0;i<n;i++)
728  octsByLevLocal[in[i].getLevel()]++;
729 
730  par::Mpi_Allreduce(octsByLevLocal,octsByLevGlobal,m_uiMaxDepth,MPI_SUM,comm);
731 
732  octsScanByLev[0]=0;
733  omp_par::scan(octsByLevGlobal,octsScanByLev,m_uiMaxDepth);
734 
735  DendroIntL totalOcts=octsScanByLev[m_uiMaxDepth-1]+octsByLevGlobal[m_uiMaxDepth-1];
736 
737  regOcts=totalOcts/(double)(1u<<(3*(m_uiMaxDepth-2)));
738  return;
739 
740 }
741 
742 
743 #endif //SFCSORTBENCH_OCTUTILS_H
int Mpi_Alltoallv_sparse(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Reduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)
Definition: block.h:35
int Mpi_Alltoall(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
Definition: skey.h:26
int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm)
A set of parallel utilities.
Definition: key.h:26
int Mpi_Allreduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)