Dendro  5.01
Dendro in Greek language means tree. The Dendro library is a large scale (262K cores on ORNL's Titan) distributed memory adaptive octree framework. The main goal of Dendro is to perform large scale multiphysics simulations efficeiently in mordern supercomputers. Dendro consists of efficient parallel data structures and algorithms to perform variational ( finite element) methods and finite difference mthods on 2:1 balanced arbitary adaptive octrees which enables the users to perform simulations raning from black holes (binary black hole mergers) to blood flow in human body, where applications ranging from relativity, astrophysics to biomedical engineering.
parUtils.h
Go to the documentation of this file.
1 
11 #ifndef __PAR_UTILS_H_
12 #define __PAR_UTILS_H_
13 
14 #define KEEP_HIGH 100
15 #define KEEP_LOW 101
16 
17 #ifdef __DEBUG__
18 #ifndef __DEBUG_PAR__
19 #define __DEBUG_PAR__
20 #endif
21 #endif
22 
23 #include "mpi.h"
24 #include <vector>
25 #include "dendro.h"
26 
27 #ifndef KWAY
28 #define KWAY 128
29 #endif
30 
31 template<typename T>
32 struct _T{
33 
34  unsigned int p;
35  unsigned int idx;
36  T val;
37  DendroIntL rank;
38 
39  inline bool operator< (const _T<T> &c1) const {
40  return (this->val < c1.val);
41  }
42 
43  inline bool operator== (const _T<T> &c1) const {
44  return (this->val == c1.val);
45  }
46 
47  inline bool operator<= (const _T<T> &c1) const {
48  return (this->val <= c1.val);
49  }
50 
51  inline bool operator> (const _T<T> &c1) const {
52  return (this->val > c1.val);
53  }
54 
55  inline bool operator>= (const _T<T> &c1) const {
56  return (this->val >= c1.val);
57  }
58 
59 };
60 
61 namespace par
62 {
63  template<typename T>
64  class Mpi_datatype;
65 
66  template <>
67  class Mpi_datatype<_T<double>>
68  {
69  public:
70 
71  static MPI_Datatype value()
72  {
73  static bool first = true;
74  static MPI_Datatype datatype;
75 
76  if (first)
77  {
78  first = false;
79  MPI_Type_contiguous(sizeof(_T<double>), MPI_BYTE, &datatype);
80  MPI_Type_commit(&datatype);
81  }
82 
83  return datatype;
84  }
85  };
86 
87  template <>
88  class Mpi_datatype<_T<float>>
89  {
90  public:
91 
92  static MPI_Datatype value()
93  {
94  static bool first = true;
95  static MPI_Datatype datatype;
96 
97  if (first)
98  {
99  first = false;
100  MPI_Type_contiguous(sizeof(_T<float>), MPI_BYTE, &datatype);
101  MPI_Type_commit(&datatype);
102  }
103 
104  return datatype;
105  }
106  };
107 
108 
109 }
110 
111 
112 
113 #ifdef PETSC_USE_LOG
114 
115 #include "petscsys.h"
116 
117 namespace par
118 {
119 extern int sortEvent;
120 extern int concatEvent;
121 extern int remdupEvent;
122 extern int partwEvent;
123 extern int searchEvent;
124 extern int parScatterEvent;
125 extern int a2avWaitEvent;
126 extern int all2AllvSparseEvent;
127 extern int all2AllvDenseEvent;
128 extern int allGatherEvent;
129 extern int reduceEvent;
130 extern int sendRecvEvent;
131 extern int allReduceEvent;
132 extern int all2AllEvent;
133 extern int allGathervEvent;
134 extern int gatherEvent;
135 extern int scanEvent;
136 extern int bcastEvent;
137 extern int splitComm2wayEvent;
138 extern int splitCommEvent;
139 } // namespace par
140 
141 #define PROF_A2AV_WAIT_BEGIN PetscLogEventBegin(a2avWaitEvent, 0, 0, 0, 0);
142 #define PROF_A2AV_WAIT_END PetscLogEventEnd(a2avWaitEvent, 0, 0, 0, 0);
143 
144 #define PROF_SPLIT_COMM_2WAY_BEGIN \
145  PetscFunctionBegin; \
146  PetscLogEventBegin(splitComm2wayEvent, 0, 0, 0, 0);
147 #define PROF_SPLIT_COMM_2WAY_END \
148  PetscLogEventEnd(splitComm2wayEvent, 0, 0, 0, 0); \
149  PetscFunctionReturn(0);
150 
151 #define PROF_SPLIT_COMM_BEGIN \
152  PetscFunctionBegin; \
153  PetscLogEventBegin(splitCommEvent, 0, 0, 0, 0);
154 #define PROF_SPLIT_COMM_END \
155  PetscLogEventEnd(splitCommEvent, 0, 0, 0, 0); \
156  PetscFunctionReturn(0);
157 
158 #define PROF_SEARCH_BEGIN \
159  PetscFunctionBegin; \
160  PetscLogEventBegin(searchEvent, 0, 0, 0, 0);
161 #define PROF_SEARCH_END \
162  PetscLogEventEnd(searchEvent, 0, 0, 0, 0); \
163  PetscFunctionReturn(0);
164 
165 #define PROF_PAR_CONCAT_BEGIN \
166  PetscFunctionBegin; \
167  PetscLogEventBegin(concatEvent, 0, 0, 0, 0);
168 #define PROF_PAR_CONCAT_END \
169  PetscLogEventEnd(concatEvent, 0, 0, 0, 0); \
170  PetscFunctionReturn(0);
171 
172 #define PROF_PAR_SCATTER_BEGIN \
173  PetscFunctionBegin; \
174  PetscLogEventBegin(parScatterEvent, 0, 0, 0, 0);
175 #define PROF_PAR_SCATTER_END \
176  PetscLogEventEnd(parScatterEvent, 0, 0, 0, 0); \
177  PetscFunctionReturn(0);
178 
179 #define PROF_PAR_SENDRECV_BEGIN \
180  PetscFunctionBegin; \
181  PetscLogEventBegin(sendRecvEvent, 0, 0, 0, 0);
182 #define PROF_PAR_SENDRECV_END \
183  PetscLogEventEnd(sendRecvEvent, 0, 0, 0, 0); \
184  PetscFunctionReturn(0);
185 
186 #define PROF_PAR_BCAST_BEGIN \
187  PetscFunctionBegin; \
188  PetscLogEventBegin(bcastEvent, 0, 0, 0, 0);
189 #define PROF_PAR_BCAST_END \
190  PetscLogEventEnd(bcastEvent, 0, 0, 0, 0); \
191  PetscFunctionReturn(0);
192 
193 #define PROF_PAR_SCAN_BEGIN \
194  PetscFunctionBegin; \
195  PetscLogEventBegin(scanEvent, 0, 0, 0, 0);
196 #define PROF_PAR_SCAN_END \
197  PetscLogEventEnd(scanEvent, 0, 0, 0, 0); \
198  PetscFunctionReturn(0);
199 
200 #define PROF_PAR_GATHER_BEGIN \
201  PetscFunctionBegin; \
202  PetscLogEventBegin(gatherEvent, 0, 0, 0, 0);
203 #define PROF_PAR_GATHER_END \
204  PetscLogEventEnd(gatherEvent, 0, 0, 0, 0); \
205  PetscFunctionReturn(0);
206 
207 #define PROF_PAR_REDUCE_BEGIN \
208  PetscFunctionBegin; \
209  PetscLogEventBegin(reduceEvent, 0, 0, 0, 0);
210 #define PROF_PAR_REDUCE_END \
211  PetscLogEventEnd(reduceEvent, 0, 0, 0, 0); \
212  PetscFunctionReturn(0);
213 
214 #define PROF_PAR_ALLREDUCE_BEGIN \
215  PetscFunctionBegin; \
216  PetscLogEventBegin(allReduceEvent, 0, 0, 0, 0);
217 #define PROF_PAR_ALLREDUCE_END \
218  PetscLogEventEnd(allReduceEvent, 0, 0, 0, 0); \
219  PetscFunctionReturn(0);
220 
221 #define PROF_PAR_ALL2ALL_BEGIN \
222  PetscFunctionBegin; \
223  PetscLogEventBegin(all2AllEvent, 0, 0, 0, 0);
224 #define PROF_PAR_ALL2ALL_END \
225  PetscLogEventEnd(all2AllEvent, 0, 0, 0, 0); \
226  PetscFunctionReturn(0);
227 
228 #define PROF_PAR_ALLGATHER_BEGIN \
229  PetscFunctionBegin; \
230  PetscLogEventBegin(allGatherEvent, 0, 0, 0, 0);
231 #define PROF_PAR_ALLGATHER_END \
232  PetscLogEventEnd(allGatherEvent, 0, 0, 0, 0); \
233  PetscFunctionReturn(0);
234 
235 #define PROF_PAR_ALLGATHERV_BEGIN \
236  PetscFunctionBegin; \
237  PetscLogEventBegin(allGathervEvent, 0, 0, 0, 0);
238 #define PROF_PAR_ALLGATHERV_END \
239  PetscLogEventEnd(allGathervEvent, 0, 0, 0, 0); \
240  PetscFunctionReturn(0);
241 
242 #define PROF_PAR_ALL2ALLV_SPARSE_BEGIN \
243  PetscFunctionBegin; \
244  PetscLogEventBegin(all2AllvSparseEvent, 0, 0, 0, 0);
245 #define PROF_PAR_ALL2ALLV_SPARSE_END \
246  PetscLogEventEnd(all2AllvSparseEvent, 0, 0, 0, 0); \
247  PetscFunctionReturn(0);
248 
249 #define PROF_PAR_ALL2ALLV_DENSE_BEGIN \
250  PetscFunctionBegin; \
251  PetscLogEventBegin(all2AllvDenseEvent, 0, 0, 0, 0);
252 #define PROF_PAR_ALL2ALLV_DENSE_END \
253  PetscLogEventEnd(all2AllvDenseEvent, 0, 0, 0, 0); \
254  PetscFunctionReturn(0);
255 
256 #define PROF_SORT_BEGIN \
257  PetscFunctionBegin; \
258  PetscLogEventBegin(sortEvent, 0, 0, 0, 0);
259 #define PROF_SORT_END \
260  PetscLogEventEnd(sortEvent, 0, 0, 0, 0); \
261  PetscFunctionReturn(0);
262 
263 #define PROF_REMDUP_BEGIN \
264  PetscFunctionBegin; \
265  PetscLogEventBegin(remdupEvent, 0, 0, 0, 0);
266 #define PROF_REMDUP_END \
267  PetscLogEventEnd(remdupEvent, 0, 0, 0, 0); \
268  PetscFunctionReturn(0);
269 
270 #define PROF_PARTW_BEGIN \
271  PetscFunctionBegin; \
272  PetscLogEventBegin(partwEvent, 0, 0, 0, 0);
273 #define PROF_PARTW_END \
274  PetscLogEventEnd(partwEvent, 0, 0, 0, 0); \
275  PetscFunctionReturn(0);
276 
277 #else
278 
279 #define PROF_A2AV_WAIT_BEGIN
280 #define PROF_SPLIT_COMM_2WAY_BEGIN
281 #define PROF_SPLIT_COMM_BEGIN
282 #define PROF_SEARCH_BEGIN
283 #define PROF_PAR_SCATTER_BEGIN
284 #define PROF_PAR_SENDRECV_BEGIN
285 #define PROF_PAR_BCAST_BEGIN
286 #define PROF_PAR_GATHER_BEGIN
287 #define PROF_PAR_SCAN_BEGIN
288 #define PROF_PAR_REDUCE_BEGIN
289 #define PROF_PAR_ALLREDUCE_BEGIN
290 #define PROF_PAR_ALL2ALL_BEGIN
291 #define PROF_PAR_ALLGATHERV_BEGIN
292 #define PROF_PAR_ALLGATHER_BEGIN
293 #define PROF_PAR_ALL2ALLV_SPARSE_BEGIN
294 #define PROF_PAR_ALL2ALLV_DENSE_BEGIN
295 #define PROF_PAR_CONCAT_BEGIN
296 #define PROF_SORT_BEGIN
297 #define PROF_REMDUP_BEGIN
298 #define PROF_PARTW_BEGIN
299 
300 #define PROF_A2AV_WAIT_END
301 #define PROF_SPLIT_COMM_2WAY_END return 1;
302 #define PROF_SPLIT_COMM_END return 1;
303 #define PROF_SEARCH_END return 1;
304 #define PROF_PAR_SCATTER_END return 1;
305 #define PROF_PAR_SENDRECV_END return 1;
306 #define PROF_PAR_BCAST_END return 1;
307 #define PROF_PAR_GATHER_END return 1;
308 #define PROF_PAR_SCAN_END return 1;
309 #define PROF_PAR_REDUCE_END return 1;
310 #define PROF_PAR_ALLREDUCE_END return 1;
311 #define PROF_PAR_ALL2ALL_END return 1;
312 #define PROF_PAR_ALLGATHERV_END return 1;
313 #define PROF_PAR_ALLGATHER_END return 1;
314 #define PROF_PAR_ALL2ALLV_SPARSE_END return 1;
315 #define PROF_PAR_ALL2ALLV_DENSE_END return 1;
316 #define PROF_PAR_CONCAT_END return 1;
317 #define PROF_SORT_END return 1;
318 #define PROF_REMDUP_END return 1;
319 #define PROF_PARTW_END return 1;
320 
321 #endif
322 
329 namespace par
330 {
331 
332 template <typename T>
333 int Mpi_Isend(T *buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request *request);
334 
335 template <typename T>
336 int Mpi_Issend(T *buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request *request);
337 
338 template <typename T>
339 int Mpi_Recv(T *buf, int count, int source, int tag, MPI_Comm comm, MPI_Status *status);
340 
341 template <typename T>
342 int Mpi_Irecv(T *buf, int count, int source, int tag, MPI_Comm comm, MPI_Request *request);
343 
347 template <typename T>
348 int Mpi_Gather(T *sendBuffer, T *recvBuffer, int count, int root, MPI_Comm comm);
349 
353 template <typename T, typename S>
354 int Mpi_Sendrecv(T *sendBuf, int sendCount, int dest, int sendTag,
355  S *recvBuf, int recvCount, int source, int recvTag,
356  MPI_Comm comm, MPI_Status *status);
357 
361 template <typename T>
362 int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm);
363 
367 template <typename T>
368 int Mpi_Scan(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm);
369 
373 template <typename T>
374 int Mpi_Reduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm);
375 
379 template <typename T>
380 int Mpi_Allreduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm);
381 
385 template <typename T>
386 int Mpi_Alltoall(T *sendbuf, T *recvbuf, int count, MPI_Comm comm);
387 
391 template <typename T>
392 int Mpi_Allgatherv(T *sendbuf, int sendcount, T *recvbuf,
393  int *recvcounts, int *displs, MPI_Comm comm);
394 
398 template <typename T>
399 int Mpi_Allgather(T *sendbuf, T *recvbuf, int count, MPI_Comm comm);
400 
404 template <typename T>
405 int Mpi_Alltoallv_sparse(T *sendbuf, int *sendcnts, int *sdispls,
406  T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm);
407 
411 template <typename T>
412 int Mpi_Alltoallv_dense(T *sendbuf, int *sendcnts, int *sdispls,
413  T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm);
414 
415 template <typename T>
416 int Mpi_Alltoallv_Kway(T *sbuff_, int *s_cnt_, int *sdisp_,
417  T *rbuff_, int *r_cnt_, int *rdisp_, MPI_Comm c);
418 
430 template <typename T>
431 int scatterValues(std::vector<T> &in, std::vector<T> &out,
432  DendroIntL outSz, MPI_Comm comm);
433 
447 template <typename T>
448 int maxLowerBound(const std::vector<T> &keys, const std::vector<T> &searchList,
449  std::vector<T> &results, MPI_Comm comm);
450 
451 template <typename T>
452 unsigned int defaultWeight(const T *a);
453 
465 template <typename T>
466 int partitionW(std::vector<T> &vec,
467  unsigned int (*getWeight)(const T *), MPI_Comm comm);
468 
481 template <typename T>
482 int concatenate(std::vector<T> &listA,
483  std::vector<T> &listB, MPI_Comm comm);
484 
500 template <typename T>
501 int sampleSort(std::vector<T> &in, std::vector<T> &out, std::vector<double> &stats, MPI_Comm comm);
502 
511 /*template<typename T>
512  int removeDuplicates(std::vector<T>& nodes, bool isSorted, MPI_Comm comm);*/
513 
523 int splitComm2way(bool iAmEmpty, MPI_Comm *new_comm, MPI_Comm orig_comm);
524 
534 int splitComm2way(const bool *isEmptyList, MPI_Comm *new_comm, MPI_Comm orig_comm);
535 
536 /*
537  @author Rahul Sampath
538  @brief Splits a communication group into two, processors with
539  ranks less than splittingRank form one group and the other
540  processors form the second group. Both the groups are sorted in
541  the ascending order of their ranks in the old comm.
542  @param splittingRank The rank used for splitting the communicator
543  @param orig_comm The comm group that needs to be split.
544  @param new_comm The new comm group.
545  */
546 int splitCommUsingSplittingRank(int splittingRank, MPI_Comm *new_comm, MPI_Comm orig_comm);
547 
557 unsigned int splitCommBinary(MPI_Comm orig_comm, MPI_Comm *new_comm);
558 
567 unsigned int splitCommBinaryNoFlip(MPI_Comm orig_comm, MPI_Comm *new_comm);
568 
594 template <typename T>
595 void MergeLists(std::vector<T> &listA, std::vector<T> &listB, int KEEP_WHAT);
596 
605 template <typename T>
606 void MergeSplit(std::vector<T> &local_list, int which_keys, int partner, MPI_Comm comm);
607 
611 template <typename T>
612 void Par_bitonic_sort_incr(std::vector<T> &local_list, int proc_set_size, MPI_Comm comm);
613 
617 template <typename T>
618 void Par_bitonic_sort_decr(std::vector<T> &local_list, int proc_set_size, MPI_Comm comm);
619 
623 template <typename T>
624 void Par_bitonic_merge_incr(std::vector<T> &local_list, int proc_set_size, MPI_Comm comm);
625 
635 template <typename T>
636 void bitonicSort_binary(std::vector<T> &in, MPI_Comm comm);
637 
650 template <typename T>
651 void bitonicSort(std::vector<T> &in, MPI_Comm comm);
652 
653 
654 template <typename T>
655 void parallel_rank(const T* in, unsigned int sz, DendroIntL* out, MPI_Comm comm);
656 
657 
658 
659 } // namespace par
660 
661 #ifdef USE_OLD_SORT
662 #include "parUtils_old.tcc"
663 #else
664 #include "parUtils.tcc"
665 #endif
666 
667 #endif
int Mpi_Alltoallv_sparse(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Allgatherv(T *sendbuf, int sendcount, T *recvbuf, int *recvcounts, int *displs, MPI_Comm comm)
int Mpi_Reduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)
void MergeLists(std::vector< T > &listA, std::vector< T > &listB, int KEEP_WHAT)
Merges lists A, and B, retaining either the low or the High in list A.
int splitComm2way(const bool *isEmptyList, MPI_Comm *new_comm, MPI_Comm orig_comm)
Splits a communication group into two depending on the values in isEmptyList. Both the groups are sor...
Definition: parUtils.cpp:243
void Par_bitonic_merge_incr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Alltoall(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
int maxLowerBound(const std::vector< T > &keys, const std::vector< T > &searchList, std::vector< T > &results, MPI_Comm comm)
A parallel search function.
void Par_bitonic_sort_incr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm)
int Mpi_Allgather(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
void MergeSplit(std::vector< T > &local_list, int which_keys, int partner, MPI_Comm comm)
The main operation in the parallel bitonic sort algorithm. This implements the compare-split operatio...
void bitonicSort_binary(std::vector< T > &in, MPI_Comm comm)
An implementation of parallel bitonic sort that expects the number of processors to be a power of 2...
unsigned int splitCommBinary(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
Definition: parUtils.cpp:21
int partitionW(std::vector< T > &vec, unsigned int(*getWeight)(const T *), MPI_Comm comm)
A parallel weighted partitioning function. In our implementation, we do not pose any restriction on t...
An abstract class used for communicating messages using user-defined datatypes. The user must impleme...
Definition: zoltan_hilbert.h:76
int concatenate(std::vector< T > &listA, std::vector< T > &listB, MPI_Comm comm)
A parallel concatenation function. listB is appended (globally) to listA and the result is stored in ...
int Mpi_Allreduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)
void bitonicSort(std::vector< T > &in, MPI_Comm comm)
An implementation of parallel bitonic sort that does not expect the number of processors to be a powe...
Collection of Generic Parallel Functions: Sorting, Partitioning, Searching,...
Definition: zoltan_hilbert.h:72
int sampleSort(std::vector< T > &in, std::vector< T > &out, std::vector< double > &stats, MPI_Comm comm)
A parallel sample sort implementation. In our implementation, we do not pose any restriction on the i...
int Mpi_Sendrecv(T *sendBuf, int sendCount, int dest, int sendTag, S *recvBuf, int recvCount, int source, int recvTag, MPI_Comm comm, MPI_Status *status)
unsigned int splitCommBinaryNoFlip(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
Definition: parUtils.cpp:73
: data structure to compute parallel rank
Definition: parUtils.h:32
int Mpi_Scan(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)
void Par_bitonic_sort_decr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Alltoallv_dense(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Gather(T *sendBuffer, T *recvBuffer, int count, int root, MPI_Comm comm)
int scatterValues(std::vector< T > &in, std::vector< T > &out, DendroIntL outSz, MPI_Comm comm)
Re-distributes a STL vector, preserving the relative ordering of the elements.