11 #ifndef __PAR_UTILS_H_ 12 #define __PAR_UTILS_H_ 39 inline bool operator< (const _T<T> &c1)
const {
40 return (this->val < c1.val);
43 inline bool operator== (
const _T<T> &c1)
const {
44 return (this->val == c1.val);
47 inline bool operator<= (const _T<T> &c1)
const {
48 return (this->val <= c1.val);
51 inline bool operator> (
const _T<T> &c1)
const {
52 return (this->val > c1.val);
55 inline bool operator>= (
const _T<T> &c1)
const {
56 return (this->val >= c1.val);
71 static MPI_Datatype value()
73 static bool first =
true;
74 static MPI_Datatype datatype;
79 MPI_Type_contiguous(
sizeof(
_T<double>), MPI_BYTE, &datatype);
80 MPI_Type_commit(&datatype);
92 static MPI_Datatype value()
94 static bool first =
true;
95 static MPI_Datatype datatype;
100 MPI_Type_contiguous(
sizeof(
_T<float>), MPI_BYTE, &datatype);
101 MPI_Type_commit(&datatype);
115 #include "petscsys.h" 119 extern int sortEvent;
120 extern int concatEvent;
121 extern int remdupEvent;
122 extern int partwEvent;
123 extern int searchEvent;
124 extern int parScatterEvent;
125 extern int a2avWaitEvent;
126 extern int all2AllvSparseEvent;
127 extern int all2AllvDenseEvent;
128 extern int allGatherEvent;
129 extern int reduceEvent;
130 extern int sendRecvEvent;
131 extern int allReduceEvent;
132 extern int all2AllEvent;
133 extern int allGathervEvent;
134 extern int gatherEvent;
135 extern int scanEvent;
136 extern int bcastEvent;
137 extern int splitComm2wayEvent;
138 extern int splitCommEvent;
141 #define PROF_A2AV_WAIT_BEGIN PetscLogEventBegin(a2avWaitEvent, 0, 0, 0, 0); 142 #define PROF_A2AV_WAIT_END PetscLogEventEnd(a2avWaitEvent, 0, 0, 0, 0); 144 #define PROF_SPLIT_COMM_2WAY_BEGIN \ 145 PetscFunctionBegin; \ 146 PetscLogEventBegin(splitComm2wayEvent, 0, 0, 0, 0); 147 #define PROF_SPLIT_COMM_2WAY_END \ 148 PetscLogEventEnd(splitComm2wayEvent, 0, 0, 0, 0); \ 149 PetscFunctionReturn(0); 151 #define PROF_SPLIT_COMM_BEGIN \ 152 PetscFunctionBegin; \ 153 PetscLogEventBegin(splitCommEvent, 0, 0, 0, 0); 154 #define PROF_SPLIT_COMM_END \ 155 PetscLogEventEnd(splitCommEvent, 0, 0, 0, 0); \ 156 PetscFunctionReturn(0); 158 #define PROF_SEARCH_BEGIN \ 159 PetscFunctionBegin; \ 160 PetscLogEventBegin(searchEvent, 0, 0, 0, 0); 161 #define PROF_SEARCH_END \ 162 PetscLogEventEnd(searchEvent, 0, 0, 0, 0); \ 163 PetscFunctionReturn(0); 165 #define PROF_PAR_CONCAT_BEGIN \ 166 PetscFunctionBegin; \ 167 PetscLogEventBegin(concatEvent, 0, 0, 0, 0); 168 #define PROF_PAR_CONCAT_END \ 169 PetscLogEventEnd(concatEvent, 0, 0, 0, 0); \ 170 PetscFunctionReturn(0); 172 #define PROF_PAR_SCATTER_BEGIN \ 173 PetscFunctionBegin; \ 174 PetscLogEventBegin(parScatterEvent, 0, 0, 0, 0); 175 #define PROF_PAR_SCATTER_END \ 176 PetscLogEventEnd(parScatterEvent, 0, 0, 0, 0); \ 177 PetscFunctionReturn(0); 179 #define PROF_PAR_SENDRECV_BEGIN \ 180 PetscFunctionBegin; \ 181 PetscLogEventBegin(sendRecvEvent, 0, 0, 0, 0); 182 #define PROF_PAR_SENDRECV_END \ 183 PetscLogEventEnd(sendRecvEvent, 0, 0, 0, 0); \ 184 PetscFunctionReturn(0); 186 #define PROF_PAR_BCAST_BEGIN \ 187 PetscFunctionBegin; \ 188 PetscLogEventBegin(bcastEvent, 0, 0, 0, 0); 189 #define PROF_PAR_BCAST_END \ 190 PetscLogEventEnd(bcastEvent, 0, 0, 0, 0); \ 191 PetscFunctionReturn(0); 193 #define PROF_PAR_SCAN_BEGIN \ 194 PetscFunctionBegin; \ 195 PetscLogEventBegin(scanEvent, 0, 0, 0, 0); 196 #define PROF_PAR_SCAN_END \ 197 PetscLogEventEnd(scanEvent, 0, 0, 0, 0); \ 198 PetscFunctionReturn(0); 200 #define PROF_PAR_GATHER_BEGIN \ 201 PetscFunctionBegin; \ 202 PetscLogEventBegin(gatherEvent, 0, 0, 0, 0); 203 #define PROF_PAR_GATHER_END \ 204 PetscLogEventEnd(gatherEvent, 0, 0, 0, 0); \ 205 PetscFunctionReturn(0); 207 #define PROF_PAR_REDUCE_BEGIN \ 208 PetscFunctionBegin; \ 209 PetscLogEventBegin(reduceEvent, 0, 0, 0, 0); 210 #define PROF_PAR_REDUCE_END \ 211 PetscLogEventEnd(reduceEvent, 0, 0, 0, 0); \ 212 PetscFunctionReturn(0); 214 #define PROF_PAR_ALLREDUCE_BEGIN \ 215 PetscFunctionBegin; \ 216 PetscLogEventBegin(allReduceEvent, 0, 0, 0, 0); 217 #define PROF_PAR_ALLREDUCE_END \ 218 PetscLogEventEnd(allReduceEvent, 0, 0, 0, 0); \ 219 PetscFunctionReturn(0); 221 #define PROF_PAR_ALL2ALL_BEGIN \ 222 PetscFunctionBegin; \ 223 PetscLogEventBegin(all2AllEvent, 0, 0, 0, 0); 224 #define PROF_PAR_ALL2ALL_END \ 225 PetscLogEventEnd(all2AllEvent, 0, 0, 0, 0); \ 226 PetscFunctionReturn(0); 228 #define PROF_PAR_ALLGATHER_BEGIN \ 229 PetscFunctionBegin; \ 230 PetscLogEventBegin(allGatherEvent, 0, 0, 0, 0); 231 #define PROF_PAR_ALLGATHER_END \ 232 PetscLogEventEnd(allGatherEvent, 0, 0, 0, 0); \ 233 PetscFunctionReturn(0); 235 #define PROF_PAR_ALLGATHERV_BEGIN \ 236 PetscFunctionBegin; \ 237 PetscLogEventBegin(allGathervEvent, 0, 0, 0, 0); 238 #define PROF_PAR_ALLGATHERV_END \ 239 PetscLogEventEnd(allGathervEvent, 0, 0, 0, 0); \ 240 PetscFunctionReturn(0); 242 #define PROF_PAR_ALL2ALLV_SPARSE_BEGIN \ 243 PetscFunctionBegin; \ 244 PetscLogEventBegin(all2AllvSparseEvent, 0, 0, 0, 0); 245 #define PROF_PAR_ALL2ALLV_SPARSE_END \ 246 PetscLogEventEnd(all2AllvSparseEvent, 0, 0, 0, 0); \ 247 PetscFunctionReturn(0); 249 #define PROF_PAR_ALL2ALLV_DENSE_BEGIN \ 250 PetscFunctionBegin; \ 251 PetscLogEventBegin(all2AllvDenseEvent, 0, 0, 0, 0); 252 #define PROF_PAR_ALL2ALLV_DENSE_END \ 253 PetscLogEventEnd(all2AllvDenseEvent, 0, 0, 0, 0); \ 254 PetscFunctionReturn(0); 256 #define PROF_SORT_BEGIN \ 257 PetscFunctionBegin; \ 258 PetscLogEventBegin(sortEvent, 0, 0, 0, 0); 259 #define PROF_SORT_END \ 260 PetscLogEventEnd(sortEvent, 0, 0, 0, 0); \ 261 PetscFunctionReturn(0); 263 #define PROF_REMDUP_BEGIN \ 264 PetscFunctionBegin; \ 265 PetscLogEventBegin(remdupEvent, 0, 0, 0, 0); 266 #define PROF_REMDUP_END \ 267 PetscLogEventEnd(remdupEvent, 0, 0, 0, 0); \ 268 PetscFunctionReturn(0); 270 #define PROF_PARTW_BEGIN \ 271 PetscFunctionBegin; \ 272 PetscLogEventBegin(partwEvent, 0, 0, 0, 0); 273 #define PROF_PARTW_END \ 274 PetscLogEventEnd(partwEvent, 0, 0, 0, 0); \ 275 PetscFunctionReturn(0); 279 #define PROF_A2AV_WAIT_BEGIN 280 #define PROF_SPLIT_COMM_2WAY_BEGIN 281 #define PROF_SPLIT_COMM_BEGIN 282 #define PROF_SEARCH_BEGIN 283 #define PROF_PAR_SCATTER_BEGIN 284 #define PROF_PAR_SENDRECV_BEGIN 285 #define PROF_PAR_BCAST_BEGIN 286 #define PROF_PAR_GATHER_BEGIN 287 #define PROF_PAR_SCAN_BEGIN 288 #define PROF_PAR_REDUCE_BEGIN 289 #define PROF_PAR_ALLREDUCE_BEGIN 290 #define PROF_PAR_ALL2ALL_BEGIN 291 #define PROF_PAR_ALLGATHERV_BEGIN 292 #define PROF_PAR_ALLGATHER_BEGIN 293 #define PROF_PAR_ALL2ALLV_SPARSE_BEGIN 294 #define PROF_PAR_ALL2ALLV_DENSE_BEGIN 295 #define PROF_PAR_CONCAT_BEGIN 296 #define PROF_SORT_BEGIN 297 #define PROF_REMDUP_BEGIN 298 #define PROF_PARTW_BEGIN 300 #define PROF_A2AV_WAIT_END 301 #define PROF_SPLIT_COMM_2WAY_END return 1; 302 #define PROF_SPLIT_COMM_END return 1; 303 #define PROF_SEARCH_END return 1; 304 #define PROF_PAR_SCATTER_END return 1; 305 #define PROF_PAR_SENDRECV_END return 1; 306 #define PROF_PAR_BCAST_END return 1; 307 #define PROF_PAR_GATHER_END return 1; 308 #define PROF_PAR_SCAN_END return 1; 309 #define PROF_PAR_REDUCE_END return 1; 310 #define PROF_PAR_ALLREDUCE_END return 1; 311 #define PROF_PAR_ALL2ALL_END return 1; 312 #define PROF_PAR_ALLGATHERV_END return 1; 313 #define PROF_PAR_ALLGATHER_END return 1; 314 #define PROF_PAR_ALL2ALLV_SPARSE_END return 1; 315 #define PROF_PAR_ALL2ALLV_DENSE_END return 1; 316 #define PROF_PAR_CONCAT_END return 1; 317 #define PROF_SORT_END return 1; 318 #define PROF_REMDUP_END return 1; 319 #define PROF_PARTW_END return 1; 332 template <
typename T>
333 int Mpi_Isend(T *buf,
int count,
int dest,
int tag, MPI_Comm comm, MPI_Request *request);
335 template <
typename T>
336 int Mpi_Issend(T *buf,
int count,
int dest,
int tag, MPI_Comm comm, MPI_Request *request);
338 template <
typename T>
339 int Mpi_Recv(T *buf,
int count,
int source,
int tag, MPI_Comm comm, MPI_Status *status);
341 template <
typename T>
342 int Mpi_Irecv(T *buf,
int count,
int source,
int tag, MPI_Comm comm, MPI_Request *request);
347 template <
typename T>
348 int Mpi_Gather(T *sendBuffer, T *recvBuffer,
int count,
int root, MPI_Comm comm);
353 template <
typename T,
typename S>
354 int Mpi_Sendrecv(T *sendBuf,
int sendCount,
int dest,
int sendTag,
355 S *recvBuf,
int recvCount,
int source,
int recvTag,
356 MPI_Comm comm, MPI_Status *status);
361 template <
typename T>
362 int Mpi_Bcast(T *buffer,
int count,
int root, MPI_Comm comm);
367 template <
typename T>
368 int Mpi_Scan(T *sendbuf, T *recvbuf,
int count, MPI_Op op, MPI_Comm comm);
373 template <
typename T>
374 int Mpi_Reduce(T *sendbuf, T *recvbuf,
int count, MPI_Op op,
int root, MPI_Comm comm);
379 template <
typename T>
380 int Mpi_Allreduce(T *sendbuf, T *recvbuf,
int count, MPI_Op op, MPI_Comm comm);
385 template <
typename T>
386 int Mpi_Alltoall(T *sendbuf, T *recvbuf,
int count, MPI_Comm comm);
391 template <
typename T>
393 int *recvcounts,
int *displs, MPI_Comm comm);
398 template <
typename T>
399 int Mpi_Allgather(T *sendbuf, T *recvbuf,
int count, MPI_Comm comm);
404 template <
typename T>
406 T *recvbuf,
int *recvcnts,
int *rdispls, MPI_Comm comm);
411 template <
typename T>
413 T *recvbuf,
int *recvcnts,
int *rdispls, MPI_Comm comm);
415 template <
typename T>
416 int Mpi_Alltoallv_Kway(T *sbuff_,
int *s_cnt_,
int *sdisp_,
417 T *rbuff_,
int *r_cnt_,
int *rdisp_, MPI_Comm c);
430 template <
typename T>
432 DendroIntL outSz, MPI_Comm comm);
447 template <
typename T>
448 int maxLowerBound(
const std::vector<T> &keys,
const std::vector<T> &searchList,
449 std::vector<T> &results, MPI_Comm comm);
451 template <
typename T>
452 unsigned int defaultWeight(
const T *a);
465 template <
typename T>
467 unsigned int (*getWeight)(
const T *), MPI_Comm comm);
481 template <
typename T>
483 std::vector<T> &listB, MPI_Comm comm);
500 template <
typename T>
501 int sampleSort(std::vector<T> &in, std::vector<T> &out, std::vector<double> &stats, MPI_Comm comm);
523 int splitComm2way(
bool iAmEmpty, MPI_Comm *new_comm, MPI_Comm orig_comm);
534 int splitComm2way(
const bool *isEmptyList, MPI_Comm *new_comm, MPI_Comm orig_comm);
546 int splitCommUsingSplittingRank(
int splittingRank, MPI_Comm *new_comm, MPI_Comm orig_comm);
594 template <
typename T>
595 void MergeLists(std::vector<T> &listA, std::vector<T> &listB,
int KEEP_WHAT);
605 template <
typename T>
606 void MergeSplit(std::vector<T> &local_list,
int which_keys,
int partner, MPI_Comm comm);
611 template <
typename T>
617 template <
typename T>
623 template <
typename T>
635 template <
typename T>
650 template <
typename T>
651 void bitonicSort(std::vector<T> &in, MPI_Comm comm);
654 template <
typename T>
655 void parallel_rank(
const T* in,
unsigned int sz, DendroIntL* out, MPI_Comm comm);
662 #include "parUtils_old.tcc" 664 #include "parUtils.tcc" int Mpi_Alltoallv_sparse(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Allgatherv(T *sendbuf, int sendcount, T *recvbuf, int *recvcounts, int *displs, MPI_Comm comm)
int Mpi_Reduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)
void MergeLists(std::vector< T > &listA, std::vector< T > &listB, int KEEP_WHAT)
Merges lists A, and B, retaining either the low or the High in list A.
int splitComm2way(const bool *isEmptyList, MPI_Comm *new_comm, MPI_Comm orig_comm)
Splits a communication group into two depending on the values in isEmptyList. Both the groups are sor...
Definition: parUtils.cpp:243
void Par_bitonic_merge_incr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Alltoall(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
int maxLowerBound(const std::vector< T > &keys, const std::vector< T > &searchList, std::vector< T > &results, MPI_Comm comm)
A parallel search function.
void Par_bitonic_sort_incr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm)
int Mpi_Allgather(T *sendbuf, T *recvbuf, int count, MPI_Comm comm)
void MergeSplit(std::vector< T > &local_list, int which_keys, int partner, MPI_Comm comm)
The main operation in the parallel bitonic sort algorithm. This implements the compare-split operatio...
void bitonicSort_binary(std::vector< T > &in, MPI_Comm comm)
An implementation of parallel bitonic sort that expects the number of processors to be a power of 2...
unsigned int splitCommBinary(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
Definition: parUtils.cpp:21
int partitionW(std::vector< T > &vec, unsigned int(*getWeight)(const T *), MPI_Comm comm)
A parallel weighted partitioning function. In our implementation, we do not pose any restriction on t...
An abstract class used for communicating messages using user-defined datatypes. The user must impleme...
Definition: zoltan_hilbert.h:76
int concatenate(std::vector< T > &listA, std::vector< T > &listB, MPI_Comm comm)
A parallel concatenation function. listB is appended (globally) to listA and the result is stored in ...
int Mpi_Allreduce(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)
void bitonicSort(std::vector< T > &in, MPI_Comm comm)
An implementation of parallel bitonic sort that does not expect the number of processors to be a powe...
Collection of Generic Parallel Functions: Sorting, Partitioning, Searching,...
Definition: zoltan_hilbert.h:72
int sampleSort(std::vector< T > &in, std::vector< T > &out, std::vector< double > &stats, MPI_Comm comm)
A parallel sample sort implementation. In our implementation, we do not pose any restriction on the i...
int Mpi_Sendrecv(T *sendBuf, int sendCount, int dest, int sendTag, S *recvBuf, int recvCount, int source, int recvTag, MPI_Comm comm, MPI_Status *status)
unsigned int splitCommBinaryNoFlip(MPI_Comm orig_comm, MPI_Comm *new_comm)
Splits a communication group into two, the first having a power of 2 number of processors and the oth...
Definition: parUtils.cpp:73
: data structure to compute parallel rank
Definition: parUtils.h:32
int Mpi_Scan(T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)
void Par_bitonic_sort_decr(std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
int Mpi_Alltoallv_dense(T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)
int Mpi_Gather(T *sendBuffer, T *recvBuffer, int count, int root, MPI_Comm comm)
int scatterValues(std::vector< T > &in, std::vector< T > &out, DendroIntL outSz, MPI_Comm comm)
Re-distributes a STL vector, preserving the relative ordering of the elements.