Dendro  5.01
Dendro in Greek language means tree. The Dendro library is a large scale (262K cores on ORNL's Titan) distributed memory adaptive octree framework. The main goal of Dendro is to perform large scale multiphysics simulations efficeiently in mordern supercomputers. Dendro consists of efficient parallel data structures and algorithms to perform variational ( finite element) methods and finite difference mthods on 2:1 balanced arbitary adaptive octrees which enables the users to perform simulations raning from black holes (binary black hole mergers) to blood flow in human body, where applications ranging from relativity, astrophysics to biomedical engineering.
block_cu.h
1 //
2 // Created by milinda on 8/22/18.
3 //
10 #ifndef DENDRO_5_0_BLOCK_CU_H
11 #define DENDRO_5_0_BLOCK_CU_H
12 
13 #include "dendro.h"
14 
15 #ifdef __CUDACC__
16 #define CUDA_CALLABLE_MEMBER __host__ __device__
17 #else
18 #define CUDA_CALLABLE_MEMBER
19 #endif
20 
21 
22 namespace cuda
23 {
24 
25  class _Block
26  {
27 
28  private:
29 
31  double m_uiPtMin[3];
32 
34  double m_uiPtMax[3];
35 
37  unsigned int m_uiOffset;
38 
40  unsigned int m_uiBFlag;
41 
43  unsigned int m_uiSz[3];
44 
46  unsigned int m_uiAlignSz[3];
47 
49  double m_uiDx[3];
50 
51 
52  public:
53 
55  CUDA_CALLABLE_MEMBER _Block()
56  {
57 
58  m_uiPtMin[0]=0.0;
59  m_uiPtMin[1]=0.0;
60  m_uiPtMin[2]=0.0;
61 
62 
63  m_uiPtMax[0]=0.0;
64  m_uiPtMax[1]=0.0;
65  m_uiPtMax[2]=0.0;
66 
67  m_uiOffset=0;
68 
69  m_uiBFlag=0;
70 
71  m_uiSz[0]=0;
72  m_uiSz[1]=0;
73  m_uiSz[2]=0;
74 
75  m_uiDx[0]=0.0;
76  m_uiDx[1]=0.0;
77  m_uiDx[2]=0.0;
78 
79  m_uiAlignSz[0]=0;
80  m_uiAlignSz[1]=0;
81  m_uiAlignSz[2]=0;
82 
83 
84 
85 
86  }
87 
89  CUDA_CALLABLE_MEMBER _Block(const double * p_ptmin, const double * p_ptmax, unsigned int p_offset, unsigned int p_bflag, const unsigned int * p_sz, const double * p_dx )
90  {
91 
92  m_uiPtMin[0]=p_ptmin[0];
93  m_uiPtMin[1]=p_ptmin[1];
94  m_uiPtMin[2]=p_ptmin[2];
95 
96 
97  m_uiPtMax[0]=p_ptmax[0];
98  m_uiPtMax[1]=p_ptmax[1];
99  m_uiPtMax[2]=p_ptmin[2];
100 
101  m_uiOffset=p_offset;
102 
103  m_uiBFlag=p_bflag;
104 
105  m_uiSz[0]=p_sz[0];
106  m_uiSz[1]=p_sz[1];
107  m_uiSz[2]=p_sz[2];
108 
109  m_uiDx[0]=p_dx[0];
110  m_uiDx[1]=p_dx[1];
111  m_uiDx[2]=p_dx[2];
112 
113  ((m_uiSz[0] & ((1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG)-1))==0)? m_uiAlignSz[0]=m_uiSz[0] : m_uiAlignSz[0]=((m_uiSz[0]/(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG))+1)*(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG);
114  m_uiAlignSz[1]=m_uiSz[1];
115  m_uiAlignSz[2]=m_uiSz[2];
116  //((m_uiSz[1] & ((1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG)-1))==0)? m_uiAlignSz[1]=m_uiSz[1] : m_uiAlignSz[1]=((m_uiSz[1]/(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG))+1)*(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG);
117  //((m_uiSz[2] & ((1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG)-1))==0)? m_uiAlignSz[2]=m_uiSz[2] : m_uiAlignSz[2]=((m_uiSz[2]/(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG))+1)*(1u<<DENDRO_BLOCK_ALIGN_FACTOR_LOG);
118 
119 
120 
121  }
122 
124  CUDA_CALLABLE_MEMBER const double * getPtMin() const
125  {
126  return m_uiPtMin;
127  }
128 
129 
131  CUDA_CALLABLE_MEMBER const double * getPtMax() const
132  {
133  return m_uiPtMax;
134  }
135 
137  CUDA_CALLABLE_MEMBER unsigned int getOffset() const
138  {
139  return m_uiOffset;
140  }
141 
142 
144  CUDA_CALLABLE_MEMBER unsigned int getBFlag() const
145  {
146  return m_uiBFlag;
147  }
148 
150  CUDA_CALLABLE_MEMBER const unsigned int * getSz() const
151  {
152  return m_uiSz;
153  }
154 
155 
157  CUDA_CALLABLE_MEMBER const unsigned int * getAlignedSz() const
158  {
159  return m_uiAlignSz;
160  }
161 
162 
164  CUDA_CALLABLE_MEMBER const unsigned int getAlignedBlockSz() const
165  {
166  return m_uiAlignSz[0]*m_uiAlignSz[1]*m_uiAlignSz[2];
167  }
168 
169 
172  CUDA_CALLABLE_MEMBER const double * getDx() const
173  {
174  return m_uiDx;
175  }
176 
177 
178  };
179 
180 
181 } // end of namespace cuda
182 
183 
184 
185 
186 
187 
188 
189 
190 
191 
192 
193 
194 
195 
196 
197 
198 
199 
200 
201 
202 
203 
204 
205 #endif //DENDRO_5_0_BLOCK_CU_H
206 
207 
208 
CUDA_CALLABLE_MEMBER const double * getDx() const
Definition: block_cu.h:172
CUDA_CALLABLE_MEMBER _Block(const double *p_ptmin, const double *p_ptmax, unsigned int p_offset, unsigned int p_bflag, const unsigned int *p_sz, const double *p_dx)
Definition: block_cu.h:89
CUDA_CALLABLE_MEMBER _Block()
Definition: block_cu.h:55
Contains utility function for the host related to GPUs.
Definition: block_cu.h:22
CUDA_CALLABLE_MEMBER const unsigned int * getSz() const
Definition: block_cu.h:150
CUDA_CALLABLE_MEMBER unsigned int getOffset() const
Definition: block_cu.h:137
CUDA_CALLABLE_MEMBER const unsigned int * getAlignedSz() const
Definition: block_cu.h:157
Definition: block_cu.h:25
CUDA_CALLABLE_MEMBER const double * getPtMin() const
Definition: block_cu.h:124
CUDA_CALLABLE_MEMBER unsigned int getBFlag() const
Definition: block_cu.h:144
CUDA_CALLABLE_MEMBER const double * getPtMax() const
Definition: block_cu.h:131
CUDA_CALLABLE_MEMBER const unsigned int getAlignedBlockSz() const
Definition: block_cu.h:164