Dendro  5.01
Dendro in Greek language means tree. The Dendro library is a large scale (262K cores on ORNL's Titan) distributed memory adaptive octree framework. The main goal of Dendro is to perform large scale multiphysics simulations efficeiently in mordern supercomputers. Dendro consists of efficient parallel data structures and algorithms to perform variational ( finite element) methods and finite difference mthods on 2:1 balanced arbitary adaptive octrees which enables the users to perform simulations raning from black holes (binary black hole mergers) to blood flow in human body, where applications ranging from relativity, astrophysics to biomedical engineering.
cg.h
1 //
2 // Created by milinda on 12/6/17.
9 //
10 
11 #ifndef SFCSORTBENCH_CG_H
12 #define SFCSORTBENCH_CG_H
13 
14 #include "mathUtils.h"
15 #include "mesh.h"
16 #include "operators.h"
17 
18 // Code taken from : http://math.nist.gov/iml++/
19 //*****************************************************************
20 // Iterative template routine -- CG
21 //
22 // CG solves the symmetric positive definite linear
23 // system Ax=b using the Conjugate Gradient method.
24 //
25 // CG follows the algorithm described on p. 15 in the
26 // SIAM Templates book.
27 //
28 // The return value indicates convergence within max_iter (input)
29 // iterations (0), or no convergence within max_iter iterations (1).
30 //
31 // Upon successful return, output arguments have the following values:
32 //
33 // x -- approximate solution to Ax = b
34 // max_iter -- the number of iterations performed before the
35 // tolerance was reached
36 // tol -- the residual after the final iteration
37 //
38 //
39 // Note: The above method is modified for simple, CG without preconditioning.
40 //
41 //*****************************************************************
42 namespace linalg
43 {
44 
45  template <typename Real >
46  int CG(ot::Mesh* mesh, Real *x,Real *b, int max_iter, Real& tol)
47  {
48 
49  Real resid,alpha,beta,rho,rho_1;
50  int status=1; // 0 indicates it has solved the system within the specified max_iter, 1 otherwise.
51 
52  if(mesh->isActive())
53  {
54 
55  int activeRank=mesh->getMPIRank();
56  int activeNpes=mesh->getMPICommSize();
57  MPI_Comm activeComm=mesh->getMPICommunicator();
58 
59  const unsigned int dof=mesh->getDegOfFreedom();
60  const unsigned int nodeLocalBegin=mesh->getNodeLocalBegin();
61  const unsigned int nodeLocalEnd=mesh->getNodeLocalEnd();
62  const unsigned int local_dof=nodeLocalEnd-nodeLocalBegin;
63 
64 
65  // need to release the memory before exit.
66  Real * p=new Real[dof];
67  Real * z=new Real[dof];
68  Real * q=new Real[dof];
69 
70  Real * Ax=new Real[dof];
71  Real * Ap=new Real[dof];
72  Real * r0=new Real[dof];
73  Real * r1=new Real[dof];
74 
75 
76  Real normb = normLInfty(b+nodeLocalBegin,local_dof,activeComm);
77  par::Mpi_Bcast(&normb,1,0,activeComm);
78  fem::operators::poisson::matvec(mesh,x,Ax);
79 
80  char fPrefix[256];
81  sprintf(fPrefix,"%s_%d","cg",0);
82  const char * varNames[]={"U"};
83  const double * var[]={Ax};
84  io::vtk::mesh2vtuFine(mesh,fPrefix,0,NULL,NULL,1,varNames,var);
85 
86  for(unsigned int i=nodeLocalBegin;i<nodeLocalEnd;i++)
87  {
88  r0[i]=b[i]-Ax[i];
89  p[i]=r0[i];
90  }
91 
92 
93  if (normb == 0.0)
94  normb = 1;
95 
96  Real normr=normLInfty(r0+nodeLocalBegin,local_dof,activeComm);
97  par::Mpi_Bcast(&normr,1,0,activeComm);
98  if(!activeRank) std::cout<<"initial residual : "<<(normr/normb)<<std::endl;
99 
100  if ((resid = normr / normb) <= tol) {
101  tol = resid;
102  max_iter = 0;
103 
104  delete [] p;
105  delete [] z;
106  delete [] q;
107  delete [] Ax;
108  delete [] Ap;
109  delete [] r0;
110  delete [] r1;
111 
112  status=0;
113  }
114 
115  if(status!=0)
116  {
117 
118  for(unsigned int i=1;i<=max_iter;i++)
119  {
120  fem::operators::poisson::matvec(mesh,p,Ap, false);
121 
122  alpha=(dot(r0+nodeLocalBegin,r0+nodeLocalBegin,local_dof,activeComm)/dot(p+nodeLocalBegin,Ap+nodeLocalBegin,local_dof,activeComm));
123  par::Mpi_Bcast(&alpha,1,0,activeComm);
124 
125  //if(!activeRank) std::cout<<"<r_0,r_0> : "<<dot(r0+nodeLocalBegin,r0+nodeLocalBegin,local_dof,activeComm)<<" <p,Ap>: "<<dot(p+nodeLocalBegin,Ap+nodeLocalBegin,local_dof,activeComm)<<" alpha "<<alpha<<std::endl;
126 
127  /*Real normAx=normL2(Ax+nodeLocalBegin,local_dof,activeComm);
128  Real normx=normL2(x+nodeLocalBegin,local_dof,activeComm);
129  if(!activeRank)std::cout<<"l2(Ax): "<<normAx<<" l2(x): "<<normx<<std::endl;*/
130 
131  //if(!activeRank) std::cout<<"rank: " <<activeRank<<" alpha: "<<alpha<<std::endl;
132  //std::cout<<"alpha: "<<alpha<<std::endl;
133 
134  for(unsigned int e=nodeLocalBegin;e<nodeLocalEnd;e++)
135  {
136  x[e]+=alpha*p[e];
137  r1[e]=r0[e]-alpha*Ap[e];
138  }
139 
140  normr=normLInfty(r1+nodeLocalBegin,local_dof,activeComm);
141  par::Mpi_Bcast(&normr,1,0,activeComm);
142 
143  if((!activeRank) && (i%10==0)) std::cout<<" iteration : "<<i<<" residual : "<<resid<<std::endl;
144 
145  if ((resid = normr / normb) <= tol) {
146 
147  if((!activeRank)) std::cout<<" iteration : "<<i<<" residual : "<<resid<<std::endl;
148  tol = resid;
149  max_iter = i;
150 
151  delete [] p;
152  delete [] z;
153  delete [] q;
154  delete [] Ax;
155  delete [] Ap;
156  delete [] r0;
157  delete [] r1;
158 
159  status=0;
160  break;
161  }
162 
163  beta=(dot(r1+nodeLocalBegin,r1+nodeLocalBegin,local_dof,activeComm)/dot(r0+nodeLocalBegin,r0+nodeLocalBegin,local_dof,activeComm));
164  par::Mpi_Bcast(&beta,1,0,activeComm);
165 
166  //if(!activeRank) std::cout<<"<r_1,r_1> : "<<dot(r1+nodeLocalBegin,r1+nodeLocalBegin,local_dof,activeComm)<<" <r_0,r_0>: "<<dot(r0+nodeLocalBegin,r0+nodeLocalBegin,local_dof,activeComm)<<" beta "<<beta<<std::endl;
167 
168 
169 
170  for(unsigned int e=nodeLocalBegin;e<nodeLocalEnd;e++)
171  {
172  p[e]=r1[e]+beta*p[e];
173  r0[e]=r1[e];
174  }
175 
176  fem::operators::poisson::applyDirichletBoundary(mesh,p,0.0);
177  fem::operators::poisson::applyDirichletBoundary(mesh,r0,0.0);
178 
179 
180  char fPrefix[256];
181  sprintf(fPrefix,"%s_%d","cg",i);
182  const char * varNames[]={"U"};
183  const double * var[]={x};
184  io::vtk::mesh2vtuFine(mesh,fPrefix,0,NULL,NULL,1,varNames,var);
185 
186 
187  }
188 
189  if(status!=0)
190  {
191  tol = resid;
192  delete [] p;
193  delete [] z;
194  delete [] q;
195  delete [] Ax;
196  delete [] r0;
197  delete [] r1;
198  status=1;
199 
200  }
201 
202 
203 
204  }
205 
206 
207  }
208 
209  // bcast act as a barrier for active and inactive meshes.
210  par::Mpi_Bcast(&tol,1,0,mesh->getMPIGlobalCommunicator());
211  return status;
212 
213  }
214 
215 
216 }
217 
218 
219 
220 #endif //SFCSORTBENCH_CG_H
MPI_Comm getMPIGlobalCommunicator() const
returns the global communicator
Definition: mesh.h:1099
Definition: mesh.h:179
unsigned int getNodeLocalBegin() const
return the location of local node begin
Definition: mesh.h:1043
int Mpi_Bcast(T *buffer, int count, int root, MPI_Comm comm)
This is modified to perform matrix-free iterative methods for solving linear systems that generated f...
Definition: cg.h:42
unsigned int getDegOfFreedom() const
returns the dof for a partition
Definition: mesh.h:1052
unsigned int getMPIRank() const
returns the rank
Definition: mesh.h:1108
bool isActive() const
returns true if mesh is active
Definition: mesh.h:1173
unsigned int getNodeLocalEnd() const
return the location of local node end
Definition: mesh.h:1045
MPI_Comm getMPICommunicator() const
returns the communicator (acitve)
Definition: mesh.h:1096
unsigned int getMPICommSize() const
returns the comm size:
Definition: mesh.h:1111