15 #ifndef SFCSORTBENCH_FEMMATVEC_H 16 #define SFCSORTBENCH_FEMMATVEC_H 18 #define NUM_VERTEX_BUCKETS 27 19 #define NUM_EDGE_BUCKETS 54 20 #define NUM_FACE_BUCKETS 36 21 #define NUM_INTERNAL_BUCKETS 8 22 #define NUM_TOTAL_BUCKETS 125 24 #define IDX(i,j,k) i+nx*(j+k*ny) 25 #define IDX2D(i,j) j*ny+i 33 #include "operators.h" 35 #define PT_EXTERNAL 0 // if the point is external to a given element 36 #define PT_INTERNAL 1 // if the point is internal but not a DG node. 37 #define PT_INTERNAL_DG_NODE 2 // if the point is internal and it is a DG node. 39 #define SFC_MVEC_TABLE_DEFAULT UCHAR_MAX 40 #define SFC_MVEC_TABLE_Rz 9 41 #define SFC_MVEC_TABLE_Ry 3 50 static const unsigned char SFC_MATVEC_BUCKET_TABLE[m_uiDim*m_uiDim*m_uiDim][NUM_CHILDREN]={
51 {0,255,255,255,255,255,255,255},
52 {0,1,255,255,255,255,255,255},
53 {1,255,255,255,255,255,255,255},
54 {0,2,255,255,255,255,255,255},
55 {0,1,2,3,255,255,255,255},
56 {1,3,255,255,255,255,255,255},
57 {2,255,255,255,255,255,255,255},
58 {2,3,255,255,255,255,255,255},
59 {3,255,255,255,255,255,255,255},
60 {0,4,255,255,255,255,255,255},
61 {0,1,4,5,255,255,255,255},
62 {1,5,255,255,255,255,255,255},
63 {0,2,4,6,255,255,255,255},
65 {1,3,5,7,255,255,255,255},
66 {2,6,255,255,255,255,255,255},
67 {2,3,6,7,255,255,255,255},
68 {3,7,255,255,255,255,255,255},
69 {4,255,255,255,255,255,255,255},
70 {4,5,255,255,255,255,255,255},
71 {5,255,255,255,255,255,255,255},
72 {4,6,255,255,255,255,255,255},
73 {4,5,6,7,255,255,255,255},
74 {5,7,255,255,255,255,255,255},
75 {6,255,255,255,255,255,255,255},
76 {6,7,255,255,255,255,255,255},
77 {7,255,255,255,255,255,255,255}
96 inline unsigned int computeIJK(
const T& pt,
const ot::TreeNode elem,
const unsigned int dx,
const unsigned int logDx,
unsigned int *ijk);
100 inline bool isInternal(
const T& pt,
const ot::TreeNode elem,
const unsigned int dx,
const unsigned int logDx);
108 template <
typename T,
typename da>
109 inline void matvec(
const T *pt_coords,
const da* in,
unsigned int pBegin,
unsigned int pEnd,
const unsigned int lev,
const unsigned int maxDepth,
const unsigned int order,
const ot::TreeNode& pOctant,
const RefElement * refEl,da * out,
unsigned int mVecType);
123 inline unsigned int fem::seq::computeIJK(
const T& pt,
const ot::TreeNode elem,
const unsigned int dx,
const unsigned int logDx,
unsigned int *ijk)
147 if(elem.
minX()<=pt.xint() && elem.minY()<=pt.yint() && elem.minZ()<=pt.zint() && pt.xint()<=elem.maxX() && pt.yint()<=elem.maxY() && pt.zint() <=elem.maxZ())
150 if(!((pt.xint()-elem.
minX())&((1u<<logDx)-1)) && !((pt.yint()-elem.minY())&((1u<<logDx)-1)) && !((pt.zint()-elem.minZ())&((1u<<(logDx))-1)) )
152 ijk[0]=(pt.xint()-elem.
minX())>>logDx;
153 ijk[1]=(pt.yint()-elem.minY())>>logDx;
154 ijk[2]=(pt.zint()-elem.minZ())>>logDx;
158 return PT_INTERNAL_DG_NODE;
175 inline bool fem::seq::isInternal(
const T& pt,
const ot::TreeNode elem,
const unsigned int dx,
const unsigned int logDx)
178 return (elem.
minX()<=pt.xint() && elem.minY()<=pt.yint() && elem.minZ()<=pt.zint() && pt.xint()<=elem.maxX() && pt.yint()<=elem.maxY() && pt.zint() <=elem.maxZ()) ?
true :
false;
183 template <
typename T,
typename da>
184 inline void fem::seq::matvec(
const T *pt_coords,
const da* in,
unsigned int pBegin,
unsigned int pEnd,
const unsigned int lev,
const unsigned int maxDepth,
const unsigned int order,
const ot::TreeNode& pOctant,
const RefElement * refEl,da * out,
unsigned int mVecType){
187 const unsigned int nPe=(order+1)*(order+1)*(order+1);
188 const unsigned int pMaxDepthBit=maxDepth-lev-1;
190 assert(((1u<<(maxDepth-lev)) % order)==0);
192 const unsigned int sz=(1u<<(maxDepth-pOctant.
getLevel()));
193 const unsigned int szb2=sz>>1u;
195 const unsigned int dx=(1u<<(maxDepth-lev))/order;
196 const unsigned int dxb2=(1u<<(maxDepth-lev-1))/order;
200 const unsigned int nx=order+1;
201 const unsigned int ny=order+1;
202 const unsigned int nz=order+1;
204 register unsigned int ijk[3];
206 const unsigned int pMin[]={pOctant.
minX(),pOctant.minY(),pOctant.minZ()};
207 const unsigned int pMax[]={pOctant.maxX(),pOctant.maxY(),pOctant.maxZ()};
208 const unsigned int pMid[]={(pMax[0]+pMin[0])>>1u,(pMax[1]+pMin[1])>>1u,(pMax[2]+pMin[2])>>1u};
210 unsigned int bCounts[NUM_CHILDREN];
211 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
216 for(
unsigned int p=pBegin;p<pEnd;p++)
224 register unsigned int pt_status;
228 childElem[3]=
ot::TreeNode(pOctant.
minX()+szb2,pOctant.minY()+szb2,pOctant.minZ(),pOctant.
getLevel()+1,m_uiDim,maxDepth);
231 childElem[5]=
ot::TreeNode(pOctant.
minX()+szb2,pOctant.minY(),pOctant.minZ()+szb2,pOctant.
getLevel()+1,m_uiDim,maxDepth);
232 childElem[6]=
ot::TreeNode(pOctant.
minX(),pOctant.minY()+szb2,pOctant.minZ()+szb2,pOctant.
getLevel()+1,m_uiDim,maxDepth);
233 childElem[7]=
ot::TreeNode(pOctant.
minX()+szb2,pOctant.minY()+szb2,pOctant.minZ()+szb2,pOctant.
getLevel()+1,m_uiDim,maxDepth);
238 #ifdef MATVEC_PROFILE 239 dendro::timer::sfcmatvec::t_pts_p1_count.start();
242 std::vector<unsigned int> duplicateCoordIndex;
244 for(
unsigned int p=pBegin;p<pEnd;p++)
251 if(pt_coords[p].zint()>pMid[2]) ijk[2]=2;
252 if(pt_coords[p].yint()>pMid[1]) ijk[1]=2;
253 if(pt_coords[p].xint()>pMid[0]) ijk[0]=2;
255 if(pt_coords[p].zint()<pMid[2]) ijk[2]=0;
256 if(pt_coords[p].yint()<pMid[1]) ijk[1]=0;
257 if(pt_coords[p].xint()<pMid[0]) ijk[0]=0;
259 if(pt_coords[p].zint()==pMid[2]) ijk[2]=1;
260 if(pt_coords[p].yint()==pMid[1]) ijk[1]=1;
261 if(pt_coords[p].xint()==pMid[0]) ijk[0]=1;
263 if(ijk[0]!=1 && ijk[1]!=1 && ijk[2]!=1)
266 cnum=(((ijk[2]>>1u)<<2u) | ((ijk[1]>>1u)<<1u) | ((ijk[0]>>1u)));
270 duplicateCoordIndex.push_back(p);
275 #ifdef MATVEC_PROFILE 276 dendro::timer::sfcmatvec::t_pts_p1_count.stop();
279 #ifdef MATVEC_PROFILE 280 dendro::timer::sfcmatvec::t_pts_p2_count.start();
284 for(
unsigned int index=0;index<duplicateCoordIndex.size();index++)
290 if(pt_coords[duplicateCoordIndex[index]].zint()>pMid[2]) ijk[2]=2;
291 if(pt_coords[duplicateCoordIndex[index]].yint()>pMid[1]) ijk[1]=2;
292 if(pt_coords[duplicateCoordIndex[index]].xint()>pMid[0]) ijk[0]=2;
294 if(pt_coords[duplicateCoordIndex[index]].zint()<pMid[2]) ijk[2]=0;
295 if(pt_coords[duplicateCoordIndex[index]].yint()<pMid[1]) ijk[1]=0;
296 if(pt_coords[duplicateCoordIndex[index]].xint()<pMid[0]) ijk[0]=0;
298 if(pt_coords[duplicateCoordIndex[index]].zint()==pMid[2]) ijk[2]=1;
299 if(pt_coords[duplicateCoordIndex[index]].yint()==pMid[1]) ijk[1]=1;
300 if(pt_coords[duplicateCoordIndex[index]].xint()==pMid[0]) ijk[0]=1;
302 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
305 cnum=SFC_MATVEC_BUCKET_TABLE[(ijk[2]*SFC_MVEC_TABLE_Rz+ijk[1]*SFC_MVEC_TABLE_Ry+ijk[0])][child];
317 #ifdef MATVEC_PROFILE 318 dendro::timer::sfcmatvec::t_pts_p2_count.stop();
324 #ifdef MATVEC_PROFILE 325 dendro::timer::sfcmatvec::t_malloc.start();
328 da** u_internal=
new da*[NUM_CHILDREN];
329 da** v_internal=
new da*[NUM_CHILDREN];
330 T** pt_internal=
new T*[NUM_CHILDREN];
333 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
335 u_internal[child]=NULL;
336 v_internal[child]=NULL;
337 pt_internal[child]=NULL;
339 if(bCounts[child]!=0)
348 #ifdef MATVEC_PROFILE 349 dendro::timer::sfcmatvec::t_malloc.stop();
352 DendroIntL counts[NUM_CHILDREN];
353 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
356 #ifdef MATVEC_PROFILE 357 dendro::timer::sfcmatvec::t_pts_p1_cpy.start();
361 for(
unsigned int p=pBegin;p<pEnd;p++)
368 if(pt_coords[p].zint()>pMid[2]) ijk[2]=2;
369 if(pt_coords[p].yint()>pMid[1]) ijk[1]=2;
370 if(pt_coords[p].xint()>pMid[0]) ijk[0]=2;
372 if(pt_coords[p].zint()<pMid[2]) ijk[2]=0;
373 if(pt_coords[p].yint()<pMid[1]) ijk[1]=0;
374 if(pt_coords[p].xint()<pMid[0]) ijk[0]=0;
376 if(pt_coords[p].zint()==pMid[2]) ijk[2]=1;
377 if(pt_coords[p].yint()==pMid[1]) ijk[1]=1;
378 if(pt_coords[p].xint()==pMid[0]) ijk[0]=1;
381 if(ijk[0]!=1 && ijk[1]!=1 && ijk[2]!=1)
383 cnum=(((ijk[2]>>1u)<<2u) | ((ijk[1]>>1u)<<1u) | ((ijk[0]>>1u)));
384 u_internal[cnum][counts[cnum]]=in[p];
385 pt_internal[cnum][counts[cnum]]=pt_coords[p];
391 #ifdef MATVEC_PROFILE 392 dendro::timer::sfcmatvec::t_pts_p1_cpy.stop();
395 #ifdef MATVEC_PROFILE 396 dendro::timer::sfcmatvec::t_pts_p2_cpy.start();
400 for(
unsigned int index=0;index<duplicateCoordIndex.size();index++) {
406 if (pt_coords[duplicateCoordIndex[index]].zint() > pMid[2]) ijk[2] = 2;
407 if (pt_coords[duplicateCoordIndex[index]].yint() > pMid[1]) ijk[1] = 2;
408 if (pt_coords[duplicateCoordIndex[index]].xint() > pMid[0]) ijk[0] = 2;
410 if (pt_coords[duplicateCoordIndex[index]].zint() < pMid[2]) ijk[2] = 0;
411 if (pt_coords[duplicateCoordIndex[index]].yint() < pMid[1]) ijk[1] = 0;
412 if (pt_coords[duplicateCoordIndex[index]].xint() < pMid[0]) ijk[0] = 0;
414 if (pt_coords[duplicateCoordIndex[index]].zint() == pMid[2]) ijk[2] = 1;
415 if (pt_coords[duplicateCoordIndex[index]].yint() == pMid[1]) ijk[1] = 1;
416 if (pt_coords[duplicateCoordIndex[index]].xint() == pMid[0]) ijk[0] = 1;
418 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
420 cnum=SFC_MATVEC_BUCKET_TABLE[(ijk[2]*SFC_MVEC_TABLE_Rz+ijk[1]*SFC_MVEC_TABLE_Ry+ijk[0])][child];
424 u_internal[cnum][counts[cnum]]=in[duplicateCoordIndex[index]];
425 pt_internal[cnum][counts[cnum]]=pt_coords[duplicateCoordIndex[index]];
431 #ifdef MATVEC_PROFILE 432 dendro::timer::sfcmatvec::t_pts_p2_cpy.stop();
436 #ifdef MATVEC_PROFILE 437 dendro::timer::sfcmatvec::t_malloc.start();
439 da * parentVal=
new da[nPe];
441 #ifdef MATVEC_PROFILE 442 dendro::timer::sfcmatvec::t_malloc.stop();
445 unsigned int pNodeCount=0;
446 bool computeParent=
false;
448 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
450 if(bCounts[child]<nPe)
460 #ifdef MATVEC_PROFILE 461 dendro::timer::sfcmatvec::t_parent_bucket.start();
470 for(
unsigned int p=pBegin;p<pEnd;p++)
472 if(fem::seq::computeIJK(pt_coords[p],pOctant,dx,logDx,ijk)==PT_INTERNAL_DG_NODE)
475 parentVal[IDX(ijk[0],ijk[1],ijk[2])]=in[p];
483 #ifdef MATVEC_PROFILE 484 dendro::timer::sfcmatvec::t_parent_bucket.stop();
489 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
496 if(bCounts[child]>nPe)
498 fem::seq::matvec(pt_internal[child],u_internal[child],0,bCounts[child],lev+1,maxDepth,order,childElem[child],refEl,v_internal[child],mVecType);
504 da* interp3D_out=NULL;
506 da* interp2D_in=NULL;
507 da* interp2D_out=NULL;
509 da* interp1D_in=NULL;
510 da* interp1D_out=NULL;
512 da* u_unzip=
new da[nPe];
513 da* v_unzip=
new da[nPe];
515 bool* unzipStatus=
new bool[nPe];
516 bool interpReq[OCT_DIR_TOTAL];
517 bool interpCpy[OCT_DIR_TOTAL];
519 for(
unsigned int i=0;i<nPe;i++)
520 unzipStatus[i]=
false;
522 for(
unsigned int i=0;i<OCT_DIR_TOTAL;i++)
529 #ifdef MATVEC_PROFILE 530 dendro::timer::sfcmatvec::t_pts_bucket.start();
533 for(
unsigned int p=0;p<bCounts[child];p++)
535 v_internal[child][p]=(da)0;
536 pt_status=fem::seq::computeIJK(pt_internal[child][p],childElem[child],dxb2,logDxb2,ijk);
537 if(pt_status==PT_INTERNAL_DG_NODE)
539 u_unzip[IDX(ijk[0],ijk[1],ijk[2])]=u_internal[child][p];
540 unzipStatus[IDX(ijk[0],ijk[1],ijk[2])]=
true;
546 #ifdef MATVEC_PROFILE 547 dendro::timer::sfcmatvec::t_pts_bucket.stop();
552 if(bCounts[child]!=nPe) {
555 #ifdef MATVEC_PROFILE 556 dendro::timer::sfcmatvec::t_p2cInterp.start();
559 interp3D_out=
new da[nPe];
560 interp2D_in=
new da[(order+1)*(order+1)];
561 interp2D_out=
new da[(order+1)*(order+1)];
563 interp1D_in=
new da[(order+1)];
564 interp1D_out=
new da[(order+1)];
570 const unsigned int intepCheckIndex=1;
573 if(!unzipStatus[IDX(0,0,intepCheckIndex)])
575 interpReq[OCT_DIR_LEFT_DOWN]=
true;
580 if(!unzipStatus[IDX(0,order,intepCheckIndex)])
582 interpReq[OCT_DIR_LEFT_UP]=
true;
587 if(!unzipStatus[IDX(0,intepCheckIndex,0)])
589 interpReq[OCT_DIR_LEFT_BACK]=
true;
594 if(!unzipStatus[IDX(0,intepCheckIndex,order)])
596 interpReq[OCT_DIR_LEFT_FRONT]=
true;
601 if(!unzipStatus[IDX(order,0,intepCheckIndex)])
603 interpReq[OCT_DIR_RIGHT_DOWN]=
true;
608 if(!unzipStatus[IDX(order,order,intepCheckIndex)])
610 interpReq[OCT_DIR_RIGHT_UP]=
true;
615 if(!unzipStatus[IDX(order,intepCheckIndex,0)])
617 interpReq[OCT_DIR_RIGHT_BACK]=
true;
622 if(!unzipStatus[IDX(order,intepCheckIndex,order)])
624 interpReq[OCT_DIR_RIGHT_FRONT]=
true;
629 if(!unzipStatus[IDX(intepCheckIndex,0,0)])
631 interpReq[OCT_DIR_DOWN_BACK]=
true;
636 if(!unzipStatus[IDX(intepCheckIndex,order,0)])
638 interpReq[OCT_DIR_UP_BACK]=
true;
643 if(!unzipStatus[IDX(intepCheckIndex,0,order)])
645 interpReq[OCT_DIR_DOWN_FRONT]=
true;
650 if(!unzipStatus[IDX(intepCheckIndex,order,order)])
652 interpReq[OCT_DIR_UP_FRONT]=
true;
658 if(!unzipStatus[IDX(0,intepCheckIndex,intepCheckIndex)])
660 interpReq[OCT_DIR_LEFT]=
true;
666 if(!unzipStatus[IDX(order,intepCheckIndex,intepCheckIndex)])
668 interpReq[OCT_DIR_RIGHT]=
true;
675 if(!unzipStatus[IDX(intepCheckIndex,0,intepCheckIndex)])
677 interpReq[OCT_DIR_DOWN]=
true;
683 if(!unzipStatus[IDX(intepCheckIndex,order,intepCheckIndex)])
685 interpReq[OCT_DIR_UP]=
true;
691 if(!unzipStatus[IDX(intepCheckIndex,intepCheckIndex,0)])
693 interpReq[OCT_DIR_BACK]=
true;
699 if(!unzipStatus[IDX(intepCheckIndex,intepCheckIndex,order)])
701 interpReq[OCT_DIR_FRONT]=
true;
707 assert(pNodeCount==nPe);
708 if(pNodeCount!=nPe) std::cout<<
"[Error]"<<__func__<<
" pNodeCount: "<<pNodeCount<<std::endl;
712 if(interpReq[OCT_DIR_LEFT])
718 for(
unsigned int k=0;k<(order+1);k++)
719 for(
unsigned int j=0;j<(order+1);j++)
720 interp2D_in[IDX2D(j,k)]=parentVal[IDX(0,j,k)];
725 for(
unsigned int k=0;k<(order+1);k++)
726 for(
unsigned int j=0;j<(order+1);j++)
727 u_unzip[IDX(0,j,k)]=interp2D_out[IDX2D(j,k)];
730 interpCpy[OCT_DIR_LEFT_DOWN]=
true;
731 interpCpy[OCT_DIR_LEFT_UP]=
true;
732 interpCpy[OCT_DIR_LEFT_BACK]=
true;
733 interpCpy[OCT_DIR_LEFT_FRONT]=
true;
738 if(interpReq[OCT_DIR_RIGHT])
744 for(
unsigned int k=0;k<(order+1);k++)
745 for(
unsigned int j=0;j<(order+1);j++)
746 interp2D_in[IDX2D(j,k)]=parentVal[IDX(order,j,k)];
750 for(
unsigned int k=0;k<(order+1);k++)
751 for(
unsigned int j=0;j<(order+1);j++)
752 u_unzip[IDX(order,j,k)]=interp2D_out[IDX2D(j,k)];
754 interpCpy[OCT_DIR_RIGHT_DOWN]=
true;
755 interpCpy[OCT_DIR_RIGHT_UP]=
true;
756 interpCpy[OCT_DIR_RIGHT_BACK]=
true;
757 interpCpy[OCT_DIR_RIGHT_FRONT]=
true;
762 if(interpReq[OCT_DIR_DOWN])
769 for(
unsigned int k=0;k<(order+1);k++)
770 for(
unsigned int i=0;i<(order+1);i++)
771 interp2D_in[IDX2D(i,k)]=parentVal[IDX(i,0,k)];
775 for(
unsigned int k=0;k<(order+1);k++)
776 for(
unsigned int i=0;i<(order+1);i++)
777 u_unzip[IDX(i,0,k)]=interp2D_out[IDX2D(i,k)];
779 interpCpy[OCT_DIR_RIGHT_DOWN]=
true;
780 interpCpy[OCT_DIR_LEFT_DOWN]=
true;
781 interpCpy[OCT_DIR_DOWN_BACK]=
true;
782 interpCpy[OCT_DIR_DOWN_FRONT]=
true;
787 if(interpReq[OCT_DIR_UP])
794 for(
unsigned int k=0;k<(order+1);k++)
795 for(
unsigned int i=0;i<(order+1);i++)
796 interp2D_in[IDX2D(i,k)]=parentVal[IDX(i,order,k)];
800 for(
unsigned int k=0;k<(order+1);k++)
801 for(
unsigned int i=0;i<(order+1);i++)
802 u_unzip[IDX(i,order,k)]=interp2D_out[IDX2D(i,k)];
805 interpCpy[OCT_DIR_RIGHT_UP]=
true;
806 interpCpy[OCT_DIR_LEFT_UP]=
true;
807 interpCpy[OCT_DIR_UP_BACK]=
true;
808 interpCpy[OCT_DIR_UP_FRONT]=
true;
813 if(interpReq[OCT_DIR_BACK])
819 for(
unsigned int j=0;j<(order+1);j++)
820 for(
unsigned int i=0;i<(order+1);i++)
821 interp2D_in[IDX2D(i,j)]=parentVal[IDX(i,j,0)];
826 for(
unsigned int j=0;j<(order+1);j++)
827 for(
unsigned int i=0;i<(order+1);i++)
828 u_unzip[IDX(i,j,0)]=interp2D_out[IDX2D(i,j)];
830 interpCpy[OCT_DIR_RIGHT_BACK]=
true;
831 interpCpy[OCT_DIR_LEFT_BACK]=
true;
832 interpCpy[OCT_DIR_UP_BACK]=
true;
833 interpCpy[OCT_DIR_DOWN_BACK]=
true;
838 if(interpReq[OCT_DIR_FRONT])
844 for(
unsigned int j=0;j<(order+1);j++)
845 for(
unsigned int i=0;i<(order+1);i++)
846 interp2D_in[IDX2D(i,j)]=parentVal[IDX(i,j,order)];
851 for(
unsigned int j=0;j<(order+1);j++)
852 for(
unsigned int i=0;i<(order+1);i++)
853 u_unzip[IDX(i,j,order)]=interp2D_out[IDX2D(i,j)];
856 interpCpy[OCT_DIR_RIGHT_FRONT]=
true;
857 interpCpy[OCT_DIR_LEFT_FRONT]=
true;
858 interpCpy[OCT_DIR_UP_FRONT]=
true;
859 interpCpy[OCT_DIR_DOWN_FRONT]=
true;
864 if( interpReq[OCT_DIR_LEFT_DOWN] && (!interpCpy[OCT_DIR_LEFT_DOWN]))
870 for(
unsigned int k=0;k<(order+1);k++)
871 interp1D_in[k]=parentVal[IDX(0,0,k)];
875 for(
unsigned int k=0;k<(order+1);k++)
876 u_unzip[IDX(0,0,k)]=interp1D_out[k];
880 if( interpReq[OCT_DIR_LEFT_UP] && (!interpCpy[OCT_DIR_LEFT_UP]))
886 for(
unsigned int k=0;k<(order+1);k++)
887 interp1D_in[k]=parentVal[IDX(0,order,k)];
891 for(
unsigned int k=0;k<(order+1);k++)
892 u_unzip[IDX(0,order,k)]=interp1D_out[k];
896 if(interpReq[OCT_DIR_LEFT_BACK] && (!interpCpy[OCT_DIR_LEFT_BACK]))
902 for(
unsigned int j=0;j<(order+1);j++)
903 interp1D_in[j]=parentVal[IDX(0,j,0)];
907 for(
unsigned int j=0;j<(order+1);j++)
908 u_unzip[IDX(0,j,0)]=interp1D_out[j];
913 if(interpReq[OCT_DIR_LEFT_FRONT] && (!interpCpy[OCT_DIR_LEFT_FRONT]))
919 for(
unsigned int j=0;j<(order+1);j++)
920 interp1D_in[j]=parentVal[IDX(0,j,order)];
925 for(
unsigned int j=0;j<(order+1);j++)
926 u_unzip[IDX(0,j,order)]=interp1D_out[j];
933 if(interpReq[OCT_DIR_RIGHT_DOWN] && (!interpCpy[OCT_DIR_RIGHT_DOWN]))
939 for(
unsigned int k=0;k<(order+1);k++)
940 interp1D_in[k]=parentVal[IDX(order,0,k)];
944 for(
unsigned int k=0;k<(order+1);k++)
945 u_unzip[IDX(order,0,k)]=interp1D_out[k];
949 if(interpReq[OCT_DIR_RIGHT_UP] && (!interpCpy[OCT_DIR_RIGHT_UP]))
955 for(
unsigned int k=0;k<(order+1);k++)
956 interp1D_in[k]=parentVal[IDX(order,order,k)];
960 for(
unsigned int k=0;k<(order+1);k++)
961 u_unzip[IDX(order,order,k)]=interp1D_out[k];
965 if(interpReq[OCT_DIR_RIGHT_BACK] && (!interpCpy[OCT_DIR_RIGHT_BACK]))
970 for(
unsigned int j=0;j<(order+1);j++)
971 interp1D_in[j]=parentVal[IDX(order,j,0)];
976 for(
unsigned int j=0;j<(order+1);j++)
977 u_unzip[IDX(order,j,0)]=interp1D_out[j];
981 if(interpReq[OCT_DIR_RIGHT_FRONT] && (!interpCpy[OCT_DIR_RIGHT_FRONT]))
987 for(
unsigned int j=0;j<(order+1);j++)
988 interp1D_in[j]=parentVal[IDX(order,j,order)];
992 for(
unsigned int j=0;j<(order+1);j++)
993 u_unzip[IDX(order,j,order)]=interp1D_out[j];
998 if(interpReq[OCT_DIR_DOWN_BACK] && (!interpCpy[OCT_DIR_DOWN_BACK]))
1004 for(
unsigned int i=0;i<(order+1);i++)
1005 interp1D_in[i]=parentVal[IDX(i,0,0)];
1009 for(
unsigned int i=0;i<(order+1);i++)
1010 u_unzip[IDX(i,0,0)]=interp1D_out[i];
1015 if(interpReq[OCT_DIR_DOWN_FRONT] && (!interpCpy[OCT_DIR_DOWN_FRONT]))
1021 for(
unsigned int i=0;i<(order+1);i++)
1022 interp1D_in[i]=parentVal[IDX(i,0,order)];
1026 for(
unsigned int i=0;i<(order+1);i++)
1027 u_unzip[IDX(i,0,order)]=interp1D_out[i];
1031 if(interpReq[OCT_DIR_UP_BACK] && (!interpCpy[OCT_DIR_UP_BACK]))
1037 for(
unsigned int i=0;i<(order+1);i++)
1038 interp1D_in[i]=parentVal[IDX(i,order,0)];
1042 for(
unsigned int i=0;i<(order+1);i++)
1043 u_unzip[IDX(i,order,0)]=interp1D_out[i];
1047 if(interpReq[OCT_DIR_UP_FRONT] && (!interpCpy[OCT_DIR_UP_FRONT]))
1053 for(
unsigned int i=0;i<(order+1);i++)
1054 interp1D_in[i]=parentVal[IDX(i,order,order)];
1058 for(
unsigned int i=0;i<(order+1);i++)
1059 u_unzip[IDX(i,order,order)]=interp1D_out[i];
1063 #ifdef MATVEC_PROFILE 1064 dendro::timer::sfcmatvec::t_p2cInterp.stop();
1071 fem::operators::poisson::elementMassMatvec(u_unzip,childElem[child],refEl,v_unzip);
1072 else if(mVecType==1)
1073 fem::operators::poisson::elementStiffnessMatvec(u_unzip,childElem[child],refEl,v_unzip);
1077 bool * accumStatus =
new bool [nPe];
1078 bool * c2pInterp=
new bool[nPe];
1080 for(
unsigned int node=0;node<nPe;node++)
1082 accumStatus[node]=
false;
1083 c2pInterp[node]=
false;
1087 if(bCounts[child]!=nPe)
1090 #ifdef MATVEC_PROFILE 1091 dendro::timer::sfcmatvec::t_c2pInterp.start();
1096 for(
unsigned int node=0;node<nPe;node++)
1097 parentVal[node]=(da)0;
1099 bool vertexAcc[NUM_CHILDREN];
1100 for(
unsigned int i=0;i<NUM_CHILDREN;i++)
1104 if(interpReq[OCT_DIR_LEFT])
1110 for(
unsigned int k=0;k<(order+1);k++)
1111 for(
unsigned int j=0;j<(order+1);j++)
1112 interp2D_in[IDX2D(j,k)]=v_unzip[IDX(0,j,k)];
1117 for(
unsigned int k=1;k<(order);k++)
1118 for(
unsigned int j=1;j<(order);j++)
1120 parentVal[IDX(0,j,k)]=interp2D_out[IDX2D(j,k)];
1121 c2pInterp[IDX(0,j,k)]=
true;
1128 if(interpReq[OCT_DIR_RIGHT])
1134 for(
unsigned int k=0;k<(order+1);k++)
1135 for(
unsigned int j=0;j<(order+1);j++)
1136 interp2D_in[IDX2D(j,k)]=v_unzip[IDX(order,j,k)];
1140 for(
unsigned int k=1;k<(order);k++)
1141 for(
unsigned int j=1;j<(order);j++)
1143 parentVal[IDX(order,j,k)]=interp2D_out[IDX2D(j,k)];
1144 c2pInterp[IDX(order,j,k)]=
true;
1151 if(interpReq[OCT_DIR_DOWN])
1158 for(
unsigned int k=0;k<(order+1);k++)
1159 for(
unsigned int i=0;i<(order+1);i++)
1160 interp2D_in[IDX2D(i,k)]=v_unzip[IDX(i,0,k)];
1164 for(
unsigned int k=1;k<(order);k++)
1165 for(
unsigned int i=1;i<(order);i++)
1167 parentVal[IDX(i,0,k)]=interp2D_out[IDX2D(i,k)];
1168 c2pInterp[IDX(i,0,k)]=
true;
1176 if(interpReq[OCT_DIR_UP])
1183 for(
unsigned int k=0;k<(order+1);k++)
1184 for(
unsigned int i=0;i<(order+1);i++)
1185 interp2D_in[IDX2D(i,k)]=v_unzip[IDX(i,order,k)];
1189 for(
unsigned int k=1;k<(order);k++)
1190 for(
unsigned int i=1;i<(order);i++)
1192 parentVal[IDX(i,order,k)]=interp2D_out[IDX2D(i,k)];
1193 c2pInterp[IDX(i,order,k)]=
true;
1201 if(interpReq[OCT_DIR_BACK])
1207 for(
unsigned int j=0;j<(order+1);j++)
1208 for(
unsigned int i=0;i<(order+1);i++)
1209 interp2D_in[IDX2D(i,j)]=v_unzip[IDX(i,j,0)];
1214 for(
unsigned int j=1;j<(order);j++)
1215 for(
unsigned int i=1;i<(order);i++)
1217 parentVal[IDX(i,j,0)]=interp2D_out[IDX2D(i,j)];
1218 c2pInterp[IDX(i,j,0)]=
true;
1227 if(interpReq[OCT_DIR_FRONT])
1233 for(
unsigned int j=0;j<(order+1);j++)
1234 for(
unsigned int i=0;i<(order+1);i++)
1235 interp2D_in[IDX2D(i,j)]=v_unzip[IDX(i,j,order)];
1240 for(
unsigned int j=1;j<(order);j++)
1241 for(
unsigned int i=1;i<(order);i++)
1243 parentVal[IDX(i,j,order)]=interp2D_out[IDX2D(i,j)];
1244 c2pInterp[IDX(i,j,order)]=
true;
1253 if( interpReq[OCT_DIR_LEFT_DOWN])
1259 for(
unsigned int k=0;k<(order+1);k++)
1260 interp1D_in[k]=v_unzip[IDX(0,0,k)];
1264 for(
unsigned int k=1;k<(order);k++)
1266 parentVal[IDX(0,0,k)]=interp1D_out[k];
1267 c2pInterp[IDX(0,0,k)]=
true;
1273 parentVal[IDX(0,0,0)]=interp1D_out[0];
1274 c2pInterp[IDX(0,0,0)]=
true;
1280 parentVal[IDX(0,0,order)]=interp1D_out[order];
1281 c2pInterp[IDX(0,0,order)]=
true;
1290 if( interpReq[OCT_DIR_LEFT_UP])
1296 for(
unsigned int k=0;k<(order+1);k++)
1297 interp1D_in[k]=v_unzip[IDX(0,order,k)];
1301 for(
unsigned int k=1;k<(order);k++)
1303 parentVal[IDX(0,order,k)]=interp1D_out[k];
1304 c2pInterp[IDX(0,order,k)]=
true;
1310 parentVal[IDX(0,order,0)]=interp1D_out[0];
1311 c2pInterp[IDX(0,order,0)]=
true;
1317 parentVal[IDX(0,order,order)]=interp1D_out[order];
1318 c2pInterp[IDX(0,order,order)]=
true;
1327 if(interpReq[OCT_DIR_LEFT_BACK])
1333 for(
unsigned int j=0;j<(order+1);j++)
1334 interp1D_in[j]=v_unzip[IDX(0,j,0)];
1338 for(
unsigned int j=1;j<(order);j++)
1340 parentVal[IDX(0,j,0)]=interp1D_out[j];
1341 c2pInterp[IDX(0,j,0)]=
true;
1348 parentVal[IDX(0,0,0)]=interp1D_out[0];
1349 c2pInterp[IDX(0,0,0)]=
true;
1355 parentVal[IDX(0,order,0)]=interp1D_out[order];
1356 c2pInterp[IDX(0,order,0)]=
true;
1366 if(interpReq[OCT_DIR_LEFT_FRONT])
1372 for(
unsigned int j=0;j<(order+1);j++)
1373 interp1D_in[j]=v_unzip[IDX(0,j,order)];
1378 for(
unsigned int j=1;j<(order);j++)
1380 parentVal[IDX(0,j,order)]=interp1D_out[j];
1381 c2pInterp[IDX(0,j,order)]=
true;
1387 parentVal[IDX(0,0,order)]=interp1D_out[0];
1388 c2pInterp[IDX(0,0,order)]=
true;
1394 parentVal[IDX(0,order,order)]=interp1D_out[order];
1395 c2pInterp[IDX(0,order,order)]=
true;
1406 if(interpReq[OCT_DIR_RIGHT_DOWN])
1412 for(
unsigned int k=0;k<(order+1);k++)
1413 interp1D_in[k]=v_unzip[IDX(order,0,k)];
1417 for(
unsigned int k=1;k<(order);k++)
1419 parentVal[IDX(order,0,k)]=interp1D_out[k];
1420 c2pInterp[IDX(order,0,k)]=
true;
1427 parentVal[IDX(order,0,0)]=interp1D_out[0];
1428 c2pInterp[IDX(order,0,0)]=
true;
1434 parentVal[IDX(order,0,order)]=interp1D_out[order];
1435 c2pInterp[IDX(order,0,order)]=
true;
1444 if(interpReq[OCT_DIR_RIGHT_UP])
1450 for(
unsigned int k=0;k<(order+1);k++)
1451 interp1D_in[k]=v_unzip[IDX(order,order,k)];
1455 for(
unsigned int k=1;k<order;k++)
1457 parentVal[IDX(order,order,k)]=interp1D_out[k];
1458 c2pInterp[IDX(order,order,k)]=
true;
1465 parentVal[IDX(order,order,0)]=interp1D_out[0];
1466 c2pInterp[IDX(order,order,0)]=
true;
1472 parentVal[IDX(order,order,order)]=interp1D_out[order];
1473 c2pInterp[IDX(order,order,order)]=
true;
1483 if(interpReq[OCT_DIR_RIGHT_BACK])
1488 for(
unsigned int j=0;j<(order+1);j++)
1489 interp1D_in[j]=v_unzip[IDX(order,j,0)];
1494 for(
unsigned int j=1;j<(order);j++)
1496 parentVal[IDX(order,j,0)]=interp1D_out[j];
1497 c2pInterp[IDX(order,j,0)]=
true;
1503 parentVal[IDX(order,0,0)]=interp1D_out[0];
1504 c2pInterp[IDX(order,0,0)]=
true;
1510 parentVal[IDX(order,order,0)]=interp1D_out[order];
1511 c2pInterp[IDX(order,order,0)]=
true;
1522 if(interpReq[OCT_DIR_RIGHT_FRONT])
1528 for(
unsigned int j=0;j<(order+1);j++)
1529 interp1D_in[j]=v_unzip[IDX(order,j,order)];
1533 for(
unsigned int j=1;j<(order);j++)
1535 parentVal[IDX(order,j,order)]=interp1D_out[j];
1536 c2pInterp[IDX(order,j,order)]=
true;
1542 parentVal[IDX(order,0,order)]=interp1D_out[0];
1543 c2pInterp[IDX(order,0,order)]=
true;
1549 parentVal[IDX(order,order,order)]=interp1D_out[order];
1550 c2pInterp[IDX(order,order,order)]=
true;
1561 if(interpReq[OCT_DIR_DOWN_BACK])
1567 for(
unsigned int i=0;i<(order+1);i++)
1568 interp1D_in[i]=v_unzip[IDX(i,0,0)];
1572 for(
unsigned int i=1;i<(order);i++)
1574 parentVal[IDX(i,0,0)]=interp1D_out[i];
1575 c2pInterp[IDX(i,0,0)]=
true;
1581 parentVal[IDX(0,0,0)]=interp1D_out[0];
1582 c2pInterp[IDX(0,0,0)]=
true;
1588 parentVal[IDX(order,0,0)]=interp1D_out[order];
1589 c2pInterp[IDX(order,0,0)]=
true;
1599 if(interpReq[OCT_DIR_DOWN_FRONT])
1605 for(
unsigned int i=0;i<(order+1);i++)
1606 interp1D_in[i]=v_unzip[IDX(i,0,order)];
1610 for(
unsigned int i=1;i<(order);i++)
1612 parentVal[IDX(i,0,order)]=interp1D_out[i];
1613 c2pInterp[IDX(i,0,order)]=
true;
1619 parentVal[IDX(0,0,order)]=interp1D_out[0];
1620 c2pInterp[IDX(0,0,order)]=
true;
1626 parentVal[IDX(order,0,order)]=interp1D_out[order];
1627 c2pInterp[IDX(order,0,order)]=
true;
1638 if(interpReq[OCT_DIR_UP_BACK])
1644 for(
unsigned int i=0;i<(order+1);i++)
1645 interp1D_in[i]=v_unzip[IDX(i,order,0)];
1649 for(
unsigned int i=1;i<(order);i++)
1651 parentVal[IDX(i,order,0)]=interp1D_out[i];
1652 c2pInterp[IDX(i,order,0)]=
true;
1659 parentVal[IDX(0,order,0)]=interp1D_out[0];
1660 c2pInterp[IDX(0,order,0)]=
true;
1667 parentVal[IDX(order,order,0)]=interp1D_out[order];
1668 c2pInterp[IDX(order,order,0)]=
true;
1679 if(interpReq[OCT_DIR_UP_FRONT])
1685 for(
unsigned int i=0;i<(order+1);i++)
1686 interp1D_in[i]=v_unzip[IDX(i,order,order)];
1691 for(
unsigned int i=1;i<(order);i++)
1693 parentVal[IDX(i,order,order)]=interp1D_out[i];
1694 c2pInterp[IDX(i,order,order)]=
true;
1702 parentVal[IDX(0,order,order)]=interp1D_out[0];
1703 c2pInterp[IDX(0,order,order)]=
true;
1709 parentVal[IDX(order,order,order)]=interp1D_out[order];
1710 c2pInterp[IDX(order,order,order)]=
true;
1727 #ifdef MATVEC_PROFILE 1728 dendro::timer::sfcmatvec::t_c2pInterp.stop();
1732 #ifdef MATVEC_PROFILE 1733 dendro::timer::sfcmatvec::t_accum.start();
1735 for(
unsigned int p=pBegin;p<pEnd;p++) {
1736 if (fem::seq::computeIJK(pt_coords[p], pOctant, dx, logDx, ijk) == PT_INTERNAL_DG_NODE)
1738 out[p]+=parentVal[IDX(ijk[0],ijk[1],ijk[2])];
1739 if(c2pInterp[IDX(ijk[0],ijk[1],ijk[2])])accumStatus[IDX(ijk[0],ijk[1],ijk[2])]=
true;
1744 #ifdef MATVEC_PROFILE 1745 dendro::timer::sfcmatvec::t_accum.stop();
1753 #ifdef MATVEC_PROFILE 1754 dendro::timer::sfcmatvec::t_accum.start();
1757 for(
unsigned int p=0;p<bCounts[child];p++)
1759 pt_status=fem::seq::computeIJK(pt_internal[child][p],childElem[child],dxb2,logDxb2,ijk);
1760 if(pt_status==PT_INTERNAL_DG_NODE && !accumStatus[IDX(ijk[0],ijk[1],ijk[2])])
1761 v_internal[child][p]=v_unzip[IDX(ijk[0],ijk[1],ijk[2])];
1766 #ifdef MATVEC_PROFILE 1767 dendro::timer::sfcmatvec::t_accum.stop();
1771 #ifdef MATVEC_PROFILE 1772 dendro::timer::sfcmatvec::t_malloc.start();
1775 delete [] accumStatus;
1776 delete [] c2pInterp;
1778 delete [] unzipStatus;
1779 delete [] interp3D_out;
1781 delete [] interp2D_in;
1782 delete [] interp2D_out;
1784 delete [] interp1D_in;
1785 delete [] interp1D_out;
1790 #ifdef MATVEC_PROFILE 1791 dendro::timer::sfcmatvec::t_malloc.stop();
1836 #ifdef MATVEC_PROFILE 1837 dendro::timer::sfcmatvec::t_pts_p1_accum.start();
1840 for(
unsigned int p=pBegin;p<pEnd;p++)
1847 if(pt_coords[p].zint()>pMid[2]) ijk[2]=2;
1848 if(pt_coords[p].yint()>pMid[1]) ijk[1]=2;
1849 if(pt_coords[p].xint()>pMid[0]) ijk[0]=2;
1851 if(pt_coords[p].zint()<pMid[2]) ijk[2]=0;
1852 if(pt_coords[p].yint()<pMid[1]) ijk[1]=0;
1853 if(pt_coords[p].xint()<pMid[0]) ijk[0]=0;
1855 if(pt_coords[p].zint()==pMid[2]) ijk[2]=1;
1856 if(pt_coords[p].yint()==pMid[1]) ijk[1]=1;
1857 if(pt_coords[p].xint()==pMid[0]) ijk[0]=1;
1860 if(ijk[0]!=1 && ijk[1]!=1 && ijk[2]!=1) {
1862 cnum = (((ijk[2] >> 1u) << 2u) | ((ijk[1] >> 1u) << 1u) | ((ijk[0] >> 1u)));
1863 out[p]+=v_internal[cnum][counts[cnum]];
1869 #ifdef MATVEC_PROFILE 1870 dendro::timer::sfcmatvec::t_pts_p1_accum.stop();
1873 #ifdef MATVEC_PROFILE 1874 dendro::timer::sfcmatvec::t_pts_p2_accum.start();
1877 for(
unsigned int index=0;index<duplicateCoordIndex.size();index++) {
1883 if (pt_coords[duplicateCoordIndex[index]].zint() > pMid[2]) ijk[2] = 2;
1884 if (pt_coords[duplicateCoordIndex[index]].yint() > pMid[1]) ijk[1] = 2;
1885 if (pt_coords[duplicateCoordIndex[index]].xint() > pMid[0]) ijk[0] = 2;
1887 if (pt_coords[duplicateCoordIndex[index]].zint() < pMid[2]) ijk[2] = 0;
1888 if (pt_coords[duplicateCoordIndex[index]].yint() < pMid[1]) ijk[1] = 0;
1889 if (pt_coords[duplicateCoordIndex[index]].xint() < pMid[0]) ijk[0] = 0;
1891 if (pt_coords[duplicateCoordIndex[index]].zint() == pMid[2]) ijk[2] = 1;
1892 if (pt_coords[duplicateCoordIndex[index]].yint() == pMid[1]) ijk[1] = 1;
1893 if (pt_coords[duplicateCoordIndex[index]].xint() == pMid[0]) ijk[0] = 1;
1895 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
1897 cnum=SFC_MATVEC_BUCKET_TABLE[(ijk[2]*SFC_MVEC_TABLE_Rz+ijk[1]*SFC_MVEC_TABLE_Ry+ijk[0])][child];
1901 out[duplicateCoordIndex[index]]+=v_internal[cnum][counts[cnum]];
1909 #ifdef MATVEC_PROFILE 1910 dendro::timer::sfcmatvec::t_pts_p2_accum.stop();
1915 #ifdef MATVEC_PROFILE 1916 dendro::timer::sfcmatvec::t_malloc.start();
1920 for(
unsigned int child=0;child<NUM_CHILDREN;child++)
1922 delete [] u_internal[child];
1923 delete [] v_internal[child];
1924 delete [] pt_internal[child];
1928 delete [] parentVal;
1929 delete [] u_internal;
1930 delete [] v_internal;
1931 delete [] pt_internal;
1933 #ifdef MATVEC_PROFILE 1934 dendro::timer::sfcmatvec::t_malloc.stop();
1942 #endif //SFCSORTBENCH_FEMMATVEC_H void I2D_Parent2Child(const double *in, double *out, unsigned int childNum) const
Definition: refel.h:460
unsigned int minX() const
returns true min corner(anchor) and the max corner coordinates of a octant.
Definition: TreeNode.h:164
int getNextHighestPowerOfTwo(unsigned int n)
Definition: binUtils.cpp:61
A class to manage octants.
Definition: TreeNode.h:35
void I1D_Parent2Child(const double *in, double *out, unsigned int childNUm) const
Definition: refel.h:555
workspace variables allocations
Definition: matvec.h:44
unsigned int fastLog2(unsigned int num)
Definition: binUtils.cpp:15
void I1D_Child2Parent(const double *in, double *out, unsigned int childNUm) const
Definition: refel.h:596
void I2D_Child2Parent(const double *in, double *out, unsigned int childNum) const
Definition: refel.h:505
unsigned int getLevel() const
return the level of the of the octant
Definition: TreeNode.h:387
Collection of Generic Sequential Functions.
A set of efficient functions that use binary operations to perform some small computations.
unsigned int getBit(T val, unsigned int i)
gets the i^th bit on the value val
Definition: binUtils.h:70