Actual source code: bddcprivate.c
petsc-3.8.4 2018-03-24
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <petscdmplex.h>
5: #include <petscblaslapack.h>
6: #include <petsc/private/sfimpl.h>
7: #include <petsc/private/dmpleximpl.h>
9: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
11: /* if range is true, it returns B s.t. span{B} = range(A)
12: if range is false, it returns B s.t. range(B) _|_ range(A) */
13: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
14: {
15: #if !defined(PETSC_USE_COMPLEX)
16: PetscScalar *uwork,*data,*U, ds = 0.;
17: PetscReal *sing;
18: PetscBLASInt bM,bN,lwork,lierr,di = 1;
19: PetscInt ulw,i,nr,nc,n;
23: #if defined(PETSC_MISSING_LAPACK_GESVD)
24: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
25: #else
26: MatGetSize(A,&nr,&nc);
27: if (!nr || !nc) return(0);
29: /* workspace */
30: if (!work) {
31: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
32: PetscMalloc1(ulw,&uwork);
33: } else {
34: ulw = lw;
35: uwork = work;
36: }
37: n = PetscMin(nr,nc);
38: if (!rwork) {
39: PetscMalloc1(n,&sing);
40: } else {
41: sing = rwork;
42: }
44: /* SVD */
45: PetscMalloc1(nr*nr,&U);
46: PetscBLASIntCast(nr,&bM);
47: PetscBLASIntCast(nc,&bN);
48: PetscBLASIntCast(ulw,&lwork);
49: MatDenseGetArray(A,&data);
50: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
51: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
52: PetscFPTrapPop();
53: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
54: MatDenseRestoreArray(A,&data);
55: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
56: if (!rwork) {
57: PetscFree(sing);
58: }
59: if (!work) {
60: PetscFree(uwork);
61: }
62: /* create B */
63: if (!range) {
64: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
65: MatDenseGetArray(*B,&data);
66: PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
67: } else {
68: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
69: MatDenseGetArray(*B,&data);
70: PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
71: }
72: MatDenseRestoreArray(*B,&data);
73: PetscFree(U);
74: #endif
75: #else /* PETSC_USE_COMPLEX */
77: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
78: #endif
79: return(0);
80: }
82: /* TODO REMOVE */
83: #if defined(PRINT_GDET)
84: static int inc = 0;
85: static int lev = 0;
86: #endif
88: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
89: {
91: Mat GE,GEd;
92: PetscInt rsize,csize,esize;
93: PetscScalar *ptr;
96: ISGetSize(edge,&esize);
97: if (!esize) return(0);
98: ISGetSize(extrow,&rsize);
99: ISGetSize(extcol,&csize);
101: /* gradients */
102: ptr = work + 5*esize;
103: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
104: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
105: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
106: MatDestroy(&GE);
108: /* constants */
109: ptr += rsize*csize;
110: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
111: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
112: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
113: MatDestroy(&GE);
114: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
115: MatDestroy(&GEd);
117: if (corners) {
118: Mat GEc;
119: PetscScalar *vals,v;
121: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
122: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
123: MatDenseGetArray(GEd,&vals);
124: /* v = PetscAbsScalar(vals[0]) */;
125: v = 1.;
126: cvals[0] = vals[0]/v;
127: cvals[1] = vals[1]/v;
128: MatDenseRestoreArray(GEd,&vals);
129: MatScale(*GKins,1./v);
130: #if defined(PRINT_GDET)
131: {
132: PetscViewer viewer;
133: char filename[256];
134: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
135: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
136: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
137: PetscObjectSetName((PetscObject)GEc,"GEc");
138: MatView(GEc,viewer);
139: PetscObjectSetName((PetscObject)(*GKins),"GK");
140: MatView(*GKins,viewer);
141: PetscObjectSetName((PetscObject)GEd,"Gproj");
142: MatView(GEd,viewer);
143: PetscViewerDestroy(&viewer);
144: }
145: #endif
146: MatDestroy(&GEd);
147: MatDestroy(&GEc);
148: }
150: return(0);
151: }
153: PetscErrorCode PCBDDCNedelecSupport(PC pc)
154: {
155: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
156: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
157: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
158: Vec tvec;
159: PetscSF sfv;
160: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
161: MPI_Comm comm;
162: IS lned,primals,allprimals,nedfieldlocal;
163: IS *eedges,*extrows,*extcols,*alleedges;
164: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
165: PetscScalar *vals,*work;
166: PetscReal *rwork;
167: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
168: PetscInt ne,nv,Lv,order,n,field;
169: PetscInt n_neigh,*neigh,*n_shared,**shared;
170: PetscInt i,j,extmem,cum,maxsize,nee;
171: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
172: PetscInt *sfvleaves,*sfvroots;
173: PetscInt *corners,*cedges;
174: PetscInt *ecount,**eneighs,*vcount,**vneighs;
175: #if defined(PETSC_USE_DEBUG)
176: PetscInt *emarks;
177: #endif
178: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
179: PetscErrorCode ierr;
182: /* If the discrete gradient is defined for a subset of dofs and global is true,
183: it assumes G is given in global ordering for all the dofs.
184: Otherwise, the ordering is global for the Nedelec field */
185: order = pcbddc->nedorder;
186: conforming = pcbddc->conforming;
187: field = pcbddc->nedfield;
188: global = pcbddc->nedglobal;
189: setprimal = PETSC_FALSE;
190: print = PETSC_FALSE;
191: singular = PETSC_FALSE;
193: /* Command line customization */
194: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
195: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
196: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
197: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
198: /* print debug info TODO: to be removed */
199: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
200: PetscOptionsEnd();
202: /* Return if there are no edges in the decomposition and the problem is not singular */
203: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
204: ISLocalToGlobalMappingGetSize(al2g,&n);
205: PetscObjectGetComm((PetscObject)pc,&comm);
206: if (!singular) {
207: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
208: lrc[0] = PETSC_FALSE;
209: for (i=0;i<n;i++) {
210: if (PetscRealPart(vals[i]) > 2.) {
211: lrc[0] = PETSC_TRUE;
212: break;
213: }
214: }
215: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
216: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
217: if (!lrc[1]) return(0);
218: }
220: /* Get Nedelec field */
221: MatISSetUpSF(pc->pmat);
222: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
223: if (pcbddc->n_ISForDofsLocal && field >= 0) {
224: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
225: nedfieldlocal = pcbddc->ISForDofsLocal[field];
226: ISGetLocalSize(nedfieldlocal,&ne);
227: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
228: ne = n;
229: nedfieldlocal = NULL;
230: global = PETSC_TRUE;
231: } else if (field == PETSC_DECIDE) {
232: PetscInt rst,ren,*idx;
234: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
235: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
236: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
237: for (i=rst;i<ren;i++) {
238: PetscInt nc;
240: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
242: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243: }
244: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
245: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246: PetscMalloc1(n,&idx);
247: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
248: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
249: } else {
250: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
251: }
253: /* Sanity checks */
254: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
255: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
256: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);
258: /* Just set primal dofs and return */
259: if (setprimal) {
260: IS enedfieldlocal;
261: PetscInt *eidxs;
263: PetscMalloc1(ne,&eidxs);
264: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
265: if (nedfieldlocal) {
266: ISGetIndices(nedfieldlocal,&idxs);
267: for (i=0,cum=0;i<ne;i++) {
268: if (PetscRealPart(vals[idxs[i]]) > 2.) {
269: eidxs[cum++] = idxs[i];
270: }
271: }
272: ISRestoreIndices(nedfieldlocal,&idxs);
273: } else {
274: for (i=0,cum=0;i<ne;i++) {
275: if (PetscRealPart(vals[i]) > 2.) {
276: eidxs[cum++] = i;
277: }
278: }
279: }
280: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
281: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
282: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
283: PetscFree(eidxs);
284: ISDestroy(&nedfieldlocal);
285: ISDestroy(&enedfieldlocal);
286: return(0);
287: }
289: /* Compute some l2g maps */
290: if (nedfieldlocal) {
291: IS is;
293: /* need to map from the local Nedelec field to local numbering */
294: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
295: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
296: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
297: ISLocalToGlobalMappingCreateIS(is,&al2g);
298: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
299: if (global) {
300: PetscObjectReference((PetscObject)al2g);
301: el2g = al2g;
302: } else {
303: IS gis;
305: ISRenumber(is,NULL,NULL,&gis);
306: ISLocalToGlobalMappingCreateIS(gis,&el2g);
307: ISDestroy(&gis);
308: }
309: ISDestroy(&is);
310: } else {
311: /* restore default */
312: pcbddc->nedfield = -1;
313: /* one ref for the destruction of al2g, one for el2g */
314: PetscObjectReference((PetscObject)al2g);
315: PetscObjectReference((PetscObject)al2g);
316: el2g = al2g;
317: fl2g = NULL;
318: }
320: /* Start communication to drop connections for interior edges (for cc analysis only) */
321: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
322: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
323: if (nedfieldlocal) {
324: ISGetIndices(nedfieldlocal,&idxs);
325: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
326: ISRestoreIndices(nedfieldlocal,&idxs);
327: } else {
328: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
329: }
330: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
334: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
335: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
336: if (global) {
337: PetscInt rst;
339: MatGetOwnershipRange(G,&rst,NULL);
340: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
341: if (matis->sf_rootdata[i] < 2) {
342: matis->sf_rootdata[cum++] = i + rst;
343: }
344: }
345: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
346: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
347: } else {
348: PetscInt *tbz;
350: PetscMalloc1(ne,&tbz);
351: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
352: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353: ISGetIndices(nedfieldlocal,&idxs);
354: for (i=0,cum=0;i<ne;i++)
355: if (matis->sf_leafdata[idxs[i]] == 1)
356: tbz[cum++] = i;
357: ISRestoreIndices(nedfieldlocal,&idxs);
358: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
359: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
360: PetscFree(tbz);
361: }
362: } else { /* we need the entire G to infer the nullspace */
363: PetscObjectReference((PetscObject)pcbddc->discretegradient);
364: G = pcbddc->discretegradient;
365: }
367: /* Extract subdomain relevant rows of G */
368: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
369: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
370: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
371: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
372: ISDestroy(&lned);
373: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
374: MatDestroy(&lGall);
375: MatISGetLocalMat(lGis,&lG);
377: /* SF for nodal dofs communications */
378: MatGetLocalSize(G,NULL,&Lv);
379: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
380: PetscObjectReference((PetscObject)vl2g);
381: ISLocalToGlobalMappingGetSize(vl2g,&nv);
382: PetscSFCreate(comm,&sfv);
383: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
384: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
385: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
386: i = singular ? 2 : 1;
387: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
389: /* Destroy temporary G created in MATIS format and modified G */
390: PetscObjectReference((PetscObject)lG);
391: MatDestroy(&lGis);
392: MatDestroy(&G);
394: if (print) {
395: PetscObjectSetName((PetscObject)lG,"initial_lG");
396: MatView(lG,NULL);
397: }
399: /* Save lG for values insertion in change of basis */
400: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
402: /* Analyze the edge-nodes connections (duplicate lG) */
403: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
404: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
405: PetscBTCreate(nv,&btv);
406: PetscBTCreate(ne,&bte);
407: PetscBTCreate(ne,&btb);
408: PetscBTCreate(ne,&btbd);
409: PetscBTCreate(nv,&btvcand);
410: /* need to import the boundary specification to ensure the
411: proper detection of coarse edges' endpoints */
412: if (pcbddc->DirichletBoundariesLocal) {
413: IS is;
415: if (fl2g) {
416: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
417: } else {
418: is = pcbddc->DirichletBoundariesLocal;
419: }
420: ISGetLocalSize(is,&cum);
421: ISGetIndices(is,&idxs);
422: for (i=0;i<cum;i++) {
423: if (idxs[i] >= 0) {
424: PetscBTSet(btb,idxs[i]);
425: PetscBTSet(btbd,idxs[i]);
426: }
427: }
428: ISRestoreIndices(is,&idxs);
429: if (fl2g) {
430: ISDestroy(&is);
431: }
432: }
433: if (pcbddc->NeumannBoundariesLocal) {
434: IS is;
436: if (fl2g) {
437: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
438: } else {
439: is = pcbddc->NeumannBoundariesLocal;
440: }
441: ISGetLocalSize(is,&cum);
442: ISGetIndices(is,&idxs);
443: for (i=0;i<cum;i++) {
444: if (idxs[i] >= 0) {
445: PetscBTSet(btb,idxs[i]);
446: }
447: }
448: ISRestoreIndices(is,&idxs);
449: if (fl2g) {
450: ISDestroy(&is);
451: }
452: }
454: /* Count neighs per dof */
455: PetscCalloc1(ne,&ecount);
456: PetscMalloc1(ne,&eneighs);
457: ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
458: for (i=1,cum=0;i<n_neigh;i++) {
459: cum += n_shared[i];
460: for (j=0;j<n_shared[i];j++) {
461: ecount[shared[i][j]]++;
462: }
463: }
464: if (ne) {
465: PetscMalloc1(cum,&eneighs[0]);
466: }
467: for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
468: PetscMemzero(ecount,ne*sizeof(PetscInt));
469: for (i=1;i<n_neigh;i++) {
470: for (j=0;j<n_shared[i];j++) {
471: PetscInt k = shared[i][j];
472: eneighs[k][ecount[k]] = neigh[i];
473: ecount[k]++;
474: }
475: }
476: for (i=0;i<ne;i++) {
477: PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
478: }
479: ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
480: PetscCalloc1(nv,&vcount);
481: PetscMalloc1(nv,&vneighs);
482: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
483: for (i=1,cum=0;i<n_neigh;i++) {
484: cum += n_shared[i];
485: for (j=0;j<n_shared[i];j++) {
486: vcount[shared[i][j]]++;
487: }
488: }
489: if (nv) {
490: PetscMalloc1(cum,&vneighs[0]);
491: }
492: for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
493: PetscMemzero(vcount,nv*sizeof(PetscInt));
494: for (i=1;i<n_neigh;i++) {
495: for (j=0;j<n_shared[i];j++) {
496: PetscInt k = shared[i][j];
497: vneighs[k][vcount[k]] = neigh[i];
498: vcount[k]++;
499: }
500: }
501: for (i=0;i<nv;i++) {
502: PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
503: }
504: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
506: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
507: for proper detection of coarse edges' endpoints */
508: PetscBTCreate(ne,&btee);
509: for (i=0;i<ne;i++) {
510: if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
511: PetscBTSet(btee,i);
512: }
513: }
514: PetscMalloc1(ne,&marks);
515: if (!conforming) {
516: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
517: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
518: }
519: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
520: MatSeqAIJGetArray(lGe,&vals);
521: cum = 0;
522: for (i=0;i<ne;i++) {
523: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
524: if (!PetscBTLookup(btee,i)) {
525: marks[cum++] = i;
526: continue;
527: }
528: /* set badly connected edge dofs as primal */
529: if (!conforming) {
530: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
531: marks[cum++] = i;
532: PetscBTSet(bte,i);
533: for (j=ii[i];j<ii[i+1];j++) {
534: PetscBTSet(btv,jj[j]);
535: }
536: } else {
537: /* every edge dofs should be connected trough a certain number of nodal dofs
538: to other edge dofs belonging to coarse edges
539: - at most 2 endpoints
540: - order-1 interior nodal dofs
541: - no undefined nodal dofs (nconn < order)
542: */
543: PetscInt ends = 0,ints = 0, undef = 0;
544: for (j=ii[i];j<ii[i+1];j++) {
545: PetscInt v = jj[j],k;
546: PetscInt nconn = iit[v+1]-iit[v];
547: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
548: if (nconn > order) ends++;
549: else if (nconn == order) ints++;
550: else undef++;
551: }
552: if (undef || ends > 2 || ints != order -1) {
553: marks[cum++] = i;
554: PetscBTSet(bte,i);
555: for (j=ii[i];j<ii[i+1];j++) {
556: PetscBTSet(btv,jj[j]);
557: }
558: }
559: }
560: }
561: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
562: if (!order && ii[i+1] != ii[i]) {
563: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
564: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
565: }
566: }
567: PetscBTDestroy(&btee);
568: MatSeqAIJRestoreArray(lGe,&vals);
569: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
570: if (!conforming) {
571: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
572: MatDestroy(&lGt);
573: }
574: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
576: /* identify splitpoints and corner candidates */
577: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
578: if (print) {
579: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
580: MatView(lGe,NULL);
581: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
582: MatView(lGt,NULL);
583: }
584: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
585: MatSeqAIJGetArray(lGt,&vals);
586: for (i=0;i<nv;i++) {
587: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
588: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
589: if (!order) { /* variable order */
590: PetscReal vorder = 0.;
592: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
593: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
594: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
595: ord = 1;
596: }
597: #if defined(PETSC_USE_DEBUG)
598: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
599: #endif
600: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
601: if (PetscBTLookup(btbd,jj[j])) {
602: bdir = PETSC_TRUE;
603: break;
604: }
605: if (vc != ecount[jj[j]]) {
606: sneighs = PETSC_FALSE;
607: } else {
608: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
609: for (k=0;k<vc;k++) {
610: if (vn[k] != en[k]) {
611: sneighs = PETSC_FALSE;
612: break;
613: }
614: }
615: }
616: }
617: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
618: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
619: PetscBTSet(btv,i);
620: } else if (test == ord) {
621: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
622: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
623: PetscBTSet(btv,i);
624: } else {
625: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
626: PetscBTSet(btvcand,i);
627: }
628: }
629: }
630: PetscFree(ecount);
631: PetscFree(vcount);
632: if (ne) {
633: PetscFree(eneighs[0]);
634: }
635: if (nv) {
636: PetscFree(vneighs[0]);
637: }
638: PetscFree(eneighs);
639: PetscFree(vneighs);
640: PetscBTDestroy(&btbd);
642: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
643: if (order != 1) {
644: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
645: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
646: for (i=0;i<nv;i++) {
647: if (PetscBTLookup(btvcand,i)) {
648: PetscBool found = PETSC_FALSE;
649: for (j=ii[i];j<ii[i+1] && !found;j++) {
650: PetscInt k,e = jj[j];
651: if (PetscBTLookup(bte,e)) continue;
652: for (k=iit[e];k<iit[e+1];k++) {
653: PetscInt v = jjt[k];
654: if (v != i && PetscBTLookup(btvcand,v)) {
655: found = PETSC_TRUE;
656: break;
657: }
658: }
659: }
660: if (!found) {
661: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i);
662: PetscBTClear(btvcand,i);
663: } else {
664: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i);
665: }
666: }
667: }
668: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
669: }
670: MatSeqAIJRestoreArray(lGt,&vals);
671: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
672: MatDestroy(&lGe);
674: /* Get the local G^T explicitly */
675: MatDestroy(&lGt);
676: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
677: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
679: /* Mark interior nodal dofs */
680: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
681: PetscBTCreate(nv,&btvi);
682: for (i=1;i<n_neigh;i++) {
683: for (j=0;j<n_shared[i];j++) {
684: PetscBTSet(btvi,shared[i][j]);
685: }
686: }
687: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
689: /* communicate corners and splitpoints */
690: PetscMalloc1(nv,&vmarks);
691: PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
692: PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
693: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
695: if (print) {
696: IS tbz;
698: cum = 0;
699: for (i=0;i<nv;i++)
700: if (sfvleaves[i])
701: vmarks[cum++] = i;
703: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
704: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
705: ISView(tbz,NULL);
706: ISDestroy(&tbz);
707: }
709: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
710: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
711: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
712: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
714: /* Zero rows of lGt corresponding to identified corners
715: and interior nodal dofs */
716: cum = 0;
717: for (i=0;i<nv;i++) {
718: if (sfvleaves[i]) {
719: vmarks[cum++] = i;
720: PetscBTSet(btv,i);
721: }
722: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
723: }
724: PetscBTDestroy(&btvi);
725: if (print) {
726: IS tbz;
728: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
729: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
730: ISView(tbz,NULL);
731: ISDestroy(&tbz);
732: }
733: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
734: PetscFree(vmarks);
735: PetscSFDestroy(&sfv);
736: PetscFree2(sfvleaves,sfvroots);
738: /* Recompute G */
739: MatDestroy(&lG);
740: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
741: if (print) {
742: PetscObjectSetName((PetscObject)lG,"used_lG");
743: MatView(lG,NULL);
744: PetscObjectSetName((PetscObject)lGt,"used_lGt");
745: MatView(lGt,NULL);
746: }
748: /* Get primal dofs (if any) */
749: cum = 0;
750: for (i=0;i<ne;i++) {
751: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
752: }
753: if (fl2g) {
754: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
755: }
756: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
757: if (print) {
758: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
759: ISView(primals,NULL);
760: }
761: PetscBTDestroy(&bte);
762: /* TODO: what if the user passed in some of them ? */
763: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
764: ISDestroy(&primals);
766: /* Compute edge connectivity */
767: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
768: MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
769: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
770: if (fl2g) {
771: PetscBT btf;
772: PetscInt *iia,*jja,*iiu,*jju;
773: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
775: /* create CSR for all local dofs */
776: PetscMalloc1(n+1,&iia);
777: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
778: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
779: iiu = pcbddc->mat_graph->xadj;
780: jju = pcbddc->mat_graph->adjncy;
781: } else if (pcbddc->use_local_adj) {
782: rest = PETSC_TRUE;
783: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
784: } else {
785: free = PETSC_TRUE;
786: PetscMalloc2(n+1,&iiu,n,&jju);
787: iiu[0] = 0;
788: for (i=0;i<n;i++) {
789: iiu[i+1] = i+1;
790: jju[i] = -1;
791: }
792: }
794: /* import sizes of CSR */
795: iia[0] = 0;
796: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
798: /* overwrite entries corresponding to the Nedelec field */
799: PetscBTCreate(n,&btf);
800: ISGetIndices(nedfieldlocal,&idxs);
801: for (i=0;i<ne;i++) {
802: PetscBTSet(btf,idxs[i]);
803: iia[idxs[i]+1] = ii[i+1]-ii[i];
804: }
806: /* iia in CSR */
807: for (i=0;i<n;i++) iia[i+1] += iia[i];
809: /* jja in CSR */
810: PetscMalloc1(iia[n],&jja);
811: for (i=0;i<n;i++)
812: if (!PetscBTLookup(btf,i))
813: for (j=0;j<iiu[i+1]-iiu[i];j++)
814: jja[iia[i]+j] = jju[iiu[i]+j];
816: /* map edge dofs connectivity */
817: if (jj) {
818: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
819: for (i=0;i<ne;i++) {
820: PetscInt e = idxs[i];
821: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
822: }
823: }
824: ISRestoreIndices(nedfieldlocal,&idxs);
825: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
826: if (rest) {
827: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
828: }
829: if (free) {
830: PetscFree2(iiu,jju);
831: }
832: PetscBTDestroy(&btf);
833: } else {
834: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
835: }
837: /* Analyze interface for edge dofs */
838: PCBDDCAnalyzeInterface(pc);
839: pcbddc->mat_graph->twodim = PETSC_FALSE;
841: /* Get coarse edges in the edge space */
842: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
843: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
845: if (fl2g) {
846: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
847: PetscMalloc1(nee,&eedges);
848: for (i=0;i<nee;i++) {
849: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
850: }
851: } else {
852: eedges = alleedges;
853: primals = allprimals;
854: }
856: /* Mark fine edge dofs with their coarse edge id */
857: PetscMemzero(marks,ne*sizeof(PetscInt));
858: ISGetLocalSize(primals,&cum);
859: ISGetIndices(primals,&idxs);
860: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
861: ISRestoreIndices(primals,&idxs);
862: if (print) {
863: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
864: ISView(primals,NULL);
865: }
867: maxsize = 0;
868: for (i=0;i<nee;i++) {
869: PetscInt size,mark = i+1;
871: ISGetLocalSize(eedges[i],&size);
872: ISGetIndices(eedges[i],&idxs);
873: for (j=0;j<size;j++) marks[idxs[j]] = mark;
874: ISRestoreIndices(eedges[i],&idxs);
875: maxsize = PetscMax(maxsize,size);
876: }
878: /* Find coarse edge endpoints */
879: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
880: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
881: for (i=0;i<nee;i++) {
882: PetscInt mark = i+1,size;
884: ISGetLocalSize(eedges[i],&size);
885: if (!size && nedfieldlocal) continue;
886: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
887: ISGetIndices(eedges[i],&idxs);
888: if (print) {
889: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
890: ISView(eedges[i],NULL);
891: }
892: for (j=0;j<size;j++) {
893: PetscInt k, ee = idxs[j];
894: if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee);
895: for (k=ii[ee];k<ii[ee+1];k++) {
896: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]);
897: if (PetscBTLookup(btv,jj[k])) {
898: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]);
899: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
900: PetscInt k2;
901: PetscBool corner = PETSC_FALSE;
902: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
903: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
904: /* it's a corner if either is connected with an edge dof belonging to a different cc or
905: if the edge dof lie on the natural part of the boundary */
906: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
907: corner = PETSC_TRUE;
908: break;
909: }
910: }
911: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
912: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]);
913: PetscBTSet(btv,jj[k]);
914: } else {
915: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
916: }
917: }
918: }
919: }
920: ISRestoreIndices(eedges[i],&idxs);
921: }
922: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
923: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
924: PetscBTDestroy(&btb);
926: /* Reset marked primal dofs */
927: ISGetLocalSize(primals,&cum);
928: ISGetIndices(primals,&idxs);
929: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
930: ISRestoreIndices(primals,&idxs);
932: /* Now use the initial lG */
933: MatDestroy(&lG);
934: MatDestroy(&lGt);
935: lG = lGinit;
936: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
938: /* Compute extended cols indices */
939: PetscBTCreate(nv,&btvc);
940: PetscBTCreate(nee,&bter);
941: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
942: MatSeqAIJGetMaxRowNonzeros(lG,&i);
943: i *= maxsize;
944: PetscCalloc1(nee,&extcols);
945: PetscMalloc2(i,&extrow,i,&gidxs);
946: eerr = PETSC_FALSE;
947: for (i=0;i<nee;i++) {
948: PetscInt size,found = 0;
950: cum = 0;
951: ISGetLocalSize(eedges[i],&size);
952: if (!size && nedfieldlocal) continue;
953: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
954: ISGetIndices(eedges[i],&idxs);
955: PetscBTMemzero(nv,btvc);
956: for (j=0;j<size;j++) {
957: PetscInt k,ee = idxs[j];
958: for (k=ii[ee];k<ii[ee+1];k++) {
959: PetscInt vv = jj[k];
960: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
961: else if (!PetscBTLookupSet(btvc,vv)) found++;
962: }
963: }
964: ISRestoreIndices(eedges[i],&idxs);
965: PetscSortRemoveDupsInt(&cum,extrow);
966: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
967: PetscSortIntWithArray(cum,gidxs,extrow);
968: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
969: /* it may happen that endpoints are not defined at this point
970: if it is the case, mark this edge for a second pass */
971: if (cum != size -1 || found != 2) {
972: PetscBTSet(bter,i);
973: if (print) {
974: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
975: ISView(eedges[i],NULL);
976: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
977: ISView(extcols[i],NULL);
978: }
979: eerr = PETSC_TRUE;
980: }
981: }
982: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
983: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
984: if (done) {
985: PetscInt *newprimals;
987: PetscMalloc1(ne,&newprimals);
988: ISGetLocalSize(primals,&cum);
989: ISGetIndices(primals,&idxs);
990: PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
991: ISRestoreIndices(primals,&idxs);
992: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
993: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
994: for (i=0;i<nee;i++) {
995: PetscBool has_candidates = PETSC_FALSE;
996: if (PetscBTLookup(bter,i)) {
997: PetscInt size,mark = i+1;
999: ISGetLocalSize(eedges[i],&size);
1000: ISGetIndices(eedges[i],&idxs);
1001: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1002: for (j=0;j<size;j++) {
1003: PetscInt k,ee = idxs[j];
1004: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1005: for (k=ii[ee];k<ii[ee+1];k++) {
1006: /* set all candidates located on the edge as corners */
1007: if (PetscBTLookup(btvcand,jj[k])) {
1008: PetscInt k2,vv = jj[k];
1009: has_candidates = PETSC_TRUE;
1010: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv);
1011: PetscBTSet(btv,vv);
1012: /* set all edge dofs connected to candidate as primals */
1013: for (k2=iit[vv];k2<iit[vv+1];k2++) {
1014: if (marks[jjt[k2]] == mark) {
1015: PetscInt k3,ee2 = jjt[k2];
1016: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2);
1017: newprimals[cum++] = ee2;
1018: /* finally set the new corners */
1019: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1020: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]);
1021: PetscBTSet(btv,jj[k3]);
1022: }
1023: }
1024: }
1025: } else {
1026: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]);
1027: }
1028: }
1029: }
1030: if (!has_candidates) { /* circular edge */
1031: PetscInt k, ee = idxs[0],*tmarks;
1033: PetscCalloc1(ne,&tmarks);
1034: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i);
1035: for (k=ii[ee];k<ii[ee+1];k++) {
1036: PetscInt k2;
1037: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]);
1038: PetscBTSet(btv,jj[k]);
1039: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1040: }
1041: for (j=0;j<size;j++) {
1042: if (tmarks[idxs[j]] > 1) {
1043: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]);
1044: newprimals[cum++] = idxs[j];
1045: }
1046: }
1047: PetscFree(tmarks);
1048: }
1049: ISRestoreIndices(eedges[i],&idxs);
1050: }
1051: ISDestroy(&extcols[i]);
1052: }
1053: PetscFree(extcols);
1054: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1055: PetscSortRemoveDupsInt(&cum,newprimals);
1056: if (fl2g) {
1057: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1058: ISDestroy(&primals);
1059: for (i=0;i<nee;i++) {
1060: ISDestroy(&eedges[i]);
1061: }
1062: PetscFree(eedges);
1063: }
1064: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1065: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1066: PetscFree(newprimals);
1067: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1068: ISDestroy(&primals);
1069: PCBDDCAnalyzeInterface(pc);
1070: pcbddc->mat_graph->twodim = PETSC_FALSE;
1071: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1072: if (fl2g) {
1073: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1074: PetscMalloc1(nee,&eedges);
1075: for (i=0;i<nee;i++) {
1076: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1077: }
1078: } else {
1079: eedges = alleedges;
1080: primals = allprimals;
1081: }
1082: PetscCalloc1(nee,&extcols);
1084: /* Mark again */
1085: PetscMemzero(marks,ne*sizeof(PetscInt));
1086: for (i=0;i<nee;i++) {
1087: PetscInt size,mark = i+1;
1089: ISGetLocalSize(eedges[i],&size);
1090: ISGetIndices(eedges[i],&idxs);
1091: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1092: ISRestoreIndices(eedges[i],&idxs);
1093: }
1094: if (print) {
1095: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1096: ISView(primals,NULL);
1097: }
1099: /* Recompute extended cols */
1100: eerr = PETSC_FALSE;
1101: for (i=0;i<nee;i++) {
1102: PetscInt size;
1104: cum = 0;
1105: ISGetLocalSize(eedges[i],&size);
1106: if (!size && nedfieldlocal) continue;
1107: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1108: ISGetIndices(eedges[i],&idxs);
1109: for (j=0;j<size;j++) {
1110: PetscInt k,ee = idxs[j];
1111: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1112: }
1113: ISRestoreIndices(eedges[i],&idxs);
1114: PetscSortRemoveDupsInt(&cum,extrow);
1115: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1116: PetscSortIntWithArray(cum,gidxs,extrow);
1117: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1118: if (cum != size -1) {
1119: if (print) {
1120: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1121: ISView(eedges[i],NULL);
1122: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1123: ISView(extcols[i],NULL);
1124: }
1125: eerr = PETSC_TRUE;
1126: }
1127: }
1128: }
1129: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1130: PetscFree2(extrow,gidxs);
1131: PetscBTDestroy(&bter);
1132: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1133: /* an error should not occur at this point */
1134: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1136: /* Check the number of endpoints */
1137: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1138: PetscMalloc1(2*nee,&corners);
1139: PetscMalloc1(nee,&cedges);
1140: for (i=0;i<nee;i++) {
1141: PetscInt size, found = 0, gc[2];
1143: /* init with defaults */
1144: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1145: ISGetLocalSize(eedges[i],&size);
1146: if (!size && nedfieldlocal) continue;
1147: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1148: ISGetIndices(eedges[i],&idxs);
1149: PetscBTMemzero(nv,btvc);
1150: for (j=0;j<size;j++) {
1151: PetscInt k,ee = idxs[j];
1152: for (k=ii[ee];k<ii[ee+1];k++) {
1153: PetscInt vv = jj[k];
1154: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1155: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1156: corners[i*2+found++] = vv;
1157: }
1158: }
1159: }
1160: if (found != 2) {
1161: PetscInt e;
1162: if (fl2g) {
1163: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1164: } else {
1165: e = idxs[0];
1166: }
1167: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1168: }
1170: /* get primal dof index on this coarse edge */
1171: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1172: if (gc[0] > gc[1]) {
1173: PetscInt swap = corners[2*i];
1174: corners[2*i] = corners[2*i+1];
1175: corners[2*i+1] = swap;
1176: }
1177: cedges[i] = idxs[size-1];
1178: ISRestoreIndices(eedges[i],&idxs);
1179: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1180: }
1181: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1182: PetscBTDestroy(&btvc);
1184: #if defined(PETSC_USE_DEBUG)
1185: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1186: not interfere with neighbouring coarse edges */
1187: PetscMalloc1(nee+1,&emarks);
1188: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1189: for (i=0;i<nv;i++) {
1190: PetscInt emax = 0,eemax = 0;
1192: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1193: PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1194: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1195: for (j=1;j<nee+1;j++) {
1196: if (emax < emarks[j]) {
1197: emax = emarks[j];
1198: eemax = j;
1199: }
1200: }
1201: /* not relevant for edges */
1202: if (!eemax) continue;
1204: for (j=ii[i];j<ii[i+1];j++) {
1205: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1206: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1207: }
1208: }
1209: }
1210: PetscFree(emarks);
1211: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1212: #endif
1214: /* Compute extended rows indices for edge blocks of the change of basis */
1215: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1216: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1217: extmem *= maxsize;
1218: PetscMalloc1(extmem*nee,&extrow);
1219: PetscMalloc1(nee,&extrows);
1220: PetscCalloc1(nee,&extrowcum);
1221: for (i=0;i<nv;i++) {
1222: PetscInt mark = 0,size,start;
1224: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1225: for (j=ii[i];j<ii[i+1];j++)
1226: if (marks[jj[j]] && !mark)
1227: mark = marks[jj[j]];
1229: /* not relevant */
1230: if (!mark) continue;
1232: /* import extended row */
1233: mark--;
1234: start = mark*extmem+extrowcum[mark];
1235: size = ii[i+1]-ii[i];
1236: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1237: PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1238: extrowcum[mark] += size;
1239: }
1240: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1241: MatDestroy(&lGt);
1242: PetscFree(marks);
1244: /* Compress extrows */
1245: cum = 0;
1246: for (i=0;i<nee;i++) {
1247: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1248: PetscSortRemoveDupsInt(&size,start);
1249: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1250: cum = PetscMax(cum,size);
1251: }
1252: PetscFree(extrowcum);
1253: PetscBTDestroy(&btv);
1254: PetscBTDestroy(&btvcand);
1256: /* Workspace for lapack inner calls and VecSetValues */
1257: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1259: /* Create change of basis matrix (preallocation can be improved) */
1260: MatCreate(comm,&T);
1261: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1262: pc->pmat->rmap->N,pc->pmat->rmap->N);
1263: MatSetType(T,MATAIJ);
1264: MatSeqAIJSetPreallocation(T,10,NULL);
1265: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1266: MatSetLocalToGlobalMapping(T,al2g,al2g);
1267: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1268: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1269: ISLocalToGlobalMappingDestroy(&al2g);
1271: /* Defaults to identity */
1272: MatCreateVecs(pc->pmat,&tvec,NULL);
1273: VecSet(tvec,1.0);
1274: MatDiagonalSet(T,tvec,INSERT_VALUES);
1275: VecDestroy(&tvec);
1277: /* Create discrete gradient for the coarser level if needed */
1278: MatDestroy(&pcbddc->nedcG);
1279: ISDestroy(&pcbddc->nedclocal);
1280: if (pcbddc->current_level < pcbddc->max_levels) {
1281: ISLocalToGlobalMapping cel2g,cvl2g;
1282: IS wis,gwis;
1283: PetscInt cnv,cne;
1285: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1286: if (fl2g) {
1287: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1288: } else {
1289: PetscObjectReference((PetscObject)wis);
1290: pcbddc->nedclocal = wis;
1291: }
1292: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1293: ISDestroy(&wis);
1294: ISRenumber(gwis,NULL,&cne,&wis);
1295: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1296: ISDestroy(&wis);
1297: ISDestroy(&gwis);
1299: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1300: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1301: ISDestroy(&wis);
1302: ISRenumber(gwis,NULL,&cnv,&wis);
1303: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1304: ISDestroy(&wis);
1305: ISDestroy(&gwis);
1307: MatCreate(comm,&pcbddc->nedcG);
1308: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1309: MatSetType(pcbddc->nedcG,MATAIJ);
1310: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1311: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1312: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1313: ISLocalToGlobalMappingDestroy(&cel2g);
1314: ISLocalToGlobalMappingDestroy(&cvl2g);
1315: }
1316: ISLocalToGlobalMappingDestroy(&vl2g);
1318: #if defined(PRINT_GDET)
1319: inc = 0;
1320: lev = pcbddc->current_level;
1321: #endif
1323: /* Insert values in the change of basis matrix */
1324: for (i=0;i<nee;i++) {
1325: Mat Gins = NULL, GKins = NULL;
1326: IS cornersis = NULL;
1327: PetscScalar cvals[2];
1329: if (pcbddc->nedcG) {
1330: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1331: }
1332: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1333: if (Gins && GKins) {
1334: PetscScalar *data;
1335: const PetscInt *rows,*cols;
1336: PetscInt nrh,nch,nrc,ncc;
1338: ISGetIndices(eedges[i],&cols);
1339: /* H1 */
1340: ISGetIndices(extrows[i],&rows);
1341: MatGetSize(Gins,&nrh,&nch);
1342: MatDenseGetArray(Gins,&data);
1343: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1344: MatDenseRestoreArray(Gins,&data);
1345: ISRestoreIndices(extrows[i],&rows);
1346: /* complement */
1347: MatGetSize(GKins,&nrc,&ncc);
1348: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1349: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1350: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1351: MatDenseGetArray(GKins,&data);
1352: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1353: MatDenseRestoreArray(GKins,&data);
1355: /* coarse discrete gradient */
1356: if (pcbddc->nedcG) {
1357: PetscInt cols[2];
1359: cols[0] = 2*i;
1360: cols[1] = 2*i+1;
1361: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1362: }
1363: ISRestoreIndices(eedges[i],&cols);
1364: }
1365: ISDestroy(&extrows[i]);
1366: ISDestroy(&extcols[i]);
1367: ISDestroy(&cornersis);
1368: MatDestroy(&Gins);
1369: MatDestroy(&GKins);
1370: }
1371: ISLocalToGlobalMappingDestroy(&el2g);
1373: /* Start assembling */
1374: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1375: if (pcbddc->nedcG) {
1376: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1377: }
1379: /* Free */
1380: if (fl2g) {
1381: ISDestroy(&primals);
1382: for (i=0;i<nee;i++) {
1383: ISDestroy(&eedges[i]);
1384: }
1385: PetscFree(eedges);
1386: }
1388: /* hack mat_graph with primal dofs on the coarse edges */
1389: {
1390: PCBDDCGraph graph = pcbddc->mat_graph;
1391: PetscInt *oqueue = graph->queue;
1392: PetscInt *ocptr = graph->cptr;
1393: PetscInt ncc,*idxs;
1395: /* find first primal edge */
1396: if (pcbddc->nedclocal) {
1397: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1398: } else {
1399: if (fl2g) {
1400: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1401: }
1402: idxs = cedges;
1403: }
1404: cum = 0;
1405: while (cum < nee && cedges[cum] < 0) cum++;
1407: /* adapt connected components */
1408: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1409: graph->cptr[0] = 0;
1410: for (i=0,ncc=0;i<graph->ncc;i++) {
1411: PetscInt lc = ocptr[i+1]-ocptr[i];
1412: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1413: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1414: graph->queue[graph->cptr[ncc]] = cedges[cum];
1415: ncc++;
1416: lc--;
1417: cum++;
1418: while (cum < nee && cedges[cum] < 0) cum++;
1419: }
1420: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1421: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1422: ncc++;
1423: }
1424: graph->ncc = ncc;
1425: if (pcbddc->nedclocal) {
1426: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1427: }
1428: PetscFree2(ocptr,oqueue);
1429: }
1430: ISLocalToGlobalMappingDestroy(&fl2g);
1431: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1432: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1433: MatDestroy(&conn);
1435: ISDestroy(&nedfieldlocal);
1436: PetscFree(extrow);
1437: PetscFree2(work,rwork);
1438: PetscFree(corners);
1439: PetscFree(cedges);
1440: PetscFree(extrows);
1441: PetscFree(extcols);
1442: MatDestroy(&lG);
1444: /* Complete assembling */
1445: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1446: if (pcbddc->nedcG) {
1447: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1448: #if 0
1449: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1450: MatView(pcbddc->nedcG,NULL);
1451: #endif
1452: }
1454: /* set change of basis */
1455: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1456: MatDestroy(&T);
1458: return(0);
1459: }
1461: /* the near-null space of BDDC carries information on quadrature weights,
1462: and these can be collinear -> so cheat with MatNullSpaceCreate
1463: and create a suitable set of basis vectors first */
1464: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1465: {
1467: PetscInt i;
1470: for (i=0;i<nvecs;i++) {
1471: PetscInt first,last;
1473: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1474: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1475: if (i>=first && i < last) {
1476: PetscScalar *data;
1477: VecGetArray(quad_vecs[i],&data);
1478: if (!has_const) {
1479: data[i-first] = 1.;
1480: } else {
1481: data[2*i-first] = 1./PetscSqrtReal(2.);
1482: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1483: }
1484: VecRestoreArray(quad_vecs[i],&data);
1485: }
1486: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1487: }
1488: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1489: for (i=0;i<nvecs;i++) { /* reset vectors */
1490: PetscInt first,last;
1491: VecLockPop(quad_vecs[i]);
1492: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1493: if (i>=first && i < last) {
1494: PetscScalar *data;
1495: VecGetArray(quad_vecs[i],&data);
1496: if (!has_const) {
1497: data[i-first] = 0.;
1498: } else {
1499: data[2*i-first] = 0.;
1500: data[2*i-first+1] = 0.;
1501: }
1502: VecRestoreArray(quad_vecs[i],&data);
1503: }
1504: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1505: VecLockPush(quad_vecs[i]);
1506: }
1507: return(0);
1508: }
1510: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1511: {
1512: Mat loc_divudotp;
1513: Vec p,v,vins,quad_vec,*quad_vecs;
1514: ISLocalToGlobalMapping map;
1515: IS *faces,*edges;
1516: PetscScalar *vals;
1517: const PetscScalar *array;
1518: PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne;
1519: PetscMPIInt rank;
1520: PetscErrorCode ierr;
1523: PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1524: if (graph->twodim) {
1525: lmaxneighs = 2;
1526: } else {
1527: lmaxneighs = 1;
1528: for (i=0;i<ne;i++) {
1529: const PetscInt *idxs;
1530: ISGetIndices(edges[i],&idxs);
1531: lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]);
1532: ISRestoreIndices(edges[i],&idxs);
1533: }
1534: lmaxneighs++; /* graph count does not include self */
1535: }
1536: MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1537: maxsize = 0;
1538: for (i=0;i<ne;i++) {
1539: PetscInt nn;
1540: ISGetLocalSize(edges[i],&nn);
1541: maxsize = PetscMax(maxsize,nn);
1542: }
1543: for (i=0;i<nf;i++) {
1544: PetscInt nn;
1545: ISGetLocalSize(faces[i],&nn);
1546: maxsize = PetscMax(maxsize,nn);
1547: }
1548: PetscMalloc1(maxsize,&vals);
1549: /* create vectors to hold quadrature weights */
1550: MatCreateVecs(A,&quad_vec,NULL);
1551: if (!transpose) {
1552: MatGetLocalToGlobalMapping(A,&map,NULL);
1553: } else {
1554: MatGetLocalToGlobalMapping(A,NULL,&map);
1555: }
1556: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1557: VecDestroy(&quad_vec);
1558: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1559: for (i=0;i<maxneighs;i++) {
1560: VecLockPop(quad_vecs[i]);
1561: VecSetLocalToGlobalMapping(quad_vecs[i],map);
1562: }
1564: /* compute local quad vec */
1565: MatISGetLocalMat(divudotp,&loc_divudotp);
1566: if (!transpose) {
1567: MatCreateVecs(loc_divudotp,&v,&p);
1568: } else {
1569: MatCreateVecs(loc_divudotp,&p,&v);
1570: }
1571: VecSet(p,1.);
1572: if (!transpose) {
1573: MatMultTranspose(loc_divudotp,p,v);
1574: } else {
1575: MatMult(loc_divudotp,p,v);
1576: }
1577: if (vl2l) {
1578: Mat lA;
1579: VecScatter sc;
1581: MatISGetLocalMat(A,&lA);
1582: MatCreateVecs(lA,&vins,NULL);
1583: VecScatterCreate(v,vl2l,vins,NULL,&sc);
1584: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1585: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1586: VecScatterDestroy(&sc);
1587: } else {
1588: vins = v;
1589: }
1590: VecGetArrayRead(vins,&array);
1591: VecDestroy(&p);
1593: /* insert in global quadrature vecs */
1594: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1595: for (i=0;i<nf;i++) {
1596: const PetscInt *idxs;
1597: PetscInt idx,nn,j;
1599: ISGetIndices(faces[i],&idxs);
1600: ISGetLocalSize(faces[i],&nn);
1601: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1602: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1603: idx = -(idx+1);
1604: VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1605: ISRestoreIndices(faces[i],&idxs);
1606: }
1607: for (i=0;i<ne;i++) {
1608: const PetscInt *idxs;
1609: PetscInt idx,nn,j;
1611: ISGetIndices(edges[i],&idxs);
1612: ISGetLocalSize(edges[i],&nn);
1613: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1614: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1615: idx = -(idx+1);
1616: VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1617: ISRestoreIndices(edges[i],&idxs);
1618: }
1619: PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1620: VecRestoreArrayRead(vins,&array);
1621: if (vl2l) {
1622: VecDestroy(&vins);
1623: }
1624: VecDestroy(&v);
1625: PetscFree(vals);
1627: /* assemble near null space */
1628: for (i=0;i<maxneighs;i++) {
1629: VecAssemblyBegin(quad_vecs[i]);
1630: }
1631: for (i=0;i<maxneighs;i++) {
1632: VecAssemblyEnd(quad_vecs[i]);
1633: VecLockPush(quad_vecs[i]);
1634: }
1635: VecDestroyVecs(maxneighs,&quad_vecs);
1636: return(0);
1637: }
1640: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1641: {
1643: Vec local,global;
1644: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1645: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1646: PetscBool monolithic = PETSC_FALSE;
1649: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1650: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1651: PetscOptionsEnd();
1652: /* need to convert from global to local topology information and remove references to information in global ordering */
1653: MatCreateVecs(pc->pmat,&global,NULL);
1654: MatCreateVecs(matis->A,&local,NULL);
1655: if (monolithic) goto boundary;
1657: if (pcbddc->user_provided_isfordofs) {
1658: if (pcbddc->n_ISForDofs) {
1659: PetscInt i;
1660: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1661: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1662: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1663: ISDestroy(&pcbddc->ISForDofs[i]);
1664: }
1665: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1666: pcbddc->n_ISForDofs = 0;
1667: PetscFree(pcbddc->ISForDofs);
1668: }
1669: } else {
1670: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1671: DM dm;
1673: PCGetDM(pc, &dm);
1674: if (!dm) {
1675: MatGetDM(pc->pmat, &dm);
1676: }
1677: if (dm) {
1678: IS *fields;
1679: PetscInt nf,i;
1680: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1681: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1682: for (i=0;i<nf;i++) {
1683: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1684: ISDestroy(&fields[i]);
1685: }
1686: PetscFree(fields);
1687: pcbddc->n_ISForDofsLocal = nf;
1688: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1689: PetscContainer c;
1691: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1692: if (c) {
1693: MatISLocalFields lf;
1694: PetscContainerGetPointer(c,(void**)&lf);
1695: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1696: } else { /* fallback, create the default fields if bs > 1 */
1697: PetscInt i, n = matis->A->rmap->n;
1698: MatGetBlockSize(pc->pmat,&i);
1699: if (i > 1) {
1700: pcbddc->n_ISForDofsLocal = i;
1701: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1702: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1703: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1704: }
1705: }
1706: }
1707: }
1708: } else {
1709: PetscInt i;
1710: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1711: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1712: }
1713: }
1714: }
1716: boundary:
1717: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1718: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1719: } else if (pcbddc->DirichletBoundariesLocal) {
1720: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1721: }
1722: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1723: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1724: } else if (pcbddc->NeumannBoundariesLocal) {
1725: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1726: }
1727: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1728: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1729: }
1730: VecDestroy(&global);
1731: VecDestroy(&local);
1733: return(0);
1734: }
1736: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1737: {
1738: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1739: PetscErrorCode ierr;
1740: IS nis;
1741: const PetscInt *idxs;
1742: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1743: PetscBool *ld;
1746: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1747: MatISSetUpSF(pc->pmat);
1748: if (mop == MPI_LAND) {
1749: /* init rootdata with true */
1750: ld = (PetscBool*) matis->sf_rootdata;
1751: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1752: } else {
1753: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1754: }
1755: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1756: ISGetLocalSize(*is,&nd);
1757: ISGetIndices(*is,&idxs);
1758: ld = (PetscBool*) matis->sf_leafdata;
1759: for (i=0;i<nd;i++)
1760: if (-1 < idxs[i] && idxs[i] < n)
1761: ld[idxs[i]] = PETSC_TRUE;
1762: ISRestoreIndices(*is,&idxs);
1763: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1764: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1765: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1766: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1767: if (mop == MPI_LAND) {
1768: PetscMalloc1(nd,&nidxs);
1769: } else {
1770: PetscMalloc1(n,&nidxs);
1771: }
1772: for (i=0,nnd=0;i<n;i++)
1773: if (ld[i])
1774: nidxs[nnd++] = i;
1775: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1776: ISDestroy(is);
1777: *is = nis;
1778: return(0);
1779: }
1781: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1782: {
1783: PC_IS *pcis = (PC_IS*)(pc->data);
1784: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1785: PetscErrorCode ierr;
1788: if (!pcbddc->benign_have_null) {
1789: return(0);
1790: }
1791: if (pcbddc->ChangeOfBasisMatrix) {
1792: Vec swap;
1794: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1795: swap = pcbddc->work_change;
1796: pcbddc->work_change = r;
1797: r = swap;
1798: }
1799: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1800: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1801: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1802: VecSet(z,0.);
1803: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1804: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1805: if (pcbddc->ChangeOfBasisMatrix) {
1806: pcbddc->work_change = r;
1807: VecCopy(z,pcbddc->work_change);
1808: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1809: }
1810: return(0);
1811: }
1813: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1814: {
1815: PCBDDCBenignMatMult_ctx ctx;
1816: PetscErrorCode ierr;
1817: PetscBool apply_right,apply_left,reset_x;
1820: MatShellGetContext(A,&ctx);
1821: if (transpose) {
1822: apply_right = ctx->apply_left;
1823: apply_left = ctx->apply_right;
1824: } else {
1825: apply_right = ctx->apply_right;
1826: apply_left = ctx->apply_left;
1827: }
1828: reset_x = PETSC_FALSE;
1829: if (apply_right) {
1830: const PetscScalar *ax;
1831: PetscInt nl,i;
1833: VecGetLocalSize(x,&nl);
1834: VecGetArrayRead(x,&ax);
1835: PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1836: VecRestoreArrayRead(x,&ax);
1837: for (i=0;i<ctx->benign_n;i++) {
1838: PetscScalar sum,val;
1839: const PetscInt *idxs;
1840: PetscInt nz,j;
1841: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1842: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1843: sum = 0.;
1844: if (ctx->apply_p0) {
1845: val = ctx->work[idxs[nz-1]];
1846: for (j=0;j<nz-1;j++) {
1847: sum += ctx->work[idxs[j]];
1848: ctx->work[idxs[j]] += val;
1849: }
1850: } else {
1851: for (j=0;j<nz-1;j++) {
1852: sum += ctx->work[idxs[j]];
1853: }
1854: }
1855: ctx->work[idxs[nz-1]] -= sum;
1856: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1857: }
1858: VecPlaceArray(x,ctx->work);
1859: reset_x = PETSC_TRUE;
1860: }
1861: if (transpose) {
1862: MatMultTranspose(ctx->A,x,y);
1863: } else {
1864: MatMult(ctx->A,x,y);
1865: }
1866: if (reset_x) {
1867: VecResetArray(x);
1868: }
1869: if (apply_left) {
1870: PetscScalar *ay;
1871: PetscInt i;
1873: VecGetArray(y,&ay);
1874: for (i=0;i<ctx->benign_n;i++) {
1875: PetscScalar sum,val;
1876: const PetscInt *idxs;
1877: PetscInt nz,j;
1878: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1879: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1880: val = -ay[idxs[nz-1]];
1881: if (ctx->apply_p0) {
1882: sum = 0.;
1883: for (j=0;j<nz-1;j++) {
1884: sum += ay[idxs[j]];
1885: ay[idxs[j]] += val;
1886: }
1887: ay[idxs[nz-1]] += sum;
1888: } else {
1889: for (j=0;j<nz-1;j++) {
1890: ay[idxs[j]] += val;
1891: }
1892: ay[idxs[nz-1]] = 0.;
1893: }
1894: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1895: }
1896: VecRestoreArray(y,&ay);
1897: }
1898: return(0);
1899: }
1901: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1902: {
1906: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
1907: return(0);
1908: }
1910: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1911: {
1915: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
1916: return(0);
1917: }
1919: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1920: {
1921: PC_IS *pcis = (PC_IS*)pc->data;
1922: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1923: PCBDDCBenignMatMult_ctx ctx;
1924: PetscErrorCode ierr;
1927: if (!restore) {
1928: Mat A_IB,A_BI;
1929: PetscScalar *work;
1930: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
1932: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
1933: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
1934: PetscMalloc1(pcis->n,&work);
1935: MatCreate(PETSC_COMM_SELF,&A_IB);
1936: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
1937: MatSetType(A_IB,MATSHELL);
1938: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
1939: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1940: PetscNew(&ctx);
1941: MatShellSetContext(A_IB,ctx);
1942: ctx->apply_left = PETSC_TRUE;
1943: ctx->apply_right = PETSC_FALSE;
1944: ctx->apply_p0 = PETSC_FALSE;
1945: ctx->benign_n = pcbddc->benign_n;
1946: if (reuse) {
1947: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1948: ctx->free = PETSC_FALSE;
1949: } else { /* TODO: could be optimized for successive solves */
1950: ISLocalToGlobalMapping N_to_D;
1951: PetscInt i;
1953: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
1954: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
1955: for (i=0;i<pcbddc->benign_n;i++) {
1956: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
1957: }
1958: ISLocalToGlobalMappingDestroy(&N_to_D);
1959: ctx->free = PETSC_TRUE;
1960: }
1961: ctx->A = pcis->A_IB;
1962: ctx->work = work;
1963: MatSetUp(A_IB);
1964: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
1965: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
1966: pcis->A_IB = A_IB;
1968: /* A_BI as A_IB^T */
1969: MatCreateTranspose(A_IB,&A_BI);
1970: pcbddc->benign_original_mat = pcis->A_BI;
1971: pcis->A_BI = A_BI;
1972: } else {
1973: if (!pcbddc->benign_original_mat) {
1974: return(0);
1975: }
1976: MatShellGetContext(pcis->A_IB,&ctx);
1977: MatDestroy(&pcis->A_IB);
1978: pcis->A_IB = ctx->A;
1979: ctx->A = NULL;
1980: MatDestroy(&pcis->A_BI);
1981: pcis->A_BI = pcbddc->benign_original_mat;
1982: pcbddc->benign_original_mat = NULL;
1983: if (ctx->free) {
1984: PetscInt i;
1985: for (i=0;i<ctx->benign_n;i++) {
1986: ISDestroy(&ctx->benign_zerodiag_subs[i]);
1987: }
1988: PetscFree(ctx->benign_zerodiag_subs);
1989: }
1990: PetscFree(ctx->work);
1991: PetscFree(ctx);
1992: }
1993: return(0);
1994: }
1996: /* used just in bddc debug mode */
1997: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
1998: {
1999: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2000: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2001: Mat An;
2005: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2006: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2007: if (is1) {
2008: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2009: MatDestroy(&An);
2010: } else {
2011: *B = An;
2012: }
2013: return(0);
2014: }
2016: /* TODO: add reuse flag */
2017: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2018: {
2019: Mat Bt;
2020: PetscScalar *a,*bdata;
2021: const PetscInt *ii,*ij;
2022: PetscInt m,n,i,nnz,*bii,*bij;
2023: PetscBool flg_row;
2027: MatGetSize(A,&n,&m);
2028: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2029: MatSeqAIJGetArray(A,&a);
2030: nnz = n;
2031: for (i=0;i<ii[n];i++) {
2032: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2033: }
2034: PetscMalloc1(n+1,&bii);
2035: PetscMalloc1(nnz,&bij);
2036: PetscMalloc1(nnz,&bdata);
2037: nnz = 0;
2038: bii[0] = 0;
2039: for (i=0;i<n;i++) {
2040: PetscInt j;
2041: for (j=ii[i];j<ii[i+1];j++) {
2042: PetscScalar entry = a[j];
2043: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
2044: bij[nnz] = ij[j];
2045: bdata[nnz] = entry;
2046: nnz++;
2047: }
2048: }
2049: bii[i+1] = nnz;
2050: }
2051: MatSeqAIJRestoreArray(A,&a);
2052: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2053: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2054: {
2055: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2056: b->free_a = PETSC_TRUE;
2057: b->free_ij = PETSC_TRUE;
2058: }
2059: *B = Bt;
2060: return(0);
2061: }
2063: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2064: {
2065: Mat B = NULL;
2066: DM dm;
2067: IS is_dummy,*cc_n;
2068: ISLocalToGlobalMapping l2gmap_dummy;
2069: PCBDDCGraph graph;
2070: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2071: PetscInt i,n;
2072: PetscInt *xadj,*adjncy;
2073: PetscBool isplex = PETSC_FALSE;
2074: PetscErrorCode ierr;
2077: PCBDDCGraphCreate(&graph);
2078: PCGetDM(pc,&dm);
2079: if (!dm) {
2080: MatGetDM(pc->pmat,&dm);
2081: }
2082: if (dm) {
2083: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2084: }
2085: if (isplex) { /* this code has been modified from plexpartition.c */
2086: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2087: PetscInt *adj = NULL;
2088: IS cellNumbering;
2089: const PetscInt *cellNum;
2090: PetscBool useCone, useClosure;
2091: PetscSection section;
2092: PetscSegBuffer adjBuffer;
2093: PetscSF sfPoint;
2097: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2098: DMGetPointSF(dm, &sfPoint);
2099: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2100: /* Build adjacency graph via a section/segbuffer */
2101: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2102: PetscSectionSetChart(section, pStart, pEnd);
2103: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2104: /* Always use FVM adjacency to create partitioner graph */
2105: DMPlexGetAdjacencyUseCone(dm, &useCone);
2106: DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2107: DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2108: DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2109: DMPlexGetCellNumbering(dm, &cellNumbering);
2110: ISGetIndices(cellNumbering, &cellNum);
2111: for (n = 0, p = pStart; p < pEnd; p++) {
2112: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2113: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2114: adjSize = PETSC_DETERMINE;
2115: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2116: for (a = 0; a < adjSize; ++a) {
2117: const PetscInt point = adj[a];
2118: if (pStart <= point && point < pEnd) {
2119: PetscInt *PETSC_RESTRICT pBuf;
2120: PetscSectionAddDof(section, p, 1);
2121: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2122: *pBuf = point;
2123: }
2124: }
2125: n++;
2126: }
2127: DMPlexSetAdjacencyUseCone(dm, useCone);
2128: DMPlexSetAdjacencyUseClosure(dm, useClosure);
2129: /* Derive CSR graph from section/segbuffer */
2130: PetscSectionSetUp(section);
2131: PetscSectionGetStorageSize(section, &size);
2132: PetscMalloc1(n+1, &xadj);
2133: for (idx = 0, p = pStart; p < pEnd; p++) {
2134: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2135: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2136: }
2137: xadj[n] = size;
2138: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2139: /* Clean up */
2140: PetscSegBufferDestroy(&adjBuffer);
2141: PetscSectionDestroy(§ion);
2142: PetscFree(adj);
2143: graph->xadj = xadj;
2144: graph->adjncy = adjncy;
2145: } else {
2146: Mat A;
2147: PetscBool filter = PETSC_FALSE, isseqaij, flg_row;
2149: MatISGetLocalMat(pc->pmat,&A);
2150: if (!A->rmap->N || !A->cmap->N) {
2151: *ncc = 0;
2152: *cc = NULL;
2153: return(0);
2154: }
2155: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2156: if (!isseqaij && filter) {
2157: PetscBool isseqdense;
2159: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2160: if (!isseqdense) {
2161: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2162: } else { /* TODO: rectangular case and LDA */
2163: PetscScalar *array;
2164: PetscReal chop=1.e-6;
2166: MatDuplicate(A,MAT_COPY_VALUES,&B);
2167: MatDenseGetArray(B,&array);
2168: MatGetSize(B,&n,NULL);
2169: for (i=0;i<n;i++) {
2170: PetscInt j;
2171: for (j=i+1;j<n;j++) {
2172: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2173: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2174: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2175: }
2176: }
2177: MatDenseRestoreArray(B,&array);
2178: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2179: }
2180: } else {
2181: PetscObjectReference((PetscObject)A);
2182: B = A;
2183: }
2184: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2186: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2187: if (filter) {
2188: PetscScalar *data;
2189: PetscInt j,cum;
2191: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2192: MatSeqAIJGetArray(B,&data);
2193: cum = 0;
2194: for (i=0;i<n;i++) {
2195: PetscInt t;
2197: for (j=xadj[i];j<xadj[i+1];j++) {
2198: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2199: continue;
2200: }
2201: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2202: }
2203: t = xadj_filtered[i];
2204: xadj_filtered[i] = cum;
2205: cum += t;
2206: }
2207: MatSeqAIJRestoreArray(B,&data);
2208: graph->xadj = xadj_filtered;
2209: graph->adjncy = adjncy_filtered;
2210: } else {
2211: graph->xadj = xadj;
2212: graph->adjncy = adjncy;
2213: }
2214: }
2215: /* compute local connected components using PCBDDCGraph */
2216: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2217: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2218: ISDestroy(&is_dummy);
2219: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2220: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2221: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2222: PCBDDCGraphComputeConnectedComponents(graph);
2224: /* partial clean up */
2225: PetscFree2(xadj_filtered,adjncy_filtered);
2226: if (B) {
2227: PetscBool flg_row;
2228: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2229: MatDestroy(&B);
2230: }
2231: if (isplex) {
2232: PetscFree(xadj);
2233: PetscFree(adjncy);
2234: }
2236: /* get back data */
2237: if (isplex) {
2238: if (ncc) *ncc = graph->ncc;
2239: if (cc || primalv) {
2240: Mat A;
2241: PetscBT btv,btvt;
2242: PetscSection subSection;
2243: PetscInt *ids,cum,cump,*cids,*pids;
2245: DMPlexGetSubdomainSection(dm,&subSection);
2246: MatISGetLocalMat(pc->pmat,&A);
2247: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2248: PetscBTCreate(A->rmap->n,&btv);
2249: PetscBTCreate(A->rmap->n,&btvt);
2251: cids[0] = 0;
2252: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2253: PetscInt j;
2255: PetscBTMemzero(A->rmap->n,btvt);
2256: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2257: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2259: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2260: for (k = 0; k < 2*size; k += 2) {
2261: PetscInt s, p = closure[k], off, dof, cdof;
2263: PetscSectionGetConstraintDof(subSection, p, &cdof);
2264: PetscSectionGetOffset(subSection,p,&off);
2265: PetscSectionGetDof(subSection,p,&dof);
2266: for (s = 0; s < dof-cdof; s++) {
2267: if (PetscBTLookupSet(btvt,off+s)) continue;
2268: if (!PetscBTLookup(btv,off+s)) {
2269: ids[cum++] = off+s;
2270: } else { /* cross-vertex */
2271: pids[cump++] = off+s;
2272: }
2273: }
2274: }
2275: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2276: }
2277: cids[i+1] = cum;
2278: /* mark dofs as already assigned */
2279: for (j = cids[i]; j < cids[i+1]; j++) {
2280: PetscBTSet(btv,ids[j]);
2281: }
2282: }
2283: if (cc) {
2284: PetscMalloc1(graph->ncc,&cc_n);
2285: for (i = 0; i < graph->ncc; i++) {
2286: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2287: }
2288: *cc = cc_n;
2289: }
2290: if (primalv) {
2291: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2292: }
2293: PetscFree3(ids,cids,pids);
2294: PetscBTDestroy(&btv);
2295: PetscBTDestroy(&btvt);
2296: }
2297: } else {
2298: if (ncc) *ncc = graph->ncc;
2299: if (cc) {
2300: PetscMalloc1(graph->ncc,&cc_n);
2301: for (i=0;i<graph->ncc;i++) {
2302: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2303: }
2304: *cc = cc_n;
2305: }
2306: if (primalv) *primalv = NULL;
2307: }
2308: /* clean up graph */
2309: graph->xadj = 0;
2310: graph->adjncy = 0;
2311: PCBDDCGraphDestroy(&graph);
2312: return(0);
2313: }
2315: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2316: {
2317: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2318: PC_IS* pcis = (PC_IS*)(pc->data);
2319: IS dirIS = NULL;
2320: PetscInt i;
2324: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2325: if (zerodiag) {
2326: Mat A;
2327: Vec vec3_N;
2328: PetscScalar *vals;
2329: const PetscInt *idxs;
2330: PetscInt nz,*count;
2332: /* p0 */
2333: VecSet(pcis->vec1_N,0.);
2334: PetscMalloc1(pcis->n,&vals);
2335: ISGetLocalSize(zerodiag,&nz);
2336: ISGetIndices(zerodiag,&idxs);
2337: for (i=0;i<nz;i++) vals[i] = 1.;
2338: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2339: VecAssemblyBegin(pcis->vec1_N);
2340: VecAssemblyEnd(pcis->vec1_N);
2341: /* v_I */
2342: VecSetRandom(pcis->vec2_N,NULL);
2343: for (i=0;i<nz;i++) vals[i] = 0.;
2344: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2345: ISRestoreIndices(zerodiag,&idxs);
2346: ISGetIndices(pcis->is_B_local,&idxs);
2347: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2348: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2349: ISRestoreIndices(pcis->is_B_local,&idxs);
2350: if (dirIS) {
2351: PetscInt n;
2353: ISGetLocalSize(dirIS,&n);
2354: ISGetIndices(dirIS,&idxs);
2355: for (i=0;i<n;i++) vals[i] = 0.;
2356: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2357: ISRestoreIndices(dirIS,&idxs);
2358: }
2359: VecAssemblyBegin(pcis->vec2_N);
2360: VecAssemblyEnd(pcis->vec2_N);
2361: VecDuplicate(pcis->vec1_N,&vec3_N);
2362: VecSet(vec3_N,0.);
2363: MatISGetLocalMat(pc->pmat,&A);
2364: MatMult(A,pcis->vec1_N,vec3_N);
2365: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2366: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2367: PetscFree(vals);
2368: VecDestroy(&vec3_N);
2370: /* there should not be any pressure dofs lying on the interface */
2371: PetscCalloc1(pcis->n,&count);
2372: ISGetIndices(pcis->is_B_local,&idxs);
2373: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2374: ISRestoreIndices(pcis->is_B_local,&idxs);
2375: ISGetIndices(zerodiag,&idxs);
2376: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2377: ISRestoreIndices(zerodiag,&idxs);
2378: PetscFree(count);
2379: }
2380: ISDestroy(&dirIS);
2382: /* check PCBDDCBenignGetOrSetP0 */
2383: VecSetRandom(pcis->vec1_global,NULL);
2384: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2385: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2386: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2387: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2388: for (i=0;i<pcbddc->benign_n;i++) {
2389: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2390: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2391: }
2392: return(0);
2393: }
2395: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2396: {
2397: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2398: IS pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2399: PetscInt nz,n;
2400: PetscInt *interior_dofs,n_interior_dofs,nneu;
2401: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2405: PetscSFDestroy(&pcbddc->benign_sf);
2406: MatDestroy(&pcbddc->benign_B0);
2407: for (n=0;n<pcbddc->benign_n;n++) {
2408: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2409: }
2410: PetscFree(pcbddc->benign_zerodiag_subs);
2411: pcbddc->benign_n = 0;
2413: /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2414: otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2415: Checks if all the pressure dofs in each subdomain have a zero diagonal
2416: If not, a change of basis on pressures is not needed
2417: since the local Schur complements are already SPD
2418: */
2419: has_null_pressures = PETSC_TRUE;
2420: have_null = PETSC_TRUE;
2421: if (pcbddc->n_ISForDofsLocal) {
2422: IS iP = NULL;
2423: PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
2425: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2426: PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2427: PetscOptionsEnd();
2428: if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2429: /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2430: ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2431: ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2432: ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2433: ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2434: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2435: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2436: if (iP) {
2437: IS newpressures;
2439: ISDifference(pressures,iP,&newpressures);
2440: ISDestroy(&pressures);
2441: pressures = newpressures;
2442: }
2443: ISSorted(pressures,&sorted);
2444: if (!sorted) {
2445: ISSort(pressures);
2446: }
2447: } else {
2448: pressures = NULL;
2449: }
2450: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2451: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2452: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2453: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2454: ISSorted(zerodiag,&sorted);
2455: if (!sorted) {
2456: ISSort(zerodiag);
2457: }
2458: PetscObjectReference((PetscObject)zerodiag);
2459: zerodiag_save = zerodiag;
2460: ISGetLocalSize(zerodiag,&nz);
2461: if (!nz) {
2462: if (n) have_null = PETSC_FALSE;
2463: has_null_pressures = PETSC_FALSE;
2464: ISDestroy(&zerodiag);
2465: }
2466: recompute_zerodiag = PETSC_FALSE;
2467: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2468: zerodiag_subs = NULL;
2469: pcbddc->benign_n = 0;
2470: n_interior_dofs = 0;
2471: interior_dofs = NULL;
2472: nneu = 0;
2473: if (pcbddc->NeumannBoundariesLocal) {
2474: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2475: }
2476: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2477: if (checkb) { /* need to compute interior nodes */
2478: PetscInt n,i,j;
2479: PetscInt n_neigh,*neigh,*n_shared,**shared;
2480: PetscInt *iwork;
2482: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2483: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2484: PetscCalloc1(n,&iwork);
2485: PetscMalloc1(n,&interior_dofs);
2486: for (i=1;i<n_neigh;i++)
2487: for (j=0;j<n_shared[i];j++)
2488: iwork[shared[i][j]] += 1;
2489: for (i=0;i<n;i++)
2490: if (!iwork[i])
2491: interior_dofs[n_interior_dofs++] = i;
2492: PetscFree(iwork);
2493: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2494: }
2495: if (has_null_pressures) {
2496: IS *subs;
2497: PetscInt nsubs,i,j,nl;
2498: const PetscInt *idxs;
2499: PetscScalar *array;
2500: Vec *work;
2501: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2503: subs = pcbddc->local_subs;
2504: nsubs = pcbddc->n_local_subs;
2505: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2506: if (checkb) {
2507: VecDuplicateVecs(matis->y,2,&work);
2508: ISGetLocalSize(zerodiag,&nl);
2509: ISGetIndices(zerodiag,&idxs);
2510: /* work[0] = 1_p */
2511: VecSet(work[0],0.);
2512: VecGetArray(work[0],&array);
2513: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2514: VecRestoreArray(work[0],&array);
2515: /* work[0] = 1_v */
2516: VecSet(work[1],1.);
2517: VecGetArray(work[1],&array);
2518: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2519: VecRestoreArray(work[1],&array);
2520: ISRestoreIndices(zerodiag,&idxs);
2521: }
2522: if (nsubs > 1) {
2523: PetscCalloc1(nsubs,&zerodiag_subs);
2524: for (i=0;i<nsubs;i++) {
2525: ISLocalToGlobalMapping l2g;
2526: IS t_zerodiag_subs;
2527: PetscInt nl;
2529: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2530: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2531: ISGetLocalSize(t_zerodiag_subs,&nl);
2532: if (nl) {
2533: PetscBool valid = PETSC_TRUE;
2535: if (checkb) {
2536: VecSet(matis->x,0);
2537: ISGetLocalSize(subs[i],&nl);
2538: ISGetIndices(subs[i],&idxs);
2539: VecGetArray(matis->x,&array);
2540: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2541: VecRestoreArray(matis->x,&array);
2542: ISRestoreIndices(subs[i],&idxs);
2543: VecPointwiseMult(matis->x,work[0],matis->x);
2544: MatMult(matis->A,matis->x,matis->y);
2545: VecPointwiseMult(matis->y,work[1],matis->y);
2546: VecGetArray(matis->y,&array);
2547: for (j=0;j<n_interior_dofs;j++) {
2548: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2549: valid = PETSC_FALSE;
2550: break;
2551: }
2552: }
2553: VecRestoreArray(matis->y,&array);
2554: }
2555: if (valid && nneu) {
2556: const PetscInt *idxs;
2557: PetscInt nzb;
2559: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2560: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2561: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2562: if (nzb) valid = PETSC_FALSE;
2563: }
2564: if (valid && pressures) {
2565: IS t_pressure_subs;
2566: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2567: ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2568: ISDestroy(&t_pressure_subs);
2569: }
2570: if (valid) {
2571: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2572: pcbddc->benign_n++;
2573: } else {
2574: recompute_zerodiag = PETSC_TRUE;
2575: }
2576: }
2577: ISDestroy(&t_zerodiag_subs);
2578: ISLocalToGlobalMappingDestroy(&l2g);
2579: }
2580: } else { /* there's just one subdomain (or zero if they have not been detected */
2581: PetscBool valid = PETSC_TRUE;
2583: if (nneu) valid = PETSC_FALSE;
2584: if (valid && pressures) {
2585: ISEqual(pressures,zerodiag,&valid);
2586: }
2587: if (valid && checkb) {
2588: MatMult(matis->A,work[0],matis->x);
2589: VecPointwiseMult(matis->x,work[1],matis->x);
2590: VecGetArray(matis->x,&array);
2591: for (j=0;j<n_interior_dofs;j++) {
2592: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2593: valid = PETSC_FALSE;
2594: break;
2595: }
2596: }
2597: VecRestoreArray(matis->x,&array);
2598: }
2599: if (valid) {
2600: pcbddc->benign_n = 1;
2601: PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2602: PetscObjectReference((PetscObject)zerodiag);
2603: zerodiag_subs[0] = zerodiag;
2604: }
2605: }
2606: if (checkb) {
2607: VecDestroyVecs(2,&work);
2608: }
2609: }
2610: PetscFree(interior_dofs);
2612: if (!pcbddc->benign_n) {
2613: PetscInt n;
2615: ISDestroy(&zerodiag);
2616: recompute_zerodiag = PETSC_FALSE;
2617: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2618: if (n) {
2619: has_null_pressures = PETSC_FALSE;
2620: have_null = PETSC_FALSE;
2621: }
2622: }
2624: /* final check for null pressures */
2625: if (zerodiag && pressures) {
2626: PetscInt nz,np;
2627: ISGetLocalSize(zerodiag,&nz);
2628: ISGetLocalSize(pressures,&np);
2629: if (nz != np) have_null = PETSC_FALSE;
2630: }
2632: if (recompute_zerodiag) {
2633: ISDestroy(&zerodiag);
2634: if (pcbddc->benign_n == 1) {
2635: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2636: zerodiag = zerodiag_subs[0];
2637: } else {
2638: PetscInt i,nzn,*new_idxs;
2640: nzn = 0;
2641: for (i=0;i<pcbddc->benign_n;i++) {
2642: PetscInt ns;
2643: ISGetLocalSize(zerodiag_subs[i],&ns);
2644: nzn += ns;
2645: }
2646: PetscMalloc1(nzn,&new_idxs);
2647: nzn = 0;
2648: for (i=0;i<pcbddc->benign_n;i++) {
2649: PetscInt ns,*idxs;
2650: ISGetLocalSize(zerodiag_subs[i],&ns);
2651: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2652: PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2653: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2654: nzn += ns;
2655: }
2656: PetscSortInt(nzn,new_idxs);
2657: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2658: }
2659: have_null = PETSC_FALSE;
2660: }
2662: /* Prepare matrix to compute no-net-flux */
2663: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2664: Mat A,loc_divudotp;
2665: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2666: IS row,col,isused = NULL;
2667: PetscInt M,N,n,st,n_isused;
2669: if (pressures) {
2670: isused = pressures;
2671: } else {
2672: isused = zerodiag_save;
2673: }
2674: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2675: MatISGetLocalMat(pc->pmat,&A);
2676: MatGetLocalSize(A,&n,NULL);
2677: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2678: n_isused = 0;
2679: if (isused) {
2680: ISGetLocalSize(isused,&n_isused);
2681: }
2682: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2683: st = st-n_isused;
2684: if (n) {
2685: const PetscInt *gidxs;
2687: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2688: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2689: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2690: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2691: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2692: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2693: } else {
2694: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2695: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2696: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2697: }
2698: MatGetSize(pc->pmat,NULL,&N);
2699: ISGetSize(row,&M);
2700: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2701: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2702: ISDestroy(&row);
2703: ISDestroy(&col);
2704: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2705: MatSetType(pcbddc->divudotp,MATIS);
2706: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2707: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2708: ISLocalToGlobalMappingDestroy(&rl2g);
2709: ISLocalToGlobalMappingDestroy(&cl2g);
2710: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2711: MatDestroy(&loc_divudotp);
2712: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2713: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2714: }
2715: ISDestroy(&zerodiag_save);
2717: /* change of basis and p0 dofs */
2718: if (has_null_pressures) {
2719: IS zerodiagc;
2720: const PetscInt *idxs,*idxsc;
2721: PetscInt i,s,*nnz;
2723: ISGetLocalSize(zerodiag,&nz);
2724: ISComplement(zerodiag,0,n,&zerodiagc);
2725: ISGetIndices(zerodiagc,&idxsc);
2726: /* local change of basis for pressures */
2727: MatDestroy(&pcbddc->benign_change);
2728: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2729: MatSetType(pcbddc->benign_change,MATAIJ);
2730: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2731: PetscMalloc1(n,&nnz);
2732: for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2733: for (i=0;i<pcbddc->benign_n;i++) {
2734: PetscInt nzs,j;
2736: ISGetLocalSize(zerodiag_subs[i],&nzs);
2737: ISGetIndices(zerodiag_subs[i],&idxs);
2738: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2739: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2740: ISRestoreIndices(zerodiag_subs[i],&idxs);
2741: }
2742: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2743: PetscFree(nnz);
2744: /* set identity on velocities */
2745: for (i=0;i<n-nz;i++) {
2746: MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2747: }
2748: ISRestoreIndices(zerodiagc,&idxsc);
2749: ISDestroy(&zerodiagc);
2750: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2751: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2752: /* set change on pressures */
2753: for (s=0;s<pcbddc->benign_n;s++) {
2754: PetscScalar *array;
2755: PetscInt nzs;
2757: ISGetLocalSize(zerodiag_subs[s],&nzs);
2758: ISGetIndices(zerodiag_subs[s],&idxs);
2759: for (i=0;i<nzs-1;i++) {
2760: PetscScalar vals[2];
2761: PetscInt cols[2];
2763: cols[0] = idxs[i];
2764: cols[1] = idxs[nzs-1];
2765: vals[0] = 1.;
2766: vals[1] = 1.;
2767: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2768: }
2769: PetscMalloc1(nzs,&array);
2770: for (i=0;i<nzs-1;i++) array[i] = -1.;
2771: array[nzs-1] = 1.;
2772: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2773: /* store local idxs for p0 */
2774: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2775: ISRestoreIndices(zerodiag_subs[s],&idxs);
2776: PetscFree(array);
2777: }
2778: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2779: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2780: /* project if needed */
2781: if (pcbddc->benign_change_explicit) {
2782: Mat M;
2784: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2785: MatDestroy(&pcbddc->local_mat);
2786: MatSeqAIJCompress(M,&pcbddc->local_mat);
2787: MatDestroy(&M);
2788: }
2789: /* store global idxs for p0 */
2790: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2791: }
2792: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2793: ISDestroy(&pressures);
2795: /* determines if the coarse solver will be singular or not */
2796: MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2797: /* determines if the problem has subdomains with 0 pressure block */
2798: MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2799: *zerodiaglocal = zerodiag;
2800: return(0);
2801: }
2803: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2804: {
2805: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2806: PetscScalar *array;
2810: if (!pcbddc->benign_sf) {
2811: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2812: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2813: }
2814: if (get) {
2815: VecGetArrayRead(v,(const PetscScalar**)&array);
2816: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2817: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2818: VecRestoreArrayRead(v,(const PetscScalar**)&array);
2819: } else {
2820: VecGetArray(v,&array);
2821: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2822: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2823: VecRestoreArray(v,&array);
2824: }
2825: return(0);
2826: }
2828: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2829: {
2830: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2834: /* TODO: add error checking
2835: - avoid nested pop (or push) calls.
2836: - cannot push before pop.
2837: - cannot call this if pcbddc->local_mat is NULL
2838: */
2839: if (!pcbddc->benign_n) {
2840: return(0);
2841: }
2842: if (pop) {
2843: if (pcbddc->benign_change_explicit) {
2844: IS is_p0;
2845: MatReuse reuse;
2847: /* extract B_0 */
2848: reuse = MAT_INITIAL_MATRIX;
2849: if (pcbddc->benign_B0) {
2850: reuse = MAT_REUSE_MATRIX;
2851: }
2852: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2853: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2854: /* remove rows and cols from local problem */
2855: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2856: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2857: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2858: ISDestroy(&is_p0);
2859: } else {
2860: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2861: PetscScalar *vals;
2862: PetscInt i,n,*idxs_ins;
2864: VecGetLocalSize(matis->y,&n);
2865: PetscMalloc2(n,&idxs_ins,n,&vals);
2866: if (!pcbddc->benign_B0) {
2867: PetscInt *nnz;
2868: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2869: MatSetType(pcbddc->benign_B0,MATAIJ);
2870: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2871: PetscMalloc1(pcbddc->benign_n,&nnz);
2872: for (i=0;i<pcbddc->benign_n;i++) {
2873: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2874: nnz[i] = n - nnz[i];
2875: }
2876: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2877: PetscFree(nnz);
2878: }
2880: for (i=0;i<pcbddc->benign_n;i++) {
2881: PetscScalar *array;
2882: PetscInt *idxs,j,nz,cum;
2884: VecSet(matis->x,0.);
2885: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2886: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2887: for (j=0;j<nz;j++) vals[j] = 1.;
2888: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2889: VecAssemblyBegin(matis->x);
2890: VecAssemblyEnd(matis->x);
2891: VecSet(matis->y,0.);
2892: MatMult(matis->A,matis->x,matis->y);
2893: VecGetArray(matis->y,&array);
2894: cum = 0;
2895: for (j=0;j<n;j++) {
2896: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2897: vals[cum] = array[j];
2898: idxs_ins[cum] = j;
2899: cum++;
2900: }
2901: }
2902: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
2903: VecRestoreArray(matis->y,&array);
2904: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2905: }
2906: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2907: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2908: PetscFree2(idxs_ins,vals);
2909: }
2910: } else { /* push */
2911: if (pcbddc->benign_change_explicit) {
2912: PetscInt i;
2914: for (i=0;i<pcbddc->benign_n;i++) {
2915: PetscScalar *B0_vals;
2916: PetscInt *B0_cols,B0_ncol;
2918: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2919: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
2920: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
2921: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
2922: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2923: }
2924: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2925: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2926: } else {
2927: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
2928: }
2929: }
2930: return(0);
2931: }
2933: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2934: {
2935: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2936: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2937: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
2938: PetscBLASInt *B_iwork,*B_ifail;
2939: PetscScalar *work,lwork;
2940: PetscScalar *St,*S,*eigv;
2941: PetscScalar *Sarray,*Starray;
2942: PetscReal *eigs,thresh;
2943: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
2944: PetscBool allocated_S_St;
2945: #if defined(PETSC_USE_COMPLEX)
2946: PetscReal *rwork;
2947: #endif
2948: PetscErrorCode ierr;
2951: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
2952: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
2953: if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef);
2955: if (pcbddc->dbg_flag) {
2956: PetscViewerFlush(pcbddc->dbg_viewer);
2957: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
2958: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
2959: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
2960: }
2962: if (pcbddc->dbg_flag) {
2963: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
2964: }
2966: /* max size of subsets */
2967: mss = 0;
2968: for (i=0;i<sub_schurs->n_subs;i++) {
2969: PetscInt subset_size;
2971: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2972: mss = PetscMax(mss,subset_size);
2973: }
2975: /* min/max and threshold */
2976: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
2977: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
2978: nmax = PetscMax(nmin,nmax);
2979: allocated_S_St = PETSC_FALSE;
2980: if (nmin) {
2981: allocated_S_St = PETSC_TRUE;
2982: }
2984: /* allocate lapack workspace */
2985: cum = cum2 = 0;
2986: maxneigs = 0;
2987: for (i=0;i<sub_schurs->n_subs;i++) {
2988: PetscInt n,subset_size;
2990: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2991: n = PetscMin(subset_size,nmax);
2992: cum += subset_size;
2993: cum2 += subset_size*n;
2994: maxneigs = PetscMax(maxneigs,n);
2995: }
2996: if (mss) {
2997: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
2998: PetscBLASInt B_itype = 1;
2999: PetscBLASInt B_N = mss;
3000: PetscReal zero = 0.0;
3001: PetscReal eps = 0.0; /* dlamch? */
3003: B_lwork = -1;
3004: S = NULL;
3005: St = NULL;
3006: eigs = NULL;
3007: eigv = NULL;
3008: B_iwork = NULL;
3009: B_ifail = NULL;
3010: #if defined(PETSC_USE_COMPLEX)
3011: rwork = NULL;
3012: #endif
3013: thresh = 1.0;
3014: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3015: #if defined(PETSC_USE_COMPLEX)
3016: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3017: #else
3018: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3019: #endif
3020: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3021: PetscFPTrapPop();
3022: } else {
3023: /* TODO */
3024: }
3025: } else {
3026: lwork = 0;
3027: }
3029: nv = 0;
3030: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3031: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3032: }
3033: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3034: if (allocated_S_St) {
3035: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3036: }
3037: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3038: #if defined(PETSC_USE_COMPLEX)
3039: PetscMalloc1(7*mss,&rwork);
3040: #endif
3041: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3042: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3043: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3044: nv+cum,&pcbddc->adaptive_constraints_idxs,
3045: nv+cum2,&pcbddc->adaptive_constraints_data);
3046: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
3048: maxneigs = 0;
3049: cum = cumarray = 0;
3050: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3051: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3052: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3053: const PetscInt *idxs;
3055: ISGetIndices(sub_schurs->is_vertices,&idxs);
3056: for (cum=0;cum<nv;cum++) {
3057: pcbddc->adaptive_constraints_n[cum] = 1;
3058: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3059: pcbddc->adaptive_constraints_data[cum] = 1.0;
3060: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3061: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3062: }
3063: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3064: }
3066: if (mss) { /* multilevel */
3067: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3068: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3069: }
3071: thresh = pcbddc->adaptive_threshold;
3072: for (i=0;i<sub_schurs->n_subs;i++) {
3073: const PetscInt *idxs;
3074: PetscReal upper,lower;
3075: PetscInt j,subset_size,eigs_start = 0;
3076: PetscBLASInt B_N;
3077: PetscBool same_data = PETSC_FALSE;
3079: if (pcbddc->use_deluxe_scaling) {
3080: upper = PETSC_MAX_REAL;
3081: lower = thresh;
3082: } else {
3083: upper = 1./thresh;
3084: lower = 0.;
3085: }
3086: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3087: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3088: PetscBLASIntCast(subset_size,&B_N);
3089: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3090: if (sub_schurs->is_hermitian) {
3091: PetscInt j,k;
3092: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3093: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3094: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3095: }
3096: for (j=0;j<subset_size;j++) {
3097: for (k=j;k<subset_size;k++) {
3098: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3099: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3100: }
3101: }
3102: } else {
3103: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3104: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3105: }
3106: } else {
3107: S = Sarray + cumarray;
3108: St = Starray + cumarray;
3109: }
3110: /* see if we can save some work */
3111: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3112: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3113: }
3115: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3116: B_neigs = 0;
3117: } else {
3118: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
3119: PetscBLASInt B_itype = 1;
3120: PetscBLASInt B_IL, B_IU;
3121: PetscReal eps = -1.0; /* dlamch? */
3122: PetscInt nmin_s;
3123: PetscBool compute_range = PETSC_FALSE;
3125: if (pcbddc->dbg_flag) {
3126: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]);
3127: }
3129: compute_range = PETSC_FALSE;
3130: if (thresh > 1.+PETSC_SMALL && !same_data) {
3131: compute_range = PETSC_TRUE;
3132: }
3134: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3135: if (compute_range) {
3137: /* ask for eigenvalues larger than thresh */
3138: #if defined(PETSC_USE_COMPLEX)
3139: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3140: #else
3141: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3142: #endif
3143: } else if (!same_data) {
3144: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3145: B_IL = 1;
3146: #if defined(PETSC_USE_COMPLEX)
3147: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3148: #else
3149: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3150: #endif
3151: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3152: PetscInt k;
3153: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3154: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3155: PetscBLASIntCast(nmax,&B_neigs);
3156: nmin = nmax;
3157: PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3158: for (k=0;k<nmax;k++) {
3159: eigs[k] = 1./PETSC_SMALL;
3160: eigv[k*(subset_size+1)] = 1.0;
3161: }
3162: }
3163: PetscFPTrapPop();
3164: if (B_ierr) {
3165: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3166: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3167: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3168: }
3170: if (B_neigs > nmax) {
3171: if (pcbddc->dbg_flag) {
3172: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3173: }
3174: if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax;
3175: B_neigs = nmax;
3176: }
3178: nmin_s = PetscMin(nmin,B_N);
3179: if (B_neigs < nmin_s) {
3180: PetscBLASInt B_neigs2;
3182: if (pcbddc->use_deluxe_scaling) {
3183: B_IL = B_N - nmin_s + 1;
3184: B_IU = B_N - B_neigs;
3185: } else {
3186: B_IL = B_neigs + 1;
3187: B_IU = nmin_s;
3188: }
3189: if (pcbddc->dbg_flag) {
3190: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3191: }
3192: if (sub_schurs->is_hermitian) {
3193: PetscInt j,k;
3194: for (j=0;j<subset_size;j++) {
3195: for (k=j;k<subset_size;k++) {
3196: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3197: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3198: }
3199: }
3200: } else {
3201: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3202: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3203: }
3204: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3205: #if defined(PETSC_USE_COMPLEX)
3206: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3207: #else
3208: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3209: #endif
3210: PetscFPTrapPop();
3211: B_neigs += B_neigs2;
3212: }
3213: if (B_ierr) {
3214: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3215: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3216: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3217: }
3218: if (pcbddc->dbg_flag) {
3219: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3220: for (j=0;j<B_neigs;j++) {
3221: if (eigs[j] == 0.0) {
3222: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3223: } else {
3224: if (pcbddc->use_deluxe_scaling) {
3225: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3226: } else {
3227: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3228: }
3229: }
3230: }
3231: }
3232: } else {
3233: /* TODO */
3234: }
3235: }
3236: /* change the basis back to the original one */
3237: if (sub_schurs->change) {
3238: Mat change,phi,phit;
3240: if (pcbddc->dbg_flag > 2) {
3241: PetscInt ii;
3242: for (ii=0;ii<B_neigs;ii++) {
3243: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3244: for (j=0;j<B_N;j++) {
3245: #if defined(PETSC_USE_COMPLEX)
3246: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3247: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3248: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3249: #else
3250: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3251: #endif
3252: }
3253: }
3254: }
3255: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3256: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3257: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3258: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3259: MatDestroy(&phit);
3260: MatDestroy(&phi);
3261: }
3262: maxneigs = PetscMax(B_neigs,maxneigs);
3263: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3264: if (B_neigs) {
3265: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
3267: if (pcbddc->dbg_flag > 1) {
3268: PetscInt ii;
3269: for (ii=0;ii<B_neigs;ii++) {
3270: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3271: for (j=0;j<B_N;j++) {
3272: #if defined(PETSC_USE_COMPLEX)
3273: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3274: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3275: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3276: #else
3277: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3278: #endif
3279: }
3280: }
3281: }
3282: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3283: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3284: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3285: cum++;
3286: }
3287: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3288: /* shift for next computation */
3289: cumarray += subset_size*subset_size;
3290: }
3291: if (pcbddc->dbg_flag) {
3292: PetscViewerFlush(pcbddc->dbg_viewer);
3293: }
3295: if (mss) {
3296: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3297: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3298: /* destroy matrices (junk) */
3299: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3300: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3301: }
3302: if (allocated_S_St) {
3303: PetscFree2(S,St);
3304: }
3305: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3306: #if defined(PETSC_USE_COMPLEX)
3307: PetscFree(rwork);
3308: #endif
3309: if (pcbddc->dbg_flag) {
3310: PetscInt maxneigs_r;
3311: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3312: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3313: }
3314: return(0);
3315: }
3317: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3318: {
3319: PetscScalar *coarse_submat_vals;
3323: /* Setup local scatters R_to_B and (optionally) R_to_D */
3324: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3325: PCBDDCSetUpLocalScatters(pc);
3327: /* Setup local neumann solver ksp_R */
3328: /* PCBDDCSetUpLocalScatters should be called first! */
3329: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3331: /*
3332: Setup local correction and local part of coarse basis.
3333: Gives back the dense local part of the coarse matrix in column major ordering
3334: */
3335: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3337: /* Compute total number of coarse nodes and setup coarse solver */
3338: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3340: /* free */
3341: PetscFree(coarse_submat_vals);
3342: return(0);
3343: }
3345: PetscErrorCode PCBDDCResetCustomization(PC pc)
3346: {
3347: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3351: ISDestroy(&pcbddc->user_primal_vertices);
3352: ISDestroy(&pcbddc->user_primal_vertices_local);
3353: ISDestroy(&pcbddc->NeumannBoundaries);
3354: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3355: ISDestroy(&pcbddc->DirichletBoundaries);
3356: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3357: PetscFree(pcbddc->onearnullvecs_state);
3358: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3359: PCBDDCSetDofsSplitting(pc,0,NULL);
3360: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3361: return(0);
3362: }
3364: PetscErrorCode PCBDDCResetTopography(PC pc)
3365: {
3366: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3367: PetscInt i;
3371: MatDestroy(&pcbddc->nedcG);
3372: ISDestroy(&pcbddc->nedclocal);
3373: MatDestroy(&pcbddc->discretegradient);
3374: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3375: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3376: MatDestroy(&pcbddc->switch_static_change);
3377: VecDestroy(&pcbddc->work_change);
3378: MatDestroy(&pcbddc->ConstraintMatrix);
3379: MatDestroy(&pcbddc->divudotp);
3380: ISDestroy(&pcbddc->divudotp_vl2l);
3381: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3382: for (i=0;i<pcbddc->n_local_subs;i++) {
3383: ISDestroy(&pcbddc->local_subs[i]);
3384: }
3385: pcbddc->n_local_subs = 0;
3386: PetscFree(pcbddc->local_subs);
3387: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3388: pcbddc->graphanalyzed = PETSC_FALSE;
3389: pcbddc->recompute_topography = PETSC_TRUE;
3390: return(0);
3391: }
3393: PetscErrorCode PCBDDCResetSolvers(PC pc)
3394: {
3395: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3399: VecDestroy(&pcbddc->coarse_vec);
3400: if (pcbddc->coarse_phi_B) {
3401: PetscScalar *array;
3402: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3403: PetscFree(array);
3404: }
3405: MatDestroy(&pcbddc->coarse_phi_B);
3406: MatDestroy(&pcbddc->coarse_phi_D);
3407: MatDestroy(&pcbddc->coarse_psi_B);
3408: MatDestroy(&pcbddc->coarse_psi_D);
3409: VecDestroy(&pcbddc->vec1_P);
3410: VecDestroy(&pcbddc->vec1_C);
3411: MatDestroy(&pcbddc->local_auxmat2);
3412: MatDestroy(&pcbddc->local_auxmat1);
3413: VecDestroy(&pcbddc->vec1_R);
3414: VecDestroy(&pcbddc->vec2_R);
3415: ISDestroy(&pcbddc->is_R_local);
3416: VecScatterDestroy(&pcbddc->R_to_B);
3417: VecScatterDestroy(&pcbddc->R_to_D);
3418: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3419: KSPReset(pcbddc->ksp_D);
3420: KSPReset(pcbddc->ksp_R);
3421: KSPReset(pcbddc->coarse_ksp);
3422: MatDestroy(&pcbddc->local_mat);
3423: PetscFree(pcbddc->primal_indices_local_idxs);
3424: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3425: PetscFree(pcbddc->global_primal_indices);
3426: ISDestroy(&pcbddc->coarse_subassembling);
3427: MatDestroy(&pcbddc->benign_change);
3428: VecDestroy(&pcbddc->benign_vec);
3429: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3430: MatDestroy(&pcbddc->benign_B0);
3431: PetscSFDestroy(&pcbddc->benign_sf);
3432: if (pcbddc->benign_zerodiag_subs) {
3433: PetscInt i;
3434: for (i=0;i<pcbddc->benign_n;i++) {
3435: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3436: }
3437: PetscFree(pcbddc->benign_zerodiag_subs);
3438: }
3439: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3440: return(0);
3441: }
3443: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3444: {
3445: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3446: PC_IS *pcis = (PC_IS*)pc->data;
3447: VecType impVecType;
3448: PetscInt n_constraints,n_R,old_size;
3452: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3453: n_R = pcis->n - pcbddc->n_vertices;
3454: VecGetType(pcis->vec1_N,&impVecType);
3455: /* local work vectors (try to avoid unneeded work)*/
3456: /* R nodes */
3457: old_size = -1;
3458: if (pcbddc->vec1_R) {
3459: VecGetSize(pcbddc->vec1_R,&old_size);
3460: }
3461: if (n_R != old_size) {
3462: VecDestroy(&pcbddc->vec1_R);
3463: VecDestroy(&pcbddc->vec2_R);
3464: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3465: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3466: VecSetType(pcbddc->vec1_R,impVecType);
3467: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3468: }
3469: /* local primal dofs */
3470: old_size = -1;
3471: if (pcbddc->vec1_P) {
3472: VecGetSize(pcbddc->vec1_P,&old_size);
3473: }
3474: if (pcbddc->local_primal_size != old_size) {
3475: VecDestroy(&pcbddc->vec1_P);
3476: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3477: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3478: VecSetType(pcbddc->vec1_P,impVecType);
3479: }
3480: /* local explicit constraints */
3481: old_size = -1;
3482: if (pcbddc->vec1_C) {
3483: VecGetSize(pcbddc->vec1_C,&old_size);
3484: }
3485: if (n_constraints && n_constraints != old_size) {
3486: VecDestroy(&pcbddc->vec1_C);
3487: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3488: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3489: VecSetType(pcbddc->vec1_C,impVecType);
3490: }
3491: return(0);
3492: }
3494: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3495: {
3496: PetscErrorCode ierr;
3497: /* pointers to pcis and pcbddc */
3498: PC_IS* pcis = (PC_IS*)pc->data;
3499: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3500: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3501: /* submatrices of local problem */
3502: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3503: /* submatrices of local coarse problem */
3504: Mat S_VV,S_CV,S_VC,S_CC;
3505: /* working matrices */
3506: Mat C_CR;
3507: /* additional working stuff */
3508: PC pc_R;
3509: Mat F,Brhs = NULL;
3510: Vec dummy_vec;
3511: PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3512: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3513: PetscScalar *work;
3514: PetscInt *idx_V_B;
3515: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3516: PetscInt i,n_R,n_D,n_B;
3518: /* some shortcuts to scalars */
3519: PetscScalar one=1.0,m_one=-1.0;
3522: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3524: /* Set Non-overlapping dimensions */
3525: n_vertices = pcbddc->n_vertices;
3526: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3527: n_B = pcis->n_B;
3528: n_D = pcis->n - n_B;
3529: n_R = pcis->n - n_vertices;
3531: /* vertices in boundary numbering */
3532: PetscMalloc1(n_vertices,&idx_V_B);
3533: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3534: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);
3536: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3537: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3538: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3539: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3540: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3541: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3542: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3543: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3544: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3545: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3547: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3548: KSPGetPC(pcbddc->ksp_R,&pc_R);
3549: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3550: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3551: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3552: lda_rhs = n_R;
3553: need_benign_correction = PETSC_FALSE;
3554: if (isLU || isILU || isCHOL) {
3555: PCFactorGetMatrix(pc_R,&F);
3556: } else if (sub_schurs && sub_schurs->reuse_solver) {
3557: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3558: MatFactorType type;
3560: F = reuse_solver->F;
3561: MatGetFactorType(F,&type);
3562: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3563: MatGetSize(F,&lda_rhs,NULL);
3564: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3565: } else {
3566: F = NULL;
3567: }
3569: /* determine if we can use a sparse right-hand side */
3570: sparserhs = PETSC_FALSE;
3571: if (F) {
3572: const MatSolverPackage solver;
3574: MatFactorGetSolverPackage(F,&solver);
3575: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3576: }
3578: /* allocate workspace */
3579: n = 0;
3580: if (n_constraints) {
3581: n += lda_rhs*n_constraints;
3582: }
3583: if (n_vertices) {
3584: n = PetscMax(2*lda_rhs*n_vertices,n);
3585: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3586: }
3587: if (!pcbddc->symmetric_primal) {
3588: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3589: }
3590: PetscMalloc1(n,&work);
3592: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3593: dummy_vec = NULL;
3594: if (need_benign_correction && lda_rhs != n_R && F) {
3595: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3596: }
3598: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3599: if (n_constraints) {
3600: Mat M1,M2,M3,C_B;
3601: IS is_aux;
3602: PetscScalar *array,*array2;
3604: MatDestroy(&pcbddc->local_auxmat1);
3605: MatDestroy(&pcbddc->local_auxmat2);
3607: /* Extract constraints on R nodes: C_{CR} */
3608: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3609: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3610: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
3612: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3613: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3614: if (!sparserhs) {
3615: PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3616: for (i=0;i<n_constraints;i++) {
3617: const PetscScalar *row_cmat_values;
3618: const PetscInt *row_cmat_indices;
3619: PetscInt size_of_constraint,j;
3621: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3622: for (j=0;j<size_of_constraint;j++) {
3623: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3624: }
3625: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3626: }
3627: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3628: } else {
3629: Mat tC_CR;
3631: MatScale(C_CR,-1.0);
3632: if (lda_rhs != n_R) {
3633: PetscScalar *aa;
3634: PetscInt r,*ii,*jj;
3635: PetscBool done;
3637: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3638: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3639: MatSeqAIJGetArray(C_CR,&aa);
3640: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3641: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3642: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3643: } else {
3644: PetscObjectReference((PetscObject)C_CR);
3645: tC_CR = C_CR;
3646: }
3647: MatCreateTranspose(tC_CR,&Brhs);
3648: MatDestroy(&tC_CR);
3649: }
3650: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3651: if (F) {
3652: if (need_benign_correction) {
3653: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3655: /* rhs is already zero on interior dofs, no need to change the rhs */
3656: PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3657: }
3658: MatMatSolve(F,Brhs,local_auxmat2_R);
3659: if (need_benign_correction) {
3660: PetscScalar *marr;
3661: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3663: MatDenseGetArray(local_auxmat2_R,&marr);
3664: if (lda_rhs != n_R) {
3665: for (i=0;i<n_constraints;i++) {
3666: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3667: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3668: VecResetArray(dummy_vec);
3669: }
3670: } else {
3671: for (i=0;i<n_constraints;i++) {
3672: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3673: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3674: VecResetArray(pcbddc->vec1_R);
3675: }
3676: }
3677: MatDenseRestoreArray(local_auxmat2_R,&marr);
3678: }
3679: } else {
3680: PetscScalar *marr;
3682: MatDenseGetArray(local_auxmat2_R,&marr);
3683: for (i=0;i<n_constraints;i++) {
3684: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3685: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3686: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3687: VecResetArray(pcbddc->vec1_R);
3688: VecResetArray(pcbddc->vec2_R);
3689: }
3690: MatDenseRestoreArray(local_auxmat2_R,&marr);
3691: }
3692: if (sparserhs) {
3693: MatScale(C_CR,-1.0);
3694: }
3695: MatDestroy(&Brhs);
3696: if (!pcbddc->switch_static) {
3697: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3698: MatDenseGetArray(pcbddc->local_auxmat2,&array);
3699: MatDenseGetArray(local_auxmat2_R,&array2);
3700: for (i=0;i<n_constraints;i++) {
3701: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
3702: VecPlaceArray(pcis->vec1_B,array+i*n_B);
3703: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3704: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3705: VecResetArray(pcis->vec1_B);
3706: VecResetArray(pcbddc->vec1_R);
3707: }
3708: MatDenseRestoreArray(local_auxmat2_R,&array2);
3709: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
3710: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3711: } else {
3712: if (lda_rhs != n_R) {
3713: IS dummy;
3715: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
3716: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
3717: ISDestroy(&dummy);
3718: } else {
3719: PetscObjectReference((PetscObject)local_auxmat2_R);
3720: pcbddc->local_auxmat2 = local_auxmat2_R;
3721: }
3722: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3723: }
3724: ISDestroy(&is_aux);
3725: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
3726: MatScale(M3,m_one);
3727: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
3728: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
3729: if (isCHOL) {
3730: MatCholeskyFactor(M3,NULL,NULL);
3731: } else {
3732: MatLUFactor(M3,NULL,NULL,NULL);
3733: }
3734: VecSet(pcbddc->vec1_C,one);
3735: MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
3736: MatMatSolve(M3,M2,M1);
3737: MatDestroy(&M2);
3738: MatDestroy(&M3);
3739: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3740: MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
3741: MatDestroy(&C_B);
3742: MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3743: MatDestroy(&M1);
3744: }
3746: /* Get submatrices from subdomain matrix */
3747: if (n_vertices) {
3748: IS is_aux;
3749: PetscBool isseqaij;
3751: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3752: IS tis;
3754: ISDuplicate(pcbddc->is_R_local,&tis);
3755: ISSort(tis);
3756: ISComplement(tis,0,pcis->n,&is_aux);
3757: ISDestroy(&tis);
3758: } else {
3759: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
3760: }
3761: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
3762: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
3763: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
3764: if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
3765: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
3766: }
3767: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
3768: ISDestroy(&is_aux);
3769: }
3771: /* Matrix of coarse basis functions (local) */
3772: if (pcbddc->coarse_phi_B) {
3773: PetscInt on_B,on_primal,on_D=n_D;
3774: if (pcbddc->coarse_phi_D) {
3775: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
3776: }
3777: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
3778: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
3779: PetscScalar *marray;
3781: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
3782: PetscFree(marray);
3783: MatDestroy(&pcbddc->coarse_phi_B);
3784: MatDestroy(&pcbddc->coarse_psi_B);
3785: MatDestroy(&pcbddc->coarse_phi_D);
3786: MatDestroy(&pcbddc->coarse_psi_D);
3787: }
3788: }
3790: if (!pcbddc->coarse_phi_B) {
3791: PetscScalar *marr;
3793: /* memory size */
3794: n = n_B*pcbddc->local_primal_size;
3795: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
3796: if (!pcbddc->symmetric_primal) n *= 2;
3797: PetscCalloc1(n,&marr);
3798: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
3799: marr += n_B*pcbddc->local_primal_size;
3800: if (pcbddc->switch_static || pcbddc->dbg_flag) {
3801: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
3802: marr += n_D*pcbddc->local_primal_size;
3803: }
3804: if (!pcbddc->symmetric_primal) {
3805: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
3806: marr += n_B*pcbddc->local_primal_size;
3807: if (pcbddc->switch_static || pcbddc->dbg_flag) {
3808: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
3809: }
3810: } else {
3811: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
3812: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
3813: if (pcbddc->switch_static || pcbddc->dbg_flag) {
3814: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
3815: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
3816: }
3817: }
3818: }
3820: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
3821: p0_lidx_I = NULL;
3822: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
3823: const PetscInt *idxs;
3825: ISGetIndices(pcis->is_I_local,&idxs);
3826: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
3827: for (i=0;i<pcbddc->benign_n;i++) {
3828: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
3829: }
3830: ISRestoreIndices(pcis->is_I_local,&idxs);
3831: }
3833: /* vertices */
3834: if (n_vertices) {
3835: PetscBool restoreavr = PETSC_FALSE;
3837: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
3839: if (n_R) {
3840: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
3841: PetscBLASInt B_N,B_one = 1;
3842: PetscScalar *x,*y;
3844: MatScale(A_RV,m_one);
3845: if (need_benign_correction) {
3846: ISLocalToGlobalMapping RtoN;
3847: IS is_p0;
3848: PetscInt *idxs_p0,n;
3850: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
3851: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
3852: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
3853: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
3854: ISLocalToGlobalMappingDestroy(&RtoN);
3855: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
3856: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
3857: ISDestroy(&is_p0);
3858: }
3860: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
3861: if (!sparserhs || need_benign_correction) {
3862: if (lda_rhs == n_R) {
3863: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
3864: } else {
3865: PetscScalar *av,*array;
3866: const PetscInt *xadj,*adjncy;
3867: PetscInt n;
3868: PetscBool flg_row;
3870: array = work+lda_rhs*n_vertices;
3871: PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
3872: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
3873: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3874: MatSeqAIJGetArray(A_RV,&av);
3875: for (i=0;i<n;i++) {
3876: PetscInt j;
3877: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
3878: }
3879: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3880: MatDestroy(&A_RV);
3881: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
3882: }
3883: if (need_benign_correction) {
3884: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3885: PetscScalar *marr;
3887: MatDenseGetArray(A_RV,&marr);
3888: /* need \Phi^T A_RV = (I+L)A_RV, L given by
3890: | 0 0 0 | (V)
3891: L = | 0 0 -1 | (P-p0)
3892: | 0 0 -1 | (p0)
3894: */
3895: for (i=0;i<reuse_solver->benign_n;i++) {
3896: const PetscScalar *vals;
3897: const PetscInt *idxs,*idxs_zero;
3898: PetscInt n,j,nz;
3900: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
3901: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3902: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
3903: for (j=0;j<n;j++) {
3904: PetscScalar val = vals[j];
3905: PetscInt k,col = idxs[j];
3906: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
3907: }
3908: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
3909: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3910: }
3911: MatDenseRestoreArray(A_RV,&marr);
3912: }
3913: PetscObjectReference((PetscObject)A_RV);
3914: Brhs = A_RV;
3915: } else {
3916: Mat tA_RVT,A_RVT;
3918: if (!pcbddc->symmetric_primal) {
3919: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
3920: } else {
3921: restoreavr = PETSC_TRUE;
3922: MatScale(A_VR,-1.0);
3923: PetscObjectReference((PetscObject)A_VR);
3924: A_RVT = A_VR;
3925: }
3926: if (lda_rhs != n_R) {
3927: PetscScalar *aa;
3928: PetscInt r,*ii,*jj;
3929: PetscBool done;
3931: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3932: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3933: MatSeqAIJGetArray(A_RVT,&aa);
3934: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
3935: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3936: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3937: } else {
3938: PetscObjectReference((PetscObject)A_RVT);
3939: tA_RVT = A_RVT;
3940: }
3941: MatCreateTranspose(tA_RVT,&Brhs);
3942: MatDestroy(&tA_RVT);
3943: MatDestroy(&A_RVT);
3944: }
3945: if (F) {
3946: /* need to correct the rhs */
3947: if (need_benign_correction) {
3948: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3949: PetscScalar *marr;
3951: MatDenseGetArray(Brhs,&marr);
3952: if (lda_rhs != n_R) {
3953: for (i=0;i<n_vertices;i++) {
3954: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3955: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
3956: VecResetArray(dummy_vec);
3957: }
3958: } else {
3959: for (i=0;i<n_vertices;i++) {
3960: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3961: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
3962: VecResetArray(pcbddc->vec1_R);
3963: }
3964: }
3965: MatDenseRestoreArray(Brhs,&marr);
3966: }
3967: MatMatSolve(F,Brhs,A_RRmA_RV);
3968: if (restoreavr) {
3969: MatScale(A_VR,-1.0);
3970: }
3971: /* need to correct the solution */
3972: if (need_benign_correction) {
3973: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3974: PetscScalar *marr;
3976: MatDenseGetArray(A_RRmA_RV,&marr);
3977: if (lda_rhs != n_R) {
3978: for (i=0;i<n_vertices;i++) {
3979: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3980: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3981: VecResetArray(dummy_vec);
3982: }
3983: } else {
3984: for (i=0;i<n_vertices;i++) {
3985: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3986: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3987: VecResetArray(pcbddc->vec1_R);
3988: }
3989: }
3990: MatDenseRestoreArray(A_RRmA_RV,&marr);
3991: }
3992: } else {
3993: MatDenseGetArray(Brhs,&y);
3994: for (i=0;i<n_vertices;i++) {
3995: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
3996: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
3997: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3998: VecResetArray(pcbddc->vec1_R);
3999: VecResetArray(pcbddc->vec2_R);
4000: }
4001: MatDenseRestoreArray(Brhs,&y);
4002: }
4003: MatDestroy(&A_RV);
4004: MatDestroy(&Brhs);
4005: /* S_VV and S_CV */
4006: if (n_constraints) {
4007: Mat B;
4009: PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4010: for (i=0;i<n_vertices;i++) {
4011: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4012: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4013: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4014: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4015: VecResetArray(pcis->vec1_B);
4016: VecResetArray(pcbddc->vec1_R);
4017: }
4018: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4019: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4020: MatDestroy(&B);
4021: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4022: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4023: MatScale(S_CV,m_one);
4024: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4025: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4026: MatDestroy(&B);
4027: }
4028: if (lda_rhs != n_R) {
4029: MatDestroy(&A_RRmA_RV);
4030: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4031: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4032: }
4033: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4034: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4035: if (need_benign_correction) {
4036: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4037: PetscScalar *marr,*sums;
4039: PetscMalloc1(n_vertices,&sums);
4040: MatDenseGetArray(S_VVt,&marr);
4041: for (i=0;i<reuse_solver->benign_n;i++) {
4042: const PetscScalar *vals;
4043: const PetscInt *idxs,*idxs_zero;
4044: PetscInt n,j,nz;
4046: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4047: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4048: for (j=0;j<n_vertices;j++) {
4049: PetscInt k;
4050: sums[j] = 0.;
4051: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4052: }
4053: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4054: for (j=0;j<n;j++) {
4055: PetscScalar val = vals[j];
4056: PetscInt k;
4057: for (k=0;k<n_vertices;k++) {
4058: marr[idxs[j]+k*n_vertices] += val*sums[k];
4059: }
4060: }
4061: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4062: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4063: }
4064: PetscFree(sums);
4065: MatDenseRestoreArray(S_VVt,&marr);
4066: MatDestroy(&A_RV_bcorr);
4067: }
4068: MatDestroy(&A_RRmA_RV);
4069: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4070: MatDenseGetArray(A_VV,&x);
4071: MatDenseGetArray(S_VVt,&y);
4072: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4073: MatDenseRestoreArray(A_VV,&x);
4074: MatDenseRestoreArray(S_VVt,&y);
4075: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4076: MatDestroy(&S_VVt);
4077: } else {
4078: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4079: }
4080: MatDestroy(&A_VV);
4082: /* coarse basis functions */
4083: for (i=0;i<n_vertices;i++) {
4084: PetscScalar *y;
4086: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4087: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4088: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4089: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4090: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4091: y[n_B*i+idx_V_B[i]] = 1.0;
4092: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4093: VecResetArray(pcis->vec1_B);
4095: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4096: PetscInt j;
4098: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4099: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4100: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4101: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4102: VecResetArray(pcis->vec1_D);
4103: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4104: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4105: }
4106: VecResetArray(pcbddc->vec1_R);
4107: }
4108: /* if n_R == 0 the object is not destroyed */
4109: MatDestroy(&A_RV);
4110: }
4111: VecDestroy(&dummy_vec);
4113: if (n_constraints) {
4114: Mat B;
4116: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4117: MatScale(S_CC,m_one);
4118: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4119: MatScale(S_CC,m_one);
4120: if (n_vertices) {
4121: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4122: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4123: } else {
4124: Mat S_VCt;
4126: if (lda_rhs != n_R) {
4127: MatDestroy(&B);
4128: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4129: MatSeqDenseSetLDA(B,lda_rhs);
4130: }
4131: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4132: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4133: MatDestroy(&S_VCt);
4134: }
4135: }
4136: MatDestroy(&B);
4137: /* coarse basis functions */
4138: for (i=0;i<n_constraints;i++) {
4139: PetscScalar *y;
4141: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4142: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4143: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4144: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4145: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4146: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4147: VecResetArray(pcis->vec1_B);
4148: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4149: PetscInt j;
4151: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4152: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4153: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4154: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4155: VecResetArray(pcis->vec1_D);
4156: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4157: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4158: }
4159: VecResetArray(pcbddc->vec1_R);
4160: }
4161: }
4162: if (n_constraints) {
4163: MatDestroy(&local_auxmat2_R);
4164: }
4165: PetscFree(p0_lidx_I);
4167: /* coarse matrix entries relative to B_0 */
4168: if (pcbddc->benign_n) {
4169: Mat B0_B,B0_BPHI;
4170: IS is_dummy;
4171: PetscScalar *data;
4172: PetscInt j;
4174: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4175: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4176: ISDestroy(&is_dummy);
4177: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4178: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4179: MatDenseGetArray(B0_BPHI,&data);
4180: for (j=0;j<pcbddc->benign_n;j++) {
4181: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4182: for (i=0;i<pcbddc->local_primal_size;i++) {
4183: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4184: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4185: }
4186: }
4187: MatDenseRestoreArray(B0_BPHI,&data);
4188: MatDestroy(&B0_B);
4189: MatDestroy(&B0_BPHI);
4190: }
4192: /* compute other basis functions for non-symmetric problems */
4193: if (!pcbddc->symmetric_primal) {
4194: Mat B_V=NULL,B_C=NULL;
4195: PetscScalar *marray;
4197: if (n_constraints) {
4198: Mat S_CCT,C_CRT;
4200: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4201: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4202: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4203: MatDestroy(&S_CCT);
4204: if (n_vertices) {
4205: Mat S_VCT;
4207: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4208: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4209: MatDestroy(&S_VCT);
4210: }
4211: MatDestroy(&C_CRT);
4212: } else {
4213: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4214: }
4215: if (n_vertices && n_R) {
4216: PetscScalar *av,*marray;
4217: const PetscInt *xadj,*adjncy;
4218: PetscInt n;
4219: PetscBool flg_row;
4221: /* B_V = B_V - A_VR^T */
4222: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4223: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4224: MatSeqAIJGetArray(A_VR,&av);
4225: MatDenseGetArray(B_V,&marray);
4226: for (i=0;i<n;i++) {
4227: PetscInt j;
4228: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4229: }
4230: MatDenseRestoreArray(B_V,&marray);
4231: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4232: MatDestroy(&A_VR);
4233: }
4235: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4236: if (n_vertices) {
4237: MatDenseGetArray(B_V,&marray);
4238: for (i=0;i<n_vertices;i++) {
4239: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4240: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4241: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4242: VecResetArray(pcbddc->vec1_R);
4243: VecResetArray(pcbddc->vec2_R);
4244: }
4245: MatDenseRestoreArray(B_V,&marray);
4246: }
4247: if (B_C) {
4248: MatDenseGetArray(B_C,&marray);
4249: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4250: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4251: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4252: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4253: VecResetArray(pcbddc->vec1_R);
4254: VecResetArray(pcbddc->vec2_R);
4255: }
4256: MatDenseRestoreArray(B_C,&marray);
4257: }
4258: /* coarse basis functions */
4259: for (i=0;i<pcbddc->local_primal_size;i++) {
4260: PetscScalar *y;
4262: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4263: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4264: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4265: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4266: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4267: if (i<n_vertices) {
4268: y[n_B*i+idx_V_B[i]] = 1.0;
4269: }
4270: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4271: VecResetArray(pcis->vec1_B);
4273: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4274: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4275: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4276: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4277: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4278: VecResetArray(pcis->vec1_D);
4279: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4280: }
4281: VecResetArray(pcbddc->vec1_R);
4282: }
4283: MatDestroy(&B_V);
4284: MatDestroy(&B_C);
4285: }
4287: /* free memory */
4288: PetscFree(idx_V_B);
4289: MatDestroy(&S_VV);
4290: MatDestroy(&S_CV);
4291: MatDestroy(&S_VC);
4292: MatDestroy(&S_CC);
4293: PetscFree(work);
4294: if (n_vertices) {
4295: MatDestroy(&A_VR);
4296: }
4297: if (n_constraints) {
4298: MatDestroy(&C_CR);
4299: }
4300: /* Checking coarse_sub_mat and coarse basis functios */
4301: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4302: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4303: if (pcbddc->dbg_flag) {
4304: Mat coarse_sub_mat;
4305: Mat AUXMAT,TM1,TM2,TM3,TM4;
4306: Mat coarse_phi_D,coarse_phi_B;
4307: Mat coarse_psi_D,coarse_psi_B;
4308: Mat A_II,A_BB,A_IB,A_BI;
4309: Mat C_B,CPHI;
4310: IS is_dummy;
4311: Vec mones;
4312: MatType checkmattype=MATSEQAIJ;
4313: PetscReal real_value;
4315: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4316: Mat A;
4317: PCBDDCBenignProject(pc,NULL,NULL,&A);
4318: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4319: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4320: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4321: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4322: MatDestroy(&A);
4323: } else {
4324: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4325: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4326: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4327: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4328: }
4329: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4330: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4331: if (!pcbddc->symmetric_primal) {
4332: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4333: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4334: }
4335: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4337: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4338: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4339: PetscViewerFlush(pcbddc->dbg_viewer);
4340: if (!pcbddc->symmetric_primal) {
4341: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4342: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4343: MatDestroy(&AUXMAT);
4344: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4345: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4346: MatDestroy(&AUXMAT);
4347: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4348: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4349: MatDestroy(&AUXMAT);
4350: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4351: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4352: MatDestroy(&AUXMAT);
4353: } else {
4354: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4355: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4356: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4357: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4358: MatDestroy(&AUXMAT);
4359: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4360: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4361: MatDestroy(&AUXMAT);
4362: }
4363: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4364: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4365: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4366: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4367: if (pcbddc->benign_n) {
4368: Mat B0_B,B0_BPHI;
4369: PetscScalar *data,*data2;
4370: PetscInt j;
4372: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4373: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4374: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4375: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4376: MatDenseGetArray(TM1,&data);
4377: MatDenseGetArray(B0_BPHI,&data2);
4378: for (j=0;j<pcbddc->benign_n;j++) {
4379: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4380: for (i=0;i<pcbddc->local_primal_size;i++) {
4381: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4382: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4383: }
4384: }
4385: MatDenseRestoreArray(TM1,&data);
4386: MatDenseRestoreArray(B0_BPHI,&data2);
4387: MatDestroy(&B0_B);
4388: ISDestroy(&is_dummy);
4389: MatDestroy(&B0_BPHI);
4390: }
4391: #if 0
4392: {
4393: PetscViewer viewer;
4394: char filename[256];
4395: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4396: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4397: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4398: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4399: MatView(coarse_sub_mat,viewer);
4400: PetscObjectSetName((PetscObject)TM1,"projected");
4401: MatView(TM1,viewer);
4402: if (pcbddc->coarse_phi_B) {
4403: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4404: MatView(pcbddc->coarse_phi_B,viewer);
4405: }
4406: if (pcbddc->coarse_phi_D) {
4407: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4408: MatView(pcbddc->coarse_phi_D,viewer);
4409: }
4410: if (pcbddc->coarse_psi_B) {
4411: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4412: MatView(pcbddc->coarse_psi_B,viewer);
4413: }
4414: if (pcbddc->coarse_psi_D) {
4415: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4416: MatView(pcbddc->coarse_psi_D,viewer);
4417: }
4418: PetscViewerDestroy(&viewer);
4419: }
4420: #endif
4421: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4422: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4423: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4424: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4426: /* check constraints */
4427: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4428: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4429: if (!pcbddc->benign_n) { /* TODO: add benign case */
4430: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4431: } else {
4432: PetscScalar *data;
4433: Mat tmat;
4434: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4435: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4436: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4437: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4438: MatDestroy(&tmat);
4439: }
4440: MatCreateVecs(CPHI,&mones,NULL);
4441: VecSet(mones,-1.0);
4442: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4443: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4444: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4445: if (!pcbddc->symmetric_primal) {
4446: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4447: VecSet(mones,-1.0);
4448: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4449: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4450: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4451: }
4452: MatDestroy(&C_B);
4453: MatDestroy(&CPHI);
4454: ISDestroy(&is_dummy);
4455: VecDestroy(&mones);
4456: PetscViewerFlush(pcbddc->dbg_viewer);
4457: MatDestroy(&A_II);
4458: MatDestroy(&A_BB);
4459: MatDestroy(&A_IB);
4460: MatDestroy(&A_BI);
4461: MatDestroy(&TM1);
4462: MatDestroy(&TM2);
4463: MatDestroy(&TM3);
4464: MatDestroy(&TM4);
4465: MatDestroy(&coarse_phi_D);
4466: MatDestroy(&coarse_phi_B);
4467: if (!pcbddc->symmetric_primal) {
4468: MatDestroy(&coarse_psi_D);
4469: MatDestroy(&coarse_psi_B);
4470: }
4471: MatDestroy(&coarse_sub_mat);
4472: }
4473: /* get back data */
4474: *coarse_submat_vals_n = coarse_submat_vals;
4475: return(0);
4476: }
4478: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4479: {
4480: Mat *work_mat;
4481: IS isrow_s,iscol_s;
4482: PetscBool rsorted,csorted;
4483: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4487: ISSorted(isrow,&rsorted);
4488: ISSorted(iscol,&csorted);
4489: ISGetLocalSize(isrow,&rsize);
4490: ISGetLocalSize(iscol,&csize);
4492: if (!rsorted) {
4493: const PetscInt *idxs;
4494: PetscInt *idxs_sorted,i;
4496: PetscMalloc1(rsize,&idxs_perm_r);
4497: PetscMalloc1(rsize,&idxs_sorted);
4498: for (i=0;i<rsize;i++) {
4499: idxs_perm_r[i] = i;
4500: }
4501: ISGetIndices(isrow,&idxs);
4502: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4503: for (i=0;i<rsize;i++) {
4504: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4505: }
4506: ISRestoreIndices(isrow,&idxs);
4507: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4508: } else {
4509: PetscObjectReference((PetscObject)isrow);
4510: isrow_s = isrow;
4511: }
4513: if (!csorted) {
4514: if (isrow == iscol) {
4515: PetscObjectReference((PetscObject)isrow_s);
4516: iscol_s = isrow_s;
4517: } else {
4518: const PetscInt *idxs;
4519: PetscInt *idxs_sorted,i;
4521: PetscMalloc1(csize,&idxs_perm_c);
4522: PetscMalloc1(csize,&idxs_sorted);
4523: for (i=0;i<csize;i++) {
4524: idxs_perm_c[i] = i;
4525: }
4526: ISGetIndices(iscol,&idxs);
4527: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4528: for (i=0;i<csize;i++) {
4529: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4530: }
4531: ISRestoreIndices(iscol,&idxs);
4532: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4533: }
4534: } else {
4535: PetscObjectReference((PetscObject)iscol);
4536: iscol_s = iscol;
4537: }
4539: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4541: if (!rsorted || !csorted) {
4542: Mat new_mat;
4543: IS is_perm_r,is_perm_c;
4545: if (!rsorted) {
4546: PetscInt *idxs_r,i;
4547: PetscMalloc1(rsize,&idxs_r);
4548: for (i=0;i<rsize;i++) {
4549: idxs_r[idxs_perm_r[i]] = i;
4550: }
4551: PetscFree(idxs_perm_r);
4552: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4553: } else {
4554: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4555: }
4556: ISSetPermutation(is_perm_r);
4558: if (!csorted) {
4559: if (isrow_s == iscol_s) {
4560: PetscObjectReference((PetscObject)is_perm_r);
4561: is_perm_c = is_perm_r;
4562: } else {
4563: PetscInt *idxs_c,i;
4564: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4565: PetscMalloc1(csize,&idxs_c);
4566: for (i=0;i<csize;i++) {
4567: idxs_c[idxs_perm_c[i]] = i;
4568: }
4569: PetscFree(idxs_perm_c);
4570: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4571: }
4572: } else {
4573: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4574: }
4575: ISSetPermutation(is_perm_c);
4577: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4578: MatDestroy(&work_mat[0]);
4579: work_mat[0] = new_mat;
4580: ISDestroy(&is_perm_r);
4581: ISDestroy(&is_perm_c);
4582: }
4584: PetscObjectReference((PetscObject)work_mat[0]);
4585: *B = work_mat[0];
4586: MatDestroyMatrices(1,&work_mat);
4587: ISDestroy(&isrow_s);
4588: ISDestroy(&iscol_s);
4589: return(0);
4590: }
4592: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4593: {
4594: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4595: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4596: Mat new_mat,lA;
4597: IS is_local,is_global;
4598: PetscInt local_size;
4599: PetscBool isseqaij;
4603: MatDestroy(&pcbddc->local_mat);
4604: MatGetSize(matis->A,&local_size,NULL);
4605: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4606: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4607: ISDestroy(&is_local);
4608: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4609: ISDestroy(&is_global);
4611: /* check */
4612: if (pcbddc->dbg_flag) {
4613: Vec x,x_change;
4614: PetscReal error;
4616: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4617: VecSetRandom(x,NULL);
4618: MatMult(ChangeOfBasisMatrix,x,x_change);
4619: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4620: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4621: MatMult(new_mat,matis->x,matis->y);
4622: if (!pcbddc->change_interior) {
4623: const PetscScalar *x,*y,*v;
4624: PetscReal lerror = 0.;
4625: PetscInt i;
4627: VecGetArrayRead(matis->x,&x);
4628: VecGetArrayRead(matis->y,&y);
4629: VecGetArrayRead(matis->counter,&v);
4630: for (i=0;i<local_size;i++)
4631: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4632: lerror = PetscAbsScalar(x[i]-y[i]);
4633: VecRestoreArrayRead(matis->x,&x);
4634: VecRestoreArrayRead(matis->y,&y);
4635: VecRestoreArrayRead(matis->counter,&v);
4636: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4637: if (error > PETSC_SMALL) {
4638: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4639: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4640: } else {
4641: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4642: }
4643: }
4644: }
4645: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4646: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4647: VecAXPY(x,-1.0,x_change);
4648: VecNorm(x,NORM_INFINITY,&error);
4649: if (error > PETSC_SMALL) {
4650: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4651: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4652: } else {
4653: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4654: }
4655: }
4656: VecDestroy(&x);
4657: VecDestroy(&x_change);
4658: }
4660: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4661: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
4663: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4664: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4665: if (isseqaij) {
4666: MatDestroy(&pcbddc->local_mat);
4667: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4668: if (lA) {
4669: Mat work;
4670: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4671: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4672: MatDestroy(&work);
4673: }
4674: } else {
4675: Mat work_mat;
4677: MatDestroy(&pcbddc->local_mat);
4678: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4679: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4680: MatDestroy(&work_mat);
4681: if (lA) {
4682: Mat work;
4683: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4684: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4685: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4686: MatDestroy(&work);
4687: }
4688: }
4689: if (matis->A->symmetric_set) {
4690: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4691: #if !defined(PETSC_USE_COMPLEX)
4692: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4693: #endif
4694: }
4695: MatDestroy(&new_mat);
4696: return(0);
4697: }
4699: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4700: {
4701: PC_IS* pcis = (PC_IS*)(pc->data);
4702: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4703: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4704: PetscInt *idx_R_local=NULL;
4705: PetscInt n_vertices,i,j,n_R,n_D,n_B;
4706: PetscInt vbs,bs;
4707: PetscBT bitmask=NULL;
4708: PetscErrorCode ierr;
4711: /*
4712: No need to setup local scatters if
4713: - primal space is unchanged
4714: AND
4715: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4716: AND
4717: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4718: */
4719: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
4720: return(0);
4721: }
4722: /* destroy old objects */
4723: ISDestroy(&pcbddc->is_R_local);
4724: VecScatterDestroy(&pcbddc->R_to_B);
4725: VecScatterDestroy(&pcbddc->R_to_D);
4726: /* Set Non-overlapping dimensions */
4727: n_B = pcis->n_B;
4728: n_D = pcis->n - n_B;
4729: n_vertices = pcbddc->n_vertices;
4731: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
4733: /* create auxiliary bitmask and allocate workspace */
4734: if (!sub_schurs || !sub_schurs->reuse_solver) {
4735: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
4736: PetscBTCreate(pcis->n,&bitmask);
4737: for (i=0;i<n_vertices;i++) {
4738: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
4739: }
4741: for (i=0, n_R=0; i<pcis->n; i++) {
4742: if (!PetscBTLookup(bitmask,i)) {
4743: idx_R_local[n_R++] = i;
4744: }
4745: }
4746: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
4747: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4749: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4750: ISGetLocalSize(reuse_solver->is_R,&n_R);
4751: }
4753: /* Block code */
4754: vbs = 1;
4755: MatGetBlockSize(pcbddc->local_mat,&bs);
4756: if (bs>1 && !(n_vertices%bs)) {
4757: PetscBool is_blocked = PETSC_TRUE;
4758: PetscInt *vary;
4759: if (!sub_schurs || !sub_schurs->reuse_solver) {
4760: PetscMalloc1(pcis->n/bs,&vary);
4761: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
4762: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
4763: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
4764: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
4765: for (i=0; i<pcis->n/bs; i++) {
4766: if (vary[i]!=0 && vary[i]!=bs) {
4767: is_blocked = PETSC_FALSE;
4768: break;
4769: }
4770: }
4771: PetscFree(vary);
4772: } else {
4773: /* Verify directly the R set */
4774: for (i=0; i<n_R/bs; i++) {
4775: PetscInt j,node=idx_R_local[bs*i];
4776: for (j=1; j<bs; j++) {
4777: if (node != idx_R_local[bs*i+j]-j) {
4778: is_blocked = PETSC_FALSE;
4779: break;
4780: }
4781: }
4782: }
4783: }
4784: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
4785: vbs = bs;
4786: for (i=0;i<n_R/vbs;i++) {
4787: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
4788: }
4789: }
4790: }
4791: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
4792: if (sub_schurs && sub_schurs->reuse_solver) {
4793: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4795: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4796: ISDestroy(&reuse_solver->is_R);
4797: PetscObjectReference((PetscObject)pcbddc->is_R_local);
4798: reuse_solver->is_R = pcbddc->is_R_local;
4799: } else {
4800: PetscFree(idx_R_local);
4801: }
4803: /* print some info if requested */
4804: if (pcbddc->dbg_flag) {
4805: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4806: PetscViewerFlush(pcbddc->dbg_viewer);
4807: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4808: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
4809: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
4810: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
4811: PetscViewerFlush(pcbddc->dbg_viewer);
4812: }
4814: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
4815: if (!sub_schurs || !sub_schurs->reuse_solver) {
4816: IS is_aux1,is_aux2;
4817: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
4819: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4820: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
4821: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
4822: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4823: for (i=0; i<n_D; i++) {
4824: PetscBTSet(bitmask,is_indices[i]);
4825: }
4826: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4827: for (i=0, j=0; i<n_R; i++) {
4828: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
4829: aux_array1[j++] = i;
4830: }
4831: }
4832: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4833: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4834: for (i=0, j=0; i<n_B; i++) {
4835: if (!PetscBTLookup(bitmask,is_indices[i])) {
4836: aux_array2[j++] = i;
4837: }
4838: }
4839: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4840: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
4841: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
4842: ISDestroy(&is_aux1);
4843: ISDestroy(&is_aux2);
4845: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4846: PetscMalloc1(n_D,&aux_array1);
4847: for (i=0, j=0; i<n_R; i++) {
4848: if (PetscBTLookup(bitmask,idx_R_local[i])) {
4849: aux_array1[j++] = i;
4850: }
4851: }
4852: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4853: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4854: ISDestroy(&is_aux1);
4855: }
4856: PetscBTDestroy(&bitmask);
4857: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4858: } else {
4859: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4860: IS tis;
4861: PetscInt schur_size;
4863: ISGetLocalSize(reuse_solver->is_B,&schur_size);
4864: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
4865: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
4866: ISDestroy(&tis);
4867: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4868: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
4869: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4870: ISDestroy(&tis);
4871: }
4872: }
4873: return(0);
4874: }
4877: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
4878: {
4879: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
4880: PC_IS *pcis = (PC_IS*)pc->data;
4881: PC pc_temp;
4882: Mat A_RR;
4883: MatReuse reuse;
4884: PetscScalar m_one = -1.0;
4885: PetscReal value;
4886: PetscInt n_D,n_R;
4887: PetscBool check_corr[2],issbaij;
4889: /* prefixes stuff */
4890: char dir_prefix[256],neu_prefix[256],str_level[16];
4891: size_t len;
4895: /* compute prefixes */
4896: PetscStrcpy(dir_prefix,"");
4897: PetscStrcpy(neu_prefix,"");
4898: if (!pcbddc->current_level) {
4899: PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
4900: PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
4901: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4902: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4903: } else {
4904: PetscStrcpy(str_level,"");
4905: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4906: PetscStrlen(((PetscObject)pc)->prefix,&len);
4907: len -= 15; /* remove "pc_bddc_coarse_" */
4908: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4909: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4910: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
4911: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
4912: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4913: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4914: PetscStrcat(dir_prefix,str_level);
4915: PetscStrcat(neu_prefix,str_level);
4916: }
4918: /* DIRICHLET PROBLEM */
4919: if (dirichlet) {
4920: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4921: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4922: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
4923: if (pcbddc->dbg_flag) {
4924: Mat A_IIn;
4926: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
4927: MatDestroy(&pcis->A_II);
4928: pcis->A_II = A_IIn;
4929: }
4930: }
4931: if (pcbddc->local_mat->symmetric_set) {
4932: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
4933: }
4934: /* Matrix for Dirichlet problem is pcis->A_II */
4935: n_D = pcis->n - pcis->n_B;
4936: if (!pcbddc->ksp_D) { /* create object if not yet build */
4937: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
4938: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
4939: /* default */
4940: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
4941: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
4942: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
4943: KSPGetPC(pcbddc->ksp_D,&pc_temp);
4944: if (issbaij) {
4945: PCSetType(pc_temp,PCCHOLESKY);
4946: } else {
4947: PCSetType(pc_temp,PCLU);
4948: }
4949: /* Allow user's customization */
4950: KSPSetFromOptions(pcbddc->ksp_D);
4951: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4952: }
4953: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
4954: if (sub_schurs && sub_schurs->reuse_solver) {
4955: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4957: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
4958: }
4959: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
4960: if (!n_D) {
4961: KSPGetPC(pcbddc->ksp_D,&pc_temp);
4962: PCSetType(pc_temp,PCNONE);
4963: }
4964: /* Set Up KSP for Dirichlet problem of BDDC */
4965: KSPSetUp(pcbddc->ksp_D);
4966: /* set ksp_D into pcis data */
4967: KSPDestroy(&pcis->ksp_D);
4968: PetscObjectReference((PetscObject)pcbddc->ksp_D);
4969: pcis->ksp_D = pcbddc->ksp_D;
4970: }
4972: /* NEUMANN PROBLEM */
4973: A_RR = 0;
4974: if (neumann) {
4975: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4976: PetscInt ibs,mbs;
4977: PetscBool issbaij, reuse_neumann_solver;
4978: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4980: reuse_neumann_solver = PETSC_FALSE;
4981: if (sub_schurs && sub_schurs->reuse_solver) {
4982: IS iP;
4984: reuse_neumann_solver = PETSC_TRUE;
4985: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
4986: if (iP) reuse_neumann_solver = PETSC_FALSE;
4987: }
4988: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
4989: ISGetSize(pcbddc->is_R_local,&n_R);
4990: if (pcbddc->ksp_R) { /* already created ksp */
4991: PetscInt nn_R;
4992: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
4993: PetscObjectReference((PetscObject)A_RR);
4994: MatGetSize(A_RR,&nn_R,NULL);
4995: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
4996: KSPReset(pcbddc->ksp_R);
4997: MatDestroy(&A_RR);
4998: reuse = MAT_INITIAL_MATRIX;
4999: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5000: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5001: MatDestroy(&A_RR);
5002: reuse = MAT_INITIAL_MATRIX;
5003: } else { /* safe to reuse the matrix */
5004: reuse = MAT_REUSE_MATRIX;
5005: }
5006: }
5007: /* last check */
5008: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5009: MatDestroy(&A_RR);
5010: reuse = MAT_INITIAL_MATRIX;
5011: }
5012: } else { /* first time, so we need to create the matrix */
5013: reuse = MAT_INITIAL_MATRIX;
5014: }
5015: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5016: MatGetBlockSize(pcbddc->local_mat,&mbs);
5017: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5018: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5019: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5020: if (matis->A == pcbddc->local_mat) {
5021: MatDestroy(&pcbddc->local_mat);
5022: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5023: } else {
5024: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5025: }
5026: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5027: if (matis->A == pcbddc->local_mat) {
5028: MatDestroy(&pcbddc->local_mat);
5029: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5030: } else {
5031: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5032: }
5033: }
5034: /* extract A_RR */
5035: if (reuse_neumann_solver) {
5036: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5038: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5039: MatDestroy(&A_RR);
5040: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5041: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5042: } else {
5043: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5044: }
5045: } else {
5046: MatDestroy(&A_RR);
5047: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5048: PetscObjectReference((PetscObject)A_RR);
5049: }
5050: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5051: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5052: }
5053: if (pcbddc->local_mat->symmetric_set) {
5054: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5055: }
5056: if (!pcbddc->ksp_R) { /* create object if not present */
5057: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5058: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5059: /* default */
5060: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5061: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5062: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5063: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5064: if (issbaij) {
5065: PCSetType(pc_temp,PCCHOLESKY);
5066: } else {
5067: PCSetType(pc_temp,PCLU);
5068: }
5069: /* Allow user's customization */
5070: KSPSetFromOptions(pcbddc->ksp_R);
5071: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
5072: }
5073: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5074: if (!n_R) {
5075: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5076: PCSetType(pc_temp,PCNONE);
5077: }
5078: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5079: /* Reuse solver if it is present */
5080: if (reuse_neumann_solver) {
5081: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5083: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5084: }
5085: /* Set Up KSP for Neumann problem of BDDC */
5086: KSPSetUp(pcbddc->ksp_R);
5087: }
5089: if (pcbddc->dbg_flag) {
5090: PetscViewerFlush(pcbddc->dbg_viewer);
5091: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5092: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5093: }
5095: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5096: check_corr[0] = check_corr[1] = PETSC_FALSE;
5097: if (pcbddc->NullSpace_corr[0]) {
5098: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5099: }
5100: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5101: check_corr[0] = PETSC_TRUE;
5102: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5103: }
5104: if (neumann && pcbddc->NullSpace_corr[2]) {
5105: check_corr[1] = PETSC_TRUE;
5106: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5107: }
5109: /* check Dirichlet and Neumann solvers */
5110: if (pcbddc->dbg_flag) {
5111: if (dirichlet) { /* Dirichlet */
5112: VecSetRandom(pcis->vec1_D,NULL);
5113: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5114: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5115: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5116: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5117: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5118: if (check_corr[0]) {
5119: PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5120: }
5121: PetscViewerFlush(pcbddc->dbg_viewer);
5122: }
5123: if (neumann) { /* Neumann */
5124: VecSetRandom(pcbddc->vec1_R,NULL);
5125: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5126: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5127: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5128: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5129: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5130: if (check_corr[1]) {
5131: PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5132: }
5133: PetscViewerFlush(pcbddc->dbg_viewer);
5134: }
5135: }
5136: /* free Neumann problem's matrix */
5137: MatDestroy(&A_RR);
5138: return(0);
5139: }
5141: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5142: {
5143: PetscErrorCode ierr;
5144: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5145: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5146: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5149: if (!reuse_solver) {
5150: VecSet(pcbddc->vec1_R,0.);
5151: }
5152: if (!pcbddc->switch_static) {
5153: if (applytranspose && pcbddc->local_auxmat1) {
5154: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5155: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5156: }
5157: if (!reuse_solver) {
5158: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5159: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5160: } else {
5161: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5163: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5164: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5165: }
5166: } else {
5167: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5168: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5169: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5170: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5171: if (applytranspose && pcbddc->local_auxmat1) {
5172: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5173: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5174: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5175: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5176: }
5177: }
5178: if (!reuse_solver || pcbddc->switch_static) {
5179: if (applytranspose) {
5180: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5181: } else {
5182: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5183: }
5184: } else {
5185: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5187: if (applytranspose) {
5188: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5189: } else {
5190: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5191: }
5192: }
5193: VecSet(inout_B,0.);
5194: if (!pcbddc->switch_static) {
5195: if (!reuse_solver) {
5196: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5197: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5198: } else {
5199: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5201: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5202: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5203: }
5204: if (!applytranspose && pcbddc->local_auxmat1) {
5205: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5206: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5207: }
5208: } else {
5209: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5210: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5211: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5212: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5213: if (!applytranspose && pcbddc->local_auxmat1) {
5214: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5215: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5216: }
5217: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5218: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5219: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5220: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5221: }
5222: return(0);
5223: }
5225: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5226: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5227: {
5229: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5230: PC_IS* pcis = (PC_IS*) (pc->data);
5231: const PetscScalar zero = 0.0;
5234: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5235: if (!pcbddc->benign_apply_coarse_only) {
5236: if (applytranspose) {
5237: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5238: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5239: } else {
5240: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5241: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5242: }
5243: } else {
5244: VecSet(pcbddc->vec1_P,zero);
5245: }
5247: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5248: if (pcbddc->benign_n) {
5249: PetscScalar *array;
5250: PetscInt j;
5252: VecGetArray(pcbddc->vec1_P,&array);
5253: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5254: VecRestoreArray(pcbddc->vec1_P,&array);
5255: }
5257: /* start communications from local primal nodes to rhs of coarse solver */
5258: VecSet(pcbddc->coarse_vec,zero);
5259: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5260: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5262: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5263: if (pcbddc->coarse_ksp) {
5264: Mat coarse_mat;
5265: Vec rhs,sol;
5266: MatNullSpace nullsp;
5267: PetscBool isbddc = PETSC_FALSE;
5269: if (pcbddc->benign_have_null) {
5270: PC coarse_pc;
5272: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5273: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5274: /* we need to propagate to coarser levels the need for a possible benign correction */
5275: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5276: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5277: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5278: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5279: }
5280: }
5281: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5282: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5283: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5284: MatGetNullSpace(coarse_mat,&nullsp);
5285: if (nullsp) {
5286: MatNullSpaceRemove(nullsp,rhs);
5287: }
5288: if (applytranspose) {
5289: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5290: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5291: } else {
5292: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5293: PC coarse_pc;
5295: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5296: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5297: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5298: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5299: } else {
5300: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5301: }
5302: }
5303: /* we don't need the benign correction at coarser levels anymore */
5304: if (pcbddc->benign_have_null && isbddc) {
5305: PC coarse_pc;
5306: PC_BDDC* coarsepcbddc;
5308: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5309: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5310: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5311: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5312: }
5313: if (nullsp) {
5314: MatNullSpaceRemove(nullsp,sol);
5315: }
5316: }
5318: /* Local solution on R nodes */
5319: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5320: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5321: }
5322: /* communications from coarse sol to local primal nodes */
5323: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5324: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5326: /* Sum contributions from the two levels */
5327: if (!pcbddc->benign_apply_coarse_only) {
5328: if (applytranspose) {
5329: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5330: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5331: } else {
5332: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5333: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5334: }
5335: /* store p0 */
5336: if (pcbddc->benign_n) {
5337: PetscScalar *array;
5338: PetscInt j;
5340: VecGetArray(pcbddc->vec1_P,&array);
5341: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5342: VecRestoreArray(pcbddc->vec1_P,&array);
5343: }
5344: } else { /* expand the coarse solution */
5345: if (applytranspose) {
5346: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5347: } else {
5348: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5349: }
5350: }
5351: return(0);
5352: }
5354: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5355: {
5357: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5358: PetscScalar *array;
5359: Vec from,to;
5362: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5363: from = pcbddc->coarse_vec;
5364: to = pcbddc->vec1_P;
5365: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5366: Vec tvec;
5368: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5369: VecResetArray(tvec);
5370: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5371: VecGetArray(tvec,&array);
5372: VecPlaceArray(from,array);
5373: VecRestoreArray(tvec,&array);
5374: }
5375: } else { /* from local to global -> put data in coarse right hand side */
5376: from = pcbddc->vec1_P;
5377: to = pcbddc->coarse_vec;
5378: }
5379: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5380: return(0);
5381: }
5383: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5384: {
5386: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5387: PetscScalar *array;
5388: Vec from,to;
5391: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5392: from = pcbddc->coarse_vec;
5393: to = pcbddc->vec1_P;
5394: } else { /* from local to global -> put data in coarse right hand side */
5395: from = pcbddc->vec1_P;
5396: to = pcbddc->coarse_vec;
5397: }
5398: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5399: if (smode == SCATTER_FORWARD) {
5400: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5401: Vec tvec;
5403: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5404: VecGetArray(to,&array);
5405: VecPlaceArray(tvec,array);
5406: VecRestoreArray(to,&array);
5407: }
5408: } else {
5409: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5410: VecResetArray(from);
5411: }
5412: }
5413: return(0);
5414: }
5416: /* uncomment for testing purposes */
5417: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5418: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5419: {
5420: PetscErrorCode ierr;
5421: PC_IS* pcis = (PC_IS*)(pc->data);
5422: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5423: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5424: /* one and zero */
5425: PetscScalar one=1.0,zero=0.0;
5426: /* space to store constraints and their local indices */
5427: PetscScalar *constraints_data;
5428: PetscInt *constraints_idxs,*constraints_idxs_B;
5429: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
5430: PetscInt *constraints_n;
5431: /* iterators */
5432: PetscInt i,j,k,total_counts,total_counts_cc,cum;
5433: /* BLAS integers */
5434: PetscBLASInt lwork,lierr;
5435: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
5436: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
5437: /* reuse */
5438: PetscInt olocal_primal_size,olocal_primal_size_cc;
5439: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
5440: /* change of basis */
5441: PetscBool qr_needed;
5442: PetscBT change_basis,qr_needed_idx;
5443: /* auxiliary stuff */
5444: PetscInt *nnz,*is_indices;
5445: PetscInt ncc;
5446: /* some quantities */
5447: PetscInt n_vertices,total_primal_vertices,valid_constraints;
5448: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5451: /* Destroy Mat objects computed previously */
5452: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5453: MatDestroy(&pcbddc->ConstraintMatrix);
5454: MatDestroy(&pcbddc->switch_static_change);
5455: /* save info on constraints from previous setup (if any) */
5456: olocal_primal_size = pcbddc->local_primal_size;
5457: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5458: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5459: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5460: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5461: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5462: PetscFree(pcbddc->primal_indices_local_idxs);
5464: if (!pcbddc->adaptive_selection) {
5465: IS ISForVertices,*ISForFaces,*ISForEdges;
5466: MatNullSpace nearnullsp;
5467: const Vec *nearnullvecs;
5468: Vec *localnearnullsp;
5469: PetscScalar *array;
5470: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
5471: PetscBool nnsp_has_cnst;
5472: /* LAPACK working arrays for SVD or POD */
5473: PetscBool skip_lapack,boolforchange;
5474: PetscScalar *work;
5475: PetscReal *singular_vals;
5476: #if defined(PETSC_USE_COMPLEX)
5477: PetscReal *rwork;
5478: #endif
5479: #if defined(PETSC_MISSING_LAPACK_GESVD)
5480: PetscScalar *temp_basis,*correlation_mat;
5481: #else
5482: PetscBLASInt dummy_int=1;
5483: PetscScalar dummy_scalar=1.;
5484: #endif
5486: /* Get index sets for faces, edges and vertices from graph */
5487: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5488: /* print some info */
5489: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5490: PetscInt nv;
5492: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5493: ISGetSize(ISForVertices,&nv);
5494: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5495: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5496: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5497: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5498: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5499: PetscViewerFlush(pcbddc->dbg_viewer);
5500: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5501: }
5503: /* free unneeded index sets */
5504: if (!pcbddc->use_vertices) {
5505: ISDestroy(&ISForVertices);
5506: }
5507: if (!pcbddc->use_edges) {
5508: for (i=0;i<n_ISForEdges;i++) {
5509: ISDestroy(&ISForEdges[i]);
5510: }
5511: PetscFree(ISForEdges);
5512: n_ISForEdges = 0;
5513: }
5514: if (!pcbddc->use_faces) {
5515: for (i=0;i<n_ISForFaces;i++) {
5516: ISDestroy(&ISForFaces[i]);
5517: }
5518: PetscFree(ISForFaces);
5519: n_ISForFaces = 0;
5520: }
5522: /* check if near null space is attached to global mat */
5523: MatGetNearNullSpace(pc->pmat,&nearnullsp);
5524: if (nearnullsp) {
5525: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5526: /* remove any stored info */
5527: MatNullSpaceDestroy(&pcbddc->onearnullspace);
5528: PetscFree(pcbddc->onearnullvecs_state);
5529: /* store information for BDDC solver reuse */
5530: PetscObjectReference((PetscObject)nearnullsp);
5531: pcbddc->onearnullspace = nearnullsp;
5532: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5533: for (i=0;i<nnsp_size;i++) {
5534: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5535: }
5536: } else { /* if near null space is not provided BDDC uses constants by default */
5537: nnsp_size = 0;
5538: nnsp_has_cnst = PETSC_TRUE;
5539: }
5540: /* get max number of constraints on a single cc */
5541: max_constraints = nnsp_size;
5542: if (nnsp_has_cnst) max_constraints++;
5544: /*
5545: Evaluate maximum storage size needed by the procedure
5546: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5547: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5548: There can be multiple constraints per connected component
5549: */
5550: n_vertices = 0;
5551: if (ISForVertices) {
5552: ISGetSize(ISForVertices,&n_vertices);
5553: }
5554: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5555: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
5557: total_counts = n_ISForFaces+n_ISForEdges;
5558: total_counts *= max_constraints;
5559: total_counts += n_vertices;
5560: PetscBTCreate(total_counts,&change_basis);
5562: total_counts = 0;
5563: max_size_of_constraint = 0;
5564: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5565: IS used_is;
5566: if (i<n_ISForEdges) {
5567: used_is = ISForEdges[i];
5568: } else {
5569: used_is = ISForFaces[i-n_ISForEdges];
5570: }
5571: ISGetSize(used_is,&j);
5572: total_counts += j;
5573: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5574: }
5575: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
5577: /* get local part of global near null space vectors */
5578: PetscMalloc1(nnsp_size,&localnearnullsp);
5579: for (k=0;k<nnsp_size;k++) {
5580: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5581: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5582: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5583: }
5585: /* whether or not to skip lapack calls */
5586: skip_lapack = PETSC_TRUE;
5587: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5589: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5590: if (!skip_lapack) {
5591: PetscScalar temp_work;
5593: #if defined(PETSC_MISSING_LAPACK_GESVD)
5594: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5595: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5596: PetscMalloc1(max_constraints,&singular_vals);
5597: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5598: #if defined(PETSC_USE_COMPLEX)
5599: PetscMalloc1(3*max_constraints,&rwork);
5600: #endif
5601: /* now we evaluate the optimal workspace using query with lwork=-1 */
5602: PetscBLASIntCast(max_constraints,&Blas_N);
5603: PetscBLASIntCast(max_constraints,&Blas_LDA);
5604: lwork = -1;
5605: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5606: #if !defined(PETSC_USE_COMPLEX)
5607: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5608: #else
5609: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5610: #endif
5611: PetscFPTrapPop();
5612: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5613: #else /* on missing GESVD */
5614: /* SVD */
5615: PetscInt max_n,min_n;
5616: max_n = max_size_of_constraint;
5617: min_n = max_constraints;
5618: if (max_size_of_constraint < max_constraints) {
5619: min_n = max_size_of_constraint;
5620: max_n = max_constraints;
5621: }
5622: PetscMalloc1(min_n,&singular_vals);
5623: #if defined(PETSC_USE_COMPLEX)
5624: PetscMalloc1(5*min_n,&rwork);
5625: #endif
5626: /* now we evaluate the optimal workspace using query with lwork=-1 */
5627: lwork = -1;
5628: PetscBLASIntCast(max_n,&Blas_M);
5629: PetscBLASIntCast(min_n,&Blas_N);
5630: PetscBLASIntCast(max_n,&Blas_LDA);
5631: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5632: #if !defined(PETSC_USE_COMPLEX)
5633: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5634: #else
5635: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5636: #endif
5637: PetscFPTrapPop();
5638: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5639: #endif /* on missing GESVD */
5640: /* Allocate optimal workspace */
5641: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5642: PetscMalloc1(lwork,&work);
5643: }
5644: /* Now we can loop on constraining sets */
5645: total_counts = 0;
5646: constraints_idxs_ptr[0] = 0;
5647: constraints_data_ptr[0] = 0;
5648: /* vertices */
5649: if (n_vertices) {
5650: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5651: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5652: for (i=0;i<n_vertices;i++) {
5653: constraints_n[total_counts] = 1;
5654: constraints_data[total_counts] = 1.0;
5655: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5656: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5657: total_counts++;
5658: }
5659: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5660: n_vertices = total_counts;
5661: }
5663: /* edges and faces */
5664: total_counts_cc = total_counts;
5665: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
5666: IS used_is;
5667: PetscBool idxs_copied = PETSC_FALSE;
5669: if (ncc<n_ISForEdges) {
5670: used_is = ISForEdges[ncc];
5671: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
5672: } else {
5673: used_is = ISForFaces[ncc-n_ISForEdges];
5674: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
5675: }
5676: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
5678: ISGetSize(used_is,&size_of_constraint);
5679: ISGetIndices(used_is,(const PetscInt**)&is_indices);
5680: /* change of basis should not be performed on local periodic nodes */
5681: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
5682: if (nnsp_has_cnst) {
5683: PetscScalar quad_value;
5685: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5686: idxs_copied = PETSC_TRUE;
5688: if (!pcbddc->use_nnsp_true) {
5689: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
5690: } else {
5691: quad_value = 1.0;
5692: }
5693: for (j=0;j<size_of_constraint;j++) {
5694: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
5695: }
5696: temp_constraints++;
5697: total_counts++;
5698: }
5699: for (k=0;k<nnsp_size;k++) {
5700: PetscReal real_value;
5701: PetscScalar *ptr_to_data;
5703: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5704: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
5705: for (j=0;j<size_of_constraint;j++) {
5706: ptr_to_data[j] = array[is_indices[j]];
5707: }
5708: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5709: /* check if array is null on the connected component */
5710: PetscBLASIntCast(size_of_constraint,&Blas_N);
5711: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
5712: if (real_value > 0.0) { /* keep indices and values */
5713: temp_constraints++;
5714: total_counts++;
5715: if (!idxs_copied) {
5716: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5717: idxs_copied = PETSC_TRUE;
5718: }
5719: }
5720: }
5721: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
5722: valid_constraints = temp_constraints;
5723: if (!pcbddc->use_nnsp_true && temp_constraints) {
5724: if (temp_constraints == 1) { /* just normalize the constraint */
5725: PetscScalar norm,*ptr_to_data;
5727: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
5728: PetscBLASIntCast(size_of_constraint,&Blas_N);
5729: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
5730: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
5731: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
5732: } else { /* perform SVD */
5733: PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
5734: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
5736: #if defined(PETSC_MISSING_LAPACK_GESVD)
5737: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
5738: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
5739: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
5740: the constraints basis will differ (by a complex factor with absolute value equal to 1)
5741: from that computed using LAPACKgesvd
5742: -> This is due to a different computation of eigenvectors in LAPACKheev
5743: -> The quality of the POD-computed basis will be the same */
5744: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
5745: /* Store upper triangular part of correlation matrix */
5746: PetscBLASIntCast(size_of_constraint,&Blas_N);
5747: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5748: for (j=0;j<temp_constraints;j++) {
5749: for (k=0;k<j+1;k++) {
5750: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
5751: }
5752: }
5753: /* compute eigenvalues and eigenvectors of correlation matrix */
5754: PetscBLASIntCast(temp_constraints,&Blas_N);
5755: PetscBLASIntCast(temp_constraints,&Blas_LDA);
5756: #if !defined(PETSC_USE_COMPLEX)
5757: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
5758: #else
5759: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
5760: #endif
5761: PetscFPTrapPop();
5762: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
5763: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
5764: j = 0;
5765: while (j < temp_constraints && singular_vals[j] < tol) j++;
5766: total_counts = total_counts-j;
5767: valid_constraints = temp_constraints-j;
5768: /* scale and copy POD basis into used quadrature memory */
5769: PetscBLASIntCast(size_of_constraint,&Blas_M);
5770: PetscBLASIntCast(temp_constraints,&Blas_N);
5771: PetscBLASIntCast(temp_constraints,&Blas_K);
5772: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5773: PetscBLASIntCast(temp_constraints,&Blas_LDB);
5774: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
5775: if (j<temp_constraints) {
5776: PetscInt ii;
5777: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
5778: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5779: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
5780: PetscFPTrapPop();
5781: for (k=0;k<temp_constraints-j;k++) {
5782: for (ii=0;ii<size_of_constraint;ii++) {
5783: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
5784: }
5785: }
5786: }
5787: #else /* on missing GESVD */
5788: PetscBLASIntCast(size_of_constraint,&Blas_M);
5789: PetscBLASIntCast(temp_constraints,&Blas_N);
5790: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5791: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5792: #if !defined(PETSC_USE_COMPLEX)
5793: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
5794: #else
5795: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
5796: #endif
5797: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
5798: PetscFPTrapPop();
5799: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
5800: k = temp_constraints;
5801: if (k > size_of_constraint) k = size_of_constraint;
5802: j = 0;
5803: while (j < k && singular_vals[k-j-1] < tol) j++;
5804: valid_constraints = k-j;
5805: total_counts = total_counts-temp_constraints+valid_constraints;
5806: #endif /* on missing GESVD */
5807: }
5808: }
5809: /* update pointers information */
5810: if (valid_constraints) {
5811: constraints_n[total_counts_cc] = valid_constraints;
5812: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
5813: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
5814: /* set change_of_basis flag */
5815: if (boolforchange) {
5816: PetscBTSet(change_basis,total_counts_cc);
5817: }
5818: total_counts_cc++;
5819: }
5820: }
5821: /* free workspace */
5822: if (!skip_lapack) {
5823: PetscFree(work);
5824: #if defined(PETSC_USE_COMPLEX)
5825: PetscFree(rwork);
5826: #endif
5827: PetscFree(singular_vals);
5828: #if defined(PETSC_MISSING_LAPACK_GESVD)
5829: PetscFree(correlation_mat);
5830: PetscFree(temp_basis);
5831: #endif
5832: }
5833: for (k=0;k<nnsp_size;k++) {
5834: VecDestroy(&localnearnullsp[k]);
5835: }
5836: PetscFree(localnearnullsp);
5837: /* free index sets of faces, edges and vertices */
5838: for (i=0;i<n_ISForFaces;i++) {
5839: ISDestroy(&ISForFaces[i]);
5840: }
5841: if (n_ISForFaces) {
5842: PetscFree(ISForFaces);
5843: }
5844: for (i=0;i<n_ISForEdges;i++) {
5845: ISDestroy(&ISForEdges[i]);
5846: }
5847: if (n_ISForEdges) {
5848: PetscFree(ISForEdges);
5849: }
5850: ISDestroy(&ISForVertices);
5851: } else {
5852: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5854: total_counts = 0;
5855: n_vertices = 0;
5856: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
5857: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
5858: }
5859: max_constraints = 0;
5860: total_counts_cc = 0;
5861: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5862: total_counts += pcbddc->adaptive_constraints_n[i];
5863: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
5864: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
5865: }
5866: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
5867: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
5868: constraints_idxs = pcbddc->adaptive_constraints_idxs;
5869: constraints_data = pcbddc->adaptive_constraints_data;
5870: /* constraints_n differs from pcbddc->adaptive_constraints_n */
5871: PetscMalloc1(total_counts_cc,&constraints_n);
5872: total_counts_cc = 0;
5873: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5874: if (pcbddc->adaptive_constraints_n[i]) {
5875: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
5876: }
5877: }
5878: #if 0
5879: printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
5880: for (i=0;i<total_counts_cc;i++) {
5881: printf("const %d, start %d",i,constraints_idxs_ptr[i]);
5882: printf(" end %d:\n",constraints_idxs_ptr[i+1]);
5883: for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
5884: printf(" %d",constraints_idxs[j]);
5885: }
5886: printf("\n");
5887: printf("number of cc: %d\n",constraints_n[i]);
5888: }
5889: for (i=0;i<n_vertices;i++) {
5890: PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
5891: }
5892: for (i=0;i<sub_schurs->n_subs;i++) {
5893: PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
5894: }
5895: #endif
5897: max_size_of_constraint = 0;
5898: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
5899: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
5900: /* Change of basis */
5901: PetscBTCreate(total_counts_cc,&change_basis);
5902: if (pcbddc->use_change_of_basis) {
5903: for (i=0;i<sub_schurs->n_subs;i++) {
5904: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
5905: PetscBTSet(change_basis,i+n_vertices);
5906: }
5907: }
5908: }
5909: }
5910: pcbddc->local_primal_size = total_counts;
5911: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
5913: /* map constraints_idxs in boundary numbering */
5914: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
5915: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);
5917: /* Create constraint matrix */
5918: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
5919: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
5920: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
5922: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
5923: /* determine if a QR strategy is needed for change of basis */
5924: qr_needed = PETSC_FALSE;
5925: PetscBTCreate(total_counts_cc,&qr_needed_idx);
5926: total_primal_vertices=0;
5927: pcbddc->local_primal_size_cc = 0;
5928: for (i=0;i<total_counts_cc;i++) {
5929: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5930: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
5931: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
5932: pcbddc->local_primal_size_cc += 1;
5933: } else if (PetscBTLookup(change_basis,i)) {
5934: for (k=0;k<constraints_n[i];k++) {
5935: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5936: }
5937: pcbddc->local_primal_size_cc += constraints_n[i];
5938: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
5939: PetscBTSet(qr_needed_idx,i);
5940: qr_needed = PETSC_TRUE;
5941: }
5942: } else {
5943: pcbddc->local_primal_size_cc += 1;
5944: }
5945: }
5946: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
5947: pcbddc->n_vertices = total_primal_vertices;
5948: /* permute indices in order to have a sorted set of vertices */
5949: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
5950: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
5951: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
5952: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
5954: /* nonzero structure of constraint matrix */
5955: /* and get reference dof for local constraints */
5956: PetscMalloc1(pcbddc->local_primal_size,&nnz);
5957: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
5959: j = total_primal_vertices;
5960: total_counts = total_primal_vertices;
5961: cum = total_primal_vertices;
5962: for (i=n_vertices;i<total_counts_cc;i++) {
5963: if (!PetscBTLookup(change_basis,i)) {
5964: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
5965: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
5966: cum++;
5967: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5968: for (k=0;k<constraints_n[i];k++) {
5969: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5970: nnz[j+k] = size_of_constraint;
5971: }
5972: j += constraints_n[i];
5973: }
5974: }
5975: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
5976: PetscFree(nnz);
5978: /* set values in constraint matrix */
5979: for (i=0;i<total_primal_vertices;i++) {
5980: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
5981: }
5982: total_counts = total_primal_vertices;
5983: for (i=n_vertices;i<total_counts_cc;i++) {
5984: if (!PetscBTLookup(change_basis,i)) {
5985: PetscInt *cols;
5987: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5988: cols = constraints_idxs+constraints_idxs_ptr[i];
5989: for (k=0;k<constraints_n[i];k++) {
5990: PetscInt row = total_counts+k;
5991: PetscScalar *vals;
5993: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
5994: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
5995: }
5996: total_counts += constraints_n[i];
5997: }
5998: }
5999: /* assembling */
6000: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6001: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6003: /*
6004: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6005: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6006: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6007: */
6008: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6009: if (pcbddc->use_change_of_basis) {
6010: /* dual and primal dofs on a single cc */
6011: PetscInt dual_dofs,primal_dofs;
6012: /* working stuff for GEQRF */
6013: PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6014: PetscBLASInt lqr_work;
6015: /* working stuff for UNGQR */
6016: PetscScalar *gqr_work,lgqr_work_t;
6017: PetscBLASInt lgqr_work;
6018: /* working stuff for TRTRS */
6019: PetscScalar *trs_rhs;
6020: PetscBLASInt Blas_NRHS;
6021: /* pointers for values insertion into change of basis matrix */
6022: PetscInt *start_rows,*start_cols;
6023: PetscScalar *start_vals;
6024: /* working stuff for values insertion */
6025: PetscBT is_primal;
6026: PetscInt *aux_primal_numbering_B;
6027: /* matrix sizes */
6028: PetscInt global_size,local_size;
6029: /* temporary change of basis */
6030: Mat localChangeOfBasisMatrix;
6031: /* extra space for debugging */
6032: PetscScalar *dbg_work;
6034: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6035: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6036: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6037: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6038: /* nonzeros for local mat */
6039: PetscMalloc1(pcis->n,&nnz);
6040: if (!pcbddc->benign_change || pcbddc->fake_change) {
6041: for (i=0;i<pcis->n;i++) nnz[i]=1;
6042: } else {
6043: const PetscInt *ii;
6044: PetscInt n;
6045: PetscBool flg_row;
6046: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6047: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6048: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6049: }
6050: for (i=n_vertices;i<total_counts_cc;i++) {
6051: if (PetscBTLookup(change_basis,i)) {
6052: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6053: if (PetscBTLookup(qr_needed_idx,i)) {
6054: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6055: } else {
6056: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6057: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6058: }
6059: }
6060: }
6061: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6062: PetscFree(nnz);
6063: /* Set interior change in the matrix */
6064: if (!pcbddc->benign_change || pcbddc->fake_change) {
6065: for (i=0;i<pcis->n;i++) {
6066: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6067: }
6068: } else {
6069: const PetscInt *ii,*jj;
6070: PetscScalar *aa;
6071: PetscInt n;
6072: PetscBool flg_row;
6073: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6074: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6075: for (i=0;i<n;i++) {
6076: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6077: }
6078: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6079: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6080: }
6082: if (pcbddc->dbg_flag) {
6083: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6084: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6085: }
6088: /* Now we loop on the constraints which need a change of basis */
6089: /*
6090: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6091: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6093: Basic blocks of change of basis matrix T computed by
6095: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6097: | 1 0 ... 0 s_1/S |
6098: | 0 1 ... 0 s_2/S |
6099: | ... |
6100: | 0 ... 1 s_{n-1}/S |
6101: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6103: with S = \sum_{i=1}^n s_i^2
6104: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6105: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6107: - QR decomposition of constraints otherwise
6108: */
6109: if (qr_needed) {
6110: /* space to store Q */
6111: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6112: /* array to store scaling factors for reflectors */
6113: PetscMalloc1(max_constraints,&qr_tau);
6114: /* first we issue queries for optimal work */
6115: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6116: PetscBLASIntCast(max_constraints,&Blas_N);
6117: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6118: lqr_work = -1;
6119: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6120: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6121: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6122: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6123: lgqr_work = -1;
6124: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6125: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6126: PetscBLASIntCast(max_constraints,&Blas_K);
6127: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6128: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6129: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6130: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
6131: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6132: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6133: /* array to store rhs and solution of triangular solver */
6134: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6135: /* allocating workspace for check */
6136: if (pcbddc->dbg_flag) {
6137: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6138: }
6139: }
6140: /* array to store whether a node is primal or not */
6141: PetscBTCreate(pcis->n_B,&is_primal);
6142: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6143: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6144: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6145: for (i=0;i<total_primal_vertices;i++) {
6146: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6147: }
6148: PetscFree(aux_primal_numbering_B);
6150: /* loop on constraints and see whether or not they need a change of basis and compute it */
6151: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6152: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6153: if (PetscBTLookup(change_basis,total_counts)) {
6154: /* get constraint info */
6155: primal_dofs = constraints_n[total_counts];
6156: dual_dofs = size_of_constraint-primal_dofs;
6158: if (pcbddc->dbg_flag) {
6159: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6160: }
6162: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6164: /* copy quadrature constraints for change of basis check */
6165: if (pcbddc->dbg_flag) {
6166: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6167: }
6168: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6169: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6171: /* compute QR decomposition of constraints */
6172: PetscBLASIntCast(size_of_constraint,&Blas_M);
6173: PetscBLASIntCast(primal_dofs,&Blas_N);
6174: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6175: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6176: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6177: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6178: PetscFPTrapPop();
6180: /* explictly compute R^-T */
6181: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6182: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6183: PetscBLASIntCast(primal_dofs,&Blas_N);
6184: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6185: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6186: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6187: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6188: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6189: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6190: PetscFPTrapPop();
6192: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6193: PetscBLASIntCast(size_of_constraint,&Blas_M);
6194: PetscBLASIntCast(size_of_constraint,&Blas_N);
6195: PetscBLASIntCast(primal_dofs,&Blas_K);
6196: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6197: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6198: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6199: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
6200: PetscFPTrapPop();
6202: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6203: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6204: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6205: PetscBLASIntCast(size_of_constraint,&Blas_M);
6206: PetscBLASIntCast(primal_dofs,&Blas_N);
6207: PetscBLASIntCast(primal_dofs,&Blas_K);
6208: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6209: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6210: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6211: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6212: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6213: PetscFPTrapPop();
6214: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6216: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6217: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6218: /* insert cols for primal dofs */
6219: for (j=0;j<primal_dofs;j++) {
6220: start_vals = &qr_basis[j*size_of_constraint];
6221: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6222: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6223: }
6224: /* insert cols for dual dofs */
6225: for (j=0,k=0;j<dual_dofs;k++) {
6226: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6227: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6228: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6229: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6230: j++;
6231: }
6232: }
6234: /* check change of basis */
6235: if (pcbddc->dbg_flag) {
6236: PetscInt ii,jj;
6237: PetscBool valid_qr=PETSC_TRUE;
6238: PetscBLASIntCast(primal_dofs,&Blas_M);
6239: PetscBLASIntCast(size_of_constraint,&Blas_N);
6240: PetscBLASIntCast(size_of_constraint,&Blas_K);
6241: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6242: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6243: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6244: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6245: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6246: PetscFPTrapPop();
6247: for (jj=0;jj<size_of_constraint;jj++) {
6248: for (ii=0;ii<primal_dofs;ii++) {
6249: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6250: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
6251: }
6252: }
6253: if (!valid_qr) {
6254: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6255: for (jj=0;jj<size_of_constraint;jj++) {
6256: for (ii=0;ii<primal_dofs;ii++) {
6257: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6258: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6259: }
6260: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
6261: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6262: }
6263: }
6264: }
6265: } else {
6266: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6267: }
6268: }
6269: } else { /* simple transformation block */
6270: PetscInt row,col;
6271: PetscScalar val,norm;
6273: PetscBLASIntCast(size_of_constraint,&Blas_N);
6274: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6275: for (j=0;j<size_of_constraint;j++) {
6276: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6277: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6278: if (!PetscBTLookup(is_primal,row_B)) {
6279: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6280: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6281: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6282: } else {
6283: for (k=0;k<size_of_constraint;k++) {
6284: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6285: if (row != col) {
6286: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6287: } else {
6288: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6289: }
6290: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6291: }
6292: }
6293: }
6294: if (pcbddc->dbg_flag) {
6295: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6296: }
6297: }
6298: } else {
6299: if (pcbddc->dbg_flag) {
6300: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6301: }
6302: }
6303: }
6305: /* free workspace */
6306: if (qr_needed) {
6307: if (pcbddc->dbg_flag) {
6308: PetscFree(dbg_work);
6309: }
6310: PetscFree(trs_rhs);
6311: PetscFree(qr_tau);
6312: PetscFree(qr_work);
6313: PetscFree(gqr_work);
6314: PetscFree(qr_basis);
6315: }
6316: PetscBTDestroy(&is_primal);
6317: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6318: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6320: /* assembling of global change of variable */
6321: if (!pcbddc->fake_change) {
6322: Mat tmat;
6323: PetscInt bs;
6325: VecGetSize(pcis->vec1_global,&global_size);
6326: VecGetLocalSize(pcis->vec1_global,&local_size);
6327: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6328: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6329: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6330: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6331: MatGetBlockSize(pc->pmat,&bs);
6332: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6333: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6334: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6335: MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6336: MatDestroy(&tmat);
6337: VecSet(pcis->vec1_global,0.0);
6338: VecSet(pcis->vec1_N,1.0);
6339: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6340: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6341: VecReciprocal(pcis->vec1_global);
6342: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6344: /* check */
6345: if (pcbddc->dbg_flag) {
6346: PetscReal error;
6347: Vec x,x_change;
6349: VecDuplicate(pcis->vec1_global,&x);
6350: VecDuplicate(pcis->vec1_global,&x_change);
6351: VecSetRandom(x,NULL);
6352: VecCopy(x,pcis->vec1_global);
6353: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6354: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6355: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6356: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6357: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6358: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6359: VecAXPY(x,-1.0,x_change);
6360: VecNorm(x,NORM_INFINITY,&error);
6361: if (error > PETSC_SMALL) {
6362: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6363: }
6364: VecDestroy(&x);
6365: VecDestroy(&x_change);
6366: }
6367: /* adapt sub_schurs computed (if any) */
6368: if (pcbddc->use_deluxe_scaling) {
6369: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6371: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6372: if (sub_schurs && sub_schurs->S_Ej_all) {
6373: Mat S_new,tmat;
6374: IS is_all_N,is_V_Sall = NULL;
6376: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6377: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6378: if (pcbddc->deluxe_zerorows) {
6379: ISLocalToGlobalMapping NtoSall;
6380: IS is_V;
6381: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6382: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6383: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6384: ISLocalToGlobalMappingDestroy(&NtoSall);
6385: ISDestroy(&is_V);
6386: }
6387: ISDestroy(&is_all_N);
6388: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6389: MatDestroy(&sub_schurs->S_Ej_all);
6390: PetscObjectReference((PetscObject)S_new);
6391: if (pcbddc->deluxe_zerorows) {
6392: const PetscScalar *array;
6393: const PetscInt *idxs_V,*idxs_all;
6394: PetscInt i,n_V;
6396: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6397: ISGetLocalSize(is_V_Sall,&n_V);
6398: ISGetIndices(is_V_Sall,&idxs_V);
6399: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6400: VecGetArrayRead(pcis->D,&array);
6401: for (i=0;i<n_V;i++) {
6402: PetscScalar val;
6403: PetscInt idx;
6405: idx = idxs_V[i];
6406: val = array[idxs_all[idxs_V[i]]];
6407: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6408: }
6409: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6410: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6411: VecRestoreArrayRead(pcis->D,&array);
6412: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6413: ISRestoreIndices(is_V_Sall,&idxs_V);
6414: }
6415: sub_schurs->S_Ej_all = S_new;
6416: MatDestroy(&S_new);
6417: if (sub_schurs->sum_S_Ej_all) {
6418: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6419: MatDestroy(&sub_schurs->sum_S_Ej_all);
6420: PetscObjectReference((PetscObject)S_new);
6421: if (pcbddc->deluxe_zerorows) {
6422: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6423: }
6424: sub_schurs->sum_S_Ej_all = S_new;
6425: MatDestroy(&S_new);
6426: }
6427: ISDestroy(&is_V_Sall);
6428: MatDestroy(&tmat);
6429: }
6430: /* destroy any change of basis context in sub_schurs */
6431: if (sub_schurs && sub_schurs->change) {
6432: PetscInt i;
6434: for (i=0;i<sub_schurs->n_subs;i++) {
6435: KSPDestroy(&sub_schurs->change[i]);
6436: }
6437: PetscFree(sub_schurs->change);
6438: }
6439: }
6440: if (pcbddc->switch_static) { /* need to save the local change */
6441: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6442: } else {
6443: MatDestroy(&localChangeOfBasisMatrix);
6444: }
6445: /* determine if any process has changed the pressures locally */
6446: pcbddc->change_interior = pcbddc->benign_have_null;
6447: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6448: MatDestroy(&pcbddc->ConstraintMatrix);
6449: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6450: pcbddc->use_qr_single = qr_needed;
6451: }
6452: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6453: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6454: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6455: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6456: } else {
6457: Mat benign_global = NULL;
6458: if (pcbddc->benign_have_null) {
6459: Mat tmat;
6461: pcbddc->change_interior = PETSC_TRUE;
6462: VecSet(pcis->vec1_global,0.0);
6463: VecSet(pcis->vec1_N,1.0);
6464: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6465: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6466: VecReciprocal(pcis->vec1_global);
6467: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6468: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6469: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6470: if (pcbddc->benign_change) {
6471: Mat M;
6473: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6474: MatDiagonalScale(M,pcis->vec1_N,NULL);
6475: MatISSetLocalMat(tmat,M);
6476: MatDestroy(&M);
6477: } else {
6478: Mat eye;
6479: PetscScalar *array;
6481: VecGetArray(pcis->vec1_N,&array);
6482: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6483: for (i=0;i<pcis->n;i++) {
6484: MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6485: }
6486: VecRestoreArray(pcis->vec1_N,&array);
6487: MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6488: MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6489: MatISSetLocalMat(tmat,eye);
6490: MatDestroy(&eye);
6491: }
6492: MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6493: MatDestroy(&tmat);
6494: }
6495: if (pcbddc->user_ChangeOfBasisMatrix) {
6496: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6497: MatDestroy(&benign_global);
6498: } else if (pcbddc->benign_have_null) {
6499: pcbddc->ChangeOfBasisMatrix = benign_global;
6500: }
6501: }
6502: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6503: IS is_global;
6504: const PetscInt *gidxs;
6506: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6507: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6508: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6509: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6510: ISDestroy(&is_global);
6511: }
6512: }
6513: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6514: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6515: }
6517: if (!pcbddc->fake_change) {
6518: /* add pressure dofs to set of primal nodes for numbering purposes */
6519: for (i=0;i<pcbddc->benign_n;i++) {
6520: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6521: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6522: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6523: pcbddc->local_primal_size_cc++;
6524: pcbddc->local_primal_size++;
6525: }
6527: /* check if a new primal space has been introduced (also take into account benign trick) */
6528: pcbddc->new_primal_space_local = PETSC_TRUE;
6529: if (olocal_primal_size == pcbddc->local_primal_size) {
6530: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6531: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6532: if (!pcbddc->new_primal_space_local) {
6533: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6534: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6535: }
6536: }
6537: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6538: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6539: }
6540: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
6542: /* flush dbg viewer */
6543: if (pcbddc->dbg_flag) {
6544: PetscViewerFlush(pcbddc->dbg_viewer);
6545: }
6547: /* free workspace */
6548: PetscBTDestroy(&qr_needed_idx);
6549: PetscBTDestroy(&change_basis);
6550: if (!pcbddc->adaptive_selection) {
6551: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6552: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6553: } else {
6554: PetscFree5(pcbddc->adaptive_constraints_n,
6555: pcbddc->adaptive_constraints_idxs_ptr,
6556: pcbddc->adaptive_constraints_data_ptr,
6557: pcbddc->adaptive_constraints_idxs,
6558: pcbddc->adaptive_constraints_data);
6559: PetscFree(constraints_n);
6560: PetscFree(constraints_idxs_B);
6561: }
6562: return(0);
6563: }
6565: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6566: {
6567: ISLocalToGlobalMapping map;
6568: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
6569: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
6570: PetscInt i,N;
6571: PetscBool rcsr = PETSC_FALSE;
6572: PetscErrorCode ierr;
6575: if (pcbddc->recompute_topography) {
6576: pcbddc->graphanalyzed = PETSC_FALSE;
6577: /* Reset previously computed graph */
6578: PCBDDCGraphReset(pcbddc->mat_graph);
6579: /* Init local Graph struct */
6580: MatGetSize(pc->pmat,&N,NULL);
6581: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6582: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
6584: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6585: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6586: }
6587: /* Check validity of the csr graph passed in by the user */
6588: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
6590: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6591: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6592: PetscInt *xadj,*adjncy;
6593: PetscInt nvtxs;
6594: PetscBool flg_row=PETSC_FALSE;
6596: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6597: if (flg_row) {
6598: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6599: pcbddc->computed_rowadj = PETSC_TRUE;
6600: }
6601: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6602: rcsr = PETSC_TRUE;
6603: }
6604: if (pcbddc->dbg_flag) {
6605: PetscViewerFlush(pcbddc->dbg_viewer);
6606: }
6608: /* Setup of Graph */
6609: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6610: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
6612: /* attach info on disconnected subdomains if present */
6613: if (pcbddc->n_local_subs) {
6614: PetscInt *local_subs;
6616: PetscMalloc1(N,&local_subs);
6617: for (i=0;i<pcbddc->n_local_subs;i++) {
6618: const PetscInt *idxs;
6619: PetscInt nl,j;
6621: ISGetLocalSize(pcbddc->local_subs[i],&nl);
6622: ISGetIndices(pcbddc->local_subs[i],&idxs);
6623: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6624: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6625: }
6626: pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6627: pcbddc->mat_graph->local_subs = local_subs;
6628: }
6629: }
6631: if (!pcbddc->graphanalyzed) {
6632: /* Graph's connected components analysis */
6633: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6634: pcbddc->graphanalyzed = PETSC_TRUE;
6635: }
6636: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6637: return(0);
6638: }
6640: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
6641: {
6642: PetscInt i,j;
6643: PetscScalar *alphas;
6647: PetscMalloc1(n,&alphas);
6648: for (i=0;i<n;i++) {
6649: VecNormalize(vecs[i],NULL);
6650: VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);
6651: for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]);
6652: VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);
6653: }
6654: PetscFree(alphas);
6655: return(0);
6656: }
6658: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
6659: {
6660: Mat A;
6661: PetscInt n_neighs,*neighs,*n_shared,**shared;
6662: PetscMPIInt size,rank,color;
6663: PetscInt *xadj,*adjncy;
6664: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
6665: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
6666: PetscInt void_procs,*procs_candidates = NULL;
6667: PetscInt xadj_count,*count;
6668: PetscBool ismatis,use_vwgt=PETSC_FALSE;
6669: PetscSubcomm psubcomm;
6670: MPI_Comm subcomm;
6675: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6676: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6679: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);
6681: if (have_void) *have_void = PETSC_FALSE;
6682: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
6683: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
6684: MatISGetLocalMat(mat,&A);
6685: MatGetLocalSize(A,&n,NULL);
6686: im_active = !!n;
6687: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
6688: void_procs = size - active_procs;
6689: /* get ranks of of non-active processes in mat communicator */
6690: if (void_procs) {
6691: PetscInt ncand;
6693: if (have_void) *have_void = PETSC_TRUE;
6694: PetscMalloc1(size,&procs_candidates);
6695: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
6696: for (i=0,ncand=0;i<size;i++) {
6697: if (!procs_candidates[i]) {
6698: procs_candidates[ncand++] = i;
6699: }
6700: }
6701: /* force n_subdomains to be not greater that the number of non-active processes */
6702: *n_subdomains = PetscMin(void_procs,*n_subdomains);
6703: }
6705: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
6706: number of subdomains requested 1 -> send to master or first candidate in voids */
6707: MatGetSize(mat,&N,NULL);
6708: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
6709: PetscInt issize,isidx,dest;
6710: if (*n_subdomains == 1) dest = 0;
6711: else dest = rank;
6712: if (im_active) {
6713: issize = 1;
6714: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6715: isidx = procs_candidates[dest];
6716: } else {
6717: isidx = dest;
6718: }
6719: } else {
6720: issize = 0;
6721: isidx = -1;
6722: }
6723: if (*n_subdomains != 1) *n_subdomains = active_procs;
6724: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
6725: PetscFree(procs_candidates);
6726: return(0);
6727: }
6728: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
6729: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
6730: threshold = PetscMax(threshold,2);
6732: /* Get info on mapping */
6733: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
6735: /* build local CSR graph of subdomains' connectivity */
6736: PetscMalloc1(2,&xadj);
6737: xadj[0] = 0;
6738: xadj[1] = PetscMax(n_neighs-1,0);
6739: PetscMalloc1(xadj[1],&adjncy);
6740: PetscMalloc1(xadj[1],&adjncy_wgt);
6741: PetscCalloc1(n,&count);
6742: for (i=1;i<n_neighs;i++)
6743: for (j=0;j<n_shared[i];j++)
6744: count[shared[i][j]] += 1;
6746: xadj_count = 0;
6747: for (i=1;i<n_neighs;i++) {
6748: for (j=0;j<n_shared[i];j++) {
6749: if (count[shared[i][j]] < threshold) {
6750: adjncy[xadj_count] = neighs[i];
6751: adjncy_wgt[xadj_count] = n_shared[i];
6752: xadj_count++;
6753: break;
6754: }
6755: }
6756: }
6757: xadj[1] = xadj_count;
6758: PetscFree(count);
6759: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
6760: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
6762: PetscMalloc1(1,&ranks_send_to_idx);
6764: /* Restrict work on active processes only */
6765: PetscMPIIntCast(im_active,&color);
6766: if (void_procs) {
6767: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
6768: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
6769: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
6770: subcomm = PetscSubcommChild(psubcomm);
6771: } else {
6772: psubcomm = NULL;
6773: subcomm = PetscObjectComm((PetscObject)mat);
6774: }
6776: v_wgt = NULL;
6777: if (!color) {
6778: PetscFree(xadj);
6779: PetscFree(adjncy);
6780: PetscFree(adjncy_wgt);
6781: } else {
6782: Mat subdomain_adj;
6783: IS new_ranks,new_ranks_contig;
6784: MatPartitioning partitioner;
6785: PetscInt rstart=0,rend=0;
6786: PetscInt *is_indices,*oldranks;
6787: PetscMPIInt size;
6788: PetscBool aggregate;
6790: MPI_Comm_size(subcomm,&size);
6791: if (void_procs) {
6792: PetscInt prank = rank;
6793: PetscMalloc1(size,&oldranks);
6794: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
6795: for (i=0;i<xadj[1];i++) {
6796: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
6797: }
6798: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
6799: } else {
6800: oldranks = NULL;
6801: }
6802: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
6803: if (aggregate) { /* TODO: all this part could be made more efficient */
6804: PetscInt lrows,row,ncols,*cols;
6805: PetscMPIInt nrank;
6806: PetscScalar *vals;
6808: MPI_Comm_rank(subcomm,&nrank);
6809: lrows = 0;
6810: if (nrank<redprocs) {
6811: lrows = size/redprocs;
6812: if (nrank<size%redprocs) lrows++;
6813: }
6814: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
6815: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
6816: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
6817: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
6818: row = nrank;
6819: ncols = xadj[1]-xadj[0];
6820: cols = adjncy;
6821: PetscMalloc1(ncols,&vals);
6822: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
6823: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
6824: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
6825: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
6826: PetscFree(xadj);
6827: PetscFree(adjncy);
6828: PetscFree(adjncy_wgt);
6829: PetscFree(vals);
6830: if (use_vwgt) {
6831: Vec v;
6832: const PetscScalar *array;
6833: PetscInt nl;
6835: MatCreateVecs(subdomain_adj,&v,NULL);
6836: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
6837: VecAssemblyBegin(v);
6838: VecAssemblyEnd(v);
6839: VecGetLocalSize(v,&nl);
6840: VecGetArrayRead(v,&array);
6841: PetscMalloc1(nl,&v_wgt);
6842: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
6843: VecRestoreArrayRead(v,&array);
6844: VecDestroy(&v);
6845: }
6846: } else {
6847: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
6848: if (use_vwgt) {
6849: PetscMalloc1(1,&v_wgt);
6850: v_wgt[0] = n;
6851: }
6852: }
6853: /* MatView(subdomain_adj,0); */
6855: /* Partition */
6856: MatPartitioningCreate(subcomm,&partitioner);
6857: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
6858: if (v_wgt) {
6859: MatPartitioningSetVertexWeights(partitioner,v_wgt);
6860: }
6861: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
6862: MatPartitioningSetNParts(partitioner,*n_subdomains);
6863: MatPartitioningSetFromOptions(partitioner);
6864: MatPartitioningApply(partitioner,&new_ranks);
6865: /* MatPartitioningView(partitioner,0); */
6867: /* renumber new_ranks to avoid "holes" in new set of processors */
6868: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
6869: ISDestroy(&new_ranks);
6870: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6871: if (!aggregate) {
6872: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6873: #if defined(PETSC_USE_DEBUG)
6874: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6875: #endif
6876: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
6877: } else if (oldranks) {
6878: ranks_send_to_idx[0] = oldranks[is_indices[0]];
6879: } else {
6880: ranks_send_to_idx[0] = is_indices[0];
6881: }
6882: } else {
6883: PetscInt idxs[1];
6884: PetscMPIInt tag;
6885: MPI_Request *reqs;
6887: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
6888: PetscMalloc1(rend-rstart,&reqs);
6889: for (i=rstart;i<rend;i++) {
6890: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
6891: }
6892: MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
6893: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
6894: PetscFree(reqs);
6895: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6896: #if defined(PETSC_USE_DEBUG)
6897: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6898: #endif
6899: ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]];
6900: } else if (oldranks) {
6901: ranks_send_to_idx[0] = oldranks[idxs[0]];
6902: } else {
6903: ranks_send_to_idx[0] = idxs[0];
6904: }
6905: }
6906: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6907: /* clean up */
6908: PetscFree(oldranks);
6909: ISDestroy(&new_ranks_contig);
6910: MatDestroy(&subdomain_adj);
6911: MatPartitioningDestroy(&partitioner);
6912: }
6913: PetscSubcommDestroy(&psubcomm);
6914: PetscFree(procs_candidates);
6916: /* assemble parallel IS for sends */
6917: i = 1;
6918: if (!color) i=0;
6919: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
6920: return(0);
6921: }
6923: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
6925: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
6926: {
6927: Mat local_mat;
6928: IS is_sends_internal;
6929: PetscInt rows,cols,new_local_rows;
6930: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
6931: PetscBool ismatis,isdense,newisdense,destroy_mat;
6932: ISLocalToGlobalMapping l2gmap;
6933: PetscInt* l2gmap_indices;
6934: const PetscInt* is_indices;
6935: MatType new_local_type;
6936: /* buffers */
6937: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
6938: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
6939: PetscInt *recv_buffer_idxs_local;
6940: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
6941: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
6942: /* MPI */
6943: MPI_Comm comm,comm_n;
6944: PetscSubcomm subcomm;
6945: PetscMPIInt n_sends,n_recvs,commsize;
6946: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
6947: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
6948: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
6949: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
6950: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
6951: PetscErrorCode ierr;
6955: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6956: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6963: if (nvecs) {
6964: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
6966: }
6967: /* further checks */
6968: MatISGetLocalMat(mat,&local_mat);
6969: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
6970: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
6971: MatGetSize(local_mat,&rows,&cols);
6972: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
6973: if (reuse && *mat_n) {
6974: PetscInt mrows,mcols,mnrows,mncols;
6976: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
6977: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
6978: MatGetSize(mat,&mrows,&mcols);
6979: MatGetSize(*mat_n,&mnrows,&mncols);
6980: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
6981: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
6982: }
6983: MatGetBlockSize(local_mat,&bs);
6986: /* prepare IS for sending if not provided */
6987: if (!is_sends) {
6988: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
6989: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
6990: } else {
6991: PetscObjectReference((PetscObject)is_sends);
6992: is_sends_internal = is_sends;
6993: }
6995: /* get comm */
6996: PetscObjectGetComm((PetscObject)mat,&comm);
6998: /* compute number of sends */
6999: ISGetLocalSize(is_sends_internal,&i);
7000: PetscMPIIntCast(i,&n_sends);
7002: /* compute number of receives */
7003: MPI_Comm_size(comm,&commsize);
7004: PetscMalloc1(commsize,&iflags);
7005: PetscMemzero(iflags,commsize*sizeof(*iflags));
7006: ISGetIndices(is_sends_internal,&is_indices);
7007: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7008: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7009: PetscFree(iflags);
7011: /* restrict comm if requested */
7012: subcomm = 0;
7013: destroy_mat = PETSC_FALSE;
7014: if (restrict_comm) {
7015: PetscMPIInt color,subcommsize;
7017: color = 0;
7018: if (restrict_full) {
7019: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7020: } else {
7021: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7022: }
7023: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7024: subcommsize = commsize - subcommsize;
7025: /* check if reuse has been requested */
7026: if (reuse) {
7027: if (*mat_n) {
7028: PetscMPIInt subcommsize2;
7029: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7030: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7031: comm_n = PetscObjectComm((PetscObject)*mat_n);
7032: } else {
7033: comm_n = PETSC_COMM_SELF;
7034: }
7035: } else { /* MAT_INITIAL_MATRIX */
7036: PetscMPIInt rank;
7038: MPI_Comm_rank(comm,&rank);
7039: PetscSubcommCreate(comm,&subcomm);
7040: PetscSubcommSetNumber(subcomm,2);
7041: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7042: comm_n = PetscSubcommChild(subcomm);
7043: }
7044: /* flag to destroy *mat_n if not significative */
7045: if (color) destroy_mat = PETSC_TRUE;
7046: } else {
7047: comm_n = comm;
7048: }
7050: /* prepare send/receive buffers */
7051: PetscMalloc1(commsize,&ilengths_idxs);
7052: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7053: PetscMalloc1(commsize,&ilengths_vals);
7054: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7055: if (nis) {
7056: PetscCalloc1(commsize,&ilengths_idxs_is);
7057: }
7059: /* Get data from local matrices */
7060: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7061: /* TODO: See below some guidelines on how to prepare the local buffers */
7062: /*
7063: send_buffer_vals should contain the raw values of the local matrix
7064: send_buffer_idxs should contain:
7065: - MatType_PRIVATE type
7066: - PetscInt size_of_l2gmap
7067: - PetscInt global_row_indices[size_of_l2gmap]
7068: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7069: */
7070: else {
7071: MatDenseGetArray(local_mat,&send_buffer_vals);
7072: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7073: PetscMalloc1(i+2,&send_buffer_idxs);
7074: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7075: send_buffer_idxs[1] = i;
7076: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7077: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7078: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7079: PetscMPIIntCast(i,&len);
7080: for (i=0;i<n_sends;i++) {
7081: ilengths_vals[is_indices[i]] = len*len;
7082: ilengths_idxs[is_indices[i]] = len+2;
7083: }
7084: }
7085: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7086: /* additional is (if any) */
7087: if (nis) {
7088: PetscMPIInt psum;
7089: PetscInt j;
7090: for (j=0,psum=0;j<nis;j++) {
7091: PetscInt plen;
7092: ISGetLocalSize(isarray[j],&plen);
7093: PetscMPIIntCast(plen,&len);
7094: psum += len+1; /* indices + lenght */
7095: }
7096: PetscMalloc1(psum,&send_buffer_idxs_is);
7097: for (j=0,psum=0;j<nis;j++) {
7098: PetscInt plen;
7099: const PetscInt *is_array_idxs;
7100: ISGetLocalSize(isarray[j],&plen);
7101: send_buffer_idxs_is[psum] = plen;
7102: ISGetIndices(isarray[j],&is_array_idxs);
7103: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7104: ISRestoreIndices(isarray[j],&is_array_idxs);
7105: psum += plen+1; /* indices + lenght */
7106: }
7107: for (i=0;i<n_sends;i++) {
7108: ilengths_idxs_is[is_indices[i]] = psum;
7109: }
7110: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7111: }
7112: MatISRestoreLocalMat(mat,&local_mat);
7114: buf_size_idxs = 0;
7115: buf_size_vals = 0;
7116: buf_size_idxs_is = 0;
7117: buf_size_vecs = 0;
7118: for (i=0;i<n_recvs;i++) {
7119: buf_size_idxs += (PetscInt)olengths_idxs[i];
7120: buf_size_vals += (PetscInt)olengths_vals[i];
7121: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7122: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7123: }
7124: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7125: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7126: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7127: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7129: /* get new tags for clean communications */
7130: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7131: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7132: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7133: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7135: /* allocate for requests */
7136: PetscMalloc1(n_sends,&send_req_idxs);
7137: PetscMalloc1(n_sends,&send_req_vals);
7138: PetscMalloc1(n_sends,&send_req_idxs_is);
7139: PetscMalloc1(n_sends,&send_req_vecs);
7140: PetscMalloc1(n_recvs,&recv_req_idxs);
7141: PetscMalloc1(n_recvs,&recv_req_vals);
7142: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7143: PetscMalloc1(n_recvs,&recv_req_vecs);
7145: /* communications */
7146: ptr_idxs = recv_buffer_idxs;
7147: ptr_vals = recv_buffer_vals;
7148: ptr_idxs_is = recv_buffer_idxs_is;
7149: ptr_vecs = recv_buffer_vecs;
7150: for (i=0;i<n_recvs;i++) {
7151: source_dest = onodes[i];
7152: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7153: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7154: ptr_idxs += olengths_idxs[i];
7155: ptr_vals += olengths_vals[i];
7156: if (nis) {
7157: source_dest = onodes_is[i];
7158: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7159: ptr_idxs_is += olengths_idxs_is[i];
7160: }
7161: if (nvecs) {
7162: source_dest = onodes[i];
7163: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7164: ptr_vecs += olengths_idxs[i]-2;
7165: }
7166: }
7167: for (i=0;i<n_sends;i++) {
7168: PetscMPIIntCast(is_indices[i],&source_dest);
7169: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7170: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7171: if (nis) {
7172: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7173: }
7174: if (nvecs) {
7175: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7176: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7177: }
7178: }
7179: ISRestoreIndices(is_sends_internal,&is_indices);
7180: ISDestroy(&is_sends_internal);
7182: /* assemble new l2g map */
7183: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7184: ptr_idxs = recv_buffer_idxs;
7185: new_local_rows = 0;
7186: for (i=0;i<n_recvs;i++) {
7187: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7188: ptr_idxs += olengths_idxs[i];
7189: }
7190: PetscMalloc1(new_local_rows,&l2gmap_indices);
7191: ptr_idxs = recv_buffer_idxs;
7192: new_local_rows = 0;
7193: for (i=0;i<n_recvs;i++) {
7194: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7195: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7196: ptr_idxs += olengths_idxs[i];
7197: }
7198: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7199: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7200: PetscFree(l2gmap_indices);
7202: /* infer new local matrix type from received local matrices type */
7203: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7204: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7205: if (n_recvs) {
7206: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7207: ptr_idxs = recv_buffer_idxs;
7208: for (i=0;i<n_recvs;i++) {
7209: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7210: new_local_type_private = MATAIJ_PRIVATE;
7211: break;
7212: }
7213: ptr_idxs += olengths_idxs[i];
7214: }
7215: switch (new_local_type_private) {
7216: case MATDENSE_PRIVATE:
7217: new_local_type = MATSEQAIJ;
7218: bs = 1;
7219: break;
7220: case MATAIJ_PRIVATE:
7221: new_local_type = MATSEQAIJ;
7222: bs = 1;
7223: break;
7224: case MATBAIJ_PRIVATE:
7225: new_local_type = MATSEQBAIJ;
7226: break;
7227: case MATSBAIJ_PRIVATE:
7228: new_local_type = MATSEQSBAIJ;
7229: break;
7230: default:
7231: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7232: break;
7233: }
7234: } else { /* by default, new_local_type is seqaij */
7235: new_local_type = MATSEQAIJ;
7236: bs = 1;
7237: }
7239: /* create MATIS object if needed */
7240: if (!reuse) {
7241: MatGetSize(mat,&rows,&cols);
7242: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7243: } else {
7244: /* it also destroys the local matrices */
7245: if (*mat_n) {
7246: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7247: } else { /* this is a fake object */
7248: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7249: }
7250: }
7251: MatISGetLocalMat(*mat_n,&local_mat);
7252: MatSetType(local_mat,new_local_type);
7254: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7256: /* Global to local map of received indices */
7257: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7258: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7259: ISLocalToGlobalMappingDestroy(&l2gmap);
7261: /* restore attributes -> type of incoming data and its size */
7262: buf_size_idxs = 0;
7263: for (i=0;i<n_recvs;i++) {
7264: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7265: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7266: buf_size_idxs += (PetscInt)olengths_idxs[i];
7267: }
7268: PetscFree(recv_buffer_idxs);
7270: /* set preallocation */
7271: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7272: if (!newisdense) {
7273: PetscInt *new_local_nnz=0;
7275: ptr_idxs = recv_buffer_idxs_local;
7276: if (n_recvs) {
7277: PetscCalloc1(new_local_rows,&new_local_nnz);
7278: }
7279: for (i=0;i<n_recvs;i++) {
7280: PetscInt j;
7281: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7282: for (j=0;j<*(ptr_idxs+1);j++) {
7283: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7284: }
7285: } else {
7286: /* TODO */
7287: }
7288: ptr_idxs += olengths_idxs[i];
7289: }
7290: if (new_local_nnz) {
7291: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7292: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7293: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7294: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7295: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7296: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7297: } else {
7298: MatSetUp(local_mat);
7299: }
7300: PetscFree(new_local_nnz);
7301: } else {
7302: MatSetUp(local_mat);
7303: }
7305: /* set values */
7306: ptr_vals = recv_buffer_vals;
7307: ptr_idxs = recv_buffer_idxs_local;
7308: for (i=0;i<n_recvs;i++) {
7309: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7310: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7311: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7312: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7313: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7314: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7315: } else {
7316: /* TODO */
7317: }
7318: ptr_idxs += olengths_idxs[i];
7319: ptr_vals += olengths_vals[i];
7320: }
7321: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7322: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7323: MatISRestoreLocalMat(*mat_n,&local_mat);
7324: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7325: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7326: PetscFree(recv_buffer_vals);
7328: #if 0
7329: if (!restrict_comm) { /* check */
7330: Vec lvec,rvec;
7331: PetscReal infty_error;
7333: MatCreateVecs(mat,&rvec,&lvec);
7334: VecSetRandom(rvec,NULL);
7335: MatMult(mat,rvec,lvec);
7336: VecScale(lvec,-1.0);
7337: MatMultAdd(*mat_n,rvec,lvec,lvec);
7338: VecNorm(lvec,NORM_INFINITY,&infty_error);
7339: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7340: VecDestroy(&rvec);
7341: VecDestroy(&lvec);
7342: }
7343: #endif
7345: /* assemble new additional is (if any) */
7346: if (nis) {
7347: PetscInt **temp_idxs,*count_is,j,psum;
7349: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7350: PetscCalloc1(nis,&count_is);
7351: ptr_idxs = recv_buffer_idxs_is;
7352: psum = 0;
7353: for (i=0;i<n_recvs;i++) {
7354: for (j=0;j<nis;j++) {
7355: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7356: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7357: psum += plen;
7358: ptr_idxs += plen+1; /* shift pointer to received data */
7359: }
7360: }
7361: PetscMalloc1(nis,&temp_idxs);
7362: PetscMalloc1(psum,&temp_idxs[0]);
7363: for (i=1;i<nis;i++) {
7364: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7365: }
7366: PetscMemzero(count_is,nis*sizeof(PetscInt));
7367: ptr_idxs = recv_buffer_idxs_is;
7368: for (i=0;i<n_recvs;i++) {
7369: for (j=0;j<nis;j++) {
7370: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7371: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7372: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7373: ptr_idxs += plen+1; /* shift pointer to received data */
7374: }
7375: }
7376: for (i=0;i<nis;i++) {
7377: ISDestroy(&isarray[i]);
7378: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7379: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7380: }
7381: PetscFree(count_is);
7382: PetscFree(temp_idxs[0]);
7383: PetscFree(temp_idxs);
7384: }
7385: /* free workspace */
7386: PetscFree(recv_buffer_idxs_is);
7387: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7388: PetscFree(send_buffer_idxs);
7389: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7390: if (isdense) {
7391: MatISGetLocalMat(mat,&local_mat);
7392: MatDenseRestoreArray(local_mat,&send_buffer_vals);
7393: MatISRestoreLocalMat(mat,&local_mat);
7394: } else {
7395: /* PetscFree(send_buffer_vals); */
7396: }
7397: if (nis) {
7398: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7399: PetscFree(send_buffer_idxs_is);
7400: }
7402: if (nvecs) {
7403: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7404: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7405: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7406: VecDestroy(&nnsp_vec[0]);
7407: VecCreate(comm_n,&nnsp_vec[0]);
7408: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7409: VecSetType(nnsp_vec[0],VECSTANDARD);
7410: /* set values */
7411: ptr_vals = recv_buffer_vecs;
7412: ptr_idxs = recv_buffer_idxs_local;
7413: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7414: for (i=0;i<n_recvs;i++) {
7415: PetscInt j;
7416: for (j=0;j<*(ptr_idxs+1);j++) {
7417: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7418: }
7419: ptr_idxs += olengths_idxs[i];
7420: ptr_vals += olengths_idxs[i]-2;
7421: }
7422: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7423: VecAssemblyBegin(nnsp_vec[0]);
7424: VecAssemblyEnd(nnsp_vec[0]);
7425: }
7427: PetscFree(recv_buffer_vecs);
7428: PetscFree(recv_buffer_idxs_local);
7429: PetscFree(recv_req_idxs);
7430: PetscFree(recv_req_vals);
7431: PetscFree(recv_req_vecs);
7432: PetscFree(recv_req_idxs_is);
7433: PetscFree(send_req_idxs);
7434: PetscFree(send_req_vals);
7435: PetscFree(send_req_vecs);
7436: PetscFree(send_req_idxs_is);
7437: PetscFree(ilengths_vals);
7438: PetscFree(ilengths_idxs);
7439: PetscFree(olengths_vals);
7440: PetscFree(olengths_idxs);
7441: PetscFree(onodes);
7442: if (nis) {
7443: PetscFree(ilengths_idxs_is);
7444: PetscFree(olengths_idxs_is);
7445: PetscFree(onodes_is);
7446: }
7447: PetscSubcommDestroy(&subcomm);
7448: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7449: MatDestroy(mat_n);
7450: for (i=0;i<nis;i++) {
7451: ISDestroy(&isarray[i]);
7452: }
7453: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7454: VecDestroy(&nnsp_vec[0]);
7455: }
7456: *mat_n = NULL;
7457: }
7458: return(0);
7459: }
7461: /* temporary hack into ksp private data structure */
7462: #include <petsc/private/kspimpl.h>
7464: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7465: {
7466: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7467: PC_IS *pcis = (PC_IS*)pc->data;
7468: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
7469: Mat coarsedivudotp = NULL;
7470: Mat coarseG,t_coarse_mat_is;
7471: MatNullSpace CoarseNullSpace = NULL;
7472: ISLocalToGlobalMapping coarse_islg;
7473: IS coarse_is,*isarray;
7474: PetscInt i,im_active=-1,active_procs=-1;
7475: PetscInt nis,nisdofs,nisneu,nisvert;
7476: PC pc_temp;
7477: PCType coarse_pc_type;
7478: KSPType coarse_ksp_type;
7479: PetscBool multilevel_requested,multilevel_allowed;
7480: PetscBool isredundant,isbddc,isnn,coarse_reuse;
7481: PetscInt ncoarse,nedcfield;
7482: PetscBool compute_vecs = PETSC_FALSE;
7483: PetscScalar *array;
7484: MatReuse coarse_mat_reuse;
7485: PetscBool restr, full_restr, have_void;
7486: PetscMPIInt commsize;
7487: PetscErrorCode ierr;
7490: /* Assign global numbering to coarse dofs */
7491: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7492: PetscInt ocoarse_size;
7493: compute_vecs = PETSC_TRUE;
7495: pcbddc->new_primal_space = PETSC_TRUE;
7496: ocoarse_size = pcbddc->coarse_size;
7497: PetscFree(pcbddc->global_primal_indices);
7498: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7499: /* see if we can avoid some work */
7500: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7501: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7502: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7503: KSPReset(pcbddc->coarse_ksp);
7504: coarse_reuse = PETSC_FALSE;
7505: } else { /* we can safely reuse already computed coarse matrix */
7506: coarse_reuse = PETSC_TRUE;
7507: }
7508: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7509: coarse_reuse = PETSC_FALSE;
7510: }
7511: /* reset any subassembling information */
7512: if (!coarse_reuse || pcbddc->recompute_topography) {
7513: ISDestroy(&pcbddc->coarse_subassembling);
7514: }
7515: } else { /* primal space is unchanged, so we can reuse coarse matrix */
7516: coarse_reuse = PETSC_TRUE;
7517: }
7518: /* assemble coarse matrix */
7519: if (coarse_reuse && pcbddc->coarse_ksp) {
7520: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7521: PetscObjectReference((PetscObject)coarse_mat);
7522: coarse_mat_reuse = MAT_REUSE_MATRIX;
7523: } else {
7524: coarse_mat = NULL;
7525: coarse_mat_reuse = MAT_INITIAL_MATRIX;
7526: }
7528: /* creates temporary l2gmap and IS for coarse indexes */
7529: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7530: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
7532: /* creates temporary MATIS object for coarse matrix */
7533: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7534: MatDenseGetArray(coarse_submat_dense,&array);
7535: PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7536: MatDenseRestoreArray(coarse_submat_dense,&array);
7537: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7538: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7539: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7540: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7541: MatDestroy(&coarse_submat_dense);
7543: /* count "active" (i.e. with positive local size) and "void" processes */
7544: im_active = !!(pcis->n);
7545: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7547: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7548: /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7549: /* full_restr : just use the receivers from the subassembling pattern */
7550: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7551: coarse_mat_is = NULL;
7552: multilevel_allowed = PETSC_FALSE;
7553: multilevel_requested = PETSC_FALSE;
7554: pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7555: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7556: if (multilevel_requested) {
7557: ncoarse = active_procs/pcbddc->coarsening_ratio;
7558: restr = PETSC_FALSE;
7559: full_restr = PETSC_FALSE;
7560: } else {
7561: ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7562: restr = PETSC_TRUE;
7563: full_restr = PETSC_TRUE;
7564: }
7565: if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7566: ncoarse = PetscMax(1,ncoarse);
7567: if (!pcbddc->coarse_subassembling) {
7568: if (pcbddc->coarsening_ratio > 1) {
7569: if (multilevel_requested) {
7570: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7571: } else {
7572: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7573: }
7574: } else {
7575: PetscMPIInt rank;
7576: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7577: have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7578: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7579: }
7580: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7581: PetscInt psum;
7582: if (pcbddc->coarse_ksp) psum = 1;
7583: else psum = 0;
7584: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7585: if (ncoarse < commsize) have_void = PETSC_TRUE;
7586: }
7587: /* determine if we can go multilevel */
7588: if (multilevel_requested) {
7589: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7590: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7591: }
7592: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7594: /* dump subassembling pattern */
7595: if (pcbddc->dbg_flag && multilevel_allowed) {
7596: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7597: }
7599: /* compute dofs splitting and neumann boundaries for coarse dofs */
7600: nedcfield = -1;
7601: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7602: PetscInt *tidxs,*tidxs2,nout,tsize,i;
7603: const PetscInt *idxs;
7604: ISLocalToGlobalMapping tmap;
7606: /* create map between primal indices (in local representative ordering) and local primal numbering */
7607: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7608: /* allocate space for temporary storage */
7609: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7610: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7611: /* allocate for IS array */
7612: nisdofs = pcbddc->n_ISForDofsLocal;
7613: if (pcbddc->nedclocal) {
7614: if (pcbddc->nedfield > -1) {
7615: nedcfield = pcbddc->nedfield;
7616: } else {
7617: nedcfield = 0;
7618: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7619: nisdofs = 1;
7620: }
7621: }
7622: nisneu = !!pcbddc->NeumannBoundariesLocal;
7623: nisvert = 0; /* nisvert is not used */
7624: nis = nisdofs + nisneu + nisvert;
7625: PetscMalloc1(nis,&isarray);
7626: /* dofs splitting */
7627: for (i=0;i<nisdofs;i++) {
7628: /* ISView(pcbddc->ISForDofsLocal[i],0); */
7629: if (nedcfield != i) {
7630: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7631: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7632: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7633: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
7634: } else {
7635: ISGetLocalSize(pcbddc->nedclocal,&tsize);
7636: ISGetIndices(pcbddc->nedclocal,&idxs);
7637: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7638: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
7639: ISRestoreIndices(pcbddc->nedclocal,&idxs);
7640: }
7641: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7642: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
7643: /* ISView(isarray[i],0); */
7644: }
7645: /* neumann boundaries */
7646: if (pcbddc->NeumannBoundariesLocal) {
7647: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7648: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
7649: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7650: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7651: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7652: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7653: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
7654: /* ISView(isarray[nisdofs],0); */
7655: }
7656: /* free memory */
7657: PetscFree(tidxs);
7658: PetscFree(tidxs2);
7659: ISLocalToGlobalMappingDestroy(&tmap);
7660: } else {
7661: nis = 0;
7662: nisdofs = 0;
7663: nisneu = 0;
7664: nisvert = 0;
7665: isarray = NULL;
7666: }
7667: /* destroy no longer needed map */
7668: ISLocalToGlobalMappingDestroy(&coarse_islg);
7670: /* subassemble */
7671: if (multilevel_allowed) {
7672: Vec vp[1];
7673: PetscInt nvecs = 0;
7674: PetscBool reuse,reuser;
7676: if (coarse_mat) reuse = PETSC_TRUE;
7677: else reuse = PETSC_FALSE;
7678: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7679: vp[0] = NULL;
7680: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7681: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
7682: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
7683: VecSetType(vp[0],VECSTANDARD);
7684: nvecs = 1;
7686: if (pcbddc->divudotp) {
7687: Mat B,loc_divudotp;
7688: Vec v,p;
7689: IS dummy;
7690: PetscInt np;
7692: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
7693: MatGetSize(loc_divudotp,&np,NULL);
7694: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
7695: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
7696: MatCreateVecs(B,&v,&p);
7697: VecSet(p,1.);
7698: MatMultTranspose(B,p,v);
7699: VecDestroy(&p);
7700: MatDestroy(&B);
7701: VecGetArray(vp[0],&array);
7702: VecPlaceArray(pcbddc->vec1_P,array);
7703: VecRestoreArray(vp[0],&array);
7704: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
7705: VecResetArray(pcbddc->vec1_P);
7706: ISDestroy(&dummy);
7707: VecDestroy(&v);
7708: }
7709: }
7710: if (reuser) {
7711: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
7712: } else {
7713: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
7714: }
7715: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
7716: PetscScalar *arraym,*arrayv;
7717: PetscInt nl;
7718: VecGetLocalSize(vp[0],&nl);
7719: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
7720: MatDenseGetArray(coarsedivudotp,&arraym);
7721: VecGetArray(vp[0],&arrayv);
7722: PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
7723: VecRestoreArray(vp[0],&arrayv);
7724: MatDenseRestoreArray(coarsedivudotp,&arraym);
7725: VecDestroy(&vp[0]);
7726: } else {
7727: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
7728: }
7729: } else {
7730: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
7731: }
7732: if (coarse_mat_is || coarse_mat) {
7733: PetscMPIInt size;
7734: MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
7735: if (!multilevel_allowed) {
7736: MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
7737: } else {
7738: Mat A;
7740: /* if this matrix is present, it means we are not reusing the coarse matrix */
7741: if (coarse_mat_is) {
7742: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
7743: PetscObjectReference((PetscObject)coarse_mat_is);
7744: coarse_mat = coarse_mat_is;
7745: }
7746: /* be sure we don't have MatSeqDENSE as local mat */
7747: MatISGetLocalMat(coarse_mat,&A);
7748: MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
7749: }
7750: }
7751: MatDestroy(&t_coarse_mat_is);
7752: MatDestroy(&coarse_mat_is);
7754: /* create local to global scatters for coarse problem */
7755: if (compute_vecs) {
7756: PetscInt lrows;
7757: VecDestroy(&pcbddc->coarse_vec);
7758: if (coarse_mat) {
7759: MatGetLocalSize(coarse_mat,&lrows,NULL);
7760: } else {
7761: lrows = 0;
7762: }
7763: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
7764: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
7765: VecSetType(pcbddc->coarse_vec,VECSTANDARD);
7766: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
7767: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
7768: }
7769: ISDestroy(&coarse_is);
7771: /* set defaults for coarse KSP and PC */
7772: if (multilevel_allowed) {
7773: coarse_ksp_type = KSPRICHARDSON;
7774: coarse_pc_type = PCBDDC;
7775: } else {
7776: coarse_ksp_type = KSPPREONLY;
7777: coarse_pc_type = PCREDUNDANT;
7778: }
7780: /* print some info if requested */
7781: if (pcbddc->dbg_flag) {
7782: if (!multilevel_allowed) {
7783: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
7784: if (multilevel_requested) {
7785: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
7786: } else if (pcbddc->max_levels) {
7787: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
7788: }
7789: PetscViewerFlush(pcbddc->dbg_viewer);
7790: }
7791: }
7793: /* communicate coarse discrete gradient */
7794: coarseG = NULL;
7795: if (pcbddc->nedcG && multilevel_allowed) {
7796: MPI_Comm ccomm;
7797: if (coarse_mat) {
7798: ccomm = PetscObjectComm((PetscObject)coarse_mat);
7799: } else {
7800: ccomm = MPI_COMM_NULL;
7801: }
7802: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
7803: }
7805: /* create the coarse KSP object only once with defaults */
7806: if (coarse_mat) {
7807: PetscViewer dbg_viewer = NULL;
7808: if (pcbddc->dbg_flag) {
7809: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
7810: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
7811: }
7812: if (!pcbddc->coarse_ksp) {
7813: char prefix[256],str_level[16];
7814: size_t len;
7816: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
7817: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
7818: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
7819: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
7820: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7821: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
7822: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
7823: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7824: /* TODO is this logic correct? should check for coarse_mat type */
7825: PCSetType(pc_temp,coarse_pc_type);
7826: /* prefix */
7827: PetscStrcpy(prefix,"");
7828: PetscStrcpy(str_level,"");
7829: if (!pcbddc->current_level) {
7830: PetscStrcpy(prefix,((PetscObject)pc)->prefix);
7831: PetscStrcat(prefix,"pc_bddc_coarse_");
7832: } else {
7833: PetscStrlen(((PetscObject)pc)->prefix,&len);
7834: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
7835: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
7836: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
7837: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
7838: PetscStrcat(prefix,str_level);
7839: }
7840: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
7841: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7842: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
7843: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
7844: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
7845: /* allow user customization */
7846: KSPSetFromOptions(pcbddc->coarse_ksp);
7847: }
7848: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7849: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7850: if (nisdofs) {
7851: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
7852: for (i=0;i<nisdofs;i++) {
7853: ISDestroy(&isarray[i]);
7854: }
7855: }
7856: if (nisneu) {
7857: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
7858: ISDestroy(&isarray[nisdofs]);
7859: }
7860: if (nisvert) {
7861: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
7862: ISDestroy(&isarray[nis-1]);
7863: }
7864: if (coarseG) {
7865: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
7866: }
7868: /* get some info after set from options */
7869: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
7870: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
7871: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
7872: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
7873: if (isbddc && !multilevel_allowed) {
7874: PCSetType(pc_temp,coarse_pc_type);
7875: isbddc = PETSC_FALSE;
7876: }
7877: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
7878: if (multilevel_requested && !isbddc && !isnn) {
7879: PCSetType(pc_temp,PCBDDC);
7880: isbddc = PETSC_TRUE;
7881: isnn = PETSC_FALSE;
7882: }
7883: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
7884: if (isredundant) {
7885: KSP inner_ksp;
7886: PC inner_pc;
7888: PCRedundantGetKSP(pc_temp,&inner_ksp);
7889: KSPGetPC(inner_ksp,&inner_pc);
7890: PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
7891: }
7893: /* parameters which miss an API */
7894: if (isbddc) {
7895: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
7896: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
7897: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
7898: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
7899: if (pcbddc_coarse->benign_saddle_point) {
7900: Mat coarsedivudotp_is;
7901: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
7902: IS row,col;
7903: const PetscInt *gidxs;
7904: PetscInt n,st,M,N;
7906: MatGetSize(coarsedivudotp,&n,NULL);
7907: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
7908: st = st-n;
7909: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
7910: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
7911: ISLocalToGlobalMappingGetSize(l2gmap,&n);
7912: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
7913: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
7914: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
7915: ISLocalToGlobalMappingCreateIS(row,&rl2g);
7916: ISLocalToGlobalMappingCreateIS(col,&cl2g);
7917: ISGetSize(row,&M);
7918: MatGetSize(coarse_mat,&N,NULL);
7919: ISDestroy(&row);
7920: ISDestroy(&col);
7921: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
7922: MatSetType(coarsedivudotp_is,MATIS);
7923: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
7924: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
7925: ISLocalToGlobalMappingDestroy(&rl2g);
7926: ISLocalToGlobalMappingDestroy(&cl2g);
7927: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
7928: MatDestroy(&coarsedivudotp);
7929: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
7930: MatDestroy(&coarsedivudotp_is);
7931: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
7932: if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
7933: }
7934: }
7936: /* propagate symmetry info of coarse matrix */
7937: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
7938: if (pc->pmat->symmetric_set) {
7939: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
7940: }
7941: if (pc->pmat->hermitian_set) {
7942: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
7943: }
7944: if (pc->pmat->spd_set) {
7945: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
7946: }
7947: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
7948: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
7949: }
7950: /* set operators */
7951: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7952: if (pcbddc->dbg_flag) {
7953: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
7954: }
7955: }
7956: MatDestroy(&coarseG);
7957: PetscFree(isarray);
7958: #if 0
7959: {
7960: PetscViewer viewer;
7961: char filename[256];
7962: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
7963: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
7964: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
7965: MatView(coarse_mat,viewer);
7966: PetscViewerPopFormat(viewer);
7967: PetscViewerDestroy(&viewer);
7968: }
7969: #endif
7971: if (pcbddc->coarse_ksp) {
7972: Vec crhs,csol;
7974: KSPGetSolution(pcbddc->coarse_ksp,&csol);
7975: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
7976: if (!csol) {
7977: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
7978: }
7979: if (!crhs) {
7980: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
7981: }
7982: }
7983: MatDestroy(&coarsedivudotp);
7985: /* compute null space for coarse solver if the benign trick has been requested */
7986: if (pcbddc->benign_null) {
7988: VecSet(pcbddc->vec1_P,0.);
7989: for (i=0;i<pcbddc->benign_n;i++) {
7990: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
7991: }
7992: VecAssemblyBegin(pcbddc->vec1_P);
7993: VecAssemblyEnd(pcbddc->vec1_P);
7994: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
7995: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
7996: if (coarse_mat) {
7997: Vec nullv;
7998: PetscScalar *array,*array2;
7999: PetscInt nl;
8001: MatCreateVecs(coarse_mat,&nullv,NULL);
8002: VecGetLocalSize(nullv,&nl);
8003: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8004: VecGetArray(nullv,&array2);
8005: PetscMemcpy(array2,array,nl*sizeof(*array));
8006: VecRestoreArray(nullv,&array2);
8007: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8008: VecNormalize(nullv,NULL);
8009: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8010: VecDestroy(&nullv);
8011: }
8012: }
8014: if (pcbddc->coarse_ksp) {
8015: PetscBool ispreonly;
8017: if (CoarseNullSpace) {
8018: PetscBool isnull;
8019: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8020: if (isnull) {
8021: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8022: }
8023: /* TODO: add local nullspaces (if any) */
8024: }
8025: /* setup coarse ksp */
8026: KSPSetUp(pcbddc->coarse_ksp);
8027: /* Check coarse problem if in debug mode or if solving with an iterative method */
8028: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8029: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8030: KSP check_ksp;
8031: KSPType check_ksp_type;
8032: PC check_pc;
8033: Vec check_vec,coarse_vec;
8034: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8035: PetscInt its;
8036: PetscBool compute_eigs;
8037: PetscReal *eigs_r,*eigs_c;
8038: PetscInt neigs;
8039: const char *prefix;
8041: /* Create ksp object suitable for estimation of extreme eigenvalues */
8042: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8043: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8044: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8045: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8046: /* prevent from setup unneeded object */
8047: KSPGetPC(check_ksp,&check_pc);
8048: PCSetType(check_pc,PCNONE);
8049: if (ispreonly) {
8050: check_ksp_type = KSPPREONLY;
8051: compute_eigs = PETSC_FALSE;
8052: } else {
8053: check_ksp_type = KSPGMRES;
8054: compute_eigs = PETSC_TRUE;
8055: }
8056: KSPSetType(check_ksp,check_ksp_type);
8057: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8058: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8059: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8060: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8061: KSPSetOptionsPrefix(check_ksp,prefix);
8062: KSPAppendOptionsPrefix(check_ksp,"check_");
8063: KSPSetFromOptions(check_ksp);
8064: KSPSetUp(check_ksp);
8065: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8066: KSPSetPC(check_ksp,check_pc);
8067: /* create random vec */
8068: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8069: VecSetRandom(check_vec,NULL);
8070: MatMult(coarse_mat,check_vec,coarse_vec);
8071: /* solve coarse problem */
8072: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8073: /* set eigenvalue estimation if preonly has not been requested */
8074: if (compute_eigs) {
8075: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8076: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8077: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8078: if (neigs) {
8079: lambda_max = eigs_r[neigs-1];
8080: lambda_min = eigs_r[0];
8081: if (pcbddc->use_coarse_estimates) {
8082: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8083: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8084: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8085: }
8086: }
8087: }
8088: }
8090: /* check coarse problem residual error */
8091: if (pcbddc->dbg_flag) {
8092: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8093: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8094: VecAXPY(check_vec,-1.0,coarse_vec);
8095: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8096: MatMult(coarse_mat,check_vec,coarse_vec);
8097: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8098: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8099: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8100: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8101: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8102: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8103: if (CoarseNullSpace) {
8104: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8105: }
8106: if (compute_eigs) {
8107: PetscReal lambda_max_s,lambda_min_s;
8108: KSPConvergedReason reason;
8109: KSPGetType(check_ksp,&check_ksp_type);
8110: KSPGetIterationNumber(check_ksp,&its);
8111: KSPGetConvergedReason(check_ksp,&reason);
8112: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8113: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8114: for (i=0;i<neigs;i++) {
8115: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8116: }
8117: }
8118: PetscViewerFlush(dbg_viewer);
8119: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8120: }
8121: VecDestroy(&check_vec);
8122: VecDestroy(&coarse_vec);
8123: KSPDestroy(&check_ksp);
8124: if (compute_eigs) {
8125: PetscFree(eigs_r);
8126: PetscFree(eigs_c);
8127: }
8128: }
8129: }
8130: MatNullSpaceDestroy(&CoarseNullSpace);
8131: /* print additional info */
8132: if (pcbddc->dbg_flag) {
8133: /* waits until all processes reaches this point */
8134: PetscBarrier((PetscObject)pc);
8135: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8136: PetscViewerFlush(pcbddc->dbg_viewer);
8137: }
8139: /* free memory */
8140: MatDestroy(&coarse_mat);
8141: return(0);
8142: }
8144: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8145: {
8146: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8147: PC_IS* pcis = (PC_IS*)pc->data;
8148: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8149: IS subset,subset_mult,subset_n;
8150: PetscInt local_size,coarse_size=0;
8151: PetscInt *local_primal_indices=NULL;
8152: const PetscInt *t_local_primal_indices;
8156: /* Compute global number of coarse dofs */
8157: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8158: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8159: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8160: ISDestroy(&subset_n);
8161: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8162: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8163: ISDestroy(&subset);
8164: ISDestroy(&subset_mult);
8165: ISGetLocalSize(subset_n,&local_size);
8166: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8167: PetscMalloc1(local_size,&local_primal_indices);
8168: ISGetIndices(subset_n,&t_local_primal_indices);
8169: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8170: ISRestoreIndices(subset_n,&t_local_primal_indices);
8171: ISDestroy(&subset_n);
8173: /* check numbering */
8174: if (pcbddc->dbg_flag) {
8175: PetscScalar coarsesum,*array,*array2;
8176: PetscInt i;
8177: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8179: PetscViewerFlush(pcbddc->dbg_viewer);
8180: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8181: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8182: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8183: /* counter */
8184: VecSet(pcis->vec1_global,0.0);
8185: VecSet(pcis->vec1_N,1.0);
8186: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8187: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8188: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8189: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8190: VecSet(pcis->vec1_N,0.0);
8191: for (i=0;i<pcbddc->local_primal_size;i++) {
8192: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8193: }
8194: VecAssemblyBegin(pcis->vec1_N);
8195: VecAssemblyEnd(pcis->vec1_N);
8196: VecSet(pcis->vec1_global,0.0);
8197: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8198: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8199: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8200: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8201: VecGetArray(pcis->vec1_N,&array);
8202: VecGetArray(pcis->vec2_N,&array2);
8203: for (i=0;i<pcis->n;i++) {
8204: if (array[i] != 0.0 && array[i] != array2[i]) {
8205: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8206: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8207: set_error = PETSC_TRUE;
8208: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8209: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8210: }
8211: }
8212: VecRestoreArray(pcis->vec2_N,&array2);
8213: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8214: PetscViewerFlush(pcbddc->dbg_viewer);
8215: for (i=0;i<pcis->n;i++) {
8216: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8217: }
8218: VecRestoreArray(pcis->vec1_N,&array);
8219: VecSet(pcis->vec1_global,0.0);
8220: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8221: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8222: VecSum(pcis->vec1_global,&coarsesum);
8223: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8224: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8225: PetscInt *gidxs;
8227: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8228: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8229: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8230: PetscViewerFlush(pcbddc->dbg_viewer);
8231: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8232: for (i=0;i<pcbddc->local_primal_size;i++) {
8233: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8234: }
8235: PetscViewerFlush(pcbddc->dbg_viewer);
8236: PetscFree(gidxs);
8237: }
8238: PetscViewerFlush(pcbddc->dbg_viewer);
8239: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8240: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8241: }
8242: /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8243: /* get back data */
8244: *coarse_size_n = coarse_size;
8245: *local_primal_indices_n = local_primal_indices;
8246: return(0);
8247: }
8249: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8250: {
8251: IS localis_t;
8252: PetscInt i,lsize,*idxs,n;
8253: PetscScalar *vals;
8257: /* get indices in local ordering exploiting local to global map */
8258: ISGetLocalSize(globalis,&lsize);
8259: PetscMalloc1(lsize,&vals);
8260: for (i=0;i<lsize;i++) vals[i] = 1.0;
8261: ISGetIndices(globalis,(const PetscInt**)&idxs);
8262: VecSet(gwork,0.0);
8263: VecSet(lwork,0.0);
8264: if (idxs) { /* multilevel guard */
8265: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8266: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8267: }
8268: VecAssemblyBegin(gwork);
8269: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8270: PetscFree(vals);
8271: VecAssemblyEnd(gwork);
8272: /* now compute set in local ordering */
8273: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8274: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8275: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8276: VecGetSize(lwork,&n);
8277: for (i=0,lsize=0;i<n;i++) {
8278: if (PetscRealPart(vals[i]) > 0.5) {
8279: lsize++;
8280: }
8281: }
8282: PetscMalloc1(lsize,&idxs);
8283: for (i=0,lsize=0;i<n;i++) {
8284: if (PetscRealPart(vals[i]) > 0.5) {
8285: idxs[lsize++] = i;
8286: }
8287: }
8288: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8289: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8290: *localis = localis_t;
8291: return(0);
8292: }
8294: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8295: {
8296: PC_IS *pcis=(PC_IS*)pc->data;
8297: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8298: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
8299: Mat S_j;
8300: PetscInt *used_xadj,*used_adjncy;
8301: PetscBool free_used_adj;
8302: PetscErrorCode ierr;
8305: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8306: free_used_adj = PETSC_FALSE;
8307: if (pcbddc->sub_schurs_layers == -1) {
8308: used_xadj = NULL;
8309: used_adjncy = NULL;
8310: } else {
8311: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8312: used_xadj = pcbddc->mat_graph->xadj;
8313: used_adjncy = pcbddc->mat_graph->adjncy;
8314: } else if (pcbddc->computed_rowadj) {
8315: used_xadj = pcbddc->mat_graph->xadj;
8316: used_adjncy = pcbddc->mat_graph->adjncy;
8317: } else {
8318: PetscBool flg_row=PETSC_FALSE;
8319: const PetscInt *xadj,*adjncy;
8320: PetscInt nvtxs;
8322: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8323: if (flg_row) {
8324: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8325: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8326: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8327: free_used_adj = PETSC_TRUE;
8328: } else {
8329: pcbddc->sub_schurs_layers = -1;
8330: used_xadj = NULL;
8331: used_adjncy = NULL;
8332: }
8333: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8334: }
8335: }
8337: /* setup sub_schurs data */
8338: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8339: if (!sub_schurs->schur_explicit) {
8340: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8341: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8342: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8343: } else {
8344: Mat change = NULL;
8345: Vec scaling = NULL;
8346: IS change_primal = NULL, iP;
8347: PetscInt benign_n;
8348: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8349: PetscBool isseqaij,need_change = PETSC_FALSE;
8350: PetscBool discrete_harmonic = PETSC_FALSE;
8352: if (!pcbddc->use_vertices && reuse_solvers) {
8353: PetscInt n_vertices;
8355: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8356: reuse_solvers = (PetscBool)!n_vertices;
8357: }
8358: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8359: if (!isseqaij) {
8360: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8361: if (matis->A == pcbddc->local_mat) {
8362: MatDestroy(&pcbddc->local_mat);
8363: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8364: } else {
8365: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8366: }
8367: }
8368: if (!pcbddc->benign_change_explicit) {
8369: benign_n = pcbddc->benign_n;
8370: } else {
8371: benign_n = 0;
8372: }
8373: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8374: We need a global reduction to avoid possible deadlocks.
8375: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8376: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8377: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8378: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8379: need_change = (PetscBool)(!need_change);
8380: }
8381: /* If the user defines additional constraints, we import them here.
8382: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8383: if (need_change) {
8384: PC_IS *pcisf;
8385: PC_BDDC *pcbddcf;
8386: PC pcf;
8388: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8389: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8390: PCSetOperators(pcf,pc->mat,pc->pmat);
8391: PCSetType(pcf,PCBDDC);
8393: /* hacks */
8394: pcisf = (PC_IS*)pcf->data;
8395: pcisf->is_B_local = pcis->is_B_local;
8396: pcisf->vec1_N = pcis->vec1_N;
8397: pcisf->BtoNmap = pcis->BtoNmap;
8398: pcisf->n = pcis->n;
8399: pcisf->n_B = pcis->n_B;
8400: pcbddcf = (PC_BDDC*)pcf->data;
8401: PetscFree(pcbddcf->mat_graph);
8402: pcbddcf->mat_graph = pcbddc->mat_graph;
8403: pcbddcf->use_faces = PETSC_TRUE;
8404: pcbddcf->use_change_of_basis = PETSC_TRUE;
8405: pcbddcf->use_change_on_faces = PETSC_TRUE;
8406: pcbddcf->use_qr_single = PETSC_TRUE;
8407: pcbddcf->fake_change = PETSC_TRUE;
8409: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8410: PCBDDCConstraintsSetUp(pcf);
8411: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8412: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8413: change = pcbddcf->ConstraintMatrix;
8414: pcbddcf->ConstraintMatrix = NULL;
8416: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8417: PetscFree(pcbddcf->sub_schurs);
8418: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8419: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8420: PetscFree(pcbddcf->primal_indices_local_idxs);
8421: PetscFree(pcbddcf->onearnullvecs_state);
8422: PetscFree(pcf->data);
8423: pcf->ops->destroy = NULL;
8424: pcf->ops->reset = NULL;
8425: PCDestroy(&pcf);
8426: }
8427: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8429: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8430: if (iP) {
8431: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8432: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8433: PetscOptionsEnd();
8434: }
8435: if (discrete_harmonic) {
8436: Mat A;
8437: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8438: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8439: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8440: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8441: MatDestroy(&A);
8442: } else {
8443: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8444: }
8445: MatDestroy(&change);
8446: ISDestroy(&change_primal);
8447: }
8448: MatDestroy(&S_j);
8450: /* free adjacency */
8451: if (free_used_adj) {
8452: PetscFree2(used_xadj,used_adjncy);
8453: }
8454: return(0);
8455: }
8457: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8458: {
8459: PC_IS *pcis=(PC_IS*)pc->data;
8460: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8461: PCBDDCGraph graph;
8462: PetscErrorCode ierr;
8465: /* attach interface graph for determining subsets */
8466: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8467: IS verticesIS,verticescomm;
8468: PetscInt vsize,*idxs;
8470: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8471: ISGetSize(verticesIS,&vsize);
8472: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8473: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8474: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8475: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8476: PCBDDCGraphCreate(&graph);
8477: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8478: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8479: ISDestroy(&verticescomm);
8480: PCBDDCGraphComputeConnectedComponents(graph);
8481: } else {
8482: graph = pcbddc->mat_graph;
8483: }
8484: /* print some info */
8485: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8486: IS vertices;
8487: PetscInt nv,nedges,nfaces;
8488: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8489: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8490: ISGetSize(vertices,&nv);
8491: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8492: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8493: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8494: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8495: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8496: PetscViewerFlush(pcbddc->dbg_viewer);
8497: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8498: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8499: }
8501: /* sub_schurs init */
8502: if (!pcbddc->sub_schurs) {
8503: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8504: }
8505: PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
8506: pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix;
8508: /* free graph struct */
8509: if (pcbddc->sub_schurs_rebuild) {
8510: PCBDDCGraphDestroy(&graph);
8511: }
8512: return(0);
8513: }
8515: PetscErrorCode PCBDDCCheckOperator(PC pc)
8516: {
8517: PC_IS *pcis=(PC_IS*)pc->data;
8518: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8519: PetscErrorCode ierr;
8522: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8523: IS zerodiag = NULL;
8524: Mat S_j,B0_B=NULL;
8525: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
8526: PetscScalar *p0_check,*array,*array2;
8527: PetscReal norm;
8528: PetscInt i;
8530: /* B0 and B0_B */
8531: if (zerodiag) {
8532: IS dummy;
8534: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8535: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8536: MatCreateVecs(B0_B,NULL,&dummy_vec);
8537: ISDestroy(&dummy);
8538: }
8539: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8540: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8541: VecSet(pcbddc->vec1_P,1.0);
8542: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8543: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8544: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8545: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8546: VecReciprocal(vec_scale_P);
8547: /* S_j */
8548: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8549: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8551: /* mimic vector in \widetilde{W}_\Gamma */
8552: VecSetRandom(pcis->vec1_N,NULL);
8553: /* continuous in primal space */
8554: VecSetRandom(pcbddc->coarse_vec,NULL);
8555: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8556: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8557: VecGetArray(pcbddc->vec1_P,&array);
8558: PetscCalloc1(pcbddc->benign_n,&p0_check);
8559: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8560: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8561: VecRestoreArray(pcbddc->vec1_P,&array);
8562: VecAssemblyBegin(pcis->vec1_N);
8563: VecAssemblyEnd(pcis->vec1_N);
8564: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8565: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8566: VecDuplicate(pcis->vec2_B,&vec_check_B);
8567: VecCopy(pcis->vec2_B,vec_check_B);
8569: /* assemble rhs for coarse problem */
8570: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8571: /* local with Schur */
8572: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8573: if (zerodiag) {
8574: VecGetArray(dummy_vec,&array);
8575: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8576: VecRestoreArray(dummy_vec,&array);
8577: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8578: }
8579: /* sum on primal nodes the local contributions */
8580: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8581: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8582: VecGetArray(pcis->vec1_N,&array);
8583: VecGetArray(pcbddc->vec1_P,&array2);
8584: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8585: VecRestoreArray(pcbddc->vec1_P,&array2);
8586: VecRestoreArray(pcis->vec1_N,&array);
8587: VecSet(pcbddc->coarse_vec,0.);
8588: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8589: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8590: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8591: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8592: VecGetArray(pcbddc->vec1_P,&array);
8593: /* scale primal nodes (BDDC sums contibutions) */
8594: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8595: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8596: VecRestoreArray(pcbddc->vec1_P,&array);
8597: VecAssemblyBegin(pcis->vec1_N);
8598: VecAssemblyEnd(pcis->vec1_N);
8599: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8600: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8601: /* global: \widetilde{B0}_B w_\Gamma */
8602: if (zerodiag) {
8603: MatMult(B0_B,pcis->vec2_B,dummy_vec);
8604: VecGetArray(dummy_vec,&array);
8605: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8606: VecRestoreArray(dummy_vec,&array);
8607: }
8608: /* BDDC */
8609: VecSet(pcis->vec1_D,0.);
8610: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
8612: VecCopy(pcis->vec1_B,pcis->vec2_B);
8613: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8614: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8615: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8616: for (i=0;i<pcbddc->benign_n;i++) {
8617: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8618: }
8619: PetscFree(p0_check);
8620: VecDestroy(&vec_scale_P);
8621: VecDestroy(&vec_check_B);
8622: VecDestroy(&dummy_vec);
8623: MatDestroy(&S_j);
8624: MatDestroy(&B0_B);
8625: }
8626: return(0);
8627: }
8629: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8630: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8631: {
8632: Mat At;
8633: IS rows;
8634: PetscInt rst,ren;
8636: PetscLayout rmap;
8639: rst = ren = 0;
8640: if (ccomm != MPI_COMM_NULL) {
8641: PetscLayoutCreate(ccomm,&rmap);
8642: PetscLayoutSetSize(rmap,A->rmap->N);
8643: PetscLayoutSetBlockSize(rmap,1);
8644: PetscLayoutSetUp(rmap);
8645: PetscLayoutGetRange(rmap,&rst,&ren);
8646: }
8647: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
8648: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
8649: ISDestroy(&rows);
8651: if (ccomm != MPI_COMM_NULL) {
8652: Mat_MPIAIJ *a,*b;
8653: IS from,to;
8654: Vec gvec;
8655: PetscInt lsize;
8657: MatCreate(ccomm,B);
8658: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
8659: MatSetType(*B,MATAIJ);
8660: PetscLayoutDestroy(&((*B)->rmap));
8661: PetscLayoutSetUp((*B)->cmap);
8662: a = (Mat_MPIAIJ*)At->data;
8663: b = (Mat_MPIAIJ*)(*B)->data;
8664: MPI_Comm_size(ccomm,&b->size);
8665: MPI_Comm_rank(ccomm,&b->rank);
8666: PetscObjectReference((PetscObject)a->A);
8667: PetscObjectReference((PetscObject)a->B);
8668: b->A = a->A;
8669: b->B = a->B;
8671: b->donotstash = a->donotstash;
8672: b->roworiented = a->roworiented;
8673: b->rowindices = 0;
8674: b->rowvalues = 0;
8675: b->getrowactive = PETSC_FALSE;
8677: (*B)->rmap = rmap;
8678: (*B)->factortype = A->factortype;
8679: (*B)->assembled = PETSC_TRUE;
8680: (*B)->insertmode = NOT_SET_VALUES;
8681: (*B)->preallocated = PETSC_TRUE;
8683: if (a->colmap) {
8684: #if defined(PETSC_USE_CTABLE)
8685: PetscTableCreateCopy(a->colmap,&b->colmap);
8686: #else
8687: PetscMalloc1(At->cmap->N,&b->colmap);
8688: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
8689: PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
8690: #endif
8691: } else b->colmap = 0;
8692: if (a->garray) {
8693: PetscInt len;
8694: len = a->B->cmap->n;
8695: PetscMalloc1(len+1,&b->garray);
8696: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
8697: if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
8698: } else b->garray = 0;
8700: PetscObjectReference((PetscObject)a->lvec);
8701: b->lvec = a->lvec;
8702: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
8704: /* cannot use VecScatterCopy */
8705: VecGetLocalSize(b->lvec,&lsize);
8706: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
8707: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
8708: MatCreateVecs(*B,&gvec,NULL);
8709: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
8710: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
8711: ISDestroy(&from);
8712: ISDestroy(&to);
8713: VecDestroy(&gvec);
8714: }
8715: MatDestroy(&At);
8716: return(0);
8717: }