Actual source code: bddcprivate.c

petsc-3.8.4 2018-03-24
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <petscdmplex.h>
  5:  #include <petscblaslapack.h>
  6:  #include <petsc/private/sfimpl.h>
  7:  #include <petsc/private/dmpleximpl.h>

  9: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 11: /* if range is true,  it returns B s.t. span{B} = range(A)
 12:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 13: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 14: {
 15: #if !defined(PETSC_USE_COMPLEX)
 16:   PetscScalar    *uwork,*data,*U, ds = 0.;
 17:   PetscReal      *sing;
 18:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 19:   PetscInt       ulw,i,nr,nc,n;

 23: #if defined(PETSC_MISSING_LAPACK_GESVD)
 24:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 25: #else
 26:   MatGetSize(A,&nr,&nc);
 27:   if (!nr || !nc) return(0);

 29:   /* workspace */
 30:   if (!work) {
 31:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 32:     PetscMalloc1(ulw,&uwork);
 33:   } else {
 34:     ulw   = lw;
 35:     uwork = work;
 36:   }
 37:   n = PetscMin(nr,nc);
 38:   if (!rwork) {
 39:     PetscMalloc1(n,&sing);
 40:   } else {
 41:     sing = rwork;
 42:   }

 44:   /* SVD */
 45:   PetscMalloc1(nr*nr,&U);
 46:   PetscBLASIntCast(nr,&bM);
 47:   PetscBLASIntCast(nc,&bN);
 48:   PetscBLASIntCast(ulw,&lwork);
 49:   MatDenseGetArray(A,&data);
 50:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 51:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 52:   PetscFPTrapPop();
 53:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 54:   MatDenseRestoreArray(A,&data);
 55:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 56:   if (!rwork) {
 57:     PetscFree(sing);
 58:   }
 59:   if (!work) {
 60:     PetscFree(uwork);
 61:   }
 62:   /* create B */
 63:   if (!range) {
 64:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 65:     MatDenseGetArray(*B,&data);
 66:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 67:   } else {
 68:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 69:     MatDenseGetArray(*B,&data);
 70:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 71:   }
 72:   MatDenseRestoreArray(*B,&data);
 73:   PetscFree(U);
 74: #endif
 75: #else /* PETSC_USE_COMPLEX */
 77:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 78: #endif
 79:   return(0);
 80: }

 82: /* TODO REMOVE */
 83: #if defined(PRINT_GDET)
 84: static int inc = 0;
 85: static int lev = 0;
 86: #endif

 88: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 89: {
 91:   Mat            GE,GEd;
 92:   PetscInt       rsize,csize,esize;
 93:   PetscScalar    *ptr;

 96:   ISGetSize(edge,&esize);
 97:   if (!esize) return(0);
 98:   ISGetSize(extrow,&rsize);
 99:   ISGetSize(extcol,&csize);

101:   /* gradients */
102:   ptr  = work + 5*esize;
103:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
104:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
105:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
106:   MatDestroy(&GE);

108:   /* constants */
109:   ptr += rsize*csize;
110:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
111:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
112:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
113:   MatDestroy(&GE);
114:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
115:   MatDestroy(&GEd);

117:   if (corners) {
118:     Mat            GEc;
119:     PetscScalar    *vals,v;

121:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
122:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
123:     MatDenseGetArray(GEd,&vals);
124:     /* v    = PetscAbsScalar(vals[0]) */;
125:     v    = 1.;
126:     cvals[0] = vals[0]/v;
127:     cvals[1] = vals[1]/v;
128:     MatDenseRestoreArray(GEd,&vals);
129:     MatScale(*GKins,1./v);
130: #if defined(PRINT_GDET)
131:     {
132:       PetscViewer viewer;
133:       char filename[256];
134:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
135:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
136:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
137:       PetscObjectSetName((PetscObject)GEc,"GEc");
138:       MatView(GEc,viewer);
139:       PetscObjectSetName((PetscObject)(*GKins),"GK");
140:       MatView(*GKins,viewer);
141:       PetscObjectSetName((PetscObject)GEd,"Gproj");
142:       MatView(GEd,viewer);
143:       PetscViewerDestroy(&viewer);
144:     }
145: #endif
146:     MatDestroy(&GEd);
147:     MatDestroy(&GEc);
148:   }

150:   return(0);
151: }

153: PetscErrorCode PCBDDCNedelecSupport(PC pc)
154: {
155:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
156:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
157:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
158:   Vec                    tvec;
159:   PetscSF                sfv;
160:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
161:   MPI_Comm               comm;
162:   IS                     lned,primals,allprimals,nedfieldlocal;
163:   IS                     *eedges,*extrows,*extcols,*alleedges;
164:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
165:   PetscScalar            *vals,*work;
166:   PetscReal              *rwork;
167:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
168:   PetscInt               ne,nv,Lv,order,n,field;
169:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
170:   PetscInt               i,j,extmem,cum,maxsize,nee;
171:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
172:   PetscInt               *sfvleaves,*sfvroots;
173:   PetscInt               *corners,*cedges;
174:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
175: #if defined(PETSC_USE_DEBUG)
176:   PetscInt               *emarks;
177: #endif
178:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
179:   PetscErrorCode         ierr;

182:   /* If the discrete gradient is defined for a subset of dofs and global is true,
183:      it assumes G is given in global ordering for all the dofs.
184:      Otherwise, the ordering is global for the Nedelec field */
185:   order      = pcbddc->nedorder;
186:   conforming = pcbddc->conforming;
187:   field      = pcbddc->nedfield;
188:   global     = pcbddc->nedglobal;
189:   setprimal  = PETSC_FALSE;
190:   print      = PETSC_FALSE;
191:   singular   = PETSC_FALSE;

193:   /* Command line customization */
194:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
195:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
196:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
197:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
198:   /* print debug info TODO: to be removed */
199:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
200:   PetscOptionsEnd();

202:   /* Return if there are no edges in the decomposition and the problem is not singular */
203:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
204:   ISLocalToGlobalMappingGetSize(al2g,&n);
205:   PetscObjectGetComm((PetscObject)pc,&comm);
206:   if (!singular) {
207:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
208:     lrc[0] = PETSC_FALSE;
209:     for (i=0;i<n;i++) {
210:       if (PetscRealPart(vals[i]) > 2.) {
211:         lrc[0] = PETSC_TRUE;
212:         break;
213:       }
214:     }
215:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
216:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
217:     if (!lrc[1]) return(0);
218:   }

220:   /* Get Nedelec field */
221:   MatISSetUpSF(pc->pmat);
222:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
223:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
224:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
225:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
226:     ISGetLocalSize(nedfieldlocal,&ne);
227:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
228:     ne            = n;
229:     nedfieldlocal = NULL;
230:     global        = PETSC_TRUE;
231:   } else if (field == PETSC_DECIDE) {
232:     PetscInt rst,ren,*idx;

234:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
235:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
236:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
237:     for (i=rst;i<ren;i++) {
238:       PetscInt nc;

240:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
242:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:     }
244:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
245:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246:     PetscMalloc1(n,&idx);
247:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
248:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
249:   } else {
250:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
251:   }

253:   /* Sanity checks */
254:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
255:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
256:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);

258:   /* Just set primal dofs and return */
259:   if (setprimal) {
260:     IS       enedfieldlocal;
261:     PetscInt *eidxs;

263:     PetscMalloc1(ne,&eidxs);
264:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
265:     if (nedfieldlocal) {
266:       ISGetIndices(nedfieldlocal,&idxs);
267:       for (i=0,cum=0;i<ne;i++) {
268:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
269:           eidxs[cum++] = idxs[i];
270:         }
271:       }
272:       ISRestoreIndices(nedfieldlocal,&idxs);
273:     } else {
274:       for (i=0,cum=0;i<ne;i++) {
275:         if (PetscRealPart(vals[i]) > 2.) {
276:           eidxs[cum++] = i;
277:         }
278:       }
279:     }
280:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
281:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
282:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
283:     PetscFree(eidxs);
284:     ISDestroy(&nedfieldlocal);
285:     ISDestroy(&enedfieldlocal);
286:     return(0);
287:   }

289:   /* Compute some l2g maps */
290:   if (nedfieldlocal) {
291:     IS is;

293:     /* need to map from the local Nedelec field to local numbering */
294:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
295:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
296:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
297:     ISLocalToGlobalMappingCreateIS(is,&al2g);
298:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
299:     if (global) {
300:       PetscObjectReference((PetscObject)al2g);
301:       el2g = al2g;
302:     } else {
303:       IS gis;

305:       ISRenumber(is,NULL,NULL,&gis);
306:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
307:       ISDestroy(&gis);
308:     }
309:     ISDestroy(&is);
310:   } else {
311:     /* restore default */
312:     pcbddc->nedfield = -1;
313:     /* one ref for the destruction of al2g, one for el2g */
314:     PetscObjectReference((PetscObject)al2g);
315:     PetscObjectReference((PetscObject)al2g);
316:     el2g = al2g;
317:     fl2g = NULL;
318:   }

320:   /* Start communication to drop connections for interior edges (for cc analysis only) */
321:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
322:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
323:   if (nedfieldlocal) {
324:     ISGetIndices(nedfieldlocal,&idxs);
325:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
326:     ISRestoreIndices(nedfieldlocal,&idxs);
327:   } else {
328:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
329:   }
330:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

333:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
334:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
335:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
336:     if (global) {
337:       PetscInt rst;

339:       MatGetOwnershipRange(G,&rst,NULL);
340:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
341:         if (matis->sf_rootdata[i] < 2) {
342:           matis->sf_rootdata[cum++] = i + rst;
343:         }
344:       }
345:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
346:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
347:     } else {
348:       PetscInt *tbz;

350:       PetscMalloc1(ne,&tbz);
351:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
352:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353:       ISGetIndices(nedfieldlocal,&idxs);
354:       for (i=0,cum=0;i<ne;i++)
355:         if (matis->sf_leafdata[idxs[i]] == 1)
356:           tbz[cum++] = i;
357:       ISRestoreIndices(nedfieldlocal,&idxs);
358:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
359:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
360:       PetscFree(tbz);
361:     }
362:   } else { /* we need the entire G to infer the nullspace */
363:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
364:     G    = pcbddc->discretegradient;
365:   }

367:   /* Extract subdomain relevant rows of G */
368:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
369:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
370:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
371:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
372:   ISDestroy(&lned);
373:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
374:   MatDestroy(&lGall);
375:   MatISGetLocalMat(lGis,&lG);

377:   /* SF for nodal dofs communications */
378:   MatGetLocalSize(G,NULL,&Lv);
379:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
380:   PetscObjectReference((PetscObject)vl2g);
381:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
382:   PetscSFCreate(comm,&sfv);
383:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
384:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
385:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
386:   i    = singular ? 2 : 1;
387:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

389:   /* Destroy temporary G created in MATIS format and modified G */
390:   PetscObjectReference((PetscObject)lG);
391:   MatDestroy(&lGis);
392:   MatDestroy(&G);

394:   if (print) {
395:     PetscObjectSetName((PetscObject)lG,"initial_lG");
396:     MatView(lG,NULL);
397:   }

399:   /* Save lG for values insertion in change of basis */
400:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

402:   /* Analyze the edge-nodes connections (duplicate lG) */
403:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
404:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
405:   PetscBTCreate(nv,&btv);
406:   PetscBTCreate(ne,&bte);
407:   PetscBTCreate(ne,&btb);
408:   PetscBTCreate(ne,&btbd);
409:   PetscBTCreate(nv,&btvcand);
410:   /* need to import the boundary specification to ensure the
411:      proper detection of coarse edges' endpoints */
412:   if (pcbddc->DirichletBoundariesLocal) {
413:     IS is;

415:     if (fl2g) {
416:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
417:     } else {
418:       is = pcbddc->DirichletBoundariesLocal;
419:     }
420:     ISGetLocalSize(is,&cum);
421:     ISGetIndices(is,&idxs);
422:     for (i=0;i<cum;i++) {
423:       if (idxs[i] >= 0) {
424:         PetscBTSet(btb,idxs[i]);
425:         PetscBTSet(btbd,idxs[i]);
426:       }
427:     }
428:     ISRestoreIndices(is,&idxs);
429:     if (fl2g) {
430:       ISDestroy(&is);
431:     }
432:   }
433:   if (pcbddc->NeumannBoundariesLocal) {
434:     IS is;

436:     if (fl2g) {
437:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
438:     } else {
439:       is = pcbddc->NeumannBoundariesLocal;
440:     }
441:     ISGetLocalSize(is,&cum);
442:     ISGetIndices(is,&idxs);
443:     for (i=0;i<cum;i++) {
444:       if (idxs[i] >= 0) {
445:         PetscBTSet(btb,idxs[i]);
446:       }
447:     }
448:     ISRestoreIndices(is,&idxs);
449:     if (fl2g) {
450:       ISDestroy(&is);
451:     }
452:   }

454:   /* Count neighs per dof */
455:   PetscCalloc1(ne,&ecount);
456:   PetscMalloc1(ne,&eneighs);
457:   ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
458:   for (i=1,cum=0;i<n_neigh;i++) {
459:     cum += n_shared[i];
460:     for (j=0;j<n_shared[i];j++) {
461:       ecount[shared[i][j]]++;
462:     }
463:   }
464:   if (ne) {
465:     PetscMalloc1(cum,&eneighs[0]);
466:   }
467:   for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
468:   PetscMemzero(ecount,ne*sizeof(PetscInt));
469:   for (i=1;i<n_neigh;i++) {
470:     for (j=0;j<n_shared[i];j++) {
471:       PetscInt k = shared[i][j];
472:       eneighs[k][ecount[k]] = neigh[i];
473:       ecount[k]++;
474:     }
475:   }
476:   for (i=0;i<ne;i++) {
477:     PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
478:   }
479:   ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
480:   PetscCalloc1(nv,&vcount);
481:   PetscMalloc1(nv,&vneighs);
482:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
483:   for (i=1,cum=0;i<n_neigh;i++) {
484:     cum += n_shared[i];
485:     for (j=0;j<n_shared[i];j++) {
486:       vcount[shared[i][j]]++;
487:     }
488:   }
489:   if (nv) {
490:     PetscMalloc1(cum,&vneighs[0]);
491:   }
492:   for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
493:   PetscMemzero(vcount,nv*sizeof(PetscInt));
494:   for (i=1;i<n_neigh;i++) {
495:     for (j=0;j<n_shared[i];j++) {
496:       PetscInt k = shared[i][j];
497:       vneighs[k][vcount[k]] = neigh[i];
498:       vcount[k]++;
499:     }
500:   }
501:   for (i=0;i<nv;i++) {
502:     PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
503:   }
504:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

506:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
507:      for proper detection of coarse edges' endpoints */
508:   PetscBTCreate(ne,&btee);
509:   for (i=0;i<ne;i++) {
510:     if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
511:       PetscBTSet(btee,i);
512:     }
513:   }
514:   PetscMalloc1(ne,&marks);
515:   if (!conforming) {
516:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
517:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
518:   }
519:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
520:   MatSeqAIJGetArray(lGe,&vals);
521:   cum  = 0;
522:   for (i=0;i<ne;i++) {
523:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
524:     if (!PetscBTLookup(btee,i)) {
525:       marks[cum++] = i;
526:       continue;
527:     }
528:     /* set badly connected edge dofs as primal */
529:     if (!conforming) {
530:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
531:         marks[cum++] = i;
532:         PetscBTSet(bte,i);
533:         for (j=ii[i];j<ii[i+1];j++) {
534:           PetscBTSet(btv,jj[j]);
535:         }
536:       } else {
537:         /* every edge dofs should be connected trough a certain number of nodal dofs
538:            to other edge dofs belonging to coarse edges
539:            - at most 2 endpoints
540:            - order-1 interior nodal dofs
541:            - no undefined nodal dofs (nconn < order)
542:         */
543:         PetscInt ends = 0,ints = 0, undef = 0;
544:         for (j=ii[i];j<ii[i+1];j++) {
545:           PetscInt v = jj[j],k;
546:           PetscInt nconn = iit[v+1]-iit[v];
547:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
548:           if (nconn > order) ends++;
549:           else if (nconn == order) ints++;
550:           else undef++;
551:         }
552:         if (undef || ends > 2 || ints != order -1) {
553:           marks[cum++] = i;
554:           PetscBTSet(bte,i);
555:           for (j=ii[i];j<ii[i+1];j++) {
556:             PetscBTSet(btv,jj[j]);
557:           }
558:         }
559:       }
560:     }
561:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
562:     if (!order && ii[i+1] != ii[i]) {
563:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
564:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
565:     }
566:   }
567:   PetscBTDestroy(&btee);
568:   MatSeqAIJRestoreArray(lGe,&vals);
569:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
570:   if (!conforming) {
571:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
572:     MatDestroy(&lGt);
573:   }
574:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

576:   /* identify splitpoints and corner candidates */
577:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
578:   if (print) {
579:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
580:     MatView(lGe,NULL);
581:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
582:     MatView(lGt,NULL);
583:   }
584:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
585:   MatSeqAIJGetArray(lGt,&vals);
586:   for (i=0;i<nv;i++) {
587:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
588:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
589:     if (!order) { /* variable order */
590:       PetscReal vorder = 0.;

592:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
593:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
594:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
595:       ord  = 1;
596:     }
597: #if defined(PETSC_USE_DEBUG)
598:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
599: #endif
600:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
601:       if (PetscBTLookup(btbd,jj[j])) {
602:         bdir = PETSC_TRUE;
603:         break;
604:       }
605:       if (vc != ecount[jj[j]]) {
606:         sneighs = PETSC_FALSE;
607:       } else {
608:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
609:         for (k=0;k<vc;k++) {
610:           if (vn[k] != en[k]) {
611:             sneighs = PETSC_FALSE;
612:             break;
613:           }
614:         }
615:       }
616:     }
617:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
618:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
619:       PetscBTSet(btv,i);
620:     } else if (test == ord) {
621:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
622:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
623:         PetscBTSet(btv,i);
624:       } else {
625:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
626:         PetscBTSet(btvcand,i);
627:       }
628:     }
629:   }
630:   PetscFree(ecount);
631:   PetscFree(vcount);
632:   if (ne) {
633:     PetscFree(eneighs[0]);
634:   }
635:   if (nv) {
636:     PetscFree(vneighs[0]);
637:   }
638:   PetscFree(eneighs);
639:   PetscFree(vneighs);
640:   PetscBTDestroy(&btbd);

642:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
643:   if (order != 1) {
644:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
645:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
646:     for (i=0;i<nv;i++) {
647:       if (PetscBTLookup(btvcand,i)) {
648:         PetscBool found = PETSC_FALSE;
649:         for (j=ii[i];j<ii[i+1] && !found;j++) {
650:           PetscInt k,e = jj[j];
651:           if (PetscBTLookup(bte,e)) continue;
652:           for (k=iit[e];k<iit[e+1];k++) {
653:             PetscInt v = jjt[k];
654:             if (v != i && PetscBTLookup(btvcand,v)) {
655:               found = PETSC_TRUE;
656:               break;
657:             }
658:           }
659:         }
660:         if (!found) {
661:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d CLEARED\n",i);
662:           PetscBTClear(btvcand,i);
663:         } else {
664:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d ACCEPTED\n",i);
665:         }
666:       }
667:     }
668:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
669:   }
670:   MatSeqAIJRestoreArray(lGt,&vals);
671:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
672:   MatDestroy(&lGe);

674:   /* Get the local G^T explicitly */
675:   MatDestroy(&lGt);
676:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
677:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

679:   /* Mark interior nodal dofs */
680:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
681:   PetscBTCreate(nv,&btvi);
682:   for (i=1;i<n_neigh;i++) {
683:     for (j=0;j<n_shared[i];j++) {
684:       PetscBTSet(btvi,shared[i][j]);
685:     }
686:   }
687:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

689:   /* communicate corners and splitpoints */
690:   PetscMalloc1(nv,&vmarks);
691:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
692:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
693:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

695:   if (print) {
696:     IS tbz;

698:     cum = 0;
699:     for (i=0;i<nv;i++)
700:       if (sfvleaves[i])
701:         vmarks[cum++] = i;

703:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
704:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
705:     ISView(tbz,NULL);
706:     ISDestroy(&tbz);
707:   }

709:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
710:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
711:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
712:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

714:   /* Zero rows of lGt corresponding to identified corners
715:      and interior nodal dofs */
716:   cum = 0;
717:   for (i=0;i<nv;i++) {
718:     if (sfvleaves[i]) {
719:       vmarks[cum++] = i;
720:       PetscBTSet(btv,i);
721:     }
722:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
723:   }
724:   PetscBTDestroy(&btvi);
725:   if (print) {
726:     IS tbz;

728:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
729:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
730:     ISView(tbz,NULL);
731:     ISDestroy(&tbz);
732:   }
733:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
734:   PetscFree(vmarks);
735:   PetscSFDestroy(&sfv);
736:   PetscFree2(sfvleaves,sfvroots);

738:   /* Recompute G */
739:   MatDestroy(&lG);
740:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
741:   if (print) {
742:     PetscObjectSetName((PetscObject)lG,"used_lG");
743:     MatView(lG,NULL);
744:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
745:     MatView(lGt,NULL);
746:   }

748:   /* Get primal dofs (if any) */
749:   cum = 0;
750:   for (i=0;i<ne;i++) {
751:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
752:   }
753:   if (fl2g) {
754:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
755:   }
756:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
757:   if (print) {
758:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
759:     ISView(primals,NULL);
760:   }
761:   PetscBTDestroy(&bte);
762:   /* TODO: what if the user passed in some of them ?  */
763:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
764:   ISDestroy(&primals);

766:   /* Compute edge connectivity */
767:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
768:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
769:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
770:   if (fl2g) {
771:     PetscBT   btf;
772:     PetscInt  *iia,*jja,*iiu,*jju;
773:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

775:     /* create CSR for all local dofs */
776:     PetscMalloc1(n+1,&iia);
777:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
778:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
779:       iiu = pcbddc->mat_graph->xadj;
780:       jju = pcbddc->mat_graph->adjncy;
781:     } else if (pcbddc->use_local_adj) {
782:       rest = PETSC_TRUE;
783:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
784:     } else {
785:       free   = PETSC_TRUE;
786:       PetscMalloc2(n+1,&iiu,n,&jju);
787:       iiu[0] = 0;
788:       for (i=0;i<n;i++) {
789:         iiu[i+1] = i+1;
790:         jju[i]   = -1;
791:       }
792:     }

794:     /* import sizes of CSR */
795:     iia[0] = 0;
796:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

798:     /* overwrite entries corresponding to the Nedelec field */
799:     PetscBTCreate(n,&btf);
800:     ISGetIndices(nedfieldlocal,&idxs);
801:     for (i=0;i<ne;i++) {
802:       PetscBTSet(btf,idxs[i]);
803:       iia[idxs[i]+1] = ii[i+1]-ii[i];
804:     }

806:     /* iia in CSR */
807:     for (i=0;i<n;i++) iia[i+1] += iia[i];

809:     /* jja in CSR */
810:     PetscMalloc1(iia[n],&jja);
811:     for (i=0;i<n;i++)
812:       if (!PetscBTLookup(btf,i))
813:         for (j=0;j<iiu[i+1]-iiu[i];j++)
814:           jja[iia[i]+j] = jju[iiu[i]+j];

816:     /* map edge dofs connectivity */
817:     if (jj) {
818:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
819:       for (i=0;i<ne;i++) {
820:         PetscInt e = idxs[i];
821:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
822:       }
823:     }
824:     ISRestoreIndices(nedfieldlocal,&idxs);
825:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
826:     if (rest) {
827:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
828:     }
829:     if (free) {
830:       PetscFree2(iiu,jju);
831:     }
832:     PetscBTDestroy(&btf);
833:   } else {
834:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
835:   }

837:   /* Analyze interface for edge dofs */
838:   PCBDDCAnalyzeInterface(pc);
839:   pcbddc->mat_graph->twodim = PETSC_FALSE;

841:   /* Get coarse edges in the edge space */
842:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
843:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

845:   if (fl2g) {
846:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
847:     PetscMalloc1(nee,&eedges);
848:     for (i=0;i<nee;i++) {
849:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
850:     }
851:   } else {
852:     eedges  = alleedges;
853:     primals = allprimals;
854:   }

856:   /* Mark fine edge dofs with their coarse edge id */
857:   PetscMemzero(marks,ne*sizeof(PetscInt));
858:   ISGetLocalSize(primals,&cum);
859:   ISGetIndices(primals,&idxs);
860:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
861:   ISRestoreIndices(primals,&idxs);
862:   if (print) {
863:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
864:     ISView(primals,NULL);
865:   }

867:   maxsize = 0;
868:   for (i=0;i<nee;i++) {
869:     PetscInt size,mark = i+1;

871:     ISGetLocalSize(eedges[i],&size);
872:     ISGetIndices(eedges[i],&idxs);
873:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
874:     ISRestoreIndices(eedges[i],&idxs);
875:     maxsize = PetscMax(maxsize,size);
876:   }

878:   /* Find coarse edge endpoints */
879:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
880:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
881:   for (i=0;i<nee;i++) {
882:     PetscInt mark = i+1,size;

884:     ISGetLocalSize(eedges[i],&size);
885:     if (!size && nedfieldlocal) continue;
886:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
887:     ISGetIndices(eedges[i],&idxs);
888:     if (print) {
889:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
890:       ISView(eedges[i],NULL);
891:     }
892:     for (j=0;j<size;j++) {
893:       PetscInt k, ee = idxs[j];
894:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %d\n",ee);
895:       for (k=ii[ee];k<ii[ee+1];k++) {
896:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %d\n",jj[k]);
897:         if (PetscBTLookup(btv,jj[k])) {
898:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %d\n",jj[k]);
899:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
900:           PetscInt  k2;
901:           PetscBool corner = PETSC_FALSE;
902:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
903:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
904:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
905:                if the edge dof lie on the natural part of the boundary */
906:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
907:               corner = PETSC_TRUE;
908:               break;
909:             }
910:           }
911:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
912:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %d\n",jj[k]);
913:             PetscBTSet(btv,jj[k]);
914:           } else {
915:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
916:           }
917:         }
918:       }
919:     }
920:     ISRestoreIndices(eedges[i],&idxs);
921:   }
922:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
923:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
924:   PetscBTDestroy(&btb);

926:   /* Reset marked primal dofs */
927:   ISGetLocalSize(primals,&cum);
928:   ISGetIndices(primals,&idxs);
929:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
930:   ISRestoreIndices(primals,&idxs);

932:   /* Now use the initial lG */
933:   MatDestroy(&lG);
934:   MatDestroy(&lGt);
935:   lG   = lGinit;
936:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

938:   /* Compute extended cols indices */
939:   PetscBTCreate(nv,&btvc);
940:   PetscBTCreate(nee,&bter);
941:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
942:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
943:   i   *= maxsize;
944:   PetscCalloc1(nee,&extcols);
945:   PetscMalloc2(i,&extrow,i,&gidxs);
946:   eerr = PETSC_FALSE;
947:   for (i=0;i<nee;i++) {
948:     PetscInt size,found = 0;

950:     cum  = 0;
951:     ISGetLocalSize(eedges[i],&size);
952:     if (!size && nedfieldlocal) continue;
953:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
954:     ISGetIndices(eedges[i],&idxs);
955:     PetscBTMemzero(nv,btvc);
956:     for (j=0;j<size;j++) {
957:       PetscInt k,ee = idxs[j];
958:       for (k=ii[ee];k<ii[ee+1];k++) {
959:         PetscInt vv = jj[k];
960:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
961:         else if (!PetscBTLookupSet(btvc,vv)) found++;
962:       }
963:     }
964:     ISRestoreIndices(eedges[i],&idxs);
965:     PetscSortRemoveDupsInt(&cum,extrow);
966:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
967:     PetscSortIntWithArray(cum,gidxs,extrow);
968:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
969:     /* it may happen that endpoints are not defined at this point
970:        if it is the case, mark this edge for a second pass */
971:     if (cum != size -1 || found != 2) {
972:       PetscBTSet(bter,i);
973:       if (print) {
974:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
975:         ISView(eedges[i],NULL);
976:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
977:         ISView(extcols[i],NULL);
978:       }
979:       eerr = PETSC_TRUE;
980:     }
981:   }
982:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
983:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
984:   if (done) {
985:     PetscInt *newprimals;

987:     PetscMalloc1(ne,&newprimals);
988:     ISGetLocalSize(primals,&cum);
989:     ISGetIndices(primals,&idxs);
990:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
991:     ISRestoreIndices(primals,&idxs);
992:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
993:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
994:     for (i=0;i<nee;i++) {
995:       PetscBool has_candidates = PETSC_FALSE;
996:       if (PetscBTLookup(bter,i)) {
997:         PetscInt size,mark = i+1;

999:         ISGetLocalSize(eedges[i],&size);
1000:         ISGetIndices(eedges[i],&idxs);
1001:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1002:         for (j=0;j<size;j++) {
1003:           PetscInt k,ee = idxs[j];
1004:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1005:           for (k=ii[ee];k<ii[ee+1];k++) {
1006:             /* set all candidates located on the edge as corners */
1007:             if (PetscBTLookup(btvcand,jj[k])) {
1008:               PetscInt k2,vv = jj[k];
1009:               has_candidates = PETSC_TRUE;
1010:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %d\n",vv);
1011:               PetscBTSet(btv,vv);
1012:               /* set all edge dofs connected to candidate as primals */
1013:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
1014:                 if (marks[jjt[k2]] == mark) {
1015:                   PetscInt k3,ee2 = jjt[k2];
1016:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %d\n",ee2);
1017:                   newprimals[cum++] = ee2;
1018:                   /* finally set the new corners */
1019:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1020:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %d\n",jj[k3]);
1021:                     PetscBTSet(btv,jj[k3]);
1022:                   }
1023:                 }
1024:               }
1025:             } else {
1026:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %d\n",jj[k]);
1027:             }
1028:           }
1029:         }
1030:         if (!has_candidates) { /* circular edge */
1031:           PetscInt k, ee = idxs[0],*tmarks;

1033:           PetscCalloc1(ne,&tmarks);
1034:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %d\n",i);
1035:           for (k=ii[ee];k<ii[ee+1];k++) {
1036:             PetscInt k2;
1037:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %d\n",jj[k]);
1038:             PetscBTSet(btv,jj[k]);
1039:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1040:           }
1041:           for (j=0;j<size;j++) {
1042:             if (tmarks[idxs[j]] > 1) {
1043:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %d\n",idxs[j]);
1044:               newprimals[cum++] = idxs[j];
1045:             }
1046:           }
1047:           PetscFree(tmarks);
1048:         }
1049:         ISRestoreIndices(eedges[i],&idxs);
1050:       }
1051:       ISDestroy(&extcols[i]);
1052:     }
1053:     PetscFree(extcols);
1054:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1055:     PetscSortRemoveDupsInt(&cum,newprimals);
1056:     if (fl2g) {
1057:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1058:       ISDestroy(&primals);
1059:       for (i=0;i<nee;i++) {
1060:         ISDestroy(&eedges[i]);
1061:       }
1062:       PetscFree(eedges);
1063:     }
1064:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1065:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1066:     PetscFree(newprimals);
1067:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1068:     ISDestroy(&primals);
1069:     PCBDDCAnalyzeInterface(pc);
1070:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1071:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1072:     if (fl2g) {
1073:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1074:       PetscMalloc1(nee,&eedges);
1075:       for (i=0;i<nee;i++) {
1076:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1077:       }
1078:     } else {
1079:       eedges  = alleedges;
1080:       primals = allprimals;
1081:     }
1082:     PetscCalloc1(nee,&extcols);

1084:     /* Mark again */
1085:     PetscMemzero(marks,ne*sizeof(PetscInt));
1086:     for (i=0;i<nee;i++) {
1087:       PetscInt size,mark = i+1;

1089:       ISGetLocalSize(eedges[i],&size);
1090:       ISGetIndices(eedges[i],&idxs);
1091:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1092:       ISRestoreIndices(eedges[i],&idxs);
1093:     }
1094:     if (print) {
1095:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1096:       ISView(primals,NULL);
1097:     }

1099:     /* Recompute extended cols */
1100:     eerr = PETSC_FALSE;
1101:     for (i=0;i<nee;i++) {
1102:       PetscInt size;

1104:       cum  = 0;
1105:       ISGetLocalSize(eedges[i],&size);
1106:       if (!size && nedfieldlocal) continue;
1107:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1108:       ISGetIndices(eedges[i],&idxs);
1109:       for (j=0;j<size;j++) {
1110:         PetscInt k,ee = idxs[j];
1111:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1112:       }
1113:       ISRestoreIndices(eedges[i],&idxs);
1114:       PetscSortRemoveDupsInt(&cum,extrow);
1115:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1116:       PetscSortIntWithArray(cum,gidxs,extrow);
1117:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1118:       if (cum != size -1) {
1119:         if (print) {
1120:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1121:           ISView(eedges[i],NULL);
1122:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1123:           ISView(extcols[i],NULL);
1124:         }
1125:         eerr = PETSC_TRUE;
1126:       }
1127:     }
1128:   }
1129:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1130:   PetscFree2(extrow,gidxs);
1131:   PetscBTDestroy(&bter);
1132:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1133:   /* an error should not occur at this point */
1134:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1136:   /* Check the number of endpoints */
1137:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1138:   PetscMalloc1(2*nee,&corners);
1139:   PetscMalloc1(nee,&cedges);
1140:   for (i=0;i<nee;i++) {
1141:     PetscInt size, found = 0, gc[2];

1143:     /* init with defaults */
1144:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1145:     ISGetLocalSize(eedges[i],&size);
1146:     if (!size && nedfieldlocal) continue;
1147:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1148:     ISGetIndices(eedges[i],&idxs);
1149:     PetscBTMemzero(nv,btvc);
1150:     for (j=0;j<size;j++) {
1151:       PetscInt k,ee = idxs[j];
1152:       for (k=ii[ee];k<ii[ee+1];k++) {
1153:         PetscInt vv = jj[k];
1154:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1155:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1156:           corners[i*2+found++] = vv;
1157:         }
1158:       }
1159:     }
1160:     if (found != 2) {
1161:       PetscInt e;
1162:       if (fl2g) {
1163:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1164:       } else {
1165:         e = idxs[0];
1166:       }
1167:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1168:     }

1170:     /* get primal dof index on this coarse edge */
1171:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1172:     if (gc[0] > gc[1]) {
1173:       PetscInt swap  = corners[2*i];
1174:       corners[2*i]   = corners[2*i+1];
1175:       corners[2*i+1] = swap;
1176:     }
1177:     cedges[i] = idxs[size-1];
1178:     ISRestoreIndices(eedges[i],&idxs);
1179:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1180:   }
1181:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1182:   PetscBTDestroy(&btvc);

1184: #if defined(PETSC_USE_DEBUG)
1185:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1186:      not interfere with neighbouring coarse edges */
1187:   PetscMalloc1(nee+1,&emarks);
1188:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1189:   for (i=0;i<nv;i++) {
1190:     PetscInt emax = 0,eemax = 0;

1192:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1193:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1194:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1195:     for (j=1;j<nee+1;j++) {
1196:       if (emax < emarks[j]) {
1197:         emax = emarks[j];
1198:         eemax = j;
1199:       }
1200:     }
1201:     /* not relevant for edges */
1202:     if (!eemax) continue;

1204:     for (j=ii[i];j<ii[i+1];j++) {
1205:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1206:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1207:       }
1208:     }
1209:   }
1210:   PetscFree(emarks);
1211:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1212: #endif

1214:   /* Compute extended rows indices for edge blocks of the change of basis */
1215:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1216:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1217:   extmem *= maxsize;
1218:   PetscMalloc1(extmem*nee,&extrow);
1219:   PetscMalloc1(nee,&extrows);
1220:   PetscCalloc1(nee,&extrowcum);
1221:   for (i=0;i<nv;i++) {
1222:     PetscInt mark = 0,size,start;

1224:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1225:     for (j=ii[i];j<ii[i+1];j++)
1226:       if (marks[jj[j]] && !mark)
1227:         mark = marks[jj[j]];

1229:     /* not relevant */
1230:     if (!mark) continue;

1232:     /* import extended row */
1233:     mark--;
1234:     start = mark*extmem+extrowcum[mark];
1235:     size = ii[i+1]-ii[i];
1236:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1237:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1238:     extrowcum[mark] += size;
1239:   }
1240:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1241:   MatDestroy(&lGt);
1242:   PetscFree(marks);

1244:   /* Compress extrows */
1245:   cum  = 0;
1246:   for (i=0;i<nee;i++) {
1247:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1248:     PetscSortRemoveDupsInt(&size,start);
1249:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1250:     cum  = PetscMax(cum,size);
1251:   }
1252:   PetscFree(extrowcum);
1253:   PetscBTDestroy(&btv);
1254:   PetscBTDestroy(&btvcand);

1256:   /* Workspace for lapack inner calls and VecSetValues */
1257:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1259:   /* Create change of basis matrix (preallocation can be improved) */
1260:   MatCreate(comm,&T);
1261:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1262:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1263:   MatSetType(T,MATAIJ);
1264:   MatSeqAIJSetPreallocation(T,10,NULL);
1265:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1266:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1267:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1268:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1269:   ISLocalToGlobalMappingDestroy(&al2g);

1271:   /* Defaults to identity */
1272:   MatCreateVecs(pc->pmat,&tvec,NULL);
1273:   VecSet(tvec,1.0);
1274:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1275:   VecDestroy(&tvec);

1277:   /* Create discrete gradient for the coarser level if needed */
1278:   MatDestroy(&pcbddc->nedcG);
1279:   ISDestroy(&pcbddc->nedclocal);
1280:   if (pcbddc->current_level < pcbddc->max_levels) {
1281:     ISLocalToGlobalMapping cel2g,cvl2g;
1282:     IS                     wis,gwis;
1283:     PetscInt               cnv,cne;

1285:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1286:     if (fl2g) {
1287:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1288:     } else {
1289:       PetscObjectReference((PetscObject)wis);
1290:       pcbddc->nedclocal = wis;
1291:     }
1292:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1293:     ISDestroy(&wis);
1294:     ISRenumber(gwis,NULL,&cne,&wis);
1295:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1296:     ISDestroy(&wis);
1297:     ISDestroy(&gwis);

1299:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1300:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1301:     ISDestroy(&wis);
1302:     ISRenumber(gwis,NULL,&cnv,&wis);
1303:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1304:     ISDestroy(&wis);
1305:     ISDestroy(&gwis);

1307:     MatCreate(comm,&pcbddc->nedcG);
1308:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1309:     MatSetType(pcbddc->nedcG,MATAIJ);
1310:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1311:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1312:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1313:     ISLocalToGlobalMappingDestroy(&cel2g);
1314:     ISLocalToGlobalMappingDestroy(&cvl2g);
1315:   }
1316:   ISLocalToGlobalMappingDestroy(&vl2g);

1318: #if defined(PRINT_GDET)
1319:   inc = 0;
1320:   lev = pcbddc->current_level;
1321: #endif

1323:   /* Insert values in the change of basis matrix */
1324:   for (i=0;i<nee;i++) {
1325:     Mat         Gins = NULL, GKins = NULL;
1326:     IS          cornersis = NULL;
1327:     PetscScalar cvals[2];

1329:     if (pcbddc->nedcG) {
1330:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1331:     }
1332:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1333:     if (Gins && GKins) {
1334:       PetscScalar    *data;
1335:       const PetscInt *rows,*cols;
1336:       PetscInt       nrh,nch,nrc,ncc;

1338:       ISGetIndices(eedges[i],&cols);
1339:       /* H1 */
1340:       ISGetIndices(extrows[i],&rows);
1341:       MatGetSize(Gins,&nrh,&nch);
1342:       MatDenseGetArray(Gins,&data);
1343:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1344:       MatDenseRestoreArray(Gins,&data);
1345:       ISRestoreIndices(extrows[i],&rows);
1346:       /* complement */
1347:       MatGetSize(GKins,&nrc,&ncc);
1348:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1349:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1350:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1351:       MatDenseGetArray(GKins,&data);
1352:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1353:       MatDenseRestoreArray(GKins,&data);

1355:       /* coarse discrete gradient */
1356:       if (pcbddc->nedcG) {
1357:         PetscInt cols[2];

1359:         cols[0] = 2*i;
1360:         cols[1] = 2*i+1;
1361:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1362:       }
1363:       ISRestoreIndices(eedges[i],&cols);
1364:     }
1365:     ISDestroy(&extrows[i]);
1366:     ISDestroy(&extcols[i]);
1367:     ISDestroy(&cornersis);
1368:     MatDestroy(&Gins);
1369:     MatDestroy(&GKins);
1370:   }
1371:   ISLocalToGlobalMappingDestroy(&el2g);

1373:   /* Start assembling */
1374:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1375:   if (pcbddc->nedcG) {
1376:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1377:   }

1379:   /* Free */
1380:   if (fl2g) {
1381:     ISDestroy(&primals);
1382:     for (i=0;i<nee;i++) {
1383:       ISDestroy(&eedges[i]);
1384:     }
1385:     PetscFree(eedges);
1386:   }

1388:   /* hack mat_graph with primal dofs on the coarse edges */
1389:   {
1390:     PCBDDCGraph graph   = pcbddc->mat_graph;
1391:     PetscInt    *oqueue = graph->queue;
1392:     PetscInt    *ocptr  = graph->cptr;
1393:     PetscInt    ncc,*idxs;

1395:     /* find first primal edge */
1396:     if (pcbddc->nedclocal) {
1397:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1398:     } else {
1399:       if (fl2g) {
1400:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1401:       }
1402:       idxs = cedges;
1403:     }
1404:     cum = 0;
1405:     while (cum < nee && cedges[cum] < 0) cum++;

1407:     /* adapt connected components */
1408:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1409:     graph->cptr[0] = 0;
1410:     for (i=0,ncc=0;i<graph->ncc;i++) {
1411:       PetscInt lc = ocptr[i+1]-ocptr[i];
1412:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1413:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1414:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1415:         ncc++;
1416:         lc--;
1417:         cum++;
1418:         while (cum < nee && cedges[cum] < 0) cum++;
1419:       }
1420:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1421:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1422:       ncc++;
1423:     }
1424:     graph->ncc = ncc;
1425:     if (pcbddc->nedclocal) {
1426:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1427:     }
1428:     PetscFree2(ocptr,oqueue);
1429:   }
1430:   ISLocalToGlobalMappingDestroy(&fl2g);
1431:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1432:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1433:   MatDestroy(&conn);

1435:   ISDestroy(&nedfieldlocal);
1436:   PetscFree(extrow);
1437:   PetscFree2(work,rwork);
1438:   PetscFree(corners);
1439:   PetscFree(cedges);
1440:   PetscFree(extrows);
1441:   PetscFree(extcols);
1442:   MatDestroy(&lG);

1444:   /* Complete assembling */
1445:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1446:   if (pcbddc->nedcG) {
1447:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1448: #if 0
1449:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1450:     MatView(pcbddc->nedcG,NULL);
1451: #endif
1452:   }

1454:   /* set change of basis */
1455:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1456:   MatDestroy(&T);

1458:   return(0);
1459: }

1461: /* the near-null space of BDDC carries information on quadrature weights,
1462:    and these can be collinear -> so cheat with MatNullSpaceCreate
1463:    and create a suitable set of basis vectors first */
1464: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1465: {
1467:   PetscInt       i;

1470:   for (i=0;i<nvecs;i++) {
1471:     PetscInt first,last;

1473:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1474:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1475:     if (i>=first && i < last) {
1476:       PetscScalar *data;
1477:       VecGetArray(quad_vecs[i],&data);
1478:       if (!has_const) {
1479:         data[i-first] = 1.;
1480:       } else {
1481:         data[2*i-first] = 1./PetscSqrtReal(2.);
1482:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1483:       }
1484:       VecRestoreArray(quad_vecs[i],&data);
1485:     }
1486:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1487:   }
1488:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1489:   for (i=0;i<nvecs;i++) { /* reset vectors */
1490:     PetscInt first,last;
1491:     VecLockPop(quad_vecs[i]);
1492:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1493:     if (i>=first && i < last) {
1494:       PetscScalar *data;
1495:       VecGetArray(quad_vecs[i],&data);
1496:       if (!has_const) {
1497:         data[i-first] = 0.;
1498:       } else {
1499:         data[2*i-first] = 0.;
1500:         data[2*i-first+1] = 0.;
1501:       }
1502:       VecRestoreArray(quad_vecs[i],&data);
1503:     }
1504:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1505:     VecLockPush(quad_vecs[i]);
1506:   }
1507:   return(0);
1508: }

1510: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1511: {
1512:   Mat                    loc_divudotp;
1513:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1514:   ISLocalToGlobalMapping map;
1515:   IS                     *faces,*edges;
1516:   PetscScalar            *vals;
1517:   const PetscScalar      *array;
1518:   PetscInt               i,maxneighs,lmaxneighs,maxsize,nf,ne;
1519:   PetscMPIInt            rank;
1520:   PetscErrorCode         ierr;

1523:   PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1524:   if (graph->twodim) {
1525:     lmaxneighs = 2;
1526:   } else {
1527:     lmaxneighs = 1;
1528:     for (i=0;i<ne;i++) {
1529:       const PetscInt *idxs;
1530:       ISGetIndices(edges[i],&idxs);
1531:       lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]);
1532:       ISRestoreIndices(edges[i],&idxs);
1533:     }
1534:     lmaxneighs++; /* graph count does not include self */
1535:   }
1536:   MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1537:   maxsize = 0;
1538:   for (i=0;i<ne;i++) {
1539:     PetscInt nn;
1540:     ISGetLocalSize(edges[i],&nn);
1541:     maxsize = PetscMax(maxsize,nn);
1542:   }
1543:   for (i=0;i<nf;i++) {
1544:     PetscInt nn;
1545:     ISGetLocalSize(faces[i],&nn);
1546:     maxsize = PetscMax(maxsize,nn);
1547:   }
1548:   PetscMalloc1(maxsize,&vals);
1549:   /* create vectors to hold quadrature weights */
1550:   MatCreateVecs(A,&quad_vec,NULL);
1551:   if (!transpose) {
1552:     MatGetLocalToGlobalMapping(A,&map,NULL);
1553:   } else {
1554:     MatGetLocalToGlobalMapping(A,NULL,&map);
1555:   }
1556:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1557:   VecDestroy(&quad_vec);
1558:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1559:   for (i=0;i<maxneighs;i++) {
1560:     VecLockPop(quad_vecs[i]);
1561:     VecSetLocalToGlobalMapping(quad_vecs[i],map);
1562:   }

1564:   /* compute local quad vec */
1565:   MatISGetLocalMat(divudotp,&loc_divudotp);
1566:   if (!transpose) {
1567:     MatCreateVecs(loc_divudotp,&v,&p);
1568:   } else {
1569:     MatCreateVecs(loc_divudotp,&p,&v);
1570:   }
1571:   VecSet(p,1.);
1572:   if (!transpose) {
1573:     MatMultTranspose(loc_divudotp,p,v);
1574:   } else {
1575:     MatMult(loc_divudotp,p,v);
1576:   }
1577:   if (vl2l) {
1578:     Mat        lA;
1579:     VecScatter sc;

1581:     MatISGetLocalMat(A,&lA);
1582:     MatCreateVecs(lA,&vins,NULL);
1583:     VecScatterCreate(v,vl2l,vins,NULL,&sc);
1584:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1585:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1586:     VecScatterDestroy(&sc);
1587:   } else {
1588:     vins = v;
1589:   }
1590:   VecGetArrayRead(vins,&array);
1591:   VecDestroy(&p);

1593:   /* insert in global quadrature vecs */
1594:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1595:   for (i=0;i<nf;i++) {
1596:     const PetscInt    *idxs;
1597:     PetscInt          idx,nn,j;

1599:     ISGetIndices(faces[i],&idxs);
1600:     ISGetLocalSize(faces[i],&nn);
1601:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1602:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1603:     idx  = -(idx+1);
1604:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1605:     ISRestoreIndices(faces[i],&idxs);
1606:   }
1607:   for (i=0;i<ne;i++) {
1608:     const PetscInt    *idxs;
1609:     PetscInt          idx,nn,j;

1611:     ISGetIndices(edges[i],&idxs);
1612:     ISGetLocalSize(edges[i],&nn);
1613:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1614:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1615:     idx  = -(idx+1);
1616:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1617:     ISRestoreIndices(edges[i],&idxs);
1618:   }
1619:   PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1620:   VecRestoreArrayRead(vins,&array);
1621:   if (vl2l) {
1622:     VecDestroy(&vins);
1623:   }
1624:   VecDestroy(&v);
1625:   PetscFree(vals);

1627:   /* assemble near null space */
1628:   for (i=0;i<maxneighs;i++) {
1629:     VecAssemblyBegin(quad_vecs[i]);
1630:   }
1631:   for (i=0;i<maxneighs;i++) {
1632:     VecAssemblyEnd(quad_vecs[i]);
1633:     VecLockPush(quad_vecs[i]);
1634:   }
1635:   VecDestroyVecs(maxneighs,&quad_vecs);
1636:   return(0);
1637: }


1640: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1641: {
1643:   Vec            local,global;
1644:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1645:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1646:   PetscBool      monolithic = PETSC_FALSE;

1649:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1650:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1651:   PetscOptionsEnd();
1652:   /* need to convert from global to local topology information and remove references to information in global ordering */
1653:   MatCreateVecs(pc->pmat,&global,NULL);
1654:   MatCreateVecs(matis->A,&local,NULL);
1655:   if (monolithic) goto boundary;

1657:   if (pcbddc->user_provided_isfordofs) {
1658:     if (pcbddc->n_ISForDofs) {
1659:       PetscInt i;
1660:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1661:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1662:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1663:         ISDestroy(&pcbddc->ISForDofs[i]);
1664:       }
1665:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1666:       pcbddc->n_ISForDofs = 0;
1667:       PetscFree(pcbddc->ISForDofs);
1668:     }
1669:   } else {
1670:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1671:       DM dm;

1673:       PCGetDM(pc, &dm);
1674:       if (!dm) {
1675:         MatGetDM(pc->pmat, &dm);
1676:       }
1677:       if (dm) {
1678:         IS      *fields;
1679:         PetscInt nf,i;
1680:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1681:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1682:         for (i=0;i<nf;i++) {
1683:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1684:           ISDestroy(&fields[i]);
1685:         }
1686:         PetscFree(fields);
1687:         pcbddc->n_ISForDofsLocal = nf;
1688:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1689:         PetscContainer   c;

1691:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1692:         if (c) {
1693:           MatISLocalFields lf;
1694:           PetscContainerGetPointer(c,(void**)&lf);
1695:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1696:         } else { /* fallback, create the default fields if bs > 1 */
1697:           PetscInt i, n = matis->A->rmap->n;
1698:           MatGetBlockSize(pc->pmat,&i);
1699:           if (i > 1) {
1700:             pcbddc->n_ISForDofsLocal = i;
1701:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1702:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1703:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1704:             }
1705:           }
1706:         }
1707:       }
1708:     } else {
1709:       PetscInt i;
1710:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1711:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1712:       }
1713:     }
1714:   }

1716: boundary:
1717:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1718:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1719:   } else if (pcbddc->DirichletBoundariesLocal) {
1720:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1721:   }
1722:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1723:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1724:   } else if (pcbddc->NeumannBoundariesLocal) {
1725:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1726:   }
1727:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1728:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1729:   }
1730:   VecDestroy(&global);
1731:   VecDestroy(&local);

1733:   return(0);
1734: }

1736: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1737: {
1738:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1739:   PetscErrorCode  ierr;
1740:   IS              nis;
1741:   const PetscInt  *idxs;
1742:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1743:   PetscBool       *ld;

1746:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1747:   MatISSetUpSF(pc->pmat);
1748:   if (mop == MPI_LAND) {
1749:     /* init rootdata with true */
1750:     ld   = (PetscBool*) matis->sf_rootdata;
1751:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1752:   } else {
1753:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1754:   }
1755:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1756:   ISGetLocalSize(*is,&nd);
1757:   ISGetIndices(*is,&idxs);
1758:   ld   = (PetscBool*) matis->sf_leafdata;
1759:   for (i=0;i<nd;i++)
1760:     if (-1 < idxs[i] && idxs[i] < n)
1761:       ld[idxs[i]] = PETSC_TRUE;
1762:   ISRestoreIndices(*is,&idxs);
1763:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1764:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1765:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1766:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1767:   if (mop == MPI_LAND) {
1768:     PetscMalloc1(nd,&nidxs);
1769:   } else {
1770:     PetscMalloc1(n,&nidxs);
1771:   }
1772:   for (i=0,nnd=0;i<n;i++)
1773:     if (ld[i])
1774:       nidxs[nnd++] = i;
1775:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1776:   ISDestroy(is);
1777:   *is  = nis;
1778:   return(0);
1779: }

1781: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1782: {
1783:   PC_IS             *pcis = (PC_IS*)(pc->data);
1784:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1785:   PetscErrorCode    ierr;

1788:   if (!pcbddc->benign_have_null) {
1789:     return(0);
1790:   }
1791:   if (pcbddc->ChangeOfBasisMatrix) {
1792:     Vec swap;

1794:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1795:     swap = pcbddc->work_change;
1796:     pcbddc->work_change = r;
1797:     r = swap;
1798:   }
1799:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1800:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1801:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1802:   VecSet(z,0.);
1803:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1804:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1805:   if (pcbddc->ChangeOfBasisMatrix) {
1806:     pcbddc->work_change = r;
1807:     VecCopy(z,pcbddc->work_change);
1808:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1809:   }
1810:   return(0);
1811: }

1813: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1814: {
1815:   PCBDDCBenignMatMult_ctx ctx;
1816:   PetscErrorCode          ierr;
1817:   PetscBool               apply_right,apply_left,reset_x;

1820:   MatShellGetContext(A,&ctx);
1821:   if (transpose) {
1822:     apply_right = ctx->apply_left;
1823:     apply_left = ctx->apply_right;
1824:   } else {
1825:     apply_right = ctx->apply_right;
1826:     apply_left = ctx->apply_left;
1827:   }
1828:   reset_x = PETSC_FALSE;
1829:   if (apply_right) {
1830:     const PetscScalar *ax;
1831:     PetscInt          nl,i;

1833:     VecGetLocalSize(x,&nl);
1834:     VecGetArrayRead(x,&ax);
1835:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1836:     VecRestoreArrayRead(x,&ax);
1837:     for (i=0;i<ctx->benign_n;i++) {
1838:       PetscScalar    sum,val;
1839:       const PetscInt *idxs;
1840:       PetscInt       nz,j;
1841:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1842:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1843:       sum = 0.;
1844:       if (ctx->apply_p0) {
1845:         val = ctx->work[idxs[nz-1]];
1846:         for (j=0;j<nz-1;j++) {
1847:           sum += ctx->work[idxs[j]];
1848:           ctx->work[idxs[j]] += val;
1849:         }
1850:       } else {
1851:         for (j=0;j<nz-1;j++) {
1852:           sum += ctx->work[idxs[j]];
1853:         }
1854:       }
1855:       ctx->work[idxs[nz-1]] -= sum;
1856:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1857:     }
1858:     VecPlaceArray(x,ctx->work);
1859:     reset_x = PETSC_TRUE;
1860:   }
1861:   if (transpose) {
1862:     MatMultTranspose(ctx->A,x,y);
1863:   } else {
1864:     MatMult(ctx->A,x,y);
1865:   }
1866:   if (reset_x) {
1867:     VecResetArray(x);
1868:   }
1869:   if (apply_left) {
1870:     PetscScalar *ay;
1871:     PetscInt    i;

1873:     VecGetArray(y,&ay);
1874:     for (i=0;i<ctx->benign_n;i++) {
1875:       PetscScalar    sum,val;
1876:       const PetscInt *idxs;
1877:       PetscInt       nz,j;
1878:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1879:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1880:       val = -ay[idxs[nz-1]];
1881:       if (ctx->apply_p0) {
1882:         sum = 0.;
1883:         for (j=0;j<nz-1;j++) {
1884:           sum += ay[idxs[j]];
1885:           ay[idxs[j]] += val;
1886:         }
1887:         ay[idxs[nz-1]] += sum;
1888:       } else {
1889:         for (j=0;j<nz-1;j++) {
1890:           ay[idxs[j]] += val;
1891:         }
1892:         ay[idxs[nz-1]] = 0.;
1893:       }
1894:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1895:     }
1896:     VecRestoreArray(y,&ay);
1897:   }
1898:   return(0);
1899: }

1901: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1902: {

1906:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
1907:   return(0);
1908: }

1910: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1911: {

1915:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
1916:   return(0);
1917: }

1919: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1920: {
1921:   PC_IS                   *pcis = (PC_IS*)pc->data;
1922:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
1923:   PCBDDCBenignMatMult_ctx ctx;
1924:   PetscErrorCode          ierr;

1927:   if (!restore) {
1928:     Mat                A_IB,A_BI;
1929:     PetscScalar        *work;
1930:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

1932:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
1933:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
1934:     PetscMalloc1(pcis->n,&work);
1935:     MatCreate(PETSC_COMM_SELF,&A_IB);
1936:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
1937:     MatSetType(A_IB,MATSHELL);
1938:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
1939:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1940:     PetscNew(&ctx);
1941:     MatShellSetContext(A_IB,ctx);
1942:     ctx->apply_left = PETSC_TRUE;
1943:     ctx->apply_right = PETSC_FALSE;
1944:     ctx->apply_p0 = PETSC_FALSE;
1945:     ctx->benign_n = pcbddc->benign_n;
1946:     if (reuse) {
1947:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1948:       ctx->free = PETSC_FALSE;
1949:     } else { /* TODO: could be optimized for successive solves */
1950:       ISLocalToGlobalMapping N_to_D;
1951:       PetscInt               i;

1953:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
1954:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
1955:       for (i=0;i<pcbddc->benign_n;i++) {
1956:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
1957:       }
1958:       ISLocalToGlobalMappingDestroy(&N_to_D);
1959:       ctx->free = PETSC_TRUE;
1960:     }
1961:     ctx->A = pcis->A_IB;
1962:     ctx->work = work;
1963:     MatSetUp(A_IB);
1964:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
1965:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
1966:     pcis->A_IB = A_IB;

1968:     /* A_BI as A_IB^T */
1969:     MatCreateTranspose(A_IB,&A_BI);
1970:     pcbddc->benign_original_mat = pcis->A_BI;
1971:     pcis->A_BI = A_BI;
1972:   } else {
1973:     if (!pcbddc->benign_original_mat) {
1974:       return(0);
1975:     }
1976:     MatShellGetContext(pcis->A_IB,&ctx);
1977:     MatDestroy(&pcis->A_IB);
1978:     pcis->A_IB = ctx->A;
1979:     ctx->A = NULL;
1980:     MatDestroy(&pcis->A_BI);
1981:     pcis->A_BI = pcbddc->benign_original_mat;
1982:     pcbddc->benign_original_mat = NULL;
1983:     if (ctx->free) {
1984:       PetscInt i;
1985:       for (i=0;i<ctx->benign_n;i++) {
1986:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
1987:       }
1988:       PetscFree(ctx->benign_zerodiag_subs);
1989:     }
1990:     PetscFree(ctx->work);
1991:     PetscFree(ctx);
1992:   }
1993:   return(0);
1994: }

1996: /* used just in bddc debug mode */
1997: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
1998: {
1999:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2000:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2001:   Mat            An;

2005:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2006:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2007:   if (is1) {
2008:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2009:     MatDestroy(&An);
2010:   } else {
2011:     *B = An;
2012:   }
2013:   return(0);
2014: }

2016: /* TODO: add reuse flag */
2017: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2018: {
2019:   Mat            Bt;
2020:   PetscScalar    *a,*bdata;
2021:   const PetscInt *ii,*ij;
2022:   PetscInt       m,n,i,nnz,*bii,*bij;
2023:   PetscBool      flg_row;

2027:   MatGetSize(A,&n,&m);
2028:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2029:   MatSeqAIJGetArray(A,&a);
2030:   nnz = n;
2031:   for (i=0;i<ii[n];i++) {
2032:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2033:   }
2034:   PetscMalloc1(n+1,&bii);
2035:   PetscMalloc1(nnz,&bij);
2036:   PetscMalloc1(nnz,&bdata);
2037:   nnz = 0;
2038:   bii[0] = 0;
2039:   for (i=0;i<n;i++) {
2040:     PetscInt j;
2041:     for (j=ii[i];j<ii[i+1];j++) {
2042:       PetscScalar entry = a[j];
2043:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
2044:         bij[nnz] = ij[j];
2045:         bdata[nnz] = entry;
2046:         nnz++;
2047:       }
2048:     }
2049:     bii[i+1] = nnz;
2050:   }
2051:   MatSeqAIJRestoreArray(A,&a);
2052:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2053:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2054:   {
2055:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2056:     b->free_a = PETSC_TRUE;
2057:     b->free_ij = PETSC_TRUE;
2058:   }
2059:   *B = Bt;
2060:   return(0);
2061: }

2063: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2064: {
2065:   Mat                    B = NULL;
2066:   DM                     dm;
2067:   IS                     is_dummy,*cc_n;
2068:   ISLocalToGlobalMapping l2gmap_dummy;
2069:   PCBDDCGraph            graph;
2070:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2071:   PetscInt               i,n;
2072:   PetscInt               *xadj,*adjncy;
2073:   PetscBool              isplex = PETSC_FALSE;
2074:   PetscErrorCode         ierr;

2077:   PCBDDCGraphCreate(&graph);
2078:   PCGetDM(pc,&dm);
2079:   if (!dm) {
2080:     MatGetDM(pc->pmat,&dm);
2081:   }
2082:   if (dm) {
2083:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2084:   }
2085:   if (isplex) { /* this code has been modified from plexpartition.c */
2086:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2087:     PetscInt      *adj = NULL;
2088:     IS             cellNumbering;
2089:     const PetscInt *cellNum;
2090:     PetscBool      useCone, useClosure;
2091:     PetscSection   section;
2092:     PetscSegBuffer adjBuffer;
2093:     PetscSF        sfPoint;

2097:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2098:     DMGetPointSF(dm, &sfPoint);
2099:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2100:     /* Build adjacency graph via a section/segbuffer */
2101:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2102:     PetscSectionSetChart(section, pStart, pEnd);
2103:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2104:     /* Always use FVM adjacency to create partitioner graph */
2105:     DMPlexGetAdjacencyUseCone(dm, &useCone);
2106:     DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2107:     DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2108:     DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2109:     DMPlexGetCellNumbering(dm, &cellNumbering);
2110:     ISGetIndices(cellNumbering, &cellNum);
2111:     for (n = 0, p = pStart; p < pEnd; p++) {
2112:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2113:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2114:       adjSize = PETSC_DETERMINE;
2115:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2116:       for (a = 0; a < adjSize; ++a) {
2117:         const PetscInt point = adj[a];
2118:         if (pStart <= point && point < pEnd) {
2119:           PetscInt *PETSC_RESTRICT pBuf;
2120:           PetscSectionAddDof(section, p, 1);
2121:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2122:           *pBuf = point;
2123:         }
2124:       }
2125:       n++;
2126:     }
2127:     DMPlexSetAdjacencyUseCone(dm, useCone);
2128:     DMPlexSetAdjacencyUseClosure(dm, useClosure);
2129:     /* Derive CSR graph from section/segbuffer */
2130:     PetscSectionSetUp(section);
2131:     PetscSectionGetStorageSize(section, &size);
2132:     PetscMalloc1(n+1, &xadj);
2133:     for (idx = 0, p = pStart; p < pEnd; p++) {
2134:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2135:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2136:     }
2137:     xadj[n] = size;
2138:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2139:     /* Clean up */
2140:     PetscSegBufferDestroy(&adjBuffer);
2141:     PetscSectionDestroy(&section);
2142:     PetscFree(adj);
2143:     graph->xadj = xadj;
2144:     graph->adjncy = adjncy;
2145:   } else {
2146:     Mat       A;
2147:     PetscBool filter = PETSC_FALSE, isseqaij, flg_row;

2149:     MatISGetLocalMat(pc->pmat,&A);
2150:     if (!A->rmap->N || !A->cmap->N) {
2151:       *ncc = 0;
2152:       *cc = NULL;
2153:       return(0);
2154:     }
2155:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2156:     if (!isseqaij && filter) {
2157:       PetscBool isseqdense;

2159:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2160:       if (!isseqdense) {
2161:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2162:       } else { /* TODO: rectangular case and LDA */
2163:         PetscScalar *array;
2164:         PetscReal   chop=1.e-6;

2166:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2167:         MatDenseGetArray(B,&array);
2168:         MatGetSize(B,&n,NULL);
2169:         for (i=0;i<n;i++) {
2170:           PetscInt j;
2171:           for (j=i+1;j<n;j++) {
2172:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2173:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2174:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2175:           }
2176:         }
2177:         MatDenseRestoreArray(B,&array);
2178:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2179:       }
2180:     } else {
2181:       PetscObjectReference((PetscObject)A);
2182:       B = A;
2183:     }
2184:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2186:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2187:     if (filter) {
2188:       PetscScalar *data;
2189:       PetscInt    j,cum;

2191:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2192:       MatSeqAIJGetArray(B,&data);
2193:       cum = 0;
2194:       for (i=0;i<n;i++) {
2195:         PetscInt t;

2197:         for (j=xadj[i];j<xadj[i+1];j++) {
2198:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2199:             continue;
2200:           }
2201:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2202:         }
2203:         t = xadj_filtered[i];
2204:         xadj_filtered[i] = cum;
2205:         cum += t;
2206:       }
2207:       MatSeqAIJRestoreArray(B,&data);
2208:       graph->xadj = xadj_filtered;
2209:       graph->adjncy = adjncy_filtered;
2210:     } else {
2211:       graph->xadj = xadj;
2212:       graph->adjncy = adjncy;
2213:     }
2214:   }
2215:   /* compute local connected components using PCBDDCGraph */
2216:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2217:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2218:   ISDestroy(&is_dummy);
2219:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2220:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2221:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2222:   PCBDDCGraphComputeConnectedComponents(graph);

2224:   /* partial clean up */
2225:   PetscFree2(xadj_filtered,adjncy_filtered);
2226:   if (B) {
2227:     PetscBool flg_row;
2228:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2229:     MatDestroy(&B);
2230:   }
2231:   if (isplex) {
2232:     PetscFree(xadj);
2233:     PetscFree(adjncy);
2234:   }

2236:   /* get back data */
2237:   if (isplex) {
2238:     if (ncc) *ncc = graph->ncc;
2239:     if (cc || primalv) {
2240:       Mat          A;
2241:       PetscBT      btv,btvt;
2242:       PetscSection subSection;
2243:       PetscInt     *ids,cum,cump,*cids,*pids;

2245:       DMPlexGetSubdomainSection(dm,&subSection);
2246:       MatISGetLocalMat(pc->pmat,&A);
2247:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2248:       PetscBTCreate(A->rmap->n,&btv);
2249:       PetscBTCreate(A->rmap->n,&btvt);

2251:       cids[0] = 0;
2252:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2253:         PetscInt j;

2255:         PetscBTMemzero(A->rmap->n,btvt);
2256:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2257:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2259:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2260:           for (k = 0; k < 2*size; k += 2) {
2261:             PetscInt s, p = closure[k], off, dof, cdof;

2263:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2264:             PetscSectionGetOffset(subSection,p,&off);
2265:             PetscSectionGetDof(subSection,p,&dof);
2266:             for (s = 0; s < dof-cdof; s++) {
2267:               if (PetscBTLookupSet(btvt,off+s)) continue;
2268:               if (!PetscBTLookup(btv,off+s)) {
2269:                 ids[cum++] = off+s;
2270:               } else { /* cross-vertex */
2271:                 pids[cump++] = off+s;
2272:               }
2273:             }
2274:           }
2275:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2276:         }
2277:         cids[i+1] = cum;
2278:         /* mark dofs as already assigned */
2279:         for (j = cids[i]; j < cids[i+1]; j++) {
2280:           PetscBTSet(btv,ids[j]);
2281:         }
2282:       }
2283:       if (cc) {
2284:         PetscMalloc1(graph->ncc,&cc_n);
2285:         for (i = 0; i < graph->ncc; i++) {
2286:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2287:         }
2288:         *cc = cc_n;
2289:       }
2290:       if (primalv) {
2291:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2292:       }
2293:       PetscFree3(ids,cids,pids);
2294:       PetscBTDestroy(&btv);
2295:       PetscBTDestroy(&btvt);
2296:     }
2297:   } else {
2298:     if (ncc) *ncc = graph->ncc;
2299:     if (cc) {
2300:       PetscMalloc1(graph->ncc,&cc_n);
2301:       for (i=0;i<graph->ncc;i++) {
2302:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2303:       }
2304:       *cc = cc_n;
2305:     }
2306:     if (primalv) *primalv = NULL;
2307:   }
2308:   /* clean up graph */
2309:   graph->xadj = 0;
2310:   graph->adjncy = 0;
2311:   PCBDDCGraphDestroy(&graph);
2312:   return(0);
2313: }

2315: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2316: {
2317:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2318:   PC_IS*         pcis = (PC_IS*)(pc->data);
2319:   IS             dirIS = NULL;
2320:   PetscInt       i;

2324:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2325:   if (zerodiag) {
2326:     Mat            A;
2327:     Vec            vec3_N;
2328:     PetscScalar    *vals;
2329:     const PetscInt *idxs;
2330:     PetscInt       nz,*count;

2332:     /* p0 */
2333:     VecSet(pcis->vec1_N,0.);
2334:     PetscMalloc1(pcis->n,&vals);
2335:     ISGetLocalSize(zerodiag,&nz);
2336:     ISGetIndices(zerodiag,&idxs);
2337:     for (i=0;i<nz;i++) vals[i] = 1.;
2338:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2339:     VecAssemblyBegin(pcis->vec1_N);
2340:     VecAssemblyEnd(pcis->vec1_N);
2341:     /* v_I */
2342:     VecSetRandom(pcis->vec2_N,NULL);
2343:     for (i=0;i<nz;i++) vals[i] = 0.;
2344:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2345:     ISRestoreIndices(zerodiag,&idxs);
2346:     ISGetIndices(pcis->is_B_local,&idxs);
2347:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2348:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2349:     ISRestoreIndices(pcis->is_B_local,&idxs);
2350:     if (dirIS) {
2351:       PetscInt n;

2353:       ISGetLocalSize(dirIS,&n);
2354:       ISGetIndices(dirIS,&idxs);
2355:       for (i=0;i<n;i++) vals[i] = 0.;
2356:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2357:       ISRestoreIndices(dirIS,&idxs);
2358:     }
2359:     VecAssemblyBegin(pcis->vec2_N);
2360:     VecAssemblyEnd(pcis->vec2_N);
2361:     VecDuplicate(pcis->vec1_N,&vec3_N);
2362:     VecSet(vec3_N,0.);
2363:     MatISGetLocalMat(pc->pmat,&A);
2364:     MatMult(A,pcis->vec1_N,vec3_N);
2365:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2366:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2367:     PetscFree(vals);
2368:     VecDestroy(&vec3_N);

2370:     /* there should not be any pressure dofs lying on the interface */
2371:     PetscCalloc1(pcis->n,&count);
2372:     ISGetIndices(pcis->is_B_local,&idxs);
2373:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2374:     ISRestoreIndices(pcis->is_B_local,&idxs);
2375:     ISGetIndices(zerodiag,&idxs);
2376:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2377:     ISRestoreIndices(zerodiag,&idxs);
2378:     PetscFree(count);
2379:   }
2380:   ISDestroy(&dirIS);

2382:   /* check PCBDDCBenignGetOrSetP0 */
2383:   VecSetRandom(pcis->vec1_global,NULL);
2384:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2385:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2386:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2387:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2388:   for (i=0;i<pcbddc->benign_n;i++) {
2389:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2390:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2391:   }
2392:   return(0);
2393: }

2395: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2396: {
2397:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2398:   IS             pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2399:   PetscInt       nz,n;
2400:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2401:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2405:   PetscSFDestroy(&pcbddc->benign_sf);
2406:   MatDestroy(&pcbddc->benign_B0);
2407:   for (n=0;n<pcbddc->benign_n;n++) {
2408:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2409:   }
2410:   PetscFree(pcbddc->benign_zerodiag_subs);
2411:   pcbddc->benign_n = 0;

2413:   /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2414:      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2415:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2416:      If not, a change of basis on pressures is not needed
2417:      since the local Schur complements are already SPD
2418:   */
2419:   has_null_pressures = PETSC_TRUE;
2420:   have_null = PETSC_TRUE;
2421:   if (pcbddc->n_ISForDofsLocal) {
2422:     IS       iP = NULL;
2423:     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;

2425:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2426:     PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2427:     PetscOptionsEnd();
2428:     if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2429:     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2430:     ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2431:     ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2432:     ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2433:     ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2434:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2435:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2436:     if (iP) {
2437:       IS newpressures;

2439:       ISDifference(pressures,iP,&newpressures);
2440:       ISDestroy(&pressures);
2441:       pressures = newpressures;
2442:     }
2443:     ISSorted(pressures,&sorted);
2444:     if (!sorted) {
2445:       ISSort(pressures);
2446:     }
2447:   } else {
2448:     pressures = NULL;
2449:   }
2450:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2451:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2452:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2453:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2454:   ISSorted(zerodiag,&sorted);
2455:   if (!sorted) {
2456:     ISSort(zerodiag);
2457:   }
2458:   PetscObjectReference((PetscObject)zerodiag);
2459:   zerodiag_save = zerodiag;
2460:   ISGetLocalSize(zerodiag,&nz);
2461:   if (!nz) {
2462:     if (n) have_null = PETSC_FALSE;
2463:     has_null_pressures = PETSC_FALSE;
2464:     ISDestroy(&zerodiag);
2465:   }
2466:   recompute_zerodiag = PETSC_FALSE;
2467:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2468:   zerodiag_subs    = NULL;
2469:   pcbddc->benign_n = 0;
2470:   n_interior_dofs  = 0;
2471:   interior_dofs    = NULL;
2472:   nneu             = 0;
2473:   if (pcbddc->NeumannBoundariesLocal) {
2474:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2475:   }
2476:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2477:   if (checkb) { /* need to compute interior nodes */
2478:     PetscInt n,i,j;
2479:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2480:     PetscInt *iwork;

2482:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2483:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2484:     PetscCalloc1(n,&iwork);
2485:     PetscMalloc1(n,&interior_dofs);
2486:     for (i=1;i<n_neigh;i++)
2487:       for (j=0;j<n_shared[i];j++)
2488:           iwork[shared[i][j]] += 1;
2489:     for (i=0;i<n;i++)
2490:       if (!iwork[i])
2491:         interior_dofs[n_interior_dofs++] = i;
2492:     PetscFree(iwork);
2493:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2494:   }
2495:   if (has_null_pressures) {
2496:     IS             *subs;
2497:     PetscInt       nsubs,i,j,nl;
2498:     const PetscInt *idxs;
2499:     PetscScalar    *array;
2500:     Vec            *work;
2501:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2503:     subs  = pcbddc->local_subs;
2504:     nsubs = pcbddc->n_local_subs;
2505:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2506:     if (checkb) {
2507:       VecDuplicateVecs(matis->y,2,&work);
2508:       ISGetLocalSize(zerodiag,&nl);
2509:       ISGetIndices(zerodiag,&idxs);
2510:       /* work[0] = 1_p */
2511:       VecSet(work[0],0.);
2512:       VecGetArray(work[0],&array);
2513:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2514:       VecRestoreArray(work[0],&array);
2515:       /* work[0] = 1_v */
2516:       VecSet(work[1],1.);
2517:       VecGetArray(work[1],&array);
2518:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2519:       VecRestoreArray(work[1],&array);
2520:       ISRestoreIndices(zerodiag,&idxs);
2521:     }
2522:     if (nsubs > 1) {
2523:       PetscCalloc1(nsubs,&zerodiag_subs);
2524:       for (i=0;i<nsubs;i++) {
2525:         ISLocalToGlobalMapping l2g;
2526:         IS                     t_zerodiag_subs;
2527:         PetscInt               nl;

2529:         ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2530:         ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2531:         ISGetLocalSize(t_zerodiag_subs,&nl);
2532:         if (nl) {
2533:           PetscBool valid = PETSC_TRUE;

2535:           if (checkb) {
2536:             VecSet(matis->x,0);
2537:             ISGetLocalSize(subs[i],&nl);
2538:             ISGetIndices(subs[i],&idxs);
2539:             VecGetArray(matis->x,&array);
2540:             for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2541:             VecRestoreArray(matis->x,&array);
2542:             ISRestoreIndices(subs[i],&idxs);
2543:             VecPointwiseMult(matis->x,work[0],matis->x);
2544:             MatMult(matis->A,matis->x,matis->y);
2545:             VecPointwiseMult(matis->y,work[1],matis->y);
2546:             VecGetArray(matis->y,&array);
2547:             for (j=0;j<n_interior_dofs;j++) {
2548:               if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2549:                 valid = PETSC_FALSE;
2550:                 break;
2551:               }
2552:             }
2553:             VecRestoreArray(matis->y,&array);
2554:           }
2555:           if (valid && nneu) {
2556:             const PetscInt *idxs;
2557:             PetscInt       nzb;

2559:             ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2560:             ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2561:             ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2562:             if (nzb) valid = PETSC_FALSE;
2563:           }
2564:           if (valid && pressures) {
2565:             IS t_pressure_subs;
2566:             ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2567:             ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2568:             ISDestroy(&t_pressure_subs);
2569:           }
2570:           if (valid) {
2571:             ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2572:             pcbddc->benign_n++;
2573:           } else {
2574:             recompute_zerodiag = PETSC_TRUE;
2575:           }
2576:         }
2577:         ISDestroy(&t_zerodiag_subs);
2578:         ISLocalToGlobalMappingDestroy(&l2g);
2579:       }
2580:     } else { /* there's just one subdomain (or zero if they have not been detected */
2581:       PetscBool valid = PETSC_TRUE;

2583:       if (nneu) valid = PETSC_FALSE;
2584:       if (valid && pressures) {
2585:         ISEqual(pressures,zerodiag,&valid);
2586:       }
2587:       if (valid && checkb) {
2588:         MatMult(matis->A,work[0],matis->x);
2589:         VecPointwiseMult(matis->x,work[1],matis->x);
2590:         VecGetArray(matis->x,&array);
2591:         for (j=0;j<n_interior_dofs;j++) {
2592:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2593:             valid = PETSC_FALSE;
2594:             break;
2595:           }
2596:         }
2597:         VecRestoreArray(matis->x,&array);
2598:       }
2599:       if (valid) {
2600:         pcbddc->benign_n = 1;
2601:         PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2602:         PetscObjectReference((PetscObject)zerodiag);
2603:         zerodiag_subs[0] = zerodiag;
2604:       }
2605:     }
2606:     if (checkb) {
2607:       VecDestroyVecs(2,&work);
2608:     }
2609:   }
2610:   PetscFree(interior_dofs);

2612:   if (!pcbddc->benign_n) {
2613:     PetscInt n;

2615:     ISDestroy(&zerodiag);
2616:     recompute_zerodiag = PETSC_FALSE;
2617:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2618:     if (n) {
2619:       has_null_pressures = PETSC_FALSE;
2620:       have_null = PETSC_FALSE;
2621:     }
2622:   }

2624:   /* final check for null pressures */
2625:   if (zerodiag && pressures) {
2626:     PetscInt nz,np;
2627:     ISGetLocalSize(zerodiag,&nz);
2628:     ISGetLocalSize(pressures,&np);
2629:     if (nz != np) have_null = PETSC_FALSE;
2630:   }

2632:   if (recompute_zerodiag) {
2633:     ISDestroy(&zerodiag);
2634:     if (pcbddc->benign_n == 1) {
2635:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2636:       zerodiag = zerodiag_subs[0];
2637:     } else {
2638:       PetscInt i,nzn,*new_idxs;

2640:       nzn = 0;
2641:       for (i=0;i<pcbddc->benign_n;i++) {
2642:         PetscInt ns;
2643:         ISGetLocalSize(zerodiag_subs[i],&ns);
2644:         nzn += ns;
2645:       }
2646:       PetscMalloc1(nzn,&new_idxs);
2647:       nzn = 0;
2648:       for (i=0;i<pcbddc->benign_n;i++) {
2649:         PetscInt ns,*idxs;
2650:         ISGetLocalSize(zerodiag_subs[i],&ns);
2651:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2652:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2653:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2654:         nzn += ns;
2655:       }
2656:       PetscSortInt(nzn,new_idxs);
2657:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2658:     }
2659:     have_null = PETSC_FALSE;
2660:   }

2662:   /* Prepare matrix to compute no-net-flux */
2663:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2664:     Mat                    A,loc_divudotp;
2665:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2666:     IS                     row,col,isused = NULL;
2667:     PetscInt               M,N,n,st,n_isused;

2669:     if (pressures) {
2670:       isused = pressures;
2671:     } else {
2672:       isused = zerodiag_save;
2673:     }
2674:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2675:     MatISGetLocalMat(pc->pmat,&A);
2676:     MatGetLocalSize(A,&n,NULL);
2677:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2678:     n_isused = 0;
2679:     if (isused) {
2680:       ISGetLocalSize(isused,&n_isused);
2681:     }
2682:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2683:     st = st-n_isused;
2684:     if (n) {
2685:       const PetscInt *gidxs;

2687:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2688:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2689:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2690:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2691:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2692:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2693:     } else {
2694:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2695:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2696:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2697:     }
2698:     MatGetSize(pc->pmat,NULL,&N);
2699:     ISGetSize(row,&M);
2700:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2701:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2702:     ISDestroy(&row);
2703:     ISDestroy(&col);
2704:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2705:     MatSetType(pcbddc->divudotp,MATIS);
2706:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2707:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2708:     ISLocalToGlobalMappingDestroy(&rl2g);
2709:     ISLocalToGlobalMappingDestroy(&cl2g);
2710:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2711:     MatDestroy(&loc_divudotp);
2712:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2713:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2714:   }
2715:   ISDestroy(&zerodiag_save);

2717:   /* change of basis and p0 dofs */
2718:   if (has_null_pressures) {
2719:     IS             zerodiagc;
2720:     const PetscInt *idxs,*idxsc;
2721:     PetscInt       i,s,*nnz;

2723:     ISGetLocalSize(zerodiag,&nz);
2724:     ISComplement(zerodiag,0,n,&zerodiagc);
2725:     ISGetIndices(zerodiagc,&idxsc);
2726:     /* local change of basis for pressures */
2727:     MatDestroy(&pcbddc->benign_change);
2728:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2729:     MatSetType(pcbddc->benign_change,MATAIJ);
2730:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2731:     PetscMalloc1(n,&nnz);
2732:     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2733:     for (i=0;i<pcbddc->benign_n;i++) {
2734:       PetscInt nzs,j;

2736:       ISGetLocalSize(zerodiag_subs[i],&nzs);
2737:       ISGetIndices(zerodiag_subs[i],&idxs);
2738:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2739:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2740:       ISRestoreIndices(zerodiag_subs[i],&idxs);
2741:     }
2742:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2743:     PetscFree(nnz);
2744:     /* set identity on velocities */
2745:     for (i=0;i<n-nz;i++) {
2746:       MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2747:     }
2748:     ISRestoreIndices(zerodiagc,&idxsc);
2749:     ISDestroy(&zerodiagc);
2750:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2751:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2752:     /* set change on pressures */
2753:     for (s=0;s<pcbddc->benign_n;s++) {
2754:       PetscScalar *array;
2755:       PetscInt    nzs;

2757:       ISGetLocalSize(zerodiag_subs[s],&nzs);
2758:       ISGetIndices(zerodiag_subs[s],&idxs);
2759:       for (i=0;i<nzs-1;i++) {
2760:         PetscScalar vals[2];
2761:         PetscInt    cols[2];

2763:         cols[0] = idxs[i];
2764:         cols[1] = idxs[nzs-1];
2765:         vals[0] = 1.;
2766:         vals[1] = 1.;
2767:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2768:       }
2769:       PetscMalloc1(nzs,&array);
2770:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2771:       array[nzs-1] = 1.;
2772:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2773:       /* store local idxs for p0 */
2774:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2775:       ISRestoreIndices(zerodiag_subs[s],&idxs);
2776:       PetscFree(array);
2777:     }
2778:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2779:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2780:     /* project if needed */
2781:     if (pcbddc->benign_change_explicit) {
2782:       Mat M;

2784:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2785:       MatDestroy(&pcbddc->local_mat);
2786:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2787:       MatDestroy(&M);
2788:     }
2789:     /* store global idxs for p0 */
2790:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2791:   }
2792:   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2793:   ISDestroy(&pressures);

2795:   /* determines if the coarse solver will be singular or not */
2796:   MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2797:   /* determines if the problem has subdomains with 0 pressure block */
2798:   MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2799:   *zerodiaglocal = zerodiag;
2800:   return(0);
2801: }

2803: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2804: {
2805:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2806:   PetscScalar    *array;

2810:   if (!pcbddc->benign_sf) {
2811:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2812:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2813:   }
2814:   if (get) {
2815:     VecGetArrayRead(v,(const PetscScalar**)&array);
2816:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2817:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2818:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2819:   } else {
2820:     VecGetArray(v,&array);
2821:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2822:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2823:     VecRestoreArray(v,&array);
2824:   }
2825:   return(0);
2826: }

2828: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2829: {
2830:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

2834:   /* TODO: add error checking
2835:     - avoid nested pop (or push) calls.
2836:     - cannot push before pop.
2837:     - cannot call this if pcbddc->local_mat is NULL
2838:   */
2839:   if (!pcbddc->benign_n) {
2840:     return(0);
2841:   }
2842:   if (pop) {
2843:     if (pcbddc->benign_change_explicit) {
2844:       IS       is_p0;
2845:       MatReuse reuse;

2847:       /* extract B_0 */
2848:       reuse = MAT_INITIAL_MATRIX;
2849:       if (pcbddc->benign_B0) {
2850:         reuse = MAT_REUSE_MATRIX;
2851:       }
2852:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2853:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2854:       /* remove rows and cols from local problem */
2855:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2856:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2857:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2858:       ISDestroy(&is_p0);
2859:     } else {
2860:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
2861:       PetscScalar *vals;
2862:       PetscInt    i,n,*idxs_ins;

2864:       VecGetLocalSize(matis->y,&n);
2865:       PetscMalloc2(n,&idxs_ins,n,&vals);
2866:       if (!pcbddc->benign_B0) {
2867:         PetscInt *nnz;
2868:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2869:         MatSetType(pcbddc->benign_B0,MATAIJ);
2870:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2871:         PetscMalloc1(pcbddc->benign_n,&nnz);
2872:         for (i=0;i<pcbddc->benign_n;i++) {
2873:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2874:           nnz[i] = n - nnz[i];
2875:         }
2876:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2877:         PetscFree(nnz);
2878:       }

2880:       for (i=0;i<pcbddc->benign_n;i++) {
2881:         PetscScalar *array;
2882:         PetscInt    *idxs,j,nz,cum;

2884:         VecSet(matis->x,0.);
2885:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2886:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2887:         for (j=0;j<nz;j++) vals[j] = 1.;
2888:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2889:         VecAssemblyBegin(matis->x);
2890:         VecAssemblyEnd(matis->x);
2891:         VecSet(matis->y,0.);
2892:         MatMult(matis->A,matis->x,matis->y);
2893:         VecGetArray(matis->y,&array);
2894:         cum = 0;
2895:         for (j=0;j<n;j++) {
2896:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2897:             vals[cum] = array[j];
2898:             idxs_ins[cum] = j;
2899:             cum++;
2900:           }
2901:         }
2902:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
2903:         VecRestoreArray(matis->y,&array);
2904:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2905:       }
2906:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2907:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2908:       PetscFree2(idxs_ins,vals);
2909:     }
2910:   } else { /* push */
2911:     if (pcbddc->benign_change_explicit) {
2912:       PetscInt i;

2914:       for (i=0;i<pcbddc->benign_n;i++) {
2915:         PetscScalar *B0_vals;
2916:         PetscInt    *B0_cols,B0_ncol;

2918:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2919:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
2920:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
2921:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
2922:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2923:       }
2924:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2925:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2926:     } else {
2927:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
2928:     }
2929:   }
2930:   return(0);
2931: }

2933: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2934: {
2935:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2936:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2937:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
2938:   PetscBLASInt    *B_iwork,*B_ifail;
2939:   PetscScalar     *work,lwork;
2940:   PetscScalar     *St,*S,*eigv;
2941:   PetscScalar     *Sarray,*Starray;
2942:   PetscReal       *eigs,thresh;
2943:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
2944:   PetscBool       allocated_S_St;
2945: #if defined(PETSC_USE_COMPLEX)
2946:   PetscReal       *rwork;
2947: #endif
2948:   PetscErrorCode  ierr;

2951:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
2952:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
2953:   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef);

2955:   if (pcbddc->dbg_flag) {
2956:     PetscViewerFlush(pcbddc->dbg_viewer);
2957:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
2958:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
2959:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
2960:   }

2962:   if (pcbddc->dbg_flag) {
2963:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
2964:   }

2966:   /* max size of subsets */
2967:   mss = 0;
2968:   for (i=0;i<sub_schurs->n_subs;i++) {
2969:     PetscInt subset_size;

2971:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2972:     mss = PetscMax(mss,subset_size);
2973:   }

2975:   /* min/max and threshold */
2976:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
2977:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
2978:   nmax = PetscMax(nmin,nmax);
2979:   allocated_S_St = PETSC_FALSE;
2980:   if (nmin) {
2981:     allocated_S_St = PETSC_TRUE;
2982:   }

2984:   /* allocate lapack workspace */
2985:   cum = cum2 = 0;
2986:   maxneigs = 0;
2987:   for (i=0;i<sub_schurs->n_subs;i++) {
2988:     PetscInt n,subset_size;

2990:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2991:     n = PetscMin(subset_size,nmax);
2992:     cum += subset_size;
2993:     cum2 += subset_size*n;
2994:     maxneigs = PetscMax(maxneigs,n);
2995:   }
2996:   if (mss) {
2997:     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
2998:       PetscBLASInt B_itype = 1;
2999:       PetscBLASInt B_N = mss;
3000:       PetscReal    zero = 0.0;
3001:       PetscReal    eps = 0.0; /* dlamch? */

3003:       B_lwork = -1;
3004:       S = NULL;
3005:       St = NULL;
3006:       eigs = NULL;
3007:       eigv = NULL;
3008:       B_iwork = NULL;
3009:       B_ifail = NULL;
3010: #if defined(PETSC_USE_COMPLEX)
3011:       rwork = NULL;
3012: #endif
3013:       thresh = 1.0;
3014:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3015: #if defined(PETSC_USE_COMPLEX)
3016:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3017: #else
3018:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3019: #endif
3020:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3021:       PetscFPTrapPop();
3022:     } else {
3023:         /* TODO */
3024:     }
3025:   } else {
3026:     lwork = 0;
3027:   }

3029:   nv = 0;
3030:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3031:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3032:   }
3033:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3034:   if (allocated_S_St) {
3035:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3036:   }
3037:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3038: #if defined(PETSC_USE_COMPLEX)
3039:   PetscMalloc1(7*mss,&rwork);
3040: #endif
3041:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3042:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3043:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3044:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3045:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3046:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3048:   maxneigs = 0;
3049:   cum = cumarray = 0;
3050:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3051:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3052:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3053:     const PetscInt *idxs;

3055:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3056:     for (cum=0;cum<nv;cum++) {
3057:       pcbddc->adaptive_constraints_n[cum] = 1;
3058:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3059:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3060:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3061:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3062:     }
3063:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3064:   }

3066:   if (mss) { /* multilevel */
3067:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3068:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3069:   }

3071:   thresh = pcbddc->adaptive_threshold;
3072:   for (i=0;i<sub_schurs->n_subs;i++) {
3073:     const PetscInt *idxs;
3074:     PetscReal      upper,lower;
3075:     PetscInt       j,subset_size,eigs_start = 0;
3076:     PetscBLASInt   B_N;
3077:     PetscBool      same_data = PETSC_FALSE;

3079:     if (pcbddc->use_deluxe_scaling) {
3080:       upper = PETSC_MAX_REAL;
3081:       lower = thresh;
3082:     } else {
3083:       upper = 1./thresh;
3084:       lower = 0.;
3085:     }
3086:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3087:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3088:     PetscBLASIntCast(subset_size,&B_N);
3089:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3090:       if (sub_schurs->is_hermitian) {
3091:         PetscInt j,k;
3092:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3093:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3094:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3095:         }
3096:         for (j=0;j<subset_size;j++) {
3097:           for (k=j;k<subset_size;k++) {
3098:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3099:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3100:           }
3101:         }
3102:       } else {
3103:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3104:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3105:       }
3106:     } else {
3107:       S = Sarray + cumarray;
3108:       St = Starray + cumarray;
3109:     }
3110:     /* see if we can save some work */
3111:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3112:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3113:     }

3115:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3116:       B_neigs = 0;
3117:     } else {
3118:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
3119:         PetscBLASInt B_itype = 1;
3120:         PetscBLASInt B_IL, B_IU;
3121:         PetscReal    eps = -1.0; /* dlamch? */
3122:         PetscInt     nmin_s;
3123:         PetscBool    compute_range = PETSC_FALSE;

3125:         if (pcbddc->dbg_flag) {
3126:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]);
3127:         }

3129:         compute_range = PETSC_FALSE;
3130:         if (thresh > 1.+PETSC_SMALL && !same_data) {
3131:           compute_range = PETSC_TRUE;
3132:         }

3134:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3135:         if (compute_range) {

3137:           /* ask for eigenvalues larger than thresh */
3138: #if defined(PETSC_USE_COMPLEX)
3139:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3140: #else
3141:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3142: #endif
3143:         } else if (!same_data) {
3144:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3145:           B_IL = 1;
3146: #if defined(PETSC_USE_COMPLEX)
3147:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3148: #else
3149:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3150: #endif
3151:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3152:           PetscInt k;
3153:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3154:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3155:           PetscBLASIntCast(nmax,&B_neigs);
3156:           nmin = nmax;
3157:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3158:           for (k=0;k<nmax;k++) {
3159:             eigs[k] = 1./PETSC_SMALL;
3160:             eigv[k*(subset_size+1)] = 1.0;
3161:           }
3162:         }
3163:         PetscFPTrapPop();
3164:         if (B_ierr) {
3165:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3166:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3167:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3168:         }

3170:         if (B_neigs > nmax) {
3171:           if (pcbddc->dbg_flag) {
3172:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3173:           }
3174:           if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax;
3175:           B_neigs = nmax;
3176:         }

3178:         nmin_s = PetscMin(nmin,B_N);
3179:         if (B_neigs < nmin_s) {
3180:           PetscBLASInt B_neigs2;

3182:           if (pcbddc->use_deluxe_scaling) {
3183:             B_IL = B_N - nmin_s + 1;
3184:             B_IU = B_N - B_neigs;
3185:           } else {
3186:             B_IL = B_neigs + 1;
3187:             B_IU = nmin_s;
3188:           }
3189:           if (pcbddc->dbg_flag) {
3190:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3191:           }
3192:           if (sub_schurs->is_hermitian) {
3193:             PetscInt j,k;
3194:             for (j=0;j<subset_size;j++) {
3195:               for (k=j;k<subset_size;k++) {
3196:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3197:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3198:               }
3199:             }
3200:           } else {
3201:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3202:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3203:           }
3204:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3205: #if defined(PETSC_USE_COMPLEX)
3206:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3207: #else
3208:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3209: #endif
3210:           PetscFPTrapPop();
3211:           B_neigs += B_neigs2;
3212:         }
3213:         if (B_ierr) {
3214:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3215:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3216:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3217:         }
3218:         if (pcbddc->dbg_flag) {
3219:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3220:           for (j=0;j<B_neigs;j++) {
3221:             if (eigs[j] == 0.0) {
3222:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3223:             } else {
3224:               if (pcbddc->use_deluxe_scaling) {
3225:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3226:               } else {
3227:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3228:               }
3229:             }
3230:           }
3231:         }
3232:       } else {
3233:           /* TODO */
3234:       }
3235:     }
3236:     /* change the basis back to the original one */
3237:     if (sub_schurs->change) {
3238:       Mat change,phi,phit;

3240:       if (pcbddc->dbg_flag > 2) {
3241:         PetscInt ii;
3242:         for (ii=0;ii<B_neigs;ii++) {
3243:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3244:           for (j=0;j<B_N;j++) {
3245: #if defined(PETSC_USE_COMPLEX)
3246:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3247:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3248:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3249: #else
3250:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3251: #endif
3252:           }
3253:         }
3254:       }
3255:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3256:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3257:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3258:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3259:       MatDestroy(&phit);
3260:       MatDestroy(&phi);
3261:     }
3262:     maxneigs = PetscMax(B_neigs,maxneigs);
3263:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3264:     if (B_neigs) {
3265:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3267:       if (pcbddc->dbg_flag > 1) {
3268:         PetscInt ii;
3269:         for (ii=0;ii<B_neigs;ii++) {
3270:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3271:           for (j=0;j<B_N;j++) {
3272: #if defined(PETSC_USE_COMPLEX)
3273:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3274:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3275:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3276: #else
3277:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3278: #endif
3279:           }
3280:         }
3281:       }
3282:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3283:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3284:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3285:       cum++;
3286:     }
3287:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3288:     /* shift for next computation */
3289:     cumarray += subset_size*subset_size;
3290:   }
3291:   if (pcbddc->dbg_flag) {
3292:     PetscViewerFlush(pcbddc->dbg_viewer);
3293:   }

3295:   if (mss) {
3296:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3297:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3298:     /* destroy matrices (junk) */
3299:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3300:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3301:   }
3302:   if (allocated_S_St) {
3303:     PetscFree2(S,St);
3304:   }
3305:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3306: #if defined(PETSC_USE_COMPLEX)
3307:   PetscFree(rwork);
3308: #endif
3309:   if (pcbddc->dbg_flag) {
3310:     PetscInt maxneigs_r;
3311:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3312:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3313:   }
3314:   return(0);
3315: }

3317: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3318: {
3319:   PetscScalar    *coarse_submat_vals;

3323:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3324:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3325:   PCBDDCSetUpLocalScatters(pc);

3327:   /* Setup local neumann solver ksp_R */
3328:   /* PCBDDCSetUpLocalScatters should be called first! */
3329:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3331:   /*
3332:      Setup local correction and local part of coarse basis.
3333:      Gives back the dense local part of the coarse matrix in column major ordering
3334:   */
3335:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3337:   /* Compute total number of coarse nodes and setup coarse solver */
3338:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3340:   /* free */
3341:   PetscFree(coarse_submat_vals);
3342:   return(0);
3343: }

3345: PetscErrorCode PCBDDCResetCustomization(PC pc)
3346: {
3347:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3351:   ISDestroy(&pcbddc->user_primal_vertices);
3352:   ISDestroy(&pcbddc->user_primal_vertices_local);
3353:   ISDestroy(&pcbddc->NeumannBoundaries);
3354:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3355:   ISDestroy(&pcbddc->DirichletBoundaries);
3356:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3357:   PetscFree(pcbddc->onearnullvecs_state);
3358:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3359:   PCBDDCSetDofsSplitting(pc,0,NULL);
3360:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3361:   return(0);
3362: }

3364: PetscErrorCode PCBDDCResetTopography(PC pc)
3365: {
3366:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3367:   PetscInt       i;

3371:   MatDestroy(&pcbddc->nedcG);
3372:   ISDestroy(&pcbddc->nedclocal);
3373:   MatDestroy(&pcbddc->discretegradient);
3374:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3375:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3376:   MatDestroy(&pcbddc->switch_static_change);
3377:   VecDestroy(&pcbddc->work_change);
3378:   MatDestroy(&pcbddc->ConstraintMatrix);
3379:   MatDestroy(&pcbddc->divudotp);
3380:   ISDestroy(&pcbddc->divudotp_vl2l);
3381:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3382:   for (i=0;i<pcbddc->n_local_subs;i++) {
3383:     ISDestroy(&pcbddc->local_subs[i]);
3384:   }
3385:   pcbddc->n_local_subs = 0;
3386:   PetscFree(pcbddc->local_subs);
3387:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3388:   pcbddc->graphanalyzed        = PETSC_FALSE;
3389:   pcbddc->recompute_topography = PETSC_TRUE;
3390:   return(0);
3391: }

3393: PetscErrorCode PCBDDCResetSolvers(PC pc)
3394: {
3395:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3399:   VecDestroy(&pcbddc->coarse_vec);
3400:   if (pcbddc->coarse_phi_B) {
3401:     PetscScalar *array;
3402:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3403:     PetscFree(array);
3404:   }
3405:   MatDestroy(&pcbddc->coarse_phi_B);
3406:   MatDestroy(&pcbddc->coarse_phi_D);
3407:   MatDestroy(&pcbddc->coarse_psi_B);
3408:   MatDestroy(&pcbddc->coarse_psi_D);
3409:   VecDestroy(&pcbddc->vec1_P);
3410:   VecDestroy(&pcbddc->vec1_C);
3411:   MatDestroy(&pcbddc->local_auxmat2);
3412:   MatDestroy(&pcbddc->local_auxmat1);
3413:   VecDestroy(&pcbddc->vec1_R);
3414:   VecDestroy(&pcbddc->vec2_R);
3415:   ISDestroy(&pcbddc->is_R_local);
3416:   VecScatterDestroy(&pcbddc->R_to_B);
3417:   VecScatterDestroy(&pcbddc->R_to_D);
3418:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3419:   KSPReset(pcbddc->ksp_D);
3420:   KSPReset(pcbddc->ksp_R);
3421:   KSPReset(pcbddc->coarse_ksp);
3422:   MatDestroy(&pcbddc->local_mat);
3423:   PetscFree(pcbddc->primal_indices_local_idxs);
3424:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3425:   PetscFree(pcbddc->global_primal_indices);
3426:   ISDestroy(&pcbddc->coarse_subassembling);
3427:   MatDestroy(&pcbddc->benign_change);
3428:   VecDestroy(&pcbddc->benign_vec);
3429:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3430:   MatDestroy(&pcbddc->benign_B0);
3431:   PetscSFDestroy(&pcbddc->benign_sf);
3432:   if (pcbddc->benign_zerodiag_subs) {
3433:     PetscInt i;
3434:     for (i=0;i<pcbddc->benign_n;i++) {
3435:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3436:     }
3437:     PetscFree(pcbddc->benign_zerodiag_subs);
3438:   }
3439:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3440:   return(0);
3441: }

3443: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3444: {
3445:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3446:   PC_IS          *pcis = (PC_IS*)pc->data;
3447:   VecType        impVecType;
3448:   PetscInt       n_constraints,n_R,old_size;

3452:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3453:   n_R = pcis->n - pcbddc->n_vertices;
3454:   VecGetType(pcis->vec1_N,&impVecType);
3455:   /* local work vectors (try to avoid unneeded work)*/
3456:   /* R nodes */
3457:   old_size = -1;
3458:   if (pcbddc->vec1_R) {
3459:     VecGetSize(pcbddc->vec1_R,&old_size);
3460:   }
3461:   if (n_R != old_size) {
3462:     VecDestroy(&pcbddc->vec1_R);
3463:     VecDestroy(&pcbddc->vec2_R);
3464:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3465:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3466:     VecSetType(pcbddc->vec1_R,impVecType);
3467:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3468:   }
3469:   /* local primal dofs */
3470:   old_size = -1;
3471:   if (pcbddc->vec1_P) {
3472:     VecGetSize(pcbddc->vec1_P,&old_size);
3473:   }
3474:   if (pcbddc->local_primal_size != old_size) {
3475:     VecDestroy(&pcbddc->vec1_P);
3476:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3477:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3478:     VecSetType(pcbddc->vec1_P,impVecType);
3479:   }
3480:   /* local explicit constraints */
3481:   old_size = -1;
3482:   if (pcbddc->vec1_C) {
3483:     VecGetSize(pcbddc->vec1_C,&old_size);
3484:   }
3485:   if (n_constraints && n_constraints != old_size) {
3486:     VecDestroy(&pcbddc->vec1_C);
3487:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3488:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3489:     VecSetType(pcbddc->vec1_C,impVecType);
3490:   }
3491:   return(0);
3492: }

3494: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3495: {
3496:   PetscErrorCode  ierr;
3497:   /* pointers to pcis and pcbddc */
3498:   PC_IS*          pcis = (PC_IS*)pc->data;
3499:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3500:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3501:   /* submatrices of local problem */
3502:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3503:   /* submatrices of local coarse problem */
3504:   Mat             S_VV,S_CV,S_VC,S_CC;
3505:   /* working matrices */
3506:   Mat             C_CR;
3507:   /* additional working stuff */
3508:   PC              pc_R;
3509:   Mat             F,Brhs = NULL;
3510:   Vec             dummy_vec;
3511:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3512:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3513:   PetscScalar     *work;
3514:   PetscInt        *idx_V_B;
3515:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3516:   PetscInt        i,n_R,n_D,n_B;

3518:   /* some shortcuts to scalars */
3519:   PetscScalar     one=1.0,m_one=-1.0;

3522:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");

3524:   /* Set Non-overlapping dimensions */
3525:   n_vertices = pcbddc->n_vertices;
3526:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3527:   n_B = pcis->n_B;
3528:   n_D = pcis->n - n_B;
3529:   n_R = pcis->n - n_vertices;

3531:   /* vertices in boundary numbering */
3532:   PetscMalloc1(n_vertices,&idx_V_B);
3533:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3534:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);

3536:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3537:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3538:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3539:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3540:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3541:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3542:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3543:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3544:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3545:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3547:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3548:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3549:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3550:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3551:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3552:   lda_rhs = n_R;
3553:   need_benign_correction = PETSC_FALSE;
3554:   if (isLU || isILU || isCHOL) {
3555:     PCFactorGetMatrix(pc_R,&F);
3556:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3557:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3558:     MatFactorType      type;

3560:     F = reuse_solver->F;
3561:     MatGetFactorType(F,&type);
3562:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3563:     MatGetSize(F,&lda_rhs,NULL);
3564:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3565:   } else {
3566:     F = NULL;
3567:   }

3569:   /* determine if we can use a sparse right-hand side */
3570:   sparserhs = PETSC_FALSE;
3571:   if (F) {
3572:     const MatSolverPackage solver;

3574:     MatFactorGetSolverPackage(F,&solver);
3575:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3576:   }

3578:   /* allocate workspace */
3579:   n = 0;
3580:   if (n_constraints) {
3581:     n += lda_rhs*n_constraints;
3582:   }
3583:   if (n_vertices) {
3584:     n = PetscMax(2*lda_rhs*n_vertices,n);
3585:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3586:   }
3587:   if (!pcbddc->symmetric_primal) {
3588:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3589:   }
3590:   PetscMalloc1(n,&work);

3592:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3593:   dummy_vec = NULL;
3594:   if (need_benign_correction && lda_rhs != n_R && F) {
3595:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3596:   }

3598:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3599:   if (n_constraints) {
3600:     Mat         M1,M2,M3,C_B;
3601:     IS          is_aux;
3602:     PetscScalar *array,*array2;

3604:     MatDestroy(&pcbddc->local_auxmat1);
3605:     MatDestroy(&pcbddc->local_auxmat2);

3607:     /* Extract constraints on R nodes: C_{CR}  */
3608:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3609:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3610:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3612:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3613:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3614:     if (!sparserhs) {
3615:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3616:       for (i=0;i<n_constraints;i++) {
3617:         const PetscScalar *row_cmat_values;
3618:         const PetscInt    *row_cmat_indices;
3619:         PetscInt          size_of_constraint,j;

3621:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3622:         for (j=0;j<size_of_constraint;j++) {
3623:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3624:         }
3625:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3626:       }
3627:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3628:     } else {
3629:       Mat tC_CR;

3631:       MatScale(C_CR,-1.0);
3632:       if (lda_rhs != n_R) {
3633:         PetscScalar *aa;
3634:         PetscInt    r,*ii,*jj;
3635:         PetscBool   done;

3637:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3638:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3639:         MatSeqAIJGetArray(C_CR,&aa);
3640:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3641:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3642:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3643:       } else {
3644:         PetscObjectReference((PetscObject)C_CR);
3645:         tC_CR = C_CR;
3646:       }
3647:       MatCreateTranspose(tC_CR,&Brhs);
3648:       MatDestroy(&tC_CR);
3649:     }
3650:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3651:     if (F) {
3652:       if (need_benign_correction) {
3653:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3655:         /* rhs is already zero on interior dofs, no need to change the rhs */
3656:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3657:       }
3658:       MatMatSolve(F,Brhs,local_auxmat2_R);
3659:       if (need_benign_correction) {
3660:         PetscScalar        *marr;
3661:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3663:         MatDenseGetArray(local_auxmat2_R,&marr);
3664:         if (lda_rhs != n_R) {
3665:           for (i=0;i<n_constraints;i++) {
3666:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3667:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3668:             VecResetArray(dummy_vec);
3669:           }
3670:         } else {
3671:           for (i=0;i<n_constraints;i++) {
3672:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3673:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3674:             VecResetArray(pcbddc->vec1_R);
3675:           }
3676:         }
3677:         MatDenseRestoreArray(local_auxmat2_R,&marr);
3678:       }
3679:     } else {
3680:       PetscScalar *marr;

3682:       MatDenseGetArray(local_auxmat2_R,&marr);
3683:       for (i=0;i<n_constraints;i++) {
3684:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3685:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3686:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3687:         VecResetArray(pcbddc->vec1_R);
3688:         VecResetArray(pcbddc->vec2_R);
3689:       }
3690:       MatDenseRestoreArray(local_auxmat2_R,&marr);
3691:     }
3692:     if (sparserhs) {
3693:       MatScale(C_CR,-1.0);
3694:     }
3695:     MatDestroy(&Brhs);
3696:     if (!pcbddc->switch_static) {
3697:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3698:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
3699:       MatDenseGetArray(local_auxmat2_R,&array2);
3700:       for (i=0;i<n_constraints;i++) {
3701:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
3702:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
3703:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3704:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3705:         VecResetArray(pcis->vec1_B);
3706:         VecResetArray(pcbddc->vec1_R);
3707:       }
3708:       MatDenseRestoreArray(local_auxmat2_R,&array2);
3709:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
3710:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3711:     } else {
3712:       if (lda_rhs != n_R) {
3713:         IS dummy;

3715:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
3716:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
3717:         ISDestroy(&dummy);
3718:       } else {
3719:         PetscObjectReference((PetscObject)local_auxmat2_R);
3720:         pcbddc->local_auxmat2 = local_auxmat2_R;
3721:       }
3722:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3723:     }
3724:     ISDestroy(&is_aux);
3725:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
3726:     MatScale(M3,m_one);
3727:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
3728:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
3729:     if (isCHOL) {
3730:       MatCholeskyFactor(M3,NULL,NULL);
3731:     } else {
3732:       MatLUFactor(M3,NULL,NULL,NULL);
3733:     }
3734:     VecSet(pcbddc->vec1_C,one);
3735:     MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
3736:     MatMatSolve(M3,M2,M1);
3737:     MatDestroy(&M2);
3738:     MatDestroy(&M3);
3739:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3740:     MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
3741:     MatDestroy(&C_B);
3742:     MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3743:     MatDestroy(&M1);
3744:   }

3746:   /* Get submatrices from subdomain matrix */
3747:   if (n_vertices) {
3748:     IS        is_aux;
3749:     PetscBool isseqaij;

3751:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3752:       IS tis;

3754:       ISDuplicate(pcbddc->is_R_local,&tis);
3755:       ISSort(tis);
3756:       ISComplement(tis,0,pcis->n,&is_aux);
3757:       ISDestroy(&tis);
3758:     } else {
3759:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
3760:     }
3761:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
3762:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
3763:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
3764:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
3765:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
3766:     }
3767:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
3768:     ISDestroy(&is_aux);
3769:   }

3771:   /* Matrix of coarse basis functions (local) */
3772:   if (pcbddc->coarse_phi_B) {
3773:     PetscInt on_B,on_primal,on_D=n_D;
3774:     if (pcbddc->coarse_phi_D) {
3775:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
3776:     }
3777:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
3778:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
3779:       PetscScalar *marray;

3781:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
3782:       PetscFree(marray);
3783:       MatDestroy(&pcbddc->coarse_phi_B);
3784:       MatDestroy(&pcbddc->coarse_psi_B);
3785:       MatDestroy(&pcbddc->coarse_phi_D);
3786:       MatDestroy(&pcbddc->coarse_psi_D);
3787:     }
3788:   }

3790:   if (!pcbddc->coarse_phi_B) {
3791:     PetscScalar *marr;

3793:     /* memory size */
3794:     n = n_B*pcbddc->local_primal_size;
3795:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
3796:     if (!pcbddc->symmetric_primal) n *= 2;
3797:     PetscCalloc1(n,&marr);
3798:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
3799:     marr += n_B*pcbddc->local_primal_size;
3800:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
3801:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
3802:       marr += n_D*pcbddc->local_primal_size;
3803:     }
3804:     if (!pcbddc->symmetric_primal) {
3805:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
3806:       marr += n_B*pcbddc->local_primal_size;
3807:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
3808:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
3809:       }
3810:     } else {
3811:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
3812:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
3813:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
3814:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
3815:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
3816:       }
3817:     }
3818:   }

3820:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
3821:   p0_lidx_I = NULL;
3822:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
3823:     const PetscInt *idxs;

3825:     ISGetIndices(pcis->is_I_local,&idxs);
3826:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
3827:     for (i=0;i<pcbddc->benign_n;i++) {
3828:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
3829:     }
3830:     ISRestoreIndices(pcis->is_I_local,&idxs);
3831:   }

3833:   /* vertices */
3834:   if (n_vertices) {
3835:     PetscBool restoreavr = PETSC_FALSE;

3837:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

3839:     if (n_R) {
3840:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
3841:       PetscBLASInt B_N,B_one = 1;
3842:       PetscScalar  *x,*y;

3844:       MatScale(A_RV,m_one);
3845:       if (need_benign_correction) {
3846:         ISLocalToGlobalMapping RtoN;
3847:         IS                     is_p0;
3848:         PetscInt               *idxs_p0,n;

3850:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
3851:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
3852:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
3853:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
3854:         ISLocalToGlobalMappingDestroy(&RtoN);
3855:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
3856:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
3857:         ISDestroy(&is_p0);
3858:       }

3860:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
3861:       if (!sparserhs || need_benign_correction) {
3862:         if (lda_rhs == n_R) {
3863:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
3864:         } else {
3865:           PetscScalar    *av,*array;
3866:           const PetscInt *xadj,*adjncy;
3867:           PetscInt       n;
3868:           PetscBool      flg_row;

3870:           array = work+lda_rhs*n_vertices;
3871:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
3872:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
3873:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3874:           MatSeqAIJGetArray(A_RV,&av);
3875:           for (i=0;i<n;i++) {
3876:             PetscInt j;
3877:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
3878:           }
3879:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3880:           MatDestroy(&A_RV);
3881:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
3882:         }
3883:         if (need_benign_correction) {
3884:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3885:           PetscScalar        *marr;

3887:           MatDenseGetArray(A_RV,&marr);
3888:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

3890:                  | 0 0  0 | (V)
3891:              L = | 0 0 -1 | (P-p0)
3892:                  | 0 0 -1 | (p0)

3894:           */
3895:           for (i=0;i<reuse_solver->benign_n;i++) {
3896:             const PetscScalar *vals;
3897:             const PetscInt    *idxs,*idxs_zero;
3898:             PetscInt          n,j,nz;

3900:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
3901:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3902:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
3903:             for (j=0;j<n;j++) {
3904:               PetscScalar val = vals[j];
3905:               PetscInt    k,col = idxs[j];
3906:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
3907:             }
3908:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
3909:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3910:           }
3911:           MatDenseRestoreArray(A_RV,&marr);
3912:         }
3913:         PetscObjectReference((PetscObject)A_RV);
3914:         Brhs = A_RV;
3915:       } else {
3916:         Mat tA_RVT,A_RVT;

3918:         if (!pcbddc->symmetric_primal) {
3919:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
3920:         } else {
3921:           restoreavr = PETSC_TRUE;
3922:           MatScale(A_VR,-1.0);
3923:           PetscObjectReference((PetscObject)A_VR);
3924:           A_RVT = A_VR;
3925:         }
3926:         if (lda_rhs != n_R) {
3927:           PetscScalar *aa;
3928:           PetscInt    r,*ii,*jj;
3929:           PetscBool   done;

3931:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3932:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3933:           MatSeqAIJGetArray(A_RVT,&aa);
3934:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
3935:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3936:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3937:         } else {
3938:           PetscObjectReference((PetscObject)A_RVT);
3939:           tA_RVT = A_RVT;
3940:         }
3941:         MatCreateTranspose(tA_RVT,&Brhs);
3942:         MatDestroy(&tA_RVT);
3943:         MatDestroy(&A_RVT);
3944:       }
3945:       if (F) {
3946:         /* need to correct the rhs */
3947:         if (need_benign_correction) {
3948:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3949:           PetscScalar        *marr;

3951:           MatDenseGetArray(Brhs,&marr);
3952:           if (lda_rhs != n_R) {
3953:             for (i=0;i<n_vertices;i++) {
3954:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3955:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
3956:               VecResetArray(dummy_vec);
3957:             }
3958:           } else {
3959:             for (i=0;i<n_vertices;i++) {
3960:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3961:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
3962:               VecResetArray(pcbddc->vec1_R);
3963:             }
3964:           }
3965:           MatDenseRestoreArray(Brhs,&marr);
3966:         }
3967:         MatMatSolve(F,Brhs,A_RRmA_RV);
3968:         if (restoreavr) {
3969:           MatScale(A_VR,-1.0);
3970:         }
3971:         /* need to correct the solution */
3972:         if (need_benign_correction) {
3973:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3974:           PetscScalar        *marr;

3976:           MatDenseGetArray(A_RRmA_RV,&marr);
3977:           if (lda_rhs != n_R) {
3978:             for (i=0;i<n_vertices;i++) {
3979:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3980:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3981:               VecResetArray(dummy_vec);
3982:             }
3983:           } else {
3984:             for (i=0;i<n_vertices;i++) {
3985:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3986:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3987:               VecResetArray(pcbddc->vec1_R);
3988:             }
3989:           }
3990:           MatDenseRestoreArray(A_RRmA_RV,&marr);
3991:         }
3992:       } else {
3993:         MatDenseGetArray(Brhs,&y);
3994:         for (i=0;i<n_vertices;i++) {
3995:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
3996:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
3997:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3998:           VecResetArray(pcbddc->vec1_R);
3999:           VecResetArray(pcbddc->vec2_R);
4000:         }
4001:         MatDenseRestoreArray(Brhs,&y);
4002:       }
4003:       MatDestroy(&A_RV);
4004:       MatDestroy(&Brhs);
4005:       /* S_VV and S_CV */
4006:       if (n_constraints) {
4007:         Mat B;

4009:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4010:         for (i=0;i<n_vertices;i++) {
4011:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4012:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4013:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4014:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4015:           VecResetArray(pcis->vec1_B);
4016:           VecResetArray(pcbddc->vec1_R);
4017:         }
4018:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4019:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4020:         MatDestroy(&B);
4021:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4022:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4023:         MatScale(S_CV,m_one);
4024:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4025:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4026:         MatDestroy(&B);
4027:       }
4028:       if (lda_rhs != n_R) {
4029:         MatDestroy(&A_RRmA_RV);
4030:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4031:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4032:       }
4033:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4034:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4035:       if (need_benign_correction) {
4036:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4037:         PetscScalar      *marr,*sums;

4039:         PetscMalloc1(n_vertices,&sums);
4040:         MatDenseGetArray(S_VVt,&marr);
4041:         for (i=0;i<reuse_solver->benign_n;i++) {
4042:           const PetscScalar *vals;
4043:           const PetscInt    *idxs,*idxs_zero;
4044:           PetscInt          n,j,nz;

4046:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4047:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4048:           for (j=0;j<n_vertices;j++) {
4049:             PetscInt k;
4050:             sums[j] = 0.;
4051:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4052:           }
4053:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4054:           for (j=0;j<n;j++) {
4055:             PetscScalar val = vals[j];
4056:             PetscInt k;
4057:             for (k=0;k<n_vertices;k++) {
4058:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4059:             }
4060:           }
4061:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4062:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4063:         }
4064:         PetscFree(sums);
4065:         MatDenseRestoreArray(S_VVt,&marr);
4066:         MatDestroy(&A_RV_bcorr);
4067:       }
4068:       MatDestroy(&A_RRmA_RV);
4069:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4070:       MatDenseGetArray(A_VV,&x);
4071:       MatDenseGetArray(S_VVt,&y);
4072:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4073:       MatDenseRestoreArray(A_VV,&x);
4074:       MatDenseRestoreArray(S_VVt,&y);
4075:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4076:       MatDestroy(&S_VVt);
4077:     } else {
4078:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4079:     }
4080:     MatDestroy(&A_VV);

4082:     /* coarse basis functions */
4083:     for (i=0;i<n_vertices;i++) {
4084:       PetscScalar *y;

4086:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4087:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4088:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4089:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4090:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4091:       y[n_B*i+idx_V_B[i]] = 1.0;
4092:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4093:       VecResetArray(pcis->vec1_B);

4095:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4096:         PetscInt j;

4098:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4099:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4100:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4101:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4102:         VecResetArray(pcis->vec1_D);
4103:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4104:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4105:       }
4106:       VecResetArray(pcbddc->vec1_R);
4107:     }
4108:     /* if n_R == 0 the object is not destroyed */
4109:     MatDestroy(&A_RV);
4110:   }
4111:   VecDestroy(&dummy_vec);

4113:   if (n_constraints) {
4114:     Mat B;

4116:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4117:     MatScale(S_CC,m_one);
4118:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4119:     MatScale(S_CC,m_one);
4120:     if (n_vertices) {
4121:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4122:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4123:       } else {
4124:         Mat S_VCt;

4126:         if (lda_rhs != n_R) {
4127:           MatDestroy(&B);
4128:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4129:           MatSeqDenseSetLDA(B,lda_rhs);
4130:         }
4131:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4132:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4133:         MatDestroy(&S_VCt);
4134:       }
4135:     }
4136:     MatDestroy(&B);
4137:     /* coarse basis functions */
4138:     for (i=0;i<n_constraints;i++) {
4139:       PetscScalar *y;

4141:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4142:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4143:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4144:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4145:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4146:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4147:       VecResetArray(pcis->vec1_B);
4148:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4149:         PetscInt j;

4151:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4152:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4153:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4154:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4155:         VecResetArray(pcis->vec1_D);
4156:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4157:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4158:       }
4159:       VecResetArray(pcbddc->vec1_R);
4160:     }
4161:   }
4162:   if (n_constraints) {
4163:     MatDestroy(&local_auxmat2_R);
4164:   }
4165:   PetscFree(p0_lidx_I);

4167:   /* coarse matrix entries relative to B_0 */
4168:   if (pcbddc->benign_n) {
4169:     Mat         B0_B,B0_BPHI;
4170:     IS          is_dummy;
4171:     PetscScalar *data;
4172:     PetscInt    j;

4174:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4175:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4176:     ISDestroy(&is_dummy);
4177:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4178:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4179:     MatDenseGetArray(B0_BPHI,&data);
4180:     for (j=0;j<pcbddc->benign_n;j++) {
4181:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4182:       for (i=0;i<pcbddc->local_primal_size;i++) {
4183:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4184:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4185:       }
4186:     }
4187:     MatDenseRestoreArray(B0_BPHI,&data);
4188:     MatDestroy(&B0_B);
4189:     MatDestroy(&B0_BPHI);
4190:   }

4192:   /* compute other basis functions for non-symmetric problems */
4193:   if (!pcbddc->symmetric_primal) {
4194:     Mat         B_V=NULL,B_C=NULL;
4195:     PetscScalar *marray;

4197:     if (n_constraints) {
4198:       Mat S_CCT,C_CRT;

4200:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4201:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4202:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4203:       MatDestroy(&S_CCT);
4204:       if (n_vertices) {
4205:         Mat S_VCT;

4207:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4208:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4209:         MatDestroy(&S_VCT);
4210:       }
4211:       MatDestroy(&C_CRT);
4212:     } else {
4213:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4214:     }
4215:     if (n_vertices && n_R) {
4216:       PetscScalar    *av,*marray;
4217:       const PetscInt *xadj,*adjncy;
4218:       PetscInt       n;
4219:       PetscBool      flg_row;

4221:       /* B_V = B_V - A_VR^T */
4222:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4223:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4224:       MatSeqAIJGetArray(A_VR,&av);
4225:       MatDenseGetArray(B_V,&marray);
4226:       for (i=0;i<n;i++) {
4227:         PetscInt j;
4228:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4229:       }
4230:       MatDenseRestoreArray(B_V,&marray);
4231:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4232:       MatDestroy(&A_VR);
4233:     }

4235:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4236:     if (n_vertices) {
4237:       MatDenseGetArray(B_V,&marray);
4238:       for (i=0;i<n_vertices;i++) {
4239:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4240:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4241:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4242:         VecResetArray(pcbddc->vec1_R);
4243:         VecResetArray(pcbddc->vec2_R);
4244:       }
4245:       MatDenseRestoreArray(B_V,&marray);
4246:     }
4247:     if (B_C) {
4248:       MatDenseGetArray(B_C,&marray);
4249:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4250:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4251:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4252:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4253:         VecResetArray(pcbddc->vec1_R);
4254:         VecResetArray(pcbddc->vec2_R);
4255:       }
4256:       MatDenseRestoreArray(B_C,&marray);
4257:     }
4258:     /* coarse basis functions */
4259:     for (i=0;i<pcbddc->local_primal_size;i++) {
4260:       PetscScalar *y;

4262:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4263:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4264:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4265:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4266:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4267:       if (i<n_vertices) {
4268:         y[n_B*i+idx_V_B[i]] = 1.0;
4269:       }
4270:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4271:       VecResetArray(pcis->vec1_B);

4273:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4274:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4275:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4276:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4277:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4278:         VecResetArray(pcis->vec1_D);
4279:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4280:       }
4281:       VecResetArray(pcbddc->vec1_R);
4282:     }
4283:     MatDestroy(&B_V);
4284:     MatDestroy(&B_C);
4285:   }

4287:   /* free memory */
4288:   PetscFree(idx_V_B);
4289:   MatDestroy(&S_VV);
4290:   MatDestroy(&S_CV);
4291:   MatDestroy(&S_VC);
4292:   MatDestroy(&S_CC);
4293:   PetscFree(work);
4294:   if (n_vertices) {
4295:     MatDestroy(&A_VR);
4296:   }
4297:   if (n_constraints) {
4298:     MatDestroy(&C_CR);
4299:   }
4300:   /* Checking coarse_sub_mat and coarse basis functios */
4301:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4302:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4303:   if (pcbddc->dbg_flag) {
4304:     Mat         coarse_sub_mat;
4305:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4306:     Mat         coarse_phi_D,coarse_phi_B;
4307:     Mat         coarse_psi_D,coarse_psi_B;
4308:     Mat         A_II,A_BB,A_IB,A_BI;
4309:     Mat         C_B,CPHI;
4310:     IS          is_dummy;
4311:     Vec         mones;
4312:     MatType     checkmattype=MATSEQAIJ;
4313:     PetscReal   real_value;

4315:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4316:       Mat A;
4317:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4318:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4319:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4320:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4321:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4322:       MatDestroy(&A);
4323:     } else {
4324:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4325:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4326:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4327:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4328:     }
4329:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4330:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4331:     if (!pcbddc->symmetric_primal) {
4332:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4333:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4334:     }
4335:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4337:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4338:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4339:     PetscViewerFlush(pcbddc->dbg_viewer);
4340:     if (!pcbddc->symmetric_primal) {
4341:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4342:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4343:       MatDestroy(&AUXMAT);
4344:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4345:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4346:       MatDestroy(&AUXMAT);
4347:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4348:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4349:       MatDestroy(&AUXMAT);
4350:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4351:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4352:       MatDestroy(&AUXMAT);
4353:     } else {
4354:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4355:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4356:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4357:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4358:       MatDestroy(&AUXMAT);
4359:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4360:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4361:       MatDestroy(&AUXMAT);
4362:     }
4363:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4364:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4365:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4366:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4367:     if (pcbddc->benign_n) {
4368:       Mat         B0_B,B0_BPHI;
4369:       PetscScalar *data,*data2;
4370:       PetscInt    j;

4372:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4373:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4374:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4375:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4376:       MatDenseGetArray(TM1,&data);
4377:       MatDenseGetArray(B0_BPHI,&data2);
4378:       for (j=0;j<pcbddc->benign_n;j++) {
4379:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4380:         for (i=0;i<pcbddc->local_primal_size;i++) {
4381:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4382:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4383:         }
4384:       }
4385:       MatDenseRestoreArray(TM1,&data);
4386:       MatDenseRestoreArray(B0_BPHI,&data2);
4387:       MatDestroy(&B0_B);
4388:       ISDestroy(&is_dummy);
4389:       MatDestroy(&B0_BPHI);
4390:     }
4391: #if 0
4392:   {
4393:     PetscViewer viewer;
4394:     char filename[256];
4395:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4396:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4397:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4398:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4399:     MatView(coarse_sub_mat,viewer);
4400:     PetscObjectSetName((PetscObject)TM1,"projected");
4401:     MatView(TM1,viewer);
4402:     if (pcbddc->coarse_phi_B) {
4403:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4404:       MatView(pcbddc->coarse_phi_B,viewer);
4405:     }
4406:     if (pcbddc->coarse_phi_D) {
4407:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4408:       MatView(pcbddc->coarse_phi_D,viewer);
4409:     }
4410:     if (pcbddc->coarse_psi_B) {
4411:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4412:       MatView(pcbddc->coarse_psi_B,viewer);
4413:     }
4414:     if (pcbddc->coarse_psi_D) {
4415:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4416:       MatView(pcbddc->coarse_psi_D,viewer);
4417:     }
4418:     PetscViewerDestroy(&viewer);
4419:   }
4420: #endif
4421:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4422:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4423:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4424:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4426:     /* check constraints */
4427:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4428:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4429:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4430:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4431:     } else {
4432:       PetscScalar *data;
4433:       Mat         tmat;
4434:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4435:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4436:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4437:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4438:       MatDestroy(&tmat);
4439:     }
4440:     MatCreateVecs(CPHI,&mones,NULL);
4441:     VecSet(mones,-1.0);
4442:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4443:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4444:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4445:     if (!pcbddc->symmetric_primal) {
4446:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4447:       VecSet(mones,-1.0);
4448:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4449:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4450:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4451:     }
4452:     MatDestroy(&C_B);
4453:     MatDestroy(&CPHI);
4454:     ISDestroy(&is_dummy);
4455:     VecDestroy(&mones);
4456:     PetscViewerFlush(pcbddc->dbg_viewer);
4457:     MatDestroy(&A_II);
4458:     MatDestroy(&A_BB);
4459:     MatDestroy(&A_IB);
4460:     MatDestroy(&A_BI);
4461:     MatDestroy(&TM1);
4462:     MatDestroy(&TM2);
4463:     MatDestroy(&TM3);
4464:     MatDestroy(&TM4);
4465:     MatDestroy(&coarse_phi_D);
4466:     MatDestroy(&coarse_phi_B);
4467:     if (!pcbddc->symmetric_primal) {
4468:       MatDestroy(&coarse_psi_D);
4469:       MatDestroy(&coarse_psi_B);
4470:     }
4471:     MatDestroy(&coarse_sub_mat);
4472:   }
4473:   /* get back data */
4474:   *coarse_submat_vals_n = coarse_submat_vals;
4475:   return(0);
4476: }

4478: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4479: {
4480:   Mat            *work_mat;
4481:   IS             isrow_s,iscol_s;
4482:   PetscBool      rsorted,csorted;
4483:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4487:   ISSorted(isrow,&rsorted);
4488:   ISSorted(iscol,&csorted);
4489:   ISGetLocalSize(isrow,&rsize);
4490:   ISGetLocalSize(iscol,&csize);

4492:   if (!rsorted) {
4493:     const PetscInt *idxs;
4494:     PetscInt *idxs_sorted,i;

4496:     PetscMalloc1(rsize,&idxs_perm_r);
4497:     PetscMalloc1(rsize,&idxs_sorted);
4498:     for (i=0;i<rsize;i++) {
4499:       idxs_perm_r[i] = i;
4500:     }
4501:     ISGetIndices(isrow,&idxs);
4502:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4503:     for (i=0;i<rsize;i++) {
4504:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4505:     }
4506:     ISRestoreIndices(isrow,&idxs);
4507:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4508:   } else {
4509:     PetscObjectReference((PetscObject)isrow);
4510:     isrow_s = isrow;
4511:   }

4513:   if (!csorted) {
4514:     if (isrow == iscol) {
4515:       PetscObjectReference((PetscObject)isrow_s);
4516:       iscol_s = isrow_s;
4517:     } else {
4518:       const PetscInt *idxs;
4519:       PetscInt       *idxs_sorted,i;

4521:       PetscMalloc1(csize,&idxs_perm_c);
4522:       PetscMalloc1(csize,&idxs_sorted);
4523:       for (i=0;i<csize;i++) {
4524:         idxs_perm_c[i] = i;
4525:       }
4526:       ISGetIndices(iscol,&idxs);
4527:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4528:       for (i=0;i<csize;i++) {
4529:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4530:       }
4531:       ISRestoreIndices(iscol,&idxs);
4532:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4533:     }
4534:   } else {
4535:     PetscObjectReference((PetscObject)iscol);
4536:     iscol_s = iscol;
4537:   }

4539:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4541:   if (!rsorted || !csorted) {
4542:     Mat      new_mat;
4543:     IS       is_perm_r,is_perm_c;

4545:     if (!rsorted) {
4546:       PetscInt *idxs_r,i;
4547:       PetscMalloc1(rsize,&idxs_r);
4548:       for (i=0;i<rsize;i++) {
4549:         idxs_r[idxs_perm_r[i]] = i;
4550:       }
4551:       PetscFree(idxs_perm_r);
4552:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4553:     } else {
4554:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4555:     }
4556:     ISSetPermutation(is_perm_r);

4558:     if (!csorted) {
4559:       if (isrow_s == iscol_s) {
4560:         PetscObjectReference((PetscObject)is_perm_r);
4561:         is_perm_c = is_perm_r;
4562:       } else {
4563:         PetscInt *idxs_c,i;
4564:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4565:         PetscMalloc1(csize,&idxs_c);
4566:         for (i=0;i<csize;i++) {
4567:           idxs_c[idxs_perm_c[i]] = i;
4568:         }
4569:         PetscFree(idxs_perm_c);
4570:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4571:       }
4572:     } else {
4573:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4574:     }
4575:     ISSetPermutation(is_perm_c);

4577:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4578:     MatDestroy(&work_mat[0]);
4579:     work_mat[0] = new_mat;
4580:     ISDestroy(&is_perm_r);
4581:     ISDestroy(&is_perm_c);
4582:   }

4584:   PetscObjectReference((PetscObject)work_mat[0]);
4585:   *B = work_mat[0];
4586:   MatDestroyMatrices(1,&work_mat);
4587:   ISDestroy(&isrow_s);
4588:   ISDestroy(&iscol_s);
4589:   return(0);
4590: }

4592: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4593: {
4594:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4595:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4596:   Mat            new_mat,lA;
4597:   IS             is_local,is_global;
4598:   PetscInt       local_size;
4599:   PetscBool      isseqaij;

4603:   MatDestroy(&pcbddc->local_mat);
4604:   MatGetSize(matis->A,&local_size,NULL);
4605:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4606:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4607:   ISDestroy(&is_local);
4608:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4609:   ISDestroy(&is_global);

4611:   /* check */
4612:   if (pcbddc->dbg_flag) {
4613:     Vec       x,x_change;
4614:     PetscReal error;

4616:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4617:     VecSetRandom(x,NULL);
4618:     MatMult(ChangeOfBasisMatrix,x,x_change);
4619:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4620:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4621:     MatMult(new_mat,matis->x,matis->y);
4622:     if (!pcbddc->change_interior) {
4623:       const PetscScalar *x,*y,*v;
4624:       PetscReal         lerror = 0.;
4625:       PetscInt          i;

4627:       VecGetArrayRead(matis->x,&x);
4628:       VecGetArrayRead(matis->y,&y);
4629:       VecGetArrayRead(matis->counter,&v);
4630:       for (i=0;i<local_size;i++)
4631:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4632:           lerror = PetscAbsScalar(x[i]-y[i]);
4633:       VecRestoreArrayRead(matis->x,&x);
4634:       VecRestoreArrayRead(matis->y,&y);
4635:       VecRestoreArrayRead(matis->counter,&v);
4636:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4637:       if (error > PETSC_SMALL) {
4638:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4639:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4640:         } else {
4641:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4642:         }
4643:       }
4644:     }
4645:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4646:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4647:     VecAXPY(x,-1.0,x_change);
4648:     VecNorm(x,NORM_INFINITY,&error);
4649:     if (error > PETSC_SMALL) {
4650:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4651:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4652:       } else {
4653:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4654:       }
4655:     }
4656:     VecDestroy(&x);
4657:     VecDestroy(&x_change);
4658:   }

4660:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4661:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

4663:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4664:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4665:   if (isseqaij) {
4666:     MatDestroy(&pcbddc->local_mat);
4667:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4668:     if (lA) {
4669:       Mat work;
4670:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4671:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4672:       MatDestroy(&work);
4673:     }
4674:   } else {
4675:     Mat work_mat;

4677:     MatDestroy(&pcbddc->local_mat);
4678:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4679:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4680:     MatDestroy(&work_mat);
4681:     if (lA) {
4682:       Mat work;
4683:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4684:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4685:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4686:       MatDestroy(&work);
4687:     }
4688:   }
4689:   if (matis->A->symmetric_set) {
4690:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4691: #if !defined(PETSC_USE_COMPLEX)
4692:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4693: #endif
4694:   }
4695:   MatDestroy(&new_mat);
4696:   return(0);
4697: }

4699: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4700: {
4701:   PC_IS*          pcis = (PC_IS*)(pc->data);
4702:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
4703:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4704:   PetscInt        *idx_R_local=NULL;
4705:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
4706:   PetscInt        vbs,bs;
4707:   PetscBT         bitmask=NULL;
4708:   PetscErrorCode  ierr;

4711:   /*
4712:     No need to setup local scatters if
4713:       - primal space is unchanged
4714:         AND
4715:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4716:         AND
4717:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4718:   */
4719:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
4720:     return(0);
4721:   }
4722:   /* destroy old objects */
4723:   ISDestroy(&pcbddc->is_R_local);
4724:   VecScatterDestroy(&pcbddc->R_to_B);
4725:   VecScatterDestroy(&pcbddc->R_to_D);
4726:   /* Set Non-overlapping dimensions */
4727:   n_B = pcis->n_B;
4728:   n_D = pcis->n - n_B;
4729:   n_vertices = pcbddc->n_vertices;

4731:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

4733:   /* create auxiliary bitmask and allocate workspace */
4734:   if (!sub_schurs || !sub_schurs->reuse_solver) {
4735:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
4736:     PetscBTCreate(pcis->n,&bitmask);
4737:     for (i=0;i<n_vertices;i++) {
4738:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
4739:     }

4741:     for (i=0, n_R=0; i<pcis->n; i++) {
4742:       if (!PetscBTLookup(bitmask,i)) {
4743:         idx_R_local[n_R++] = i;
4744:       }
4745:     }
4746:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
4747:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4749:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4750:     ISGetLocalSize(reuse_solver->is_R,&n_R);
4751:   }

4753:   /* Block code */
4754:   vbs = 1;
4755:   MatGetBlockSize(pcbddc->local_mat,&bs);
4756:   if (bs>1 && !(n_vertices%bs)) {
4757:     PetscBool is_blocked = PETSC_TRUE;
4758:     PetscInt  *vary;
4759:     if (!sub_schurs || !sub_schurs->reuse_solver) {
4760:       PetscMalloc1(pcis->n/bs,&vary);
4761:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
4762:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
4763:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
4764:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
4765:       for (i=0; i<pcis->n/bs; i++) {
4766:         if (vary[i]!=0 && vary[i]!=bs) {
4767:           is_blocked = PETSC_FALSE;
4768:           break;
4769:         }
4770:       }
4771:       PetscFree(vary);
4772:     } else {
4773:       /* Verify directly the R set */
4774:       for (i=0; i<n_R/bs; i++) {
4775:         PetscInt j,node=idx_R_local[bs*i];
4776:         for (j=1; j<bs; j++) {
4777:           if (node != idx_R_local[bs*i+j]-j) {
4778:             is_blocked = PETSC_FALSE;
4779:             break;
4780:           }
4781:         }
4782:       }
4783:     }
4784:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
4785:       vbs = bs;
4786:       for (i=0;i<n_R/vbs;i++) {
4787:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
4788:       }
4789:     }
4790:   }
4791:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
4792:   if (sub_schurs && sub_schurs->reuse_solver) {
4793:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4795:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4796:     ISDestroy(&reuse_solver->is_R);
4797:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
4798:     reuse_solver->is_R = pcbddc->is_R_local;
4799:   } else {
4800:     PetscFree(idx_R_local);
4801:   }

4803:   /* print some info if requested */
4804:   if (pcbddc->dbg_flag) {
4805:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4806:     PetscViewerFlush(pcbddc->dbg_viewer);
4807:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4808:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
4809:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
4810:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
4811:     PetscViewerFlush(pcbddc->dbg_viewer);
4812:   }

4814:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
4815:   if (!sub_schurs || !sub_schurs->reuse_solver) {
4816:     IS       is_aux1,is_aux2;
4817:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

4819:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4820:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
4821:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
4822:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4823:     for (i=0; i<n_D; i++) {
4824:       PetscBTSet(bitmask,is_indices[i]);
4825:     }
4826:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4827:     for (i=0, j=0; i<n_R; i++) {
4828:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
4829:         aux_array1[j++] = i;
4830:       }
4831:     }
4832:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4833:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4834:     for (i=0, j=0; i<n_B; i++) {
4835:       if (!PetscBTLookup(bitmask,is_indices[i])) {
4836:         aux_array2[j++] = i;
4837:       }
4838:     }
4839:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4840:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
4841:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
4842:     ISDestroy(&is_aux1);
4843:     ISDestroy(&is_aux2);

4845:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4846:       PetscMalloc1(n_D,&aux_array1);
4847:       for (i=0, j=0; i<n_R; i++) {
4848:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
4849:           aux_array1[j++] = i;
4850:         }
4851:       }
4852:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4853:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4854:       ISDestroy(&is_aux1);
4855:     }
4856:     PetscBTDestroy(&bitmask);
4857:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4858:   } else {
4859:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4860:     IS                 tis;
4861:     PetscInt           schur_size;

4863:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
4864:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
4865:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
4866:     ISDestroy(&tis);
4867:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4868:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
4869:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4870:       ISDestroy(&tis);
4871:     }
4872:   }
4873:   return(0);
4874: }


4877: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
4878: {
4879:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
4880:   PC_IS          *pcis = (PC_IS*)pc->data;
4881:   PC             pc_temp;
4882:   Mat            A_RR;
4883:   MatReuse       reuse;
4884:   PetscScalar    m_one = -1.0;
4885:   PetscReal      value;
4886:   PetscInt       n_D,n_R;
4887:   PetscBool      check_corr[2],issbaij;
4889:   /* prefixes stuff */
4890:   char           dir_prefix[256],neu_prefix[256],str_level[16];
4891:   size_t         len;


4895:   /* compute prefixes */
4896:   PetscStrcpy(dir_prefix,"");
4897:   PetscStrcpy(neu_prefix,"");
4898:   if (!pcbddc->current_level) {
4899:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
4900:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
4901:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4902:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4903:   } else {
4904:     PetscStrcpy(str_level,"");
4905:     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4906:     PetscStrlen(((PetscObject)pc)->prefix,&len);
4907:     len -= 15; /* remove "pc_bddc_coarse_" */
4908:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4909:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4910:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
4911:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
4912:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4913:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4914:     PetscStrcat(dir_prefix,str_level);
4915:     PetscStrcat(neu_prefix,str_level);
4916:   }

4918:   /* DIRICHLET PROBLEM */
4919:   if (dirichlet) {
4920:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4921:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4922:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
4923:       if (pcbddc->dbg_flag) {
4924:         Mat    A_IIn;

4926:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
4927:         MatDestroy(&pcis->A_II);
4928:         pcis->A_II = A_IIn;
4929:       }
4930:     }
4931:     if (pcbddc->local_mat->symmetric_set) {
4932:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
4933:     }
4934:     /* Matrix for Dirichlet problem is pcis->A_II */
4935:     n_D = pcis->n - pcis->n_B;
4936:     if (!pcbddc->ksp_D) { /* create object if not yet build */
4937:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
4938:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
4939:       /* default */
4940:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
4941:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
4942:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
4943:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
4944:       if (issbaij) {
4945:         PCSetType(pc_temp,PCCHOLESKY);
4946:       } else {
4947:         PCSetType(pc_temp,PCLU);
4948:       }
4949:       /* Allow user's customization */
4950:       KSPSetFromOptions(pcbddc->ksp_D);
4951:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4952:     }
4953:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
4954:     if (sub_schurs && sub_schurs->reuse_solver) {
4955:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4957:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
4958:     }
4959:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
4960:     if (!n_D) {
4961:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
4962:       PCSetType(pc_temp,PCNONE);
4963:     }
4964:     /* Set Up KSP for Dirichlet problem of BDDC */
4965:     KSPSetUp(pcbddc->ksp_D);
4966:     /* set ksp_D into pcis data */
4967:     KSPDestroy(&pcis->ksp_D);
4968:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
4969:     pcis->ksp_D = pcbddc->ksp_D;
4970:   }

4972:   /* NEUMANN PROBLEM */
4973:   A_RR = 0;
4974:   if (neumann) {
4975:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4976:     PetscInt        ibs,mbs;
4977:     PetscBool       issbaij, reuse_neumann_solver;
4978:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

4980:     reuse_neumann_solver = PETSC_FALSE;
4981:     if (sub_schurs && sub_schurs->reuse_solver) {
4982:       IS iP;

4984:       reuse_neumann_solver = PETSC_TRUE;
4985:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
4986:       if (iP) reuse_neumann_solver = PETSC_FALSE;
4987:     }
4988:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
4989:     ISGetSize(pcbddc->is_R_local,&n_R);
4990:     if (pcbddc->ksp_R) { /* already created ksp */
4991:       PetscInt nn_R;
4992:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
4993:       PetscObjectReference((PetscObject)A_RR);
4994:       MatGetSize(A_RR,&nn_R,NULL);
4995:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
4996:         KSPReset(pcbddc->ksp_R);
4997:         MatDestroy(&A_RR);
4998:         reuse = MAT_INITIAL_MATRIX;
4999:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5000:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5001:           MatDestroy(&A_RR);
5002:           reuse = MAT_INITIAL_MATRIX;
5003:         } else { /* safe to reuse the matrix */
5004:           reuse = MAT_REUSE_MATRIX;
5005:         }
5006:       }
5007:       /* last check */
5008:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5009:         MatDestroy(&A_RR);
5010:         reuse = MAT_INITIAL_MATRIX;
5011:       }
5012:     } else { /* first time, so we need to create the matrix */
5013:       reuse = MAT_INITIAL_MATRIX;
5014:     }
5015:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5016:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5017:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5018:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5019:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5020:       if (matis->A == pcbddc->local_mat) {
5021:         MatDestroy(&pcbddc->local_mat);
5022:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5023:       } else {
5024:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5025:       }
5026:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5027:       if (matis->A == pcbddc->local_mat) {
5028:         MatDestroy(&pcbddc->local_mat);
5029:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5030:       } else {
5031:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5032:       }
5033:     }
5034:     /* extract A_RR */
5035:     if (reuse_neumann_solver) {
5036:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5038:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5039:         MatDestroy(&A_RR);
5040:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5041:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5042:         } else {
5043:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5044:         }
5045:       } else {
5046:         MatDestroy(&A_RR);
5047:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5048:         PetscObjectReference((PetscObject)A_RR);
5049:       }
5050:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5051:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5052:     }
5053:     if (pcbddc->local_mat->symmetric_set) {
5054:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5055:     }
5056:     if (!pcbddc->ksp_R) { /* create object if not present */
5057:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5058:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5059:       /* default */
5060:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5061:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5062:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5063:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5064:       if (issbaij) {
5065:         PCSetType(pc_temp,PCCHOLESKY);
5066:       } else {
5067:         PCSetType(pc_temp,PCLU);
5068:       }
5069:       /* Allow user's customization */
5070:       KSPSetFromOptions(pcbddc->ksp_R);
5071:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
5072:     }
5073:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5074:     if (!n_R) {
5075:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5076:       PCSetType(pc_temp,PCNONE);
5077:     }
5078:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5079:     /* Reuse solver if it is present */
5080:     if (reuse_neumann_solver) {
5081:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5083:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5084:     }
5085:     /* Set Up KSP for Neumann problem of BDDC */
5086:     KSPSetUp(pcbddc->ksp_R);
5087:   }

5089:   if (pcbddc->dbg_flag) {
5090:     PetscViewerFlush(pcbddc->dbg_viewer);
5091:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5092:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5093:   }

5095:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5096:   check_corr[0] = check_corr[1] = PETSC_FALSE;
5097:   if (pcbddc->NullSpace_corr[0]) {
5098:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5099:   }
5100:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5101:     check_corr[0] = PETSC_TRUE;
5102:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5103:   }
5104:   if (neumann && pcbddc->NullSpace_corr[2]) {
5105:     check_corr[1] = PETSC_TRUE;
5106:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5107:   }

5109:   /* check Dirichlet and Neumann solvers */
5110:   if (pcbddc->dbg_flag) {
5111:     if (dirichlet) { /* Dirichlet */
5112:       VecSetRandom(pcis->vec1_D,NULL);
5113:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5114:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5115:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5116:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5117:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5118:       if (check_corr[0]) {
5119:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5120:       }
5121:       PetscViewerFlush(pcbddc->dbg_viewer);
5122:     }
5123:     if (neumann) { /* Neumann */
5124:       VecSetRandom(pcbddc->vec1_R,NULL);
5125:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5126:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5127:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5128:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5129:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5130:       if (check_corr[1]) {
5131:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5132:       }
5133:       PetscViewerFlush(pcbddc->dbg_viewer);
5134:     }
5135:   }
5136:   /* free Neumann problem's matrix */
5137:   MatDestroy(&A_RR);
5138:   return(0);
5139: }

5141: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5142: {
5143:   PetscErrorCode  ierr;
5144:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5145:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5146:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5149:   if (!reuse_solver) {
5150:     VecSet(pcbddc->vec1_R,0.);
5151:   }
5152:   if (!pcbddc->switch_static) {
5153:     if (applytranspose && pcbddc->local_auxmat1) {
5154:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5155:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5156:     }
5157:     if (!reuse_solver) {
5158:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5159:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5160:     } else {
5161:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5163:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5164:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5165:     }
5166:   } else {
5167:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5168:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5169:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5170:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5171:     if (applytranspose && pcbddc->local_auxmat1) {
5172:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5173:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5174:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5175:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5176:     }
5177:   }
5178:   if (!reuse_solver || pcbddc->switch_static) {
5179:     if (applytranspose) {
5180:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5181:     } else {
5182:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5183:     }
5184:   } else {
5185:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5187:     if (applytranspose) {
5188:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5189:     } else {
5190:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5191:     }
5192:   }
5193:   VecSet(inout_B,0.);
5194:   if (!pcbddc->switch_static) {
5195:     if (!reuse_solver) {
5196:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5197:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5198:     } else {
5199:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5201:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5202:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5203:     }
5204:     if (!applytranspose && pcbddc->local_auxmat1) {
5205:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5206:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5207:     }
5208:   } else {
5209:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5210:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5211:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5212:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5213:     if (!applytranspose && pcbddc->local_auxmat1) {
5214:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5215:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5216:     }
5217:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5218:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5219:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5220:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5221:   }
5222:   return(0);
5223: }

5225: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5226: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5227: {
5229:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5230:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5231:   const PetscScalar zero = 0.0;

5234:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5235:   if (!pcbddc->benign_apply_coarse_only) {
5236:     if (applytranspose) {
5237:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5238:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5239:     } else {
5240:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5241:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5242:     }
5243:   } else {
5244:     VecSet(pcbddc->vec1_P,zero);
5245:   }

5247:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5248:   if (pcbddc->benign_n) {
5249:     PetscScalar *array;
5250:     PetscInt    j;

5252:     VecGetArray(pcbddc->vec1_P,&array);
5253:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5254:     VecRestoreArray(pcbddc->vec1_P,&array);
5255:   }

5257:   /* start communications from local primal nodes to rhs of coarse solver */
5258:   VecSet(pcbddc->coarse_vec,zero);
5259:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5260:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5262:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5263:   if (pcbddc->coarse_ksp) {
5264:     Mat          coarse_mat;
5265:     Vec          rhs,sol;
5266:     MatNullSpace nullsp;
5267:     PetscBool    isbddc = PETSC_FALSE;

5269:     if (pcbddc->benign_have_null) {
5270:       PC        coarse_pc;

5272:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5273:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5274:       /* we need to propagate to coarser levels the need for a possible benign correction */
5275:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5276:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5277:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5278:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5279:       }
5280:     }
5281:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5282:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5283:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5284:     MatGetNullSpace(coarse_mat,&nullsp);
5285:     if (nullsp) {
5286:       MatNullSpaceRemove(nullsp,rhs);
5287:     }
5288:     if (applytranspose) {
5289:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5290:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5291:     } else {
5292:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5293:         PC        coarse_pc;

5295:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5296:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5297:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5298:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5299:       } else {
5300:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5301:       }
5302:     }
5303:     /* we don't need the benign correction at coarser levels anymore */
5304:     if (pcbddc->benign_have_null && isbddc) {
5305:       PC        coarse_pc;
5306:       PC_BDDC*  coarsepcbddc;

5308:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5309:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5310:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5311:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5312:     }
5313:     if (nullsp) {
5314:       MatNullSpaceRemove(nullsp,sol);
5315:     }
5316:   }

5318:   /* Local solution on R nodes */
5319:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5320:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5321:   }
5322:   /* communications from coarse sol to local primal nodes */
5323:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5324:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5326:   /* Sum contributions from the two levels */
5327:   if (!pcbddc->benign_apply_coarse_only) {
5328:     if (applytranspose) {
5329:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5330:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5331:     } else {
5332:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5333:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5334:     }
5335:     /* store p0 */
5336:     if (pcbddc->benign_n) {
5337:       PetscScalar *array;
5338:       PetscInt    j;

5340:       VecGetArray(pcbddc->vec1_P,&array);
5341:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5342:       VecRestoreArray(pcbddc->vec1_P,&array);
5343:     }
5344:   } else { /* expand the coarse solution */
5345:     if (applytranspose) {
5346:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5347:     } else {
5348:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5349:     }
5350:   }
5351:   return(0);
5352: }

5354: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5355: {
5357:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5358:   PetscScalar    *array;
5359:   Vec            from,to;

5362:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5363:     from = pcbddc->coarse_vec;
5364:     to = pcbddc->vec1_P;
5365:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5366:       Vec tvec;

5368:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5369:       VecResetArray(tvec);
5370:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5371:       VecGetArray(tvec,&array);
5372:       VecPlaceArray(from,array);
5373:       VecRestoreArray(tvec,&array);
5374:     }
5375:   } else { /* from local to global -> put data in coarse right hand side */
5376:     from = pcbddc->vec1_P;
5377:     to = pcbddc->coarse_vec;
5378:   }
5379:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5380:   return(0);
5381: }

5383: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5384: {
5386:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5387:   PetscScalar    *array;
5388:   Vec            from,to;

5391:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5392:     from = pcbddc->coarse_vec;
5393:     to = pcbddc->vec1_P;
5394:   } else { /* from local to global -> put data in coarse right hand side */
5395:     from = pcbddc->vec1_P;
5396:     to = pcbddc->coarse_vec;
5397:   }
5398:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5399:   if (smode == SCATTER_FORWARD) {
5400:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5401:       Vec tvec;

5403:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5404:       VecGetArray(to,&array);
5405:       VecPlaceArray(tvec,array);
5406:       VecRestoreArray(to,&array);
5407:     }
5408:   } else {
5409:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5410:      VecResetArray(from);
5411:     }
5412:   }
5413:   return(0);
5414: }

5416: /* uncomment for testing purposes */
5417: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5418: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5419: {
5420:   PetscErrorCode    ierr;
5421:   PC_IS*            pcis = (PC_IS*)(pc->data);
5422:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5423:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5424:   /* one and zero */
5425:   PetscScalar       one=1.0,zero=0.0;
5426:   /* space to store constraints and their local indices */
5427:   PetscScalar       *constraints_data;
5428:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5429:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5430:   PetscInt          *constraints_n;
5431:   /* iterators */
5432:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5433:   /* BLAS integers */
5434:   PetscBLASInt      lwork,lierr;
5435:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5436:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5437:   /* reuse */
5438:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5439:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5440:   /* change of basis */
5441:   PetscBool         qr_needed;
5442:   PetscBT           change_basis,qr_needed_idx;
5443:   /* auxiliary stuff */
5444:   PetscInt          *nnz,*is_indices;
5445:   PetscInt          ncc;
5446:   /* some quantities */
5447:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5448:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;

5451:   /* Destroy Mat objects computed previously */
5452:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5453:   MatDestroy(&pcbddc->ConstraintMatrix);
5454:   MatDestroy(&pcbddc->switch_static_change);
5455:   /* save info on constraints from previous setup (if any) */
5456:   olocal_primal_size = pcbddc->local_primal_size;
5457:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5458:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5459:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5460:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5461:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5462:   PetscFree(pcbddc->primal_indices_local_idxs);

5464:   if (!pcbddc->adaptive_selection) {
5465:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5466:     MatNullSpace nearnullsp;
5467:     const Vec    *nearnullvecs;
5468:     Vec          *localnearnullsp;
5469:     PetscScalar  *array;
5470:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5471:     PetscBool    nnsp_has_cnst;
5472:     /* LAPACK working arrays for SVD or POD */
5473:     PetscBool    skip_lapack,boolforchange;
5474:     PetscScalar  *work;
5475:     PetscReal    *singular_vals;
5476: #if defined(PETSC_USE_COMPLEX)
5477:     PetscReal    *rwork;
5478: #endif
5479: #if defined(PETSC_MISSING_LAPACK_GESVD)
5480:     PetscScalar  *temp_basis,*correlation_mat;
5481: #else
5482:     PetscBLASInt dummy_int=1;
5483:     PetscScalar  dummy_scalar=1.;
5484: #endif

5486:     /* Get index sets for faces, edges and vertices from graph */
5487:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5488:     /* print some info */
5489:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5490:       PetscInt nv;

5492:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5493:       ISGetSize(ISForVertices,&nv);
5494:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5495:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5496:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5497:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5498:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5499:       PetscViewerFlush(pcbddc->dbg_viewer);
5500:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5501:     }

5503:     /* free unneeded index sets */
5504:     if (!pcbddc->use_vertices) {
5505:       ISDestroy(&ISForVertices);
5506:     }
5507:     if (!pcbddc->use_edges) {
5508:       for (i=0;i<n_ISForEdges;i++) {
5509:         ISDestroy(&ISForEdges[i]);
5510:       }
5511:       PetscFree(ISForEdges);
5512:       n_ISForEdges = 0;
5513:     }
5514:     if (!pcbddc->use_faces) {
5515:       for (i=0;i<n_ISForFaces;i++) {
5516:         ISDestroy(&ISForFaces[i]);
5517:       }
5518:       PetscFree(ISForFaces);
5519:       n_ISForFaces = 0;
5520:     }

5522:     /* check if near null space is attached to global mat */
5523:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
5524:     if (nearnullsp) {
5525:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5526:       /* remove any stored info */
5527:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5528:       PetscFree(pcbddc->onearnullvecs_state);
5529:       /* store information for BDDC solver reuse */
5530:       PetscObjectReference((PetscObject)nearnullsp);
5531:       pcbddc->onearnullspace = nearnullsp;
5532:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5533:       for (i=0;i<nnsp_size;i++) {
5534:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5535:       }
5536:     } else { /* if near null space is not provided BDDC uses constants by default */
5537:       nnsp_size = 0;
5538:       nnsp_has_cnst = PETSC_TRUE;
5539:     }
5540:     /* get max number of constraints on a single cc */
5541:     max_constraints = nnsp_size;
5542:     if (nnsp_has_cnst) max_constraints++;

5544:     /*
5545:          Evaluate maximum storage size needed by the procedure
5546:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5547:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5548:          There can be multiple constraints per connected component
5549:                                                                                                                                                            */
5550:     n_vertices = 0;
5551:     if (ISForVertices) {
5552:       ISGetSize(ISForVertices,&n_vertices);
5553:     }
5554:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5555:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

5557:     total_counts = n_ISForFaces+n_ISForEdges;
5558:     total_counts *= max_constraints;
5559:     total_counts += n_vertices;
5560:     PetscBTCreate(total_counts,&change_basis);

5562:     total_counts = 0;
5563:     max_size_of_constraint = 0;
5564:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5565:       IS used_is;
5566:       if (i<n_ISForEdges) {
5567:         used_is = ISForEdges[i];
5568:       } else {
5569:         used_is = ISForFaces[i-n_ISForEdges];
5570:       }
5571:       ISGetSize(used_is,&j);
5572:       total_counts += j;
5573:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5574:     }
5575:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

5577:     /* get local part of global near null space vectors */
5578:     PetscMalloc1(nnsp_size,&localnearnullsp);
5579:     for (k=0;k<nnsp_size;k++) {
5580:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5581:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5582:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5583:     }

5585:     /* whether or not to skip lapack calls */
5586:     skip_lapack = PETSC_TRUE;
5587:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5589:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5590:     if (!skip_lapack) {
5591:       PetscScalar temp_work;

5593: #if defined(PETSC_MISSING_LAPACK_GESVD)
5594:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5595:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5596:       PetscMalloc1(max_constraints,&singular_vals);
5597:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5598: #if defined(PETSC_USE_COMPLEX)
5599:       PetscMalloc1(3*max_constraints,&rwork);
5600: #endif
5601:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5602:       PetscBLASIntCast(max_constraints,&Blas_N);
5603:       PetscBLASIntCast(max_constraints,&Blas_LDA);
5604:       lwork = -1;
5605:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5606: #if !defined(PETSC_USE_COMPLEX)
5607:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5608: #else
5609:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5610: #endif
5611:       PetscFPTrapPop();
5612:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5613: #else /* on missing GESVD */
5614:       /* SVD */
5615:       PetscInt max_n,min_n;
5616:       max_n = max_size_of_constraint;
5617:       min_n = max_constraints;
5618:       if (max_size_of_constraint < max_constraints) {
5619:         min_n = max_size_of_constraint;
5620:         max_n = max_constraints;
5621:       }
5622:       PetscMalloc1(min_n,&singular_vals);
5623: #if defined(PETSC_USE_COMPLEX)
5624:       PetscMalloc1(5*min_n,&rwork);
5625: #endif
5626:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5627:       lwork = -1;
5628:       PetscBLASIntCast(max_n,&Blas_M);
5629:       PetscBLASIntCast(min_n,&Blas_N);
5630:       PetscBLASIntCast(max_n,&Blas_LDA);
5631:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5632: #if !defined(PETSC_USE_COMPLEX)
5633:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5634: #else
5635:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5636: #endif
5637:       PetscFPTrapPop();
5638:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5639: #endif /* on missing GESVD */
5640:       /* Allocate optimal workspace */
5641:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5642:       PetscMalloc1(lwork,&work);
5643:     }
5644:     /* Now we can loop on constraining sets */
5645:     total_counts = 0;
5646:     constraints_idxs_ptr[0] = 0;
5647:     constraints_data_ptr[0] = 0;
5648:     /* vertices */
5649:     if (n_vertices) {
5650:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5651:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5652:       for (i=0;i<n_vertices;i++) {
5653:         constraints_n[total_counts] = 1;
5654:         constraints_data[total_counts] = 1.0;
5655:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5656:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5657:         total_counts++;
5658:       }
5659:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5660:       n_vertices = total_counts;
5661:     }

5663:     /* edges and faces */
5664:     total_counts_cc = total_counts;
5665:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
5666:       IS        used_is;
5667:       PetscBool idxs_copied = PETSC_FALSE;

5669:       if (ncc<n_ISForEdges) {
5670:         used_is = ISForEdges[ncc];
5671:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
5672:       } else {
5673:         used_is = ISForFaces[ncc-n_ISForEdges];
5674:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
5675:       }
5676:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

5678:       ISGetSize(used_is,&size_of_constraint);
5679:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
5680:       /* change of basis should not be performed on local periodic nodes */
5681:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
5682:       if (nnsp_has_cnst) {
5683:         PetscScalar quad_value;

5685:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5686:         idxs_copied = PETSC_TRUE;

5688:         if (!pcbddc->use_nnsp_true) {
5689:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
5690:         } else {
5691:           quad_value = 1.0;
5692:         }
5693:         for (j=0;j<size_of_constraint;j++) {
5694:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
5695:         }
5696:         temp_constraints++;
5697:         total_counts++;
5698:       }
5699:       for (k=0;k<nnsp_size;k++) {
5700:         PetscReal real_value;
5701:         PetscScalar *ptr_to_data;

5703:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5704:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
5705:         for (j=0;j<size_of_constraint;j++) {
5706:           ptr_to_data[j] = array[is_indices[j]];
5707:         }
5708:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5709:         /* check if array is null on the connected component */
5710:         PetscBLASIntCast(size_of_constraint,&Blas_N);
5711:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
5712:         if (real_value > 0.0) { /* keep indices and values */
5713:           temp_constraints++;
5714:           total_counts++;
5715:           if (!idxs_copied) {
5716:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5717:             idxs_copied = PETSC_TRUE;
5718:           }
5719:         }
5720:       }
5721:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
5722:       valid_constraints = temp_constraints;
5723:       if (!pcbddc->use_nnsp_true && temp_constraints) {
5724:         if (temp_constraints == 1) { /* just normalize the constraint */
5725:           PetscScalar norm,*ptr_to_data;

5727:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
5728:           PetscBLASIntCast(size_of_constraint,&Blas_N);
5729:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
5730:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
5731:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
5732:         } else { /* perform SVD */
5733:           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
5734:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

5736: #if defined(PETSC_MISSING_LAPACK_GESVD)
5737:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
5738:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
5739:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
5740:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
5741:                 from that computed using LAPACKgesvd
5742:              -> This is due to a different computation of eigenvectors in LAPACKheev
5743:              -> The quality of the POD-computed basis will be the same */
5744:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
5745:           /* Store upper triangular part of correlation matrix */
5746:           PetscBLASIntCast(size_of_constraint,&Blas_N);
5747:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5748:           for (j=0;j<temp_constraints;j++) {
5749:             for (k=0;k<j+1;k++) {
5750:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
5751:             }
5752:           }
5753:           /* compute eigenvalues and eigenvectors of correlation matrix */
5754:           PetscBLASIntCast(temp_constraints,&Blas_N);
5755:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
5756: #if !defined(PETSC_USE_COMPLEX)
5757:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
5758: #else
5759:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
5760: #endif
5761:           PetscFPTrapPop();
5762:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
5763:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
5764:           j = 0;
5765:           while (j < temp_constraints && singular_vals[j] < tol) j++;
5766:           total_counts = total_counts-j;
5767:           valid_constraints = temp_constraints-j;
5768:           /* scale and copy POD basis into used quadrature memory */
5769:           PetscBLASIntCast(size_of_constraint,&Blas_M);
5770:           PetscBLASIntCast(temp_constraints,&Blas_N);
5771:           PetscBLASIntCast(temp_constraints,&Blas_K);
5772:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5773:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
5774:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
5775:           if (j<temp_constraints) {
5776:             PetscInt ii;
5777:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
5778:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5779:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
5780:             PetscFPTrapPop();
5781:             for (k=0;k<temp_constraints-j;k++) {
5782:               for (ii=0;ii<size_of_constraint;ii++) {
5783:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
5784:               }
5785:             }
5786:           }
5787: #else  /* on missing GESVD */
5788:           PetscBLASIntCast(size_of_constraint,&Blas_M);
5789:           PetscBLASIntCast(temp_constraints,&Blas_N);
5790:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5791:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5792: #if !defined(PETSC_USE_COMPLEX)
5793:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
5794: #else
5795:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
5796: #endif
5797:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
5798:           PetscFPTrapPop();
5799:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
5800:           k = temp_constraints;
5801:           if (k > size_of_constraint) k = size_of_constraint;
5802:           j = 0;
5803:           while (j < k && singular_vals[k-j-1] < tol) j++;
5804:           valid_constraints = k-j;
5805:           total_counts = total_counts-temp_constraints+valid_constraints;
5806: #endif /* on missing GESVD */
5807:         }
5808:       }
5809:       /* update pointers information */
5810:       if (valid_constraints) {
5811:         constraints_n[total_counts_cc] = valid_constraints;
5812:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
5813:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
5814:         /* set change_of_basis flag */
5815:         if (boolforchange) {
5816:           PetscBTSet(change_basis,total_counts_cc);
5817:         }
5818:         total_counts_cc++;
5819:       }
5820:     }
5821:     /* free workspace */
5822:     if (!skip_lapack) {
5823:       PetscFree(work);
5824: #if defined(PETSC_USE_COMPLEX)
5825:       PetscFree(rwork);
5826: #endif
5827:       PetscFree(singular_vals);
5828: #if defined(PETSC_MISSING_LAPACK_GESVD)
5829:       PetscFree(correlation_mat);
5830:       PetscFree(temp_basis);
5831: #endif
5832:     }
5833:     for (k=0;k<nnsp_size;k++) {
5834:       VecDestroy(&localnearnullsp[k]);
5835:     }
5836:     PetscFree(localnearnullsp);
5837:     /* free index sets of faces, edges and vertices */
5838:     for (i=0;i<n_ISForFaces;i++) {
5839:       ISDestroy(&ISForFaces[i]);
5840:     }
5841:     if (n_ISForFaces) {
5842:       PetscFree(ISForFaces);
5843:     }
5844:     for (i=0;i<n_ISForEdges;i++) {
5845:       ISDestroy(&ISForEdges[i]);
5846:     }
5847:     if (n_ISForEdges) {
5848:       PetscFree(ISForEdges);
5849:     }
5850:     ISDestroy(&ISForVertices);
5851:   } else {
5852:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

5854:     total_counts = 0;
5855:     n_vertices = 0;
5856:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
5857:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
5858:     }
5859:     max_constraints = 0;
5860:     total_counts_cc = 0;
5861:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5862:       total_counts += pcbddc->adaptive_constraints_n[i];
5863:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
5864:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
5865:     }
5866:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
5867:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
5868:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
5869:     constraints_data = pcbddc->adaptive_constraints_data;
5870:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
5871:     PetscMalloc1(total_counts_cc,&constraints_n);
5872:     total_counts_cc = 0;
5873:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5874:       if (pcbddc->adaptive_constraints_n[i]) {
5875:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
5876:       }
5877:     }
5878: #if 0
5879:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
5880:     for (i=0;i<total_counts_cc;i++) {
5881:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
5882:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
5883:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
5884:         printf(" %d",constraints_idxs[j]);
5885:       }
5886:       printf("\n");
5887:       printf("number of cc: %d\n",constraints_n[i]);
5888:     }
5889:     for (i=0;i<n_vertices;i++) {
5890:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
5891:     }
5892:     for (i=0;i<sub_schurs->n_subs;i++) {
5893:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
5894:     }
5895: #endif

5897:     max_size_of_constraint = 0;
5898:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
5899:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
5900:     /* Change of basis */
5901:     PetscBTCreate(total_counts_cc,&change_basis);
5902:     if (pcbddc->use_change_of_basis) {
5903:       for (i=0;i<sub_schurs->n_subs;i++) {
5904:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
5905:           PetscBTSet(change_basis,i+n_vertices);
5906:         }
5907:       }
5908:     }
5909:   }
5910:   pcbddc->local_primal_size = total_counts;
5911:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

5913:   /* map constraints_idxs in boundary numbering */
5914:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
5915:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);

5917:   /* Create constraint matrix */
5918:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
5919:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
5920:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

5922:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
5923:   /* determine if a QR strategy is needed for change of basis */
5924:   qr_needed = PETSC_FALSE;
5925:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
5926:   total_primal_vertices=0;
5927:   pcbddc->local_primal_size_cc = 0;
5928:   for (i=0;i<total_counts_cc;i++) {
5929:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5930:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
5931:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
5932:       pcbddc->local_primal_size_cc += 1;
5933:     } else if (PetscBTLookup(change_basis,i)) {
5934:       for (k=0;k<constraints_n[i];k++) {
5935:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5936:       }
5937:       pcbddc->local_primal_size_cc += constraints_n[i];
5938:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
5939:         PetscBTSet(qr_needed_idx,i);
5940:         qr_needed = PETSC_TRUE;
5941:       }
5942:     } else {
5943:       pcbddc->local_primal_size_cc += 1;
5944:     }
5945:   }
5946:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
5947:   pcbddc->n_vertices = total_primal_vertices;
5948:   /* permute indices in order to have a sorted set of vertices */
5949:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
5950:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
5951:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
5952:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

5954:   /* nonzero structure of constraint matrix */
5955:   /* and get reference dof for local constraints */
5956:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
5957:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

5959:   j = total_primal_vertices;
5960:   total_counts = total_primal_vertices;
5961:   cum = total_primal_vertices;
5962:   for (i=n_vertices;i<total_counts_cc;i++) {
5963:     if (!PetscBTLookup(change_basis,i)) {
5964:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
5965:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
5966:       cum++;
5967:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5968:       for (k=0;k<constraints_n[i];k++) {
5969:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5970:         nnz[j+k] = size_of_constraint;
5971:       }
5972:       j += constraints_n[i];
5973:     }
5974:   }
5975:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
5976:   PetscFree(nnz);

5978:   /* set values in constraint matrix */
5979:   for (i=0;i<total_primal_vertices;i++) {
5980:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
5981:   }
5982:   total_counts = total_primal_vertices;
5983:   for (i=n_vertices;i<total_counts_cc;i++) {
5984:     if (!PetscBTLookup(change_basis,i)) {
5985:       PetscInt *cols;

5987:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5988:       cols = constraints_idxs+constraints_idxs_ptr[i];
5989:       for (k=0;k<constraints_n[i];k++) {
5990:         PetscInt    row = total_counts+k;
5991:         PetscScalar *vals;

5993:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
5994:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
5995:       }
5996:       total_counts += constraints_n[i];
5997:     }
5998:   }
5999:   /* assembling */
6000:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6001:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);

6003:   /*
6004:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6005:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6006:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6007:   */
6008:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6009:   if (pcbddc->use_change_of_basis) {
6010:     /* dual and primal dofs on a single cc */
6011:     PetscInt     dual_dofs,primal_dofs;
6012:     /* working stuff for GEQRF */
6013:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6014:     PetscBLASInt lqr_work;
6015:     /* working stuff for UNGQR */
6016:     PetscScalar  *gqr_work,lgqr_work_t;
6017:     PetscBLASInt lgqr_work;
6018:     /* working stuff for TRTRS */
6019:     PetscScalar  *trs_rhs;
6020:     PetscBLASInt Blas_NRHS;
6021:     /* pointers for values insertion into change of basis matrix */
6022:     PetscInt     *start_rows,*start_cols;
6023:     PetscScalar  *start_vals;
6024:     /* working stuff for values insertion */
6025:     PetscBT      is_primal;
6026:     PetscInt     *aux_primal_numbering_B;
6027:     /* matrix sizes */
6028:     PetscInt     global_size,local_size;
6029:     /* temporary change of basis */
6030:     Mat          localChangeOfBasisMatrix;
6031:     /* extra space for debugging */
6032:     PetscScalar  *dbg_work;

6034:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6035:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6036:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6037:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6038:     /* nonzeros for local mat */
6039:     PetscMalloc1(pcis->n,&nnz);
6040:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6041:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6042:     } else {
6043:       const PetscInt *ii;
6044:       PetscInt       n;
6045:       PetscBool      flg_row;
6046:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6047:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6048:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6049:     }
6050:     for (i=n_vertices;i<total_counts_cc;i++) {
6051:       if (PetscBTLookup(change_basis,i)) {
6052:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6053:         if (PetscBTLookup(qr_needed_idx,i)) {
6054:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6055:         } else {
6056:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6057:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6058:         }
6059:       }
6060:     }
6061:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6062:     PetscFree(nnz);
6063:     /* Set interior change in the matrix */
6064:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6065:       for (i=0;i<pcis->n;i++) {
6066:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6067:       }
6068:     } else {
6069:       const PetscInt *ii,*jj;
6070:       PetscScalar    *aa;
6071:       PetscInt       n;
6072:       PetscBool      flg_row;
6073:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6074:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6075:       for (i=0;i<n;i++) {
6076:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6077:       }
6078:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6079:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6080:     }

6082:     if (pcbddc->dbg_flag) {
6083:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6084:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6085:     }


6088:     /* Now we loop on the constraints which need a change of basis */
6089:     /*
6090:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6091:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6093:        Basic blocks of change of basis matrix T computed by

6095:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6097:             | 1        0   ...        0         s_1/S |
6098:             | 0        1   ...        0         s_2/S |
6099:             |              ...                        |
6100:             | 0        ...            1     s_{n-1}/S |
6101:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6103:             with S = \sum_{i=1}^n s_i^2
6104:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6105:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6107:           - QR decomposition of constraints otherwise
6108:     */
6109:     if (qr_needed) {
6110:       /* space to store Q */
6111:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6112:       /* array to store scaling factors for reflectors */
6113:       PetscMalloc1(max_constraints,&qr_tau);
6114:       /* first we issue queries for optimal work */
6115:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6116:       PetscBLASIntCast(max_constraints,&Blas_N);
6117:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6118:       lqr_work = -1;
6119:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6120:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6121:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6122:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6123:       lgqr_work = -1;
6124:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6125:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6126:       PetscBLASIntCast(max_constraints,&Blas_K);
6127:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6128:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6129:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6130:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
6131:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6132:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6133:       /* array to store rhs and solution of triangular solver */
6134:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6135:       /* allocating workspace for check */
6136:       if (pcbddc->dbg_flag) {
6137:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6138:       }
6139:     }
6140:     /* array to store whether a node is primal or not */
6141:     PetscBTCreate(pcis->n_B,&is_primal);
6142:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6143:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6144:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6145:     for (i=0;i<total_primal_vertices;i++) {
6146:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6147:     }
6148:     PetscFree(aux_primal_numbering_B);

6150:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6151:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6152:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6153:       if (PetscBTLookup(change_basis,total_counts)) {
6154:         /* get constraint info */
6155:         primal_dofs = constraints_n[total_counts];
6156:         dual_dofs = size_of_constraint-primal_dofs;

6158:         if (pcbddc->dbg_flag) {
6159:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6160:         }

6162:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6164:           /* copy quadrature constraints for change of basis check */
6165:           if (pcbddc->dbg_flag) {
6166:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6167:           }
6168:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6169:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6171:           /* compute QR decomposition of constraints */
6172:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6173:           PetscBLASIntCast(primal_dofs,&Blas_N);
6174:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6175:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6176:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6177:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6178:           PetscFPTrapPop();

6180:           /* explictly compute R^-T */
6181:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6182:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6183:           PetscBLASIntCast(primal_dofs,&Blas_N);
6184:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6185:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6186:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6187:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6188:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6189:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6190:           PetscFPTrapPop();

6192:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6193:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6194:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6195:           PetscBLASIntCast(primal_dofs,&Blas_K);
6196:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6197:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6198:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6199:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
6200:           PetscFPTrapPop();

6202:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6203:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6204:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6205:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6206:           PetscBLASIntCast(primal_dofs,&Blas_N);
6207:           PetscBLASIntCast(primal_dofs,&Blas_K);
6208:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6209:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6210:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6211:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6212:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6213:           PetscFPTrapPop();
6214:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6216:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6217:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6218:           /* insert cols for primal dofs */
6219:           for (j=0;j<primal_dofs;j++) {
6220:             start_vals = &qr_basis[j*size_of_constraint];
6221:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6222:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6223:           }
6224:           /* insert cols for dual dofs */
6225:           for (j=0,k=0;j<dual_dofs;k++) {
6226:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6227:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6228:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6229:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6230:               j++;
6231:             }
6232:           }

6234:           /* check change of basis */
6235:           if (pcbddc->dbg_flag) {
6236:             PetscInt   ii,jj;
6237:             PetscBool valid_qr=PETSC_TRUE;
6238:             PetscBLASIntCast(primal_dofs,&Blas_M);
6239:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6240:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6241:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6242:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6243:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6244:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6245:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6246:             PetscFPTrapPop();
6247:             for (jj=0;jj<size_of_constraint;jj++) {
6248:               for (ii=0;ii<primal_dofs;ii++) {
6249:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6250:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
6251:               }
6252:             }
6253:             if (!valid_qr) {
6254:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6255:               for (jj=0;jj<size_of_constraint;jj++) {
6256:                 for (ii=0;ii<primal_dofs;ii++) {
6257:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6258:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6259:                   }
6260:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
6261:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6262:                   }
6263:                 }
6264:               }
6265:             } else {
6266:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6267:             }
6268:           }
6269:         } else { /* simple transformation block */
6270:           PetscInt    row,col;
6271:           PetscScalar val,norm;

6273:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6274:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6275:           for (j=0;j<size_of_constraint;j++) {
6276:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6277:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6278:             if (!PetscBTLookup(is_primal,row_B)) {
6279:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6280:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6281:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6282:             } else {
6283:               for (k=0;k<size_of_constraint;k++) {
6284:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6285:                 if (row != col) {
6286:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6287:                 } else {
6288:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6289:                 }
6290:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6291:               }
6292:             }
6293:           }
6294:           if (pcbddc->dbg_flag) {
6295:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6296:           }
6297:         }
6298:       } else {
6299:         if (pcbddc->dbg_flag) {
6300:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6301:         }
6302:       }
6303:     }

6305:     /* free workspace */
6306:     if (qr_needed) {
6307:       if (pcbddc->dbg_flag) {
6308:         PetscFree(dbg_work);
6309:       }
6310:       PetscFree(trs_rhs);
6311:       PetscFree(qr_tau);
6312:       PetscFree(qr_work);
6313:       PetscFree(gqr_work);
6314:       PetscFree(qr_basis);
6315:     }
6316:     PetscBTDestroy(&is_primal);
6317:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6318:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6320:     /* assembling of global change of variable */
6321:     if (!pcbddc->fake_change) {
6322:       Mat      tmat;
6323:       PetscInt bs;

6325:       VecGetSize(pcis->vec1_global,&global_size);
6326:       VecGetLocalSize(pcis->vec1_global,&local_size);
6327:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6328:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6329:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6330:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6331:       MatGetBlockSize(pc->pmat,&bs);
6332:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6333:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6334:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6335:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6336:       MatDestroy(&tmat);
6337:       VecSet(pcis->vec1_global,0.0);
6338:       VecSet(pcis->vec1_N,1.0);
6339:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6340:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6341:       VecReciprocal(pcis->vec1_global);
6342:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6344:       /* check */
6345:       if (pcbddc->dbg_flag) {
6346:         PetscReal error;
6347:         Vec       x,x_change;

6349:         VecDuplicate(pcis->vec1_global,&x);
6350:         VecDuplicate(pcis->vec1_global,&x_change);
6351:         VecSetRandom(x,NULL);
6352:         VecCopy(x,pcis->vec1_global);
6353:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6354:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6355:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6356:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6357:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6358:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6359:         VecAXPY(x,-1.0,x_change);
6360:         VecNorm(x,NORM_INFINITY,&error);
6361:         if (error > PETSC_SMALL) {
6362:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6363:         }
6364:         VecDestroy(&x);
6365:         VecDestroy(&x_change);
6366:       }
6367:       /* adapt sub_schurs computed (if any) */
6368:       if (pcbddc->use_deluxe_scaling) {
6369:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6371:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6372:         if (sub_schurs && sub_schurs->S_Ej_all) {
6373:           Mat                    S_new,tmat;
6374:           IS                     is_all_N,is_V_Sall = NULL;

6376:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6377:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6378:           if (pcbddc->deluxe_zerorows) {
6379:             ISLocalToGlobalMapping NtoSall;
6380:             IS                     is_V;
6381:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6382:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6383:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6384:             ISLocalToGlobalMappingDestroy(&NtoSall);
6385:             ISDestroy(&is_V);
6386:           }
6387:           ISDestroy(&is_all_N);
6388:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6389:           MatDestroy(&sub_schurs->S_Ej_all);
6390:           PetscObjectReference((PetscObject)S_new);
6391:           if (pcbddc->deluxe_zerorows) {
6392:             const PetscScalar *array;
6393:             const PetscInt    *idxs_V,*idxs_all;
6394:             PetscInt          i,n_V;

6396:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6397:             ISGetLocalSize(is_V_Sall,&n_V);
6398:             ISGetIndices(is_V_Sall,&idxs_V);
6399:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6400:             VecGetArrayRead(pcis->D,&array);
6401:             for (i=0;i<n_V;i++) {
6402:               PetscScalar val;
6403:               PetscInt    idx;

6405:               idx = idxs_V[i];
6406:               val = array[idxs_all[idxs_V[i]]];
6407:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6408:             }
6409:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6410:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6411:             VecRestoreArrayRead(pcis->D,&array);
6412:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6413:             ISRestoreIndices(is_V_Sall,&idxs_V);
6414:           }
6415:           sub_schurs->S_Ej_all = S_new;
6416:           MatDestroy(&S_new);
6417:           if (sub_schurs->sum_S_Ej_all) {
6418:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6419:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6420:             PetscObjectReference((PetscObject)S_new);
6421:             if (pcbddc->deluxe_zerorows) {
6422:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6423:             }
6424:             sub_schurs->sum_S_Ej_all = S_new;
6425:             MatDestroy(&S_new);
6426:           }
6427:           ISDestroy(&is_V_Sall);
6428:           MatDestroy(&tmat);
6429:         }
6430:         /* destroy any change of basis context in sub_schurs */
6431:         if (sub_schurs && sub_schurs->change) {
6432:           PetscInt i;

6434:           for (i=0;i<sub_schurs->n_subs;i++) {
6435:             KSPDestroy(&sub_schurs->change[i]);
6436:           }
6437:           PetscFree(sub_schurs->change);
6438:         }
6439:       }
6440:       if (pcbddc->switch_static) { /* need to save the local change */
6441:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6442:       } else {
6443:         MatDestroy(&localChangeOfBasisMatrix);
6444:       }
6445:       /* determine if any process has changed the pressures locally */
6446:       pcbddc->change_interior = pcbddc->benign_have_null;
6447:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6448:       MatDestroy(&pcbddc->ConstraintMatrix);
6449:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6450:       pcbddc->use_qr_single = qr_needed;
6451:     }
6452:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6453:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6454:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6455:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6456:     } else {
6457:       Mat benign_global = NULL;
6458:       if (pcbddc->benign_have_null) {
6459:         Mat tmat;

6461:         pcbddc->change_interior = PETSC_TRUE;
6462:         VecSet(pcis->vec1_global,0.0);
6463:         VecSet(pcis->vec1_N,1.0);
6464:         VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6465:         VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6466:         VecReciprocal(pcis->vec1_global);
6467:         VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6468:         VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6469:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6470:         if (pcbddc->benign_change) {
6471:           Mat M;

6473:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6474:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6475:           MatISSetLocalMat(tmat,M);
6476:           MatDestroy(&M);
6477:         } else {
6478:           Mat         eye;
6479:           PetscScalar *array;

6481:           VecGetArray(pcis->vec1_N,&array);
6482:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6483:           for (i=0;i<pcis->n;i++) {
6484:             MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6485:           }
6486:           VecRestoreArray(pcis->vec1_N,&array);
6487:           MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6488:           MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6489:           MatISSetLocalMat(tmat,eye);
6490:           MatDestroy(&eye);
6491:         }
6492:         MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6493:         MatDestroy(&tmat);
6494:       }
6495:       if (pcbddc->user_ChangeOfBasisMatrix) {
6496:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6497:         MatDestroy(&benign_global);
6498:       } else if (pcbddc->benign_have_null) {
6499:         pcbddc->ChangeOfBasisMatrix = benign_global;
6500:       }
6501:     }
6502:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6503:       IS             is_global;
6504:       const PetscInt *gidxs;

6506:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6507:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6508:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6509:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6510:       ISDestroy(&is_global);
6511:     }
6512:   }
6513:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6514:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6515:   }

6517:   if (!pcbddc->fake_change) {
6518:     /* add pressure dofs to set of primal nodes for numbering purposes */
6519:     for (i=0;i<pcbddc->benign_n;i++) {
6520:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6521:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6522:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6523:       pcbddc->local_primal_size_cc++;
6524:       pcbddc->local_primal_size++;
6525:     }

6527:     /* check if a new primal space has been introduced (also take into account benign trick) */
6528:     pcbddc->new_primal_space_local = PETSC_TRUE;
6529:     if (olocal_primal_size == pcbddc->local_primal_size) {
6530:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6531:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6532:       if (!pcbddc->new_primal_space_local) {
6533:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6534:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6535:       }
6536:     }
6537:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6538:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6539:   }
6540:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

6542:   /* flush dbg viewer */
6543:   if (pcbddc->dbg_flag) {
6544:     PetscViewerFlush(pcbddc->dbg_viewer);
6545:   }

6547:   /* free workspace */
6548:   PetscBTDestroy(&qr_needed_idx);
6549:   PetscBTDestroy(&change_basis);
6550:   if (!pcbddc->adaptive_selection) {
6551:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6552:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6553:   } else {
6554:     PetscFree5(pcbddc->adaptive_constraints_n,
6555:                       pcbddc->adaptive_constraints_idxs_ptr,
6556:                       pcbddc->adaptive_constraints_data_ptr,
6557:                       pcbddc->adaptive_constraints_idxs,
6558:                       pcbddc->adaptive_constraints_data);
6559:     PetscFree(constraints_n);
6560:     PetscFree(constraints_idxs_B);
6561:   }
6562:   return(0);
6563: }

6565: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6566: {
6567:   ISLocalToGlobalMapping map;
6568:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
6569:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
6570:   PetscInt               i,N;
6571:   PetscBool              rcsr = PETSC_FALSE;
6572:   PetscErrorCode         ierr;

6575:   if (pcbddc->recompute_topography) {
6576:     pcbddc->graphanalyzed = PETSC_FALSE;
6577:     /* Reset previously computed graph */
6578:     PCBDDCGraphReset(pcbddc->mat_graph);
6579:     /* Init local Graph struct */
6580:     MatGetSize(pc->pmat,&N,NULL);
6581:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6582:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

6584:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6585:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6586:     }
6587:     /* Check validity of the csr graph passed in by the user */
6588:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

6590:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6591:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6592:       PetscInt  *xadj,*adjncy;
6593:       PetscInt  nvtxs;
6594:       PetscBool flg_row=PETSC_FALSE;

6596:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6597:       if (flg_row) {
6598:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6599:         pcbddc->computed_rowadj = PETSC_TRUE;
6600:       }
6601:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6602:       rcsr = PETSC_TRUE;
6603:     }
6604:     if (pcbddc->dbg_flag) {
6605:       PetscViewerFlush(pcbddc->dbg_viewer);
6606:     }

6608:     /* Setup of Graph */
6609:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6610:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

6612:     /* attach info on disconnected subdomains if present */
6613:     if (pcbddc->n_local_subs) {
6614:       PetscInt *local_subs;

6616:       PetscMalloc1(N,&local_subs);
6617:       for (i=0;i<pcbddc->n_local_subs;i++) {
6618:         const PetscInt *idxs;
6619:         PetscInt       nl,j;

6621:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
6622:         ISGetIndices(pcbddc->local_subs[i],&idxs);
6623:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6624:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6625:       }
6626:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6627:       pcbddc->mat_graph->local_subs = local_subs;
6628:     }
6629:   }

6631:   if (!pcbddc->graphanalyzed) {
6632:     /* Graph's connected components analysis */
6633:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6634:     pcbddc->graphanalyzed = PETSC_TRUE;
6635:   }
6636:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6637:   return(0);
6638: }

6640: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
6641: {
6642:   PetscInt       i,j;
6643:   PetscScalar    *alphas;

6647:   PetscMalloc1(n,&alphas);
6648:   for (i=0;i<n;i++) {
6649:     VecNormalize(vecs[i],NULL);
6650:     VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);
6651:     for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]);
6652:     VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);
6653:   }
6654:   PetscFree(alphas);
6655:   return(0);
6656: }

6658: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
6659: {
6660:   Mat            A;
6661:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
6662:   PetscMPIInt    size,rank,color;
6663:   PetscInt       *xadj,*adjncy;
6664:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
6665:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
6666:   PetscInt       void_procs,*procs_candidates = NULL;
6667:   PetscInt       xadj_count,*count;
6668:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
6669:   PetscSubcomm   psubcomm;
6670:   MPI_Comm       subcomm;

6675:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6676:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6679:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);

6681:   if (have_void) *have_void = PETSC_FALSE;
6682:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
6683:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
6684:   MatISGetLocalMat(mat,&A);
6685:   MatGetLocalSize(A,&n,NULL);
6686:   im_active = !!n;
6687:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
6688:   void_procs = size - active_procs;
6689:   /* get ranks of of non-active processes in mat communicator */
6690:   if (void_procs) {
6691:     PetscInt ncand;

6693:     if (have_void) *have_void = PETSC_TRUE;
6694:     PetscMalloc1(size,&procs_candidates);
6695:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
6696:     for (i=0,ncand=0;i<size;i++) {
6697:       if (!procs_candidates[i]) {
6698:         procs_candidates[ncand++] = i;
6699:       }
6700:     }
6701:     /* force n_subdomains to be not greater that the number of non-active processes */
6702:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
6703:   }

6705:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
6706:      number of subdomains requested 1 -> send to master or first candidate in voids  */
6707:   MatGetSize(mat,&N,NULL);
6708:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
6709:     PetscInt issize,isidx,dest;
6710:     if (*n_subdomains == 1) dest = 0;
6711:     else dest = rank;
6712:     if (im_active) {
6713:       issize = 1;
6714:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6715:         isidx = procs_candidates[dest];
6716:       } else {
6717:         isidx = dest;
6718:       }
6719:     } else {
6720:       issize = 0;
6721:       isidx = -1;
6722:     }
6723:     if (*n_subdomains != 1) *n_subdomains = active_procs;
6724:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
6725:     PetscFree(procs_candidates);
6726:     return(0);
6727:   }
6728:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
6729:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
6730:   threshold = PetscMax(threshold,2);

6732:   /* Get info on mapping */
6733:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

6735:   /* build local CSR graph of subdomains' connectivity */
6736:   PetscMalloc1(2,&xadj);
6737:   xadj[0] = 0;
6738:   xadj[1] = PetscMax(n_neighs-1,0);
6739:   PetscMalloc1(xadj[1],&adjncy);
6740:   PetscMalloc1(xadj[1],&adjncy_wgt);
6741:   PetscCalloc1(n,&count);
6742:   for (i=1;i<n_neighs;i++)
6743:     for (j=0;j<n_shared[i];j++)
6744:       count[shared[i][j]] += 1;

6746:   xadj_count = 0;
6747:   for (i=1;i<n_neighs;i++) {
6748:     for (j=0;j<n_shared[i];j++) {
6749:       if (count[shared[i][j]] < threshold) {
6750:         adjncy[xadj_count] = neighs[i];
6751:         adjncy_wgt[xadj_count] = n_shared[i];
6752:         xadj_count++;
6753:         break;
6754:       }
6755:     }
6756:   }
6757:   xadj[1] = xadj_count;
6758:   PetscFree(count);
6759:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
6760:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

6762:   PetscMalloc1(1,&ranks_send_to_idx);

6764:   /* Restrict work on active processes only */
6765:   PetscMPIIntCast(im_active,&color);
6766:   if (void_procs) {
6767:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
6768:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
6769:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
6770:     subcomm = PetscSubcommChild(psubcomm);
6771:   } else {
6772:     psubcomm = NULL;
6773:     subcomm = PetscObjectComm((PetscObject)mat);
6774:   }

6776:   v_wgt = NULL;
6777:   if (!color) {
6778:     PetscFree(xadj);
6779:     PetscFree(adjncy);
6780:     PetscFree(adjncy_wgt);
6781:   } else {
6782:     Mat             subdomain_adj;
6783:     IS              new_ranks,new_ranks_contig;
6784:     MatPartitioning partitioner;
6785:     PetscInt        rstart=0,rend=0;
6786:     PetscInt        *is_indices,*oldranks;
6787:     PetscMPIInt     size;
6788:     PetscBool       aggregate;

6790:     MPI_Comm_size(subcomm,&size);
6791:     if (void_procs) {
6792:       PetscInt prank = rank;
6793:       PetscMalloc1(size,&oldranks);
6794:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
6795:       for (i=0;i<xadj[1];i++) {
6796:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
6797:       }
6798:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
6799:     } else {
6800:       oldranks = NULL;
6801:     }
6802:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
6803:     if (aggregate) { /* TODO: all this part could be made more efficient */
6804:       PetscInt    lrows,row,ncols,*cols;
6805:       PetscMPIInt nrank;
6806:       PetscScalar *vals;

6808:       MPI_Comm_rank(subcomm,&nrank);
6809:       lrows = 0;
6810:       if (nrank<redprocs) {
6811:         lrows = size/redprocs;
6812:         if (nrank<size%redprocs) lrows++;
6813:       }
6814:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
6815:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
6816:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
6817:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
6818:       row = nrank;
6819:       ncols = xadj[1]-xadj[0];
6820:       cols = adjncy;
6821:       PetscMalloc1(ncols,&vals);
6822:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
6823:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
6824:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
6825:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
6826:       PetscFree(xadj);
6827:       PetscFree(adjncy);
6828:       PetscFree(adjncy_wgt);
6829:       PetscFree(vals);
6830:       if (use_vwgt) {
6831:         Vec               v;
6832:         const PetscScalar *array;
6833:         PetscInt          nl;

6835:         MatCreateVecs(subdomain_adj,&v,NULL);
6836:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
6837:         VecAssemblyBegin(v);
6838:         VecAssemblyEnd(v);
6839:         VecGetLocalSize(v,&nl);
6840:         VecGetArrayRead(v,&array);
6841:         PetscMalloc1(nl,&v_wgt);
6842:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
6843:         VecRestoreArrayRead(v,&array);
6844:         VecDestroy(&v);
6845:       }
6846:     } else {
6847:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
6848:       if (use_vwgt) {
6849:         PetscMalloc1(1,&v_wgt);
6850:         v_wgt[0] = n;
6851:       }
6852:     }
6853:     /* MatView(subdomain_adj,0); */

6855:     /* Partition */
6856:     MatPartitioningCreate(subcomm,&partitioner);
6857:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
6858:     if (v_wgt) {
6859:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
6860:     }
6861:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
6862:     MatPartitioningSetNParts(partitioner,*n_subdomains);
6863:     MatPartitioningSetFromOptions(partitioner);
6864:     MatPartitioningApply(partitioner,&new_ranks);
6865:     /* MatPartitioningView(partitioner,0); */

6867:     /* renumber new_ranks to avoid "holes" in new set of processors */
6868:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
6869:     ISDestroy(&new_ranks);
6870:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6871:     if (!aggregate) {
6872:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6873: #if defined(PETSC_USE_DEBUG)
6874:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6875: #endif
6876:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
6877:       } else if (oldranks) {
6878:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
6879:       } else {
6880:         ranks_send_to_idx[0] = is_indices[0];
6881:       }
6882:     } else {
6883:       PetscInt    idxs[1];
6884:       PetscMPIInt tag;
6885:       MPI_Request *reqs;

6887:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
6888:       PetscMalloc1(rend-rstart,&reqs);
6889:       for (i=rstart;i<rend;i++) {
6890:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
6891:       }
6892:       MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
6893:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
6894:       PetscFree(reqs);
6895:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6896: #if defined(PETSC_USE_DEBUG)
6897:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6898: #endif
6899:         ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]];
6900:       } else if (oldranks) {
6901:         ranks_send_to_idx[0] = oldranks[idxs[0]];
6902:       } else {
6903:         ranks_send_to_idx[0] = idxs[0];
6904:       }
6905:     }
6906:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6907:     /* clean up */
6908:     PetscFree(oldranks);
6909:     ISDestroy(&new_ranks_contig);
6910:     MatDestroy(&subdomain_adj);
6911:     MatPartitioningDestroy(&partitioner);
6912:   }
6913:   PetscSubcommDestroy(&psubcomm);
6914:   PetscFree(procs_candidates);

6916:   /* assemble parallel IS for sends */
6917:   i = 1;
6918:   if (!color) i=0;
6919:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
6920:   return(0);
6921: }

6923: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

6925: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
6926: {
6927:   Mat                    local_mat;
6928:   IS                     is_sends_internal;
6929:   PetscInt               rows,cols,new_local_rows;
6930:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
6931:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
6932:   ISLocalToGlobalMapping l2gmap;
6933:   PetscInt*              l2gmap_indices;
6934:   const PetscInt*        is_indices;
6935:   MatType                new_local_type;
6936:   /* buffers */
6937:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
6938:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
6939:   PetscInt               *recv_buffer_idxs_local;
6940:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
6941:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
6942:   /* MPI */
6943:   MPI_Comm               comm,comm_n;
6944:   PetscSubcomm           subcomm;
6945:   PetscMPIInt            n_sends,n_recvs,commsize;
6946:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
6947:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
6948:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
6949:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
6950:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
6951:   PetscErrorCode         ierr;

6955:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6956:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6963:   if (nvecs) {
6964:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
6966:   }
6967:   /* further checks */
6968:   MatISGetLocalMat(mat,&local_mat);
6969:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
6970:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
6971:   MatGetSize(local_mat,&rows,&cols);
6972:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
6973:   if (reuse && *mat_n) {
6974:     PetscInt mrows,mcols,mnrows,mncols;
6976:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
6977:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
6978:     MatGetSize(mat,&mrows,&mcols);
6979:     MatGetSize(*mat_n,&mnrows,&mncols);
6980:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
6981:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
6982:   }
6983:   MatGetBlockSize(local_mat,&bs);

6986:   /* prepare IS for sending if not provided */
6987:   if (!is_sends) {
6988:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
6989:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
6990:   } else {
6991:     PetscObjectReference((PetscObject)is_sends);
6992:     is_sends_internal = is_sends;
6993:   }

6995:   /* get comm */
6996:   PetscObjectGetComm((PetscObject)mat,&comm);

6998:   /* compute number of sends */
6999:   ISGetLocalSize(is_sends_internal,&i);
7000:   PetscMPIIntCast(i,&n_sends);

7002:   /* compute number of receives */
7003:   MPI_Comm_size(comm,&commsize);
7004:   PetscMalloc1(commsize,&iflags);
7005:   PetscMemzero(iflags,commsize*sizeof(*iflags));
7006:   ISGetIndices(is_sends_internal,&is_indices);
7007:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7008:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7009:   PetscFree(iflags);

7011:   /* restrict comm if requested */
7012:   subcomm = 0;
7013:   destroy_mat = PETSC_FALSE;
7014:   if (restrict_comm) {
7015:     PetscMPIInt color,subcommsize;

7017:     color = 0;
7018:     if (restrict_full) {
7019:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7020:     } else {
7021:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7022:     }
7023:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7024:     subcommsize = commsize - subcommsize;
7025:     /* check if reuse has been requested */
7026:     if (reuse) {
7027:       if (*mat_n) {
7028:         PetscMPIInt subcommsize2;
7029:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7030:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7031:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7032:       } else {
7033:         comm_n = PETSC_COMM_SELF;
7034:       }
7035:     } else { /* MAT_INITIAL_MATRIX */
7036:       PetscMPIInt rank;

7038:       MPI_Comm_rank(comm,&rank);
7039:       PetscSubcommCreate(comm,&subcomm);
7040:       PetscSubcommSetNumber(subcomm,2);
7041:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7042:       comm_n = PetscSubcommChild(subcomm);
7043:     }
7044:     /* flag to destroy *mat_n if not significative */
7045:     if (color) destroy_mat = PETSC_TRUE;
7046:   } else {
7047:     comm_n = comm;
7048:   }

7050:   /* prepare send/receive buffers */
7051:   PetscMalloc1(commsize,&ilengths_idxs);
7052:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7053:   PetscMalloc1(commsize,&ilengths_vals);
7054:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7055:   if (nis) {
7056:     PetscCalloc1(commsize,&ilengths_idxs_is);
7057:   }

7059:   /* Get data from local matrices */
7060:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7061:     /* TODO: See below some guidelines on how to prepare the local buffers */
7062:     /*
7063:        send_buffer_vals should contain the raw values of the local matrix
7064:        send_buffer_idxs should contain:
7065:        - MatType_PRIVATE type
7066:        - PetscInt        size_of_l2gmap
7067:        - PetscInt        global_row_indices[size_of_l2gmap]
7068:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7069:     */
7070:   else {
7071:     MatDenseGetArray(local_mat,&send_buffer_vals);
7072:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7073:     PetscMalloc1(i+2,&send_buffer_idxs);
7074:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7075:     send_buffer_idxs[1] = i;
7076:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7077:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7078:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7079:     PetscMPIIntCast(i,&len);
7080:     for (i=0;i<n_sends;i++) {
7081:       ilengths_vals[is_indices[i]] = len*len;
7082:       ilengths_idxs[is_indices[i]] = len+2;
7083:     }
7084:   }
7085:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7086:   /* additional is (if any) */
7087:   if (nis) {
7088:     PetscMPIInt psum;
7089:     PetscInt j;
7090:     for (j=0,psum=0;j<nis;j++) {
7091:       PetscInt plen;
7092:       ISGetLocalSize(isarray[j],&plen);
7093:       PetscMPIIntCast(plen,&len);
7094:       psum += len+1; /* indices + lenght */
7095:     }
7096:     PetscMalloc1(psum,&send_buffer_idxs_is);
7097:     for (j=0,psum=0;j<nis;j++) {
7098:       PetscInt plen;
7099:       const PetscInt *is_array_idxs;
7100:       ISGetLocalSize(isarray[j],&plen);
7101:       send_buffer_idxs_is[psum] = plen;
7102:       ISGetIndices(isarray[j],&is_array_idxs);
7103:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7104:       ISRestoreIndices(isarray[j],&is_array_idxs);
7105:       psum += plen+1; /* indices + lenght */
7106:     }
7107:     for (i=0;i<n_sends;i++) {
7108:       ilengths_idxs_is[is_indices[i]] = psum;
7109:     }
7110:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7111:   }
7112:   MatISRestoreLocalMat(mat,&local_mat);

7114:   buf_size_idxs = 0;
7115:   buf_size_vals = 0;
7116:   buf_size_idxs_is = 0;
7117:   buf_size_vecs = 0;
7118:   for (i=0;i<n_recvs;i++) {
7119:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7120:     buf_size_vals += (PetscInt)olengths_vals[i];
7121:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7122:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7123:   }
7124:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7125:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7126:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7127:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7129:   /* get new tags for clean communications */
7130:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7131:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7132:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7133:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7135:   /* allocate for requests */
7136:   PetscMalloc1(n_sends,&send_req_idxs);
7137:   PetscMalloc1(n_sends,&send_req_vals);
7138:   PetscMalloc1(n_sends,&send_req_idxs_is);
7139:   PetscMalloc1(n_sends,&send_req_vecs);
7140:   PetscMalloc1(n_recvs,&recv_req_idxs);
7141:   PetscMalloc1(n_recvs,&recv_req_vals);
7142:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7143:   PetscMalloc1(n_recvs,&recv_req_vecs);

7145:   /* communications */
7146:   ptr_idxs = recv_buffer_idxs;
7147:   ptr_vals = recv_buffer_vals;
7148:   ptr_idxs_is = recv_buffer_idxs_is;
7149:   ptr_vecs = recv_buffer_vecs;
7150:   for (i=0;i<n_recvs;i++) {
7151:     source_dest = onodes[i];
7152:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7153:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7154:     ptr_idxs += olengths_idxs[i];
7155:     ptr_vals += olengths_vals[i];
7156:     if (nis) {
7157:       source_dest = onodes_is[i];
7158:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7159:       ptr_idxs_is += olengths_idxs_is[i];
7160:     }
7161:     if (nvecs) {
7162:       source_dest = onodes[i];
7163:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7164:       ptr_vecs += olengths_idxs[i]-2;
7165:     }
7166:   }
7167:   for (i=0;i<n_sends;i++) {
7168:     PetscMPIIntCast(is_indices[i],&source_dest);
7169:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7170:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7171:     if (nis) {
7172:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7173:     }
7174:     if (nvecs) {
7175:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7176:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7177:     }
7178:   }
7179:   ISRestoreIndices(is_sends_internal,&is_indices);
7180:   ISDestroy(&is_sends_internal);

7182:   /* assemble new l2g map */
7183:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7184:   ptr_idxs = recv_buffer_idxs;
7185:   new_local_rows = 0;
7186:   for (i=0;i<n_recvs;i++) {
7187:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7188:     ptr_idxs += olengths_idxs[i];
7189:   }
7190:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7191:   ptr_idxs = recv_buffer_idxs;
7192:   new_local_rows = 0;
7193:   for (i=0;i<n_recvs;i++) {
7194:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7195:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7196:     ptr_idxs += olengths_idxs[i];
7197:   }
7198:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7199:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7200:   PetscFree(l2gmap_indices);

7202:   /* infer new local matrix type from received local matrices type */
7203:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7204:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7205:   if (n_recvs) {
7206:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7207:     ptr_idxs = recv_buffer_idxs;
7208:     for (i=0;i<n_recvs;i++) {
7209:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7210:         new_local_type_private = MATAIJ_PRIVATE;
7211:         break;
7212:       }
7213:       ptr_idxs += olengths_idxs[i];
7214:     }
7215:     switch (new_local_type_private) {
7216:       case MATDENSE_PRIVATE:
7217:         new_local_type = MATSEQAIJ;
7218:         bs = 1;
7219:         break;
7220:       case MATAIJ_PRIVATE:
7221:         new_local_type = MATSEQAIJ;
7222:         bs = 1;
7223:         break;
7224:       case MATBAIJ_PRIVATE:
7225:         new_local_type = MATSEQBAIJ;
7226:         break;
7227:       case MATSBAIJ_PRIVATE:
7228:         new_local_type = MATSEQSBAIJ;
7229:         break;
7230:       default:
7231:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7232:         break;
7233:     }
7234:   } else { /* by default, new_local_type is seqaij */
7235:     new_local_type = MATSEQAIJ;
7236:     bs = 1;
7237:   }

7239:   /* create MATIS object if needed */
7240:   if (!reuse) {
7241:     MatGetSize(mat,&rows,&cols);
7242:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7243:   } else {
7244:     /* it also destroys the local matrices */
7245:     if (*mat_n) {
7246:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7247:     } else { /* this is a fake object */
7248:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7249:     }
7250:   }
7251:   MatISGetLocalMat(*mat_n,&local_mat);
7252:   MatSetType(local_mat,new_local_type);

7254:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7256:   /* Global to local map of received indices */
7257:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7258:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7259:   ISLocalToGlobalMappingDestroy(&l2gmap);

7261:   /* restore attributes -> type of incoming data and its size */
7262:   buf_size_idxs = 0;
7263:   for (i=0;i<n_recvs;i++) {
7264:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7265:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7266:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7267:   }
7268:   PetscFree(recv_buffer_idxs);

7270:   /* set preallocation */
7271:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7272:   if (!newisdense) {
7273:     PetscInt *new_local_nnz=0;

7275:     ptr_idxs = recv_buffer_idxs_local;
7276:     if (n_recvs) {
7277:       PetscCalloc1(new_local_rows,&new_local_nnz);
7278:     }
7279:     for (i=0;i<n_recvs;i++) {
7280:       PetscInt j;
7281:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7282:         for (j=0;j<*(ptr_idxs+1);j++) {
7283:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7284:         }
7285:       } else {
7286:         /* TODO */
7287:       }
7288:       ptr_idxs += olengths_idxs[i];
7289:     }
7290:     if (new_local_nnz) {
7291:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7292:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7293:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7294:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7295:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7296:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7297:     } else {
7298:       MatSetUp(local_mat);
7299:     }
7300:     PetscFree(new_local_nnz);
7301:   } else {
7302:     MatSetUp(local_mat);
7303:   }

7305:   /* set values */
7306:   ptr_vals = recv_buffer_vals;
7307:   ptr_idxs = recv_buffer_idxs_local;
7308:   for (i=0;i<n_recvs;i++) {
7309:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7310:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7311:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7312:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7313:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7314:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7315:     } else {
7316:       /* TODO */
7317:     }
7318:     ptr_idxs += olengths_idxs[i];
7319:     ptr_vals += olengths_vals[i];
7320:   }
7321:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7322:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7323:   MatISRestoreLocalMat(*mat_n,&local_mat);
7324:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7325:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7326:   PetscFree(recv_buffer_vals);

7328: #if 0
7329:   if (!restrict_comm) { /* check */
7330:     Vec       lvec,rvec;
7331:     PetscReal infty_error;

7333:     MatCreateVecs(mat,&rvec,&lvec);
7334:     VecSetRandom(rvec,NULL);
7335:     MatMult(mat,rvec,lvec);
7336:     VecScale(lvec,-1.0);
7337:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7338:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7339:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7340:     VecDestroy(&rvec);
7341:     VecDestroy(&lvec);
7342:   }
7343: #endif

7345:   /* assemble new additional is (if any) */
7346:   if (nis) {
7347:     PetscInt **temp_idxs,*count_is,j,psum;

7349:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7350:     PetscCalloc1(nis,&count_is);
7351:     ptr_idxs = recv_buffer_idxs_is;
7352:     psum = 0;
7353:     for (i=0;i<n_recvs;i++) {
7354:       for (j=0;j<nis;j++) {
7355:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7356:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7357:         psum += plen;
7358:         ptr_idxs += plen+1; /* shift pointer to received data */
7359:       }
7360:     }
7361:     PetscMalloc1(nis,&temp_idxs);
7362:     PetscMalloc1(psum,&temp_idxs[0]);
7363:     for (i=1;i<nis;i++) {
7364:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7365:     }
7366:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7367:     ptr_idxs = recv_buffer_idxs_is;
7368:     for (i=0;i<n_recvs;i++) {
7369:       for (j=0;j<nis;j++) {
7370:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7371:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7372:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7373:         ptr_idxs += plen+1; /* shift pointer to received data */
7374:       }
7375:     }
7376:     for (i=0;i<nis;i++) {
7377:       ISDestroy(&isarray[i]);
7378:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7379:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7380:     }
7381:     PetscFree(count_is);
7382:     PetscFree(temp_idxs[0]);
7383:     PetscFree(temp_idxs);
7384:   }
7385:   /* free workspace */
7386:   PetscFree(recv_buffer_idxs_is);
7387:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7388:   PetscFree(send_buffer_idxs);
7389:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7390:   if (isdense) {
7391:     MatISGetLocalMat(mat,&local_mat);
7392:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7393:     MatISRestoreLocalMat(mat,&local_mat);
7394:   } else {
7395:     /* PetscFree(send_buffer_vals); */
7396:   }
7397:   if (nis) {
7398:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7399:     PetscFree(send_buffer_idxs_is);
7400:   }

7402:   if (nvecs) {
7403:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7404:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7405:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7406:     VecDestroy(&nnsp_vec[0]);
7407:     VecCreate(comm_n,&nnsp_vec[0]);
7408:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7409:     VecSetType(nnsp_vec[0],VECSTANDARD);
7410:     /* set values */
7411:     ptr_vals = recv_buffer_vecs;
7412:     ptr_idxs = recv_buffer_idxs_local;
7413:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7414:     for (i=0;i<n_recvs;i++) {
7415:       PetscInt j;
7416:       for (j=0;j<*(ptr_idxs+1);j++) {
7417:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7418:       }
7419:       ptr_idxs += olengths_idxs[i];
7420:       ptr_vals += olengths_idxs[i]-2;
7421:     }
7422:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7423:     VecAssemblyBegin(nnsp_vec[0]);
7424:     VecAssemblyEnd(nnsp_vec[0]);
7425:   }

7427:   PetscFree(recv_buffer_vecs);
7428:   PetscFree(recv_buffer_idxs_local);
7429:   PetscFree(recv_req_idxs);
7430:   PetscFree(recv_req_vals);
7431:   PetscFree(recv_req_vecs);
7432:   PetscFree(recv_req_idxs_is);
7433:   PetscFree(send_req_idxs);
7434:   PetscFree(send_req_vals);
7435:   PetscFree(send_req_vecs);
7436:   PetscFree(send_req_idxs_is);
7437:   PetscFree(ilengths_vals);
7438:   PetscFree(ilengths_idxs);
7439:   PetscFree(olengths_vals);
7440:   PetscFree(olengths_idxs);
7441:   PetscFree(onodes);
7442:   if (nis) {
7443:     PetscFree(ilengths_idxs_is);
7444:     PetscFree(olengths_idxs_is);
7445:     PetscFree(onodes_is);
7446:   }
7447:   PetscSubcommDestroy(&subcomm);
7448:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7449:     MatDestroy(mat_n);
7450:     for (i=0;i<nis;i++) {
7451:       ISDestroy(&isarray[i]);
7452:     }
7453:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7454:       VecDestroy(&nnsp_vec[0]);
7455:     }
7456:     *mat_n = NULL;
7457:   }
7458:   return(0);
7459: }

7461: /* temporary hack into ksp private data structure */
7462:  #include <petsc/private/kspimpl.h>

7464: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7465: {
7466:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7467:   PC_IS                  *pcis = (PC_IS*)pc->data;
7468:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7469:   Mat                    coarsedivudotp = NULL;
7470:   Mat                    coarseG,t_coarse_mat_is;
7471:   MatNullSpace           CoarseNullSpace = NULL;
7472:   ISLocalToGlobalMapping coarse_islg;
7473:   IS                     coarse_is,*isarray;
7474:   PetscInt               i,im_active=-1,active_procs=-1;
7475:   PetscInt               nis,nisdofs,nisneu,nisvert;
7476:   PC                     pc_temp;
7477:   PCType                 coarse_pc_type;
7478:   KSPType                coarse_ksp_type;
7479:   PetscBool              multilevel_requested,multilevel_allowed;
7480:   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
7481:   PetscInt               ncoarse,nedcfield;
7482:   PetscBool              compute_vecs = PETSC_FALSE;
7483:   PetscScalar            *array;
7484:   MatReuse               coarse_mat_reuse;
7485:   PetscBool              restr, full_restr, have_void;
7486:   PetscMPIInt            commsize;
7487:   PetscErrorCode         ierr;

7490:   /* Assign global numbering to coarse dofs */
7491:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7492:     PetscInt ocoarse_size;
7493:     compute_vecs = PETSC_TRUE;

7495:     pcbddc->new_primal_space = PETSC_TRUE;
7496:     ocoarse_size = pcbddc->coarse_size;
7497:     PetscFree(pcbddc->global_primal_indices);
7498:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7499:     /* see if we can avoid some work */
7500:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7501:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7502:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7503:         KSPReset(pcbddc->coarse_ksp);
7504:         coarse_reuse = PETSC_FALSE;
7505:       } else { /* we can safely reuse already computed coarse matrix */
7506:         coarse_reuse = PETSC_TRUE;
7507:       }
7508:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7509:       coarse_reuse = PETSC_FALSE;
7510:     }
7511:     /* reset any subassembling information */
7512:     if (!coarse_reuse || pcbddc->recompute_topography) {
7513:       ISDestroy(&pcbddc->coarse_subassembling);
7514:     }
7515:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7516:     coarse_reuse = PETSC_TRUE;
7517:   }
7518:   /* assemble coarse matrix */
7519:   if (coarse_reuse && pcbddc->coarse_ksp) {
7520:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7521:     PetscObjectReference((PetscObject)coarse_mat);
7522:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7523:   } else {
7524:     coarse_mat = NULL;
7525:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7526:   }

7528:   /* creates temporary l2gmap and IS for coarse indexes */
7529:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7530:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

7532:   /* creates temporary MATIS object for coarse matrix */
7533:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7534:   MatDenseGetArray(coarse_submat_dense,&array);
7535:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7536:   MatDenseRestoreArray(coarse_submat_dense,&array);
7537:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7538:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7539:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7540:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7541:   MatDestroy(&coarse_submat_dense);

7543:   /* count "active" (i.e. with positive local size) and "void" processes */
7544:   im_active = !!(pcis->n);
7545:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

7547:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7548:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7549:   /* full_restr : just use the receivers from the subassembling pattern */
7550:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7551:   coarse_mat_is = NULL;
7552:   multilevel_allowed = PETSC_FALSE;
7553:   multilevel_requested = PETSC_FALSE;
7554:   pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7555:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7556:   if (multilevel_requested) {
7557:     ncoarse = active_procs/pcbddc->coarsening_ratio;
7558:     restr = PETSC_FALSE;
7559:     full_restr = PETSC_FALSE;
7560:   } else {
7561:     ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7562:     restr = PETSC_TRUE;
7563:     full_restr = PETSC_TRUE;
7564:   }
7565:   if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7566:   ncoarse = PetscMax(1,ncoarse);
7567:   if (!pcbddc->coarse_subassembling) {
7568:     if (pcbddc->coarsening_ratio > 1) {
7569:       if (multilevel_requested) {
7570:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7571:       } else {
7572:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7573:       }
7574:     } else {
7575:       PetscMPIInt rank;
7576:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7577:       have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7578:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7579:     }
7580:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7581:     PetscInt    psum;
7582:     if (pcbddc->coarse_ksp) psum = 1;
7583:     else psum = 0;
7584:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7585:     if (ncoarse < commsize) have_void = PETSC_TRUE;
7586:   }
7587:   /* determine if we can go multilevel */
7588:   if (multilevel_requested) {
7589:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7590:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7591:   }
7592:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7594:   /* dump subassembling pattern */
7595:   if (pcbddc->dbg_flag && multilevel_allowed) {
7596:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7597:   }

7599:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7600:   nedcfield = -1;
7601:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7602:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
7603:     const PetscInt         *idxs;
7604:     ISLocalToGlobalMapping tmap;

7606:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7607:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7608:     /* allocate space for temporary storage */
7609:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7610:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7611:     /* allocate for IS array */
7612:     nisdofs = pcbddc->n_ISForDofsLocal;
7613:     if (pcbddc->nedclocal) {
7614:       if (pcbddc->nedfield > -1) {
7615:         nedcfield = pcbddc->nedfield;
7616:       } else {
7617:         nedcfield = 0;
7618:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7619:         nisdofs = 1;
7620:       }
7621:     }
7622:     nisneu = !!pcbddc->NeumannBoundariesLocal;
7623:     nisvert = 0; /* nisvert is not used */
7624:     nis = nisdofs + nisneu + nisvert;
7625:     PetscMalloc1(nis,&isarray);
7626:     /* dofs splitting */
7627:     for (i=0;i<nisdofs;i++) {
7628:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7629:       if (nedcfield != i) {
7630:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7631:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7632:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7633:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
7634:       } else {
7635:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
7636:         ISGetIndices(pcbddc->nedclocal,&idxs);
7637:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7638:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
7639:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
7640:       }
7641:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7642:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
7643:       /* ISView(isarray[i],0); */
7644:     }
7645:     /* neumann boundaries */
7646:     if (pcbddc->NeumannBoundariesLocal) {
7647:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7648:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
7649:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7650:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7651:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7652:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7653:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
7654:       /* ISView(isarray[nisdofs],0); */
7655:     }
7656:     /* free memory */
7657:     PetscFree(tidxs);
7658:     PetscFree(tidxs2);
7659:     ISLocalToGlobalMappingDestroy(&tmap);
7660:   } else {
7661:     nis = 0;
7662:     nisdofs = 0;
7663:     nisneu = 0;
7664:     nisvert = 0;
7665:     isarray = NULL;
7666:   }
7667:   /* destroy no longer needed map */
7668:   ISLocalToGlobalMappingDestroy(&coarse_islg);

7670:   /* subassemble */
7671:   if (multilevel_allowed) {
7672:     Vec       vp[1];
7673:     PetscInt  nvecs = 0;
7674:     PetscBool reuse,reuser;

7676:     if (coarse_mat) reuse = PETSC_TRUE;
7677:     else reuse = PETSC_FALSE;
7678:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7679:     vp[0] = NULL;
7680:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7681:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
7682:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
7683:       VecSetType(vp[0],VECSTANDARD);
7684:       nvecs = 1;

7686:       if (pcbddc->divudotp) {
7687:         Mat      B,loc_divudotp;
7688:         Vec      v,p;
7689:         IS       dummy;
7690:         PetscInt np;

7692:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
7693:         MatGetSize(loc_divudotp,&np,NULL);
7694:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
7695:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
7696:         MatCreateVecs(B,&v,&p);
7697:         VecSet(p,1.);
7698:         MatMultTranspose(B,p,v);
7699:         VecDestroy(&p);
7700:         MatDestroy(&B);
7701:         VecGetArray(vp[0],&array);
7702:         VecPlaceArray(pcbddc->vec1_P,array);
7703:         VecRestoreArray(vp[0],&array);
7704:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
7705:         VecResetArray(pcbddc->vec1_P);
7706:         ISDestroy(&dummy);
7707:         VecDestroy(&v);
7708:       }
7709:     }
7710:     if (reuser) {
7711:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
7712:     } else {
7713:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
7714:     }
7715:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
7716:       PetscScalar *arraym,*arrayv;
7717:       PetscInt    nl;
7718:       VecGetLocalSize(vp[0],&nl);
7719:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
7720:       MatDenseGetArray(coarsedivudotp,&arraym);
7721:       VecGetArray(vp[0],&arrayv);
7722:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
7723:       VecRestoreArray(vp[0],&arrayv);
7724:       MatDenseRestoreArray(coarsedivudotp,&arraym);
7725:       VecDestroy(&vp[0]);
7726:     } else {
7727:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
7728:     }
7729:   } else {
7730:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
7731:   }
7732:   if (coarse_mat_is || coarse_mat) {
7733:     PetscMPIInt size;
7734:     MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
7735:     if (!multilevel_allowed) {
7736:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
7737:     } else {
7738:       Mat A;

7740:       /* if this matrix is present, it means we are not reusing the coarse matrix */
7741:       if (coarse_mat_is) {
7742:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
7743:         PetscObjectReference((PetscObject)coarse_mat_is);
7744:         coarse_mat = coarse_mat_is;
7745:       }
7746:       /* be sure we don't have MatSeqDENSE as local mat */
7747:       MatISGetLocalMat(coarse_mat,&A);
7748:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
7749:     }
7750:   }
7751:   MatDestroy(&t_coarse_mat_is);
7752:   MatDestroy(&coarse_mat_is);

7754:   /* create local to global scatters for coarse problem */
7755:   if (compute_vecs) {
7756:     PetscInt lrows;
7757:     VecDestroy(&pcbddc->coarse_vec);
7758:     if (coarse_mat) {
7759:       MatGetLocalSize(coarse_mat,&lrows,NULL);
7760:     } else {
7761:       lrows = 0;
7762:     }
7763:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
7764:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
7765:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
7766:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
7767:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
7768:   }
7769:   ISDestroy(&coarse_is);

7771:   /* set defaults for coarse KSP and PC */
7772:   if (multilevel_allowed) {
7773:     coarse_ksp_type = KSPRICHARDSON;
7774:     coarse_pc_type = PCBDDC;
7775:   } else {
7776:     coarse_ksp_type = KSPPREONLY;
7777:     coarse_pc_type = PCREDUNDANT;
7778:   }

7780:   /* print some info if requested */
7781:   if (pcbddc->dbg_flag) {
7782:     if (!multilevel_allowed) {
7783:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
7784:       if (multilevel_requested) {
7785:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
7786:       } else if (pcbddc->max_levels) {
7787:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
7788:       }
7789:       PetscViewerFlush(pcbddc->dbg_viewer);
7790:     }
7791:   }

7793:   /* communicate coarse discrete gradient */
7794:   coarseG = NULL;
7795:   if (pcbddc->nedcG && multilevel_allowed) {
7796:     MPI_Comm ccomm;
7797:     if (coarse_mat) {
7798:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
7799:     } else {
7800:       ccomm = MPI_COMM_NULL;
7801:     }
7802:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
7803:   }

7805:   /* create the coarse KSP object only once with defaults */
7806:   if (coarse_mat) {
7807:     PetscViewer dbg_viewer = NULL;
7808:     if (pcbddc->dbg_flag) {
7809:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
7810:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
7811:     }
7812:     if (!pcbddc->coarse_ksp) {
7813:       char prefix[256],str_level[16];
7814:       size_t len;

7816:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
7817:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
7818:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
7819:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
7820:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7821:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
7822:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
7823:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7824:       /* TODO is this logic correct? should check for coarse_mat type */
7825:       PCSetType(pc_temp,coarse_pc_type);
7826:       /* prefix */
7827:       PetscStrcpy(prefix,"");
7828:       PetscStrcpy(str_level,"");
7829:       if (!pcbddc->current_level) {
7830:         PetscStrcpy(prefix,((PetscObject)pc)->prefix);
7831:         PetscStrcat(prefix,"pc_bddc_coarse_");
7832:       } else {
7833:         PetscStrlen(((PetscObject)pc)->prefix,&len);
7834:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
7835:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
7836:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
7837:         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
7838:         PetscStrcat(prefix,str_level);
7839:       }
7840:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
7841:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7842:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
7843:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
7844:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
7845:       /* allow user customization */
7846:       KSPSetFromOptions(pcbddc->coarse_ksp);
7847:     }
7848:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7849:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7850:     if (nisdofs) {
7851:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
7852:       for (i=0;i<nisdofs;i++) {
7853:         ISDestroy(&isarray[i]);
7854:       }
7855:     }
7856:     if (nisneu) {
7857:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
7858:       ISDestroy(&isarray[nisdofs]);
7859:     }
7860:     if (nisvert) {
7861:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
7862:       ISDestroy(&isarray[nis-1]);
7863:     }
7864:     if (coarseG) {
7865:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
7866:     }

7868:     /* get some info after set from options */
7869:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
7870:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
7871:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
7872:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
7873:     if (isbddc && !multilevel_allowed) {
7874:       PCSetType(pc_temp,coarse_pc_type);
7875:       isbddc = PETSC_FALSE;
7876:     }
7877:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
7878:     if (multilevel_requested && !isbddc && !isnn) {
7879:       PCSetType(pc_temp,PCBDDC);
7880:       isbddc = PETSC_TRUE;
7881:       isnn   = PETSC_FALSE;
7882:     }
7883:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
7884:     if (isredundant) {
7885:       KSP inner_ksp;
7886:       PC  inner_pc;

7888:       PCRedundantGetKSP(pc_temp,&inner_ksp);
7889:       KSPGetPC(inner_ksp,&inner_pc);
7890:       PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
7891:     }

7893:     /* parameters which miss an API */
7894:     if (isbddc) {
7895:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
7896:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
7897:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
7898:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
7899:       if (pcbddc_coarse->benign_saddle_point) {
7900:         Mat                    coarsedivudotp_is;
7901:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
7902:         IS                     row,col;
7903:         const PetscInt         *gidxs;
7904:         PetscInt               n,st,M,N;

7906:         MatGetSize(coarsedivudotp,&n,NULL);
7907:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
7908:         st   = st-n;
7909:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
7910:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
7911:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
7912:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
7913:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
7914:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
7915:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
7916:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
7917:         ISGetSize(row,&M);
7918:         MatGetSize(coarse_mat,&N,NULL);
7919:         ISDestroy(&row);
7920:         ISDestroy(&col);
7921:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
7922:         MatSetType(coarsedivudotp_is,MATIS);
7923:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
7924:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
7925:         ISLocalToGlobalMappingDestroy(&rl2g);
7926:         ISLocalToGlobalMappingDestroy(&cl2g);
7927:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
7928:         MatDestroy(&coarsedivudotp);
7929:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
7930:         MatDestroy(&coarsedivudotp_is);
7931:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
7932:         if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
7933:       }
7934:     }

7936:     /* propagate symmetry info of coarse matrix */
7937:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
7938:     if (pc->pmat->symmetric_set) {
7939:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
7940:     }
7941:     if (pc->pmat->hermitian_set) {
7942:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
7943:     }
7944:     if (pc->pmat->spd_set) {
7945:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
7946:     }
7947:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
7948:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
7949:     }
7950:     /* set operators */
7951:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7952:     if (pcbddc->dbg_flag) {
7953:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
7954:     }
7955:   }
7956:   MatDestroy(&coarseG);
7957:   PetscFree(isarray);
7958: #if 0
7959:   {
7960:     PetscViewer viewer;
7961:     char filename[256];
7962:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
7963:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
7964:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
7965:     MatView(coarse_mat,viewer);
7966:     PetscViewerPopFormat(viewer);
7967:     PetscViewerDestroy(&viewer);
7968:   }
7969: #endif

7971:   if (pcbddc->coarse_ksp) {
7972:     Vec crhs,csol;

7974:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
7975:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
7976:     if (!csol) {
7977:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
7978:     }
7979:     if (!crhs) {
7980:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
7981:     }
7982:   }
7983:   MatDestroy(&coarsedivudotp);

7985:   /* compute null space for coarse solver if the benign trick has been requested */
7986:   if (pcbddc->benign_null) {

7988:     VecSet(pcbddc->vec1_P,0.);
7989:     for (i=0;i<pcbddc->benign_n;i++) {
7990:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
7991:     }
7992:     VecAssemblyBegin(pcbddc->vec1_P);
7993:     VecAssemblyEnd(pcbddc->vec1_P);
7994:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
7995:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
7996:     if (coarse_mat) {
7997:       Vec         nullv;
7998:       PetscScalar *array,*array2;
7999:       PetscInt    nl;

8001:       MatCreateVecs(coarse_mat,&nullv,NULL);
8002:       VecGetLocalSize(nullv,&nl);
8003:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8004:       VecGetArray(nullv,&array2);
8005:       PetscMemcpy(array2,array,nl*sizeof(*array));
8006:       VecRestoreArray(nullv,&array2);
8007:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8008:       VecNormalize(nullv,NULL);
8009:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8010:       VecDestroy(&nullv);
8011:     }
8012:   }

8014:   if (pcbddc->coarse_ksp) {
8015:     PetscBool ispreonly;

8017:     if (CoarseNullSpace) {
8018:       PetscBool isnull;
8019:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8020:       if (isnull) {
8021:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8022:       }
8023:       /* TODO: add local nullspaces (if any) */
8024:     }
8025:     /* setup coarse ksp */
8026:     KSPSetUp(pcbddc->coarse_ksp);
8027:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8028:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8029:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8030:       KSP       check_ksp;
8031:       KSPType   check_ksp_type;
8032:       PC        check_pc;
8033:       Vec       check_vec,coarse_vec;
8034:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8035:       PetscInt  its;
8036:       PetscBool compute_eigs;
8037:       PetscReal *eigs_r,*eigs_c;
8038:       PetscInt  neigs;
8039:       const char *prefix;

8041:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8042:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8043:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8044:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8045:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8046:       /* prevent from setup unneeded object */
8047:       KSPGetPC(check_ksp,&check_pc);
8048:       PCSetType(check_pc,PCNONE);
8049:       if (ispreonly) {
8050:         check_ksp_type = KSPPREONLY;
8051:         compute_eigs = PETSC_FALSE;
8052:       } else {
8053:         check_ksp_type = KSPGMRES;
8054:         compute_eigs = PETSC_TRUE;
8055:       }
8056:       KSPSetType(check_ksp,check_ksp_type);
8057:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8058:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8059:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8060:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8061:       KSPSetOptionsPrefix(check_ksp,prefix);
8062:       KSPAppendOptionsPrefix(check_ksp,"check_");
8063:       KSPSetFromOptions(check_ksp);
8064:       KSPSetUp(check_ksp);
8065:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8066:       KSPSetPC(check_ksp,check_pc);
8067:       /* create random vec */
8068:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8069:       VecSetRandom(check_vec,NULL);
8070:       MatMult(coarse_mat,check_vec,coarse_vec);
8071:       /* solve coarse problem */
8072:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8073:       /* set eigenvalue estimation if preonly has not been requested */
8074:       if (compute_eigs) {
8075:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8076:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8077:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8078:         if (neigs) {
8079:           lambda_max = eigs_r[neigs-1];
8080:           lambda_min = eigs_r[0];
8081:           if (pcbddc->use_coarse_estimates) {
8082:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8083:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8084:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8085:             }
8086:           }
8087:         }
8088:       }

8090:       /* check coarse problem residual error */
8091:       if (pcbddc->dbg_flag) {
8092:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8093:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8094:         VecAXPY(check_vec,-1.0,coarse_vec);
8095:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8096:         MatMult(coarse_mat,check_vec,coarse_vec);
8097:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8098:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8099:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8100:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8101:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8102:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8103:         if (CoarseNullSpace) {
8104:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8105:         }
8106:         if (compute_eigs) {
8107:           PetscReal          lambda_max_s,lambda_min_s;
8108:           KSPConvergedReason reason;
8109:           KSPGetType(check_ksp,&check_ksp_type);
8110:           KSPGetIterationNumber(check_ksp,&its);
8111:           KSPGetConvergedReason(check_ksp,&reason);
8112:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8113:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8114:           for (i=0;i<neigs;i++) {
8115:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8116:           }
8117:         }
8118:         PetscViewerFlush(dbg_viewer);
8119:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8120:       }
8121:       VecDestroy(&check_vec);
8122:       VecDestroy(&coarse_vec);
8123:       KSPDestroy(&check_ksp);
8124:       if (compute_eigs) {
8125:         PetscFree(eigs_r);
8126:         PetscFree(eigs_c);
8127:       }
8128:     }
8129:   }
8130:   MatNullSpaceDestroy(&CoarseNullSpace);
8131:   /* print additional info */
8132:   if (pcbddc->dbg_flag) {
8133:     /* waits until all processes reaches this point */
8134:     PetscBarrier((PetscObject)pc);
8135:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8136:     PetscViewerFlush(pcbddc->dbg_viewer);
8137:   }

8139:   /* free memory */
8140:   MatDestroy(&coarse_mat);
8141:   return(0);
8142: }

8144: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8145: {
8146:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8147:   PC_IS*         pcis = (PC_IS*)pc->data;
8148:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8149:   IS             subset,subset_mult,subset_n;
8150:   PetscInt       local_size,coarse_size=0;
8151:   PetscInt       *local_primal_indices=NULL;
8152:   const PetscInt *t_local_primal_indices;

8156:   /* Compute global number of coarse dofs */
8157:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8158:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8159:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8160:   ISDestroy(&subset_n);
8161:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8162:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8163:   ISDestroy(&subset);
8164:   ISDestroy(&subset_mult);
8165:   ISGetLocalSize(subset_n,&local_size);
8166:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8167:   PetscMalloc1(local_size,&local_primal_indices);
8168:   ISGetIndices(subset_n,&t_local_primal_indices);
8169:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8170:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8171:   ISDestroy(&subset_n);

8173:   /* check numbering */
8174:   if (pcbddc->dbg_flag) {
8175:     PetscScalar coarsesum,*array,*array2;
8176:     PetscInt    i;
8177:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8179:     PetscViewerFlush(pcbddc->dbg_viewer);
8180:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8181:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8182:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8183:     /* counter */
8184:     VecSet(pcis->vec1_global,0.0);
8185:     VecSet(pcis->vec1_N,1.0);
8186:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8187:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8188:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8189:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8190:     VecSet(pcis->vec1_N,0.0);
8191:     for (i=0;i<pcbddc->local_primal_size;i++) {
8192:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8193:     }
8194:     VecAssemblyBegin(pcis->vec1_N);
8195:     VecAssemblyEnd(pcis->vec1_N);
8196:     VecSet(pcis->vec1_global,0.0);
8197:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8198:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8199:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8200:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8201:     VecGetArray(pcis->vec1_N,&array);
8202:     VecGetArray(pcis->vec2_N,&array2);
8203:     for (i=0;i<pcis->n;i++) {
8204:       if (array[i] != 0.0 && array[i] != array2[i]) {
8205:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8206:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8207:         set_error = PETSC_TRUE;
8208:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8209:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8210:       }
8211:     }
8212:     VecRestoreArray(pcis->vec2_N,&array2);
8213:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8214:     PetscViewerFlush(pcbddc->dbg_viewer);
8215:     for (i=0;i<pcis->n;i++) {
8216:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8217:     }
8218:     VecRestoreArray(pcis->vec1_N,&array);
8219:     VecSet(pcis->vec1_global,0.0);
8220:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8221:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8222:     VecSum(pcis->vec1_global,&coarsesum);
8223:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8224:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8225:       PetscInt *gidxs;

8227:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8228:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8229:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8230:       PetscViewerFlush(pcbddc->dbg_viewer);
8231:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8232:       for (i=0;i<pcbddc->local_primal_size;i++) {
8233:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8234:       }
8235:       PetscViewerFlush(pcbddc->dbg_viewer);
8236:       PetscFree(gidxs);
8237:     }
8238:     PetscViewerFlush(pcbddc->dbg_viewer);
8239:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8240:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8241:   }
8242:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8243:   /* get back data */
8244:   *coarse_size_n = coarse_size;
8245:   *local_primal_indices_n = local_primal_indices;
8246:   return(0);
8247: }

8249: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8250: {
8251:   IS             localis_t;
8252:   PetscInt       i,lsize,*idxs,n;
8253:   PetscScalar    *vals;

8257:   /* get indices in local ordering exploiting local to global map */
8258:   ISGetLocalSize(globalis,&lsize);
8259:   PetscMalloc1(lsize,&vals);
8260:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8261:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8262:   VecSet(gwork,0.0);
8263:   VecSet(lwork,0.0);
8264:   if (idxs) { /* multilevel guard */
8265:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8266:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8267:   }
8268:   VecAssemblyBegin(gwork);
8269:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8270:   PetscFree(vals);
8271:   VecAssemblyEnd(gwork);
8272:   /* now compute set in local ordering */
8273:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8274:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8275:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8276:   VecGetSize(lwork,&n);
8277:   for (i=0,lsize=0;i<n;i++) {
8278:     if (PetscRealPart(vals[i]) > 0.5) {
8279:       lsize++;
8280:     }
8281:   }
8282:   PetscMalloc1(lsize,&idxs);
8283:   for (i=0,lsize=0;i<n;i++) {
8284:     if (PetscRealPart(vals[i]) > 0.5) {
8285:       idxs[lsize++] = i;
8286:     }
8287:   }
8288:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8289:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8290:   *localis = localis_t;
8291:   return(0);
8292: }

8294: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8295: {
8296:   PC_IS               *pcis=(PC_IS*)pc->data;
8297:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8298:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8299:   Mat                 S_j;
8300:   PetscInt            *used_xadj,*used_adjncy;
8301:   PetscBool           free_used_adj;
8302:   PetscErrorCode      ierr;

8305:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8306:   free_used_adj = PETSC_FALSE;
8307:   if (pcbddc->sub_schurs_layers == -1) {
8308:     used_xadj = NULL;
8309:     used_adjncy = NULL;
8310:   } else {
8311:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8312:       used_xadj = pcbddc->mat_graph->xadj;
8313:       used_adjncy = pcbddc->mat_graph->adjncy;
8314:     } else if (pcbddc->computed_rowadj) {
8315:       used_xadj = pcbddc->mat_graph->xadj;
8316:       used_adjncy = pcbddc->mat_graph->adjncy;
8317:     } else {
8318:       PetscBool      flg_row=PETSC_FALSE;
8319:       const PetscInt *xadj,*adjncy;
8320:       PetscInt       nvtxs;

8322:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8323:       if (flg_row) {
8324:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8325:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8326:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8327:         free_used_adj = PETSC_TRUE;
8328:       } else {
8329:         pcbddc->sub_schurs_layers = -1;
8330:         used_xadj = NULL;
8331:         used_adjncy = NULL;
8332:       }
8333:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8334:     }
8335:   }

8337:   /* setup sub_schurs data */
8338:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8339:   if (!sub_schurs->schur_explicit) {
8340:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8341:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8342:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8343:   } else {
8344:     Mat       change = NULL;
8345:     Vec       scaling = NULL;
8346:     IS        change_primal = NULL, iP;
8347:     PetscInt  benign_n;
8348:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8349:     PetscBool isseqaij,need_change = PETSC_FALSE;
8350:     PetscBool discrete_harmonic = PETSC_FALSE;

8352:     if (!pcbddc->use_vertices && reuse_solvers) {
8353:       PetscInt n_vertices;

8355:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8356:       reuse_solvers = (PetscBool)!n_vertices;
8357:     }
8358:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8359:     if (!isseqaij) {
8360:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8361:       if (matis->A == pcbddc->local_mat) {
8362:         MatDestroy(&pcbddc->local_mat);
8363:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8364:       } else {
8365:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8366:       }
8367:     }
8368:     if (!pcbddc->benign_change_explicit) {
8369:       benign_n = pcbddc->benign_n;
8370:     } else {
8371:       benign_n = 0;
8372:     }
8373:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8374:        We need a global reduction to avoid possible deadlocks.
8375:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8376:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8377:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8378:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8379:       need_change = (PetscBool)(!need_change);
8380:     }
8381:     /* If the user defines additional constraints, we import them here.
8382:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8383:     if (need_change) {
8384:       PC_IS   *pcisf;
8385:       PC_BDDC *pcbddcf;
8386:       PC      pcf;

8388:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8389:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8390:       PCSetOperators(pcf,pc->mat,pc->pmat);
8391:       PCSetType(pcf,PCBDDC);

8393:       /* hacks */
8394:       pcisf                        = (PC_IS*)pcf->data;
8395:       pcisf->is_B_local            = pcis->is_B_local;
8396:       pcisf->vec1_N                = pcis->vec1_N;
8397:       pcisf->BtoNmap               = pcis->BtoNmap;
8398:       pcisf->n                     = pcis->n;
8399:       pcisf->n_B                   = pcis->n_B;
8400:       pcbddcf                      = (PC_BDDC*)pcf->data;
8401:       PetscFree(pcbddcf->mat_graph);
8402:       pcbddcf->mat_graph           = pcbddc->mat_graph;
8403:       pcbddcf->use_faces           = PETSC_TRUE;
8404:       pcbddcf->use_change_of_basis = PETSC_TRUE;
8405:       pcbddcf->use_change_on_faces = PETSC_TRUE;
8406:       pcbddcf->use_qr_single       = PETSC_TRUE;
8407:       pcbddcf->fake_change         = PETSC_TRUE;

8409:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8410:       PCBDDCConstraintsSetUp(pcf);
8411:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8412:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8413:       change = pcbddcf->ConstraintMatrix;
8414:       pcbddcf->ConstraintMatrix = NULL;

8416:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8417:       PetscFree(pcbddcf->sub_schurs);
8418:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8419:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8420:       PetscFree(pcbddcf->primal_indices_local_idxs);
8421:       PetscFree(pcbddcf->onearnullvecs_state);
8422:       PetscFree(pcf->data);
8423:       pcf->ops->destroy = NULL;
8424:       pcf->ops->reset   = NULL;
8425:       PCDestroy(&pcf);
8426:     }
8427:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8429:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8430:     if (iP) {
8431:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8432:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8433:       PetscOptionsEnd();
8434:     }
8435:     if (discrete_harmonic) {
8436:       Mat A;
8437:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8438:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8439:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8440:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8441:       MatDestroy(&A);
8442:     } else {
8443:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8444:     }
8445:     MatDestroy(&change);
8446:     ISDestroy(&change_primal);
8447:   }
8448:   MatDestroy(&S_j);

8450:   /* free adjacency */
8451:   if (free_used_adj) {
8452:     PetscFree2(used_xadj,used_adjncy);
8453:   }
8454:   return(0);
8455: }

8457: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8458: {
8459:   PC_IS               *pcis=(PC_IS*)pc->data;
8460:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8461:   PCBDDCGraph         graph;
8462:   PetscErrorCode      ierr;

8465:   /* attach interface graph for determining subsets */
8466:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8467:     IS       verticesIS,verticescomm;
8468:     PetscInt vsize,*idxs;

8470:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8471:     ISGetSize(verticesIS,&vsize);
8472:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8473:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8474:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8475:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8476:     PCBDDCGraphCreate(&graph);
8477:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8478:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8479:     ISDestroy(&verticescomm);
8480:     PCBDDCGraphComputeConnectedComponents(graph);
8481:   } else {
8482:     graph = pcbddc->mat_graph;
8483:   }
8484:   /* print some info */
8485:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8486:     IS       vertices;
8487:     PetscInt nv,nedges,nfaces;
8488:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8489:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8490:     ISGetSize(vertices,&nv);
8491:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8492:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8493:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8494:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8495:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8496:     PetscViewerFlush(pcbddc->dbg_viewer);
8497:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8498:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8499:   }

8501:   /* sub_schurs init */
8502:   if (!pcbddc->sub_schurs) {
8503:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8504:   }
8505:   PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
8506:   pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix;

8508:   /* free graph struct */
8509:   if (pcbddc->sub_schurs_rebuild) {
8510:     PCBDDCGraphDestroy(&graph);
8511:   }
8512:   return(0);
8513: }

8515: PetscErrorCode PCBDDCCheckOperator(PC pc)
8516: {
8517:   PC_IS               *pcis=(PC_IS*)pc->data;
8518:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8519:   PetscErrorCode      ierr;

8522:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8523:     IS             zerodiag = NULL;
8524:     Mat            S_j,B0_B=NULL;
8525:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
8526:     PetscScalar    *p0_check,*array,*array2;
8527:     PetscReal      norm;
8528:     PetscInt       i;

8530:     /* B0 and B0_B */
8531:     if (zerodiag) {
8532:       IS       dummy;

8534:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8535:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8536:       MatCreateVecs(B0_B,NULL,&dummy_vec);
8537:       ISDestroy(&dummy);
8538:     }
8539:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8540:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8541:     VecSet(pcbddc->vec1_P,1.0);
8542:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8543:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8544:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8545:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8546:     VecReciprocal(vec_scale_P);
8547:     /* S_j */
8548:     MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8549:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

8551:     /* mimic vector in \widetilde{W}_\Gamma */
8552:     VecSetRandom(pcis->vec1_N,NULL);
8553:     /* continuous in primal space */
8554:     VecSetRandom(pcbddc->coarse_vec,NULL);
8555:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8556:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8557:     VecGetArray(pcbddc->vec1_P,&array);
8558:     PetscCalloc1(pcbddc->benign_n,&p0_check);
8559:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8560:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8561:     VecRestoreArray(pcbddc->vec1_P,&array);
8562:     VecAssemblyBegin(pcis->vec1_N);
8563:     VecAssemblyEnd(pcis->vec1_N);
8564:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8565:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8566:     VecDuplicate(pcis->vec2_B,&vec_check_B);
8567:     VecCopy(pcis->vec2_B,vec_check_B);

8569:     /* assemble rhs for coarse problem */
8570:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8571:     /* local with Schur */
8572:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8573:     if (zerodiag) {
8574:       VecGetArray(dummy_vec,&array);
8575:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8576:       VecRestoreArray(dummy_vec,&array);
8577:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8578:     }
8579:     /* sum on primal nodes the local contributions */
8580:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8581:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8582:     VecGetArray(pcis->vec1_N,&array);
8583:     VecGetArray(pcbddc->vec1_P,&array2);
8584:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8585:     VecRestoreArray(pcbddc->vec1_P,&array2);
8586:     VecRestoreArray(pcis->vec1_N,&array);
8587:     VecSet(pcbddc->coarse_vec,0.);
8588:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8589:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8590:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8591:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8592:     VecGetArray(pcbddc->vec1_P,&array);
8593:     /* scale primal nodes (BDDC sums contibutions) */
8594:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8595:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8596:     VecRestoreArray(pcbddc->vec1_P,&array);
8597:     VecAssemblyBegin(pcis->vec1_N);
8598:     VecAssemblyEnd(pcis->vec1_N);
8599:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8600:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8601:     /* global: \widetilde{B0}_B w_\Gamma */
8602:     if (zerodiag) {
8603:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
8604:       VecGetArray(dummy_vec,&array);
8605:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8606:       VecRestoreArray(dummy_vec,&array);
8607:     }
8608:     /* BDDC */
8609:     VecSet(pcis->vec1_D,0.);
8610:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

8612:     VecCopy(pcis->vec1_B,pcis->vec2_B);
8613:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8614:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8615:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8616:     for (i=0;i<pcbddc->benign_n;i++) {
8617:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8618:     }
8619:     PetscFree(p0_check);
8620:     VecDestroy(&vec_scale_P);
8621:     VecDestroy(&vec_check_B);
8622:     VecDestroy(&dummy_vec);
8623:     MatDestroy(&S_j);
8624:     MatDestroy(&B0_B);
8625:   }
8626:   return(0);
8627: }

8629:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
8630: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8631: {
8632:   Mat            At;
8633:   IS             rows;
8634:   PetscInt       rst,ren;
8636:   PetscLayout    rmap;

8639:   rst = ren = 0;
8640:   if (ccomm != MPI_COMM_NULL) {
8641:     PetscLayoutCreate(ccomm,&rmap);
8642:     PetscLayoutSetSize(rmap,A->rmap->N);
8643:     PetscLayoutSetBlockSize(rmap,1);
8644:     PetscLayoutSetUp(rmap);
8645:     PetscLayoutGetRange(rmap,&rst,&ren);
8646:   }
8647:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
8648:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
8649:   ISDestroy(&rows);

8651:   if (ccomm != MPI_COMM_NULL) {
8652:     Mat_MPIAIJ *a,*b;
8653:     IS         from,to;
8654:     Vec        gvec;
8655:     PetscInt   lsize;

8657:     MatCreate(ccomm,B);
8658:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
8659:     MatSetType(*B,MATAIJ);
8660:     PetscLayoutDestroy(&((*B)->rmap));
8661:     PetscLayoutSetUp((*B)->cmap);
8662:     a    = (Mat_MPIAIJ*)At->data;
8663:     b    = (Mat_MPIAIJ*)(*B)->data;
8664:     MPI_Comm_size(ccomm,&b->size);
8665:     MPI_Comm_rank(ccomm,&b->rank);
8666:     PetscObjectReference((PetscObject)a->A);
8667:     PetscObjectReference((PetscObject)a->B);
8668:     b->A = a->A;
8669:     b->B = a->B;

8671:     b->donotstash      = a->donotstash;
8672:     b->roworiented     = a->roworiented;
8673:     b->rowindices      = 0;
8674:     b->rowvalues       = 0;
8675:     b->getrowactive    = PETSC_FALSE;

8677:     (*B)->rmap         = rmap;
8678:     (*B)->factortype   = A->factortype;
8679:     (*B)->assembled    = PETSC_TRUE;
8680:     (*B)->insertmode   = NOT_SET_VALUES;
8681:     (*B)->preallocated = PETSC_TRUE;

8683:     if (a->colmap) {
8684: #if defined(PETSC_USE_CTABLE)
8685:       PetscTableCreateCopy(a->colmap,&b->colmap);
8686: #else
8687:       PetscMalloc1(At->cmap->N,&b->colmap);
8688:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
8689:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
8690: #endif
8691:     } else b->colmap = 0;
8692:     if (a->garray) {
8693:       PetscInt len;
8694:       len  = a->B->cmap->n;
8695:       PetscMalloc1(len+1,&b->garray);
8696:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
8697:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
8698:     } else b->garray = 0;

8700:     PetscObjectReference((PetscObject)a->lvec);
8701:     b->lvec = a->lvec;
8702:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

8704:     /* cannot use VecScatterCopy */
8705:     VecGetLocalSize(b->lvec,&lsize);
8706:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
8707:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
8708:     MatCreateVecs(*B,&gvec,NULL);
8709:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
8710:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
8711:     ISDestroy(&from);
8712:     ISDestroy(&to);
8713:     VecDestroy(&gvec);
8714:   }
8715:   MatDestroy(&At);
8716:   return(0);
8717: }