Actual source code: bddcprivate.c

petsc-3.9.3 2018-07-02
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>
  5:  #include <petscdmplex.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/sfimpl.h>
  8:  #include <petsc/private/dmpleximpl.h>
  9:  #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscScalar    *uwork,*data,*U, ds = 0.;
 19:   PetscReal      *sing;
 20:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 21:   PetscInt       ulw,i,nr,nc,n;

 25: #if defined(PETSC_MISSING_LAPACK_GESVD)
 26:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 27: #else
 28:   MatGetSize(A,&nr,&nc);
 29:   if (!nr || !nc) return(0);

 31:   /* workspace */
 32:   if (!work) {
 33:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 34:     PetscMalloc1(ulw,&uwork);
 35:   } else {
 36:     ulw   = lw;
 37:     uwork = work;
 38:   }
 39:   n = PetscMin(nr,nc);
 40:   if (!rwork) {
 41:     PetscMalloc1(n,&sing);
 42:   } else {
 43:     sing = rwork;
 44:   }

 46:   /* SVD */
 47:   PetscMalloc1(nr*nr,&U);
 48:   PetscBLASIntCast(nr,&bM);
 49:   PetscBLASIntCast(nc,&bN);
 50:   PetscBLASIntCast(ulw,&lwork);
 51:   MatDenseGetArray(A,&data);
 52:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54:   PetscFPTrapPop();
 55:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 56:   MatDenseRestoreArray(A,&data);
 57:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 58:   if (!rwork) {
 59:     PetscFree(sing);
 60:   }
 61:   if (!work) {
 62:     PetscFree(uwork);
 63:   }
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 67:     MatDenseGetArray(*B,&data);
 68:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 71:     MatDenseGetArray(*B,&data);
 72:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 73:   }
 74:   MatDenseRestoreArray(*B,&data);
 75:   PetscFree(U);
 76: #endif
 77: #else /* PETSC_USE_COMPLEX */
 79:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 80: #endif
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat            GEc;
121:     PetscScalar    *vals,v;

123:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125:     MatDenseGetArray(GEd,&vals);
126:     /* v    = PetscAbsScalar(vals[0]) */;
127:     v    = 1.;
128:     cvals[0] = vals[0]/v;
129:     cvals[1] = vals[1]/v;
130:     MatDenseRestoreArray(GEd,&vals);
131:     MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133:     {
134:       PetscViewer viewer;
135:       char filename[256];
136:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139:       PetscObjectSetName((PetscObject)GEc,"GEc");
140:       MatView(GEc,viewer);
141:       PetscObjectSetName((PetscObject)(*GKins),"GK");
142:       MatView(*GKins,viewer);
143:       PetscObjectSetName((PetscObject)GEd,"Gproj");
144:       MatView(GEd,viewer);
145:       PetscViewerDestroy(&viewer);
146:     }
147: #endif
148:     MatDestroy(&GEd);
149:     MatDestroy(&GEc);
150:   }

152:   return(0);
153: }

155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
158:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
159:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160:   Vec                    tvec;
161:   PetscSF                sfv;
162:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163:   MPI_Comm               comm;
164:   IS                     lned,primals,allprimals,nedfieldlocal;
165:   IS                     *eedges,*extrows,*extcols,*alleedges;
166:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167:   PetscScalar            *vals,*work;
168:   PetscReal              *rwork;
169:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
170:   PetscInt               ne,nv,Lv,order,n,field;
171:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
172:   PetscInt               i,j,extmem,cum,maxsize,nee;
173:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174:   PetscInt               *sfvleaves,*sfvroots;
175:   PetscInt               *corners,*cedges;
176:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178:   PetscInt               *emarks;
179: #endif
180:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181:   PetscErrorCode         ierr;

184:   /* If the discrete gradient is defined for a subset of dofs and global is true,
185:      it assumes G is given in global ordering for all the dofs.
186:      Otherwise, the ordering is global for the Nedelec field */
187:   order      = pcbddc->nedorder;
188:   conforming = pcbddc->conforming;
189:   field      = pcbddc->nedfield;
190:   global     = pcbddc->nedglobal;
191:   setprimal  = PETSC_FALSE;
192:   print      = PETSC_FALSE;
193:   singular   = PETSC_FALSE;

195:   /* Command line customization */
196:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200:   /* print debug info TODO: to be removed */
201:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202:   PetscOptionsEnd();

204:   /* Return if there are no edges in the decomposition and the problem is not singular */
205:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206:   ISLocalToGlobalMappingGetSize(al2g,&n);
207:   PetscObjectGetComm((PetscObject)pc,&comm);
208:   if (!singular) {
209:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210:     lrc[0] = PETSC_FALSE;
211:     for (i=0;i<n;i++) {
212:       if (PetscRealPart(vals[i]) > 2.) {
213:         lrc[0] = PETSC_TRUE;
214:         break;
215:       }
216:     }
217:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219:     if (!lrc[1]) return(0);
220:   }

222:   /* Get Nedelec field */
223:   MatISSetUpSF(pc->pmat);
224:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
225:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
226:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
228:     ISGetLocalSize(nedfieldlocal,&ne);
229:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230:     ne            = n;
231:     nedfieldlocal = NULL;
232:     global        = PETSC_TRUE;
233:   } else if (field == PETSC_DECIDE) {
234:     PetscInt rst,ren,*idx;

236:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
237:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
238:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239:     for (i=rst;i<ren;i++) {
240:       PetscInt nc;

242:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245:     }
246:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248:     PetscMalloc1(n,&idx);
249:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251:   } else {
252:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253:   }

255:   /* Sanity checks */
256:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);

260:   /* Just set primal dofs and return */
261:   if (setprimal) {
262:     IS       enedfieldlocal;
263:     PetscInt *eidxs;

265:     PetscMalloc1(ne,&eidxs);
266:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267:     if (nedfieldlocal) {
268:       ISGetIndices(nedfieldlocal,&idxs);
269:       for (i=0,cum=0;i<ne;i++) {
270:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
271:           eidxs[cum++] = idxs[i];
272:         }
273:       }
274:       ISRestoreIndices(nedfieldlocal,&idxs);
275:     } else {
276:       for (i=0,cum=0;i<ne;i++) {
277:         if (PetscRealPart(vals[i]) > 2.) {
278:           eidxs[cum++] = i;
279:         }
280:       }
281:     }
282:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285:     PetscFree(eidxs);
286:     ISDestroy(&nedfieldlocal);
287:     ISDestroy(&enedfieldlocal);
288:     return(0);
289:   }

291:   /* Compute some l2g maps */
292:   if (nedfieldlocal) {
293:     IS is;

295:     /* need to map from the local Nedelec field to local numbering */
296:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299:     ISLocalToGlobalMappingCreateIS(is,&al2g);
300:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301:     if (global) {
302:       PetscObjectReference((PetscObject)al2g);
303:       el2g = al2g;
304:     } else {
305:       IS gis;

307:       ISRenumber(is,NULL,NULL,&gis);
308:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
309:       ISDestroy(&gis);
310:     }
311:     ISDestroy(&is);
312:   } else {
313:     /* restore default */
314:     pcbddc->nedfield = -1;
315:     /* one ref for the destruction of al2g, one for el2g */
316:     PetscObjectReference((PetscObject)al2g);
317:     PetscObjectReference((PetscObject)al2g);
318:     el2g = al2g;
319:     fl2g = NULL;
320:   }

322:   /* Start communication to drop connections for interior edges (for cc analysis only) */
323:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
324:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
325:   if (nedfieldlocal) {
326:     ISGetIndices(nedfieldlocal,&idxs);
327:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328:     ISRestoreIndices(nedfieldlocal,&idxs);
329:   } else {
330:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331:   }
332:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

335:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338:     if (global) {
339:       PetscInt rst;

341:       MatGetOwnershipRange(G,&rst,NULL);
342:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343:         if (matis->sf_rootdata[i] < 2) {
344:           matis->sf_rootdata[cum++] = i + rst;
345:         }
346:       }
347:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349:     } else {
350:       PetscInt *tbz;

352:       PetscMalloc1(ne,&tbz);
353:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355:       ISGetIndices(nedfieldlocal,&idxs);
356:       for (i=0,cum=0;i<ne;i++)
357:         if (matis->sf_leafdata[idxs[i]] == 1)
358:           tbz[cum++] = i;
359:       ISRestoreIndices(nedfieldlocal,&idxs);
360:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362:       PetscFree(tbz);
363:     }
364:   } else { /* we need the entire G to infer the nullspace */
365:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
366:     G    = pcbddc->discretegradient;
367:   }

369:   /* Extract subdomain relevant rows of G */
370:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374:   ISDestroy(&lned);
375:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376:   MatDestroy(&lGall);
377:   MatISGetLocalMat(lGis,&lG);

379:   /* SF for nodal dofs communications */
380:   MatGetLocalSize(G,NULL,&Lv);
381:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382:   PetscObjectReference((PetscObject)vl2g);
383:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
384:   PetscSFCreate(comm,&sfv);
385:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388:   i    = singular ? 2 : 1;
389:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

391:   /* Destroy temporary G created in MATIS format and modified G */
392:   PetscObjectReference((PetscObject)lG);
393:   MatDestroy(&lGis);
394:   MatDestroy(&G);

396:   if (print) {
397:     PetscObjectSetName((PetscObject)lG,"initial_lG");
398:     MatView(lG,NULL);
399:   }

401:   /* Save lG for values insertion in change of basis */
402:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

404:   /* Analyze the edge-nodes connections (duplicate lG) */
405:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407:   PetscBTCreate(nv,&btv);
408:   PetscBTCreate(ne,&bte);
409:   PetscBTCreate(ne,&btb);
410:   PetscBTCreate(ne,&btbd);
411:   PetscBTCreate(nv,&btvcand);
412:   /* need to import the boundary specification to ensure the
413:      proper detection of coarse edges' endpoints */
414:   if (pcbddc->DirichletBoundariesLocal) {
415:     IS is;

417:     if (fl2g) {
418:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419:     } else {
420:       is = pcbddc->DirichletBoundariesLocal;
421:     }
422:     ISGetLocalSize(is,&cum);
423:     ISGetIndices(is,&idxs);
424:     for (i=0;i<cum;i++) {
425:       if (idxs[i] >= 0) {
426:         PetscBTSet(btb,idxs[i]);
427:         PetscBTSet(btbd,idxs[i]);
428:       }
429:     }
430:     ISRestoreIndices(is,&idxs);
431:     if (fl2g) {
432:       ISDestroy(&is);
433:     }
434:   }
435:   if (pcbddc->NeumannBoundariesLocal) {
436:     IS is;

438:     if (fl2g) {
439:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440:     } else {
441:       is = pcbddc->NeumannBoundariesLocal;
442:     }
443:     ISGetLocalSize(is,&cum);
444:     ISGetIndices(is,&idxs);
445:     for (i=0;i<cum;i++) {
446:       if (idxs[i] >= 0) {
447:         PetscBTSet(btb,idxs[i]);
448:       }
449:     }
450:     ISRestoreIndices(is,&idxs);
451:     if (fl2g) {
452:       ISDestroy(&is);
453:     }
454:   }

456:   /* Count neighs per dof */
457:   PetscCalloc1(ne,&ecount);
458:   PetscMalloc1(ne,&eneighs);
459:   ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
460:   for (i=1,cum=0;i<n_neigh;i++) {
461:     cum += n_shared[i];
462:     for (j=0;j<n_shared[i];j++) {
463:       ecount[shared[i][j]]++;
464:     }
465:   }
466:   if (ne) {
467:     PetscMalloc1(cum,&eneighs[0]);
468:   }
469:   for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
470:   PetscMemzero(ecount,ne*sizeof(PetscInt));
471:   for (i=1;i<n_neigh;i++) {
472:     for (j=0;j<n_shared[i];j++) {
473:       PetscInt k = shared[i][j];
474:       eneighs[k][ecount[k]] = neigh[i];
475:       ecount[k]++;
476:     }
477:   }
478:   for (i=0;i<ne;i++) {
479:     PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
480:   }
481:   ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
482:   PetscCalloc1(nv,&vcount);
483:   PetscMalloc1(nv,&vneighs);
484:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
485:   for (i=1,cum=0;i<n_neigh;i++) {
486:     cum += n_shared[i];
487:     for (j=0;j<n_shared[i];j++) {
488:       vcount[shared[i][j]]++;
489:     }
490:   }
491:   if (nv) {
492:     PetscMalloc1(cum,&vneighs[0]);
493:   }
494:   for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
495:   PetscMemzero(vcount,nv*sizeof(PetscInt));
496:   for (i=1;i<n_neigh;i++) {
497:     for (j=0;j<n_shared[i];j++) {
498:       PetscInt k = shared[i][j];
499:       vneighs[k][vcount[k]] = neigh[i];
500:       vcount[k]++;
501:     }
502:   }
503:   for (i=0;i<nv;i++) {
504:     PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
505:   }
506:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

508:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
509:      for proper detection of coarse edges' endpoints */
510:   PetscBTCreate(ne,&btee);
511:   for (i=0;i<ne;i++) {
512:     if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
513:       PetscBTSet(btee,i);
514:     }
515:   }
516:   PetscMalloc1(ne,&marks);
517:   if (!conforming) {
518:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
519:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
520:   }
521:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
522:   MatSeqAIJGetArray(lGe,&vals);
523:   cum  = 0;
524:   for (i=0;i<ne;i++) {
525:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
526:     if (!PetscBTLookup(btee,i)) {
527:       marks[cum++] = i;
528:       continue;
529:     }
530:     /* set badly connected edge dofs as primal */
531:     if (!conforming) {
532:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
533:         marks[cum++] = i;
534:         PetscBTSet(bte,i);
535:         for (j=ii[i];j<ii[i+1];j++) {
536:           PetscBTSet(btv,jj[j]);
537:         }
538:       } else {
539:         /* every edge dofs should be connected trough a certain number of nodal dofs
540:            to other edge dofs belonging to coarse edges
541:            - at most 2 endpoints
542:            - order-1 interior nodal dofs
543:            - no undefined nodal dofs (nconn < order)
544:         */
545:         PetscInt ends = 0,ints = 0, undef = 0;
546:         for (j=ii[i];j<ii[i+1];j++) {
547:           PetscInt v = jj[j],k;
548:           PetscInt nconn = iit[v+1]-iit[v];
549:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
550:           if (nconn > order) ends++;
551:           else if (nconn == order) ints++;
552:           else undef++;
553:         }
554:         if (undef || ends > 2 || ints != order -1) {
555:           marks[cum++] = i;
556:           PetscBTSet(bte,i);
557:           for (j=ii[i];j<ii[i+1];j++) {
558:             PetscBTSet(btv,jj[j]);
559:           }
560:         }
561:       }
562:     }
563:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
564:     if (!order && ii[i+1] != ii[i]) {
565:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
566:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
567:     }
568:   }
569:   PetscBTDestroy(&btee);
570:   MatSeqAIJRestoreArray(lGe,&vals);
571:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
572:   if (!conforming) {
573:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
574:     MatDestroy(&lGt);
575:   }
576:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

578:   /* identify splitpoints and corner candidates */
579:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
580:   if (print) {
581:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
582:     MatView(lGe,NULL);
583:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
584:     MatView(lGt,NULL);
585:   }
586:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
587:   MatSeqAIJGetArray(lGt,&vals);
588:   for (i=0;i<nv;i++) {
589:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
590:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
591:     if (!order) { /* variable order */
592:       PetscReal vorder = 0.;

594:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
595:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
596:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
597:       ord  = 1;
598:     }
599: #if defined(PETSC_USE_DEBUG)
600:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
601: #endif
602:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
603:       if (PetscBTLookup(btbd,jj[j])) {
604:         bdir = PETSC_TRUE;
605:         break;
606:       }
607:       if (vc != ecount[jj[j]]) {
608:         sneighs = PETSC_FALSE;
609:       } else {
610:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
611:         for (k=0;k<vc;k++) {
612:           if (vn[k] != en[k]) {
613:             sneighs = PETSC_FALSE;
614:             break;
615:           }
616:         }
617:       }
618:     }
619:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
620:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
621:       PetscBTSet(btv,i);
622:     } else if (test == ord) {
623:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
624:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
625:         PetscBTSet(btv,i);
626:       } else {
627:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
628:         PetscBTSet(btvcand,i);
629:       }
630:     }
631:   }
632:   PetscFree(ecount);
633:   PetscFree(vcount);
634:   if (ne) {
635:     PetscFree(eneighs[0]);
636:   }
637:   if (nv) {
638:     PetscFree(vneighs[0]);
639:   }
640:   PetscFree(eneighs);
641:   PetscFree(vneighs);
642:   PetscBTDestroy(&btbd);

644:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
645:   if (order != 1) {
646:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
647:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
648:     for (i=0;i<nv;i++) {
649:       if (PetscBTLookup(btvcand,i)) {
650:         PetscBool found = PETSC_FALSE;
651:         for (j=ii[i];j<ii[i+1] && !found;j++) {
652:           PetscInt k,e = jj[j];
653:           if (PetscBTLookup(bte,e)) continue;
654:           for (k=iit[e];k<iit[e+1];k++) {
655:             PetscInt v = jjt[k];
656:             if (v != i && PetscBTLookup(btvcand,v)) {
657:               found = PETSC_TRUE;
658:               break;
659:             }
660:           }
661:         }
662:         if (!found) {
663:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d CLEARED\n",i);
664:           PetscBTClear(btvcand,i);
665:         } else {
666:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d ACCEPTED\n",i);
667:         }
668:       }
669:     }
670:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
671:   }
672:   MatSeqAIJRestoreArray(lGt,&vals);
673:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
674:   MatDestroy(&lGe);

676:   /* Get the local G^T explicitly */
677:   MatDestroy(&lGt);
678:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
679:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

681:   /* Mark interior nodal dofs */
682:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
683:   PetscBTCreate(nv,&btvi);
684:   for (i=1;i<n_neigh;i++) {
685:     for (j=0;j<n_shared[i];j++) {
686:       PetscBTSet(btvi,shared[i][j]);
687:     }
688:   }
689:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

691:   /* communicate corners and splitpoints */
692:   PetscMalloc1(nv,&vmarks);
693:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
694:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
695:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

697:   if (print) {
698:     IS tbz;

700:     cum = 0;
701:     for (i=0;i<nv;i++)
702:       if (sfvleaves[i])
703:         vmarks[cum++] = i;

705:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
706:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
707:     ISView(tbz,NULL);
708:     ISDestroy(&tbz);
709:   }

711:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
712:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
713:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
714:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

716:   /* Zero rows of lGt corresponding to identified corners
717:      and interior nodal dofs */
718:   cum = 0;
719:   for (i=0;i<nv;i++) {
720:     if (sfvleaves[i]) {
721:       vmarks[cum++] = i;
722:       PetscBTSet(btv,i);
723:     }
724:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
725:   }
726:   PetscBTDestroy(&btvi);
727:   if (print) {
728:     IS tbz;

730:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
731:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
732:     ISView(tbz,NULL);
733:     ISDestroy(&tbz);
734:   }
735:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
736:   PetscFree(vmarks);
737:   PetscSFDestroy(&sfv);
738:   PetscFree2(sfvleaves,sfvroots);

740:   /* Recompute G */
741:   MatDestroy(&lG);
742:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
743:   if (print) {
744:     PetscObjectSetName((PetscObject)lG,"used_lG");
745:     MatView(lG,NULL);
746:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
747:     MatView(lGt,NULL);
748:   }

750:   /* Get primal dofs (if any) */
751:   cum = 0;
752:   for (i=0;i<ne;i++) {
753:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
754:   }
755:   if (fl2g) {
756:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
757:   }
758:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
759:   if (print) {
760:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
761:     ISView(primals,NULL);
762:   }
763:   PetscBTDestroy(&bte);
764:   /* TODO: what if the user passed in some of them ?  */
765:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
766:   ISDestroy(&primals);

768:   /* Compute edge connectivity */
769:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
770:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
771:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
772:   if (fl2g) {
773:     PetscBT   btf;
774:     PetscInt  *iia,*jja,*iiu,*jju;
775:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

777:     /* create CSR for all local dofs */
778:     PetscMalloc1(n+1,&iia);
779:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
780:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
781:       iiu = pcbddc->mat_graph->xadj;
782:       jju = pcbddc->mat_graph->adjncy;
783:     } else if (pcbddc->use_local_adj) {
784:       rest = PETSC_TRUE;
785:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
786:     } else {
787:       free   = PETSC_TRUE;
788:       PetscMalloc2(n+1,&iiu,n,&jju);
789:       iiu[0] = 0;
790:       for (i=0;i<n;i++) {
791:         iiu[i+1] = i+1;
792:         jju[i]   = -1;
793:       }
794:     }

796:     /* import sizes of CSR */
797:     iia[0] = 0;
798:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

800:     /* overwrite entries corresponding to the Nedelec field */
801:     PetscBTCreate(n,&btf);
802:     ISGetIndices(nedfieldlocal,&idxs);
803:     for (i=0;i<ne;i++) {
804:       PetscBTSet(btf,idxs[i]);
805:       iia[idxs[i]+1] = ii[i+1]-ii[i];
806:     }

808:     /* iia in CSR */
809:     for (i=0;i<n;i++) iia[i+1] += iia[i];

811:     /* jja in CSR */
812:     PetscMalloc1(iia[n],&jja);
813:     for (i=0;i<n;i++)
814:       if (!PetscBTLookup(btf,i))
815:         for (j=0;j<iiu[i+1]-iiu[i];j++)
816:           jja[iia[i]+j] = jju[iiu[i]+j];

818:     /* map edge dofs connectivity */
819:     if (jj) {
820:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
821:       for (i=0;i<ne;i++) {
822:         PetscInt e = idxs[i];
823:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
824:       }
825:     }
826:     ISRestoreIndices(nedfieldlocal,&idxs);
827:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
828:     if (rest) {
829:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
830:     }
831:     if (free) {
832:       PetscFree2(iiu,jju);
833:     }
834:     PetscBTDestroy(&btf);
835:   } else {
836:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
837:   }

839:   /* Analyze interface for edge dofs */
840:   PCBDDCAnalyzeInterface(pc);
841:   pcbddc->mat_graph->twodim = PETSC_FALSE;

843:   /* Get coarse edges in the edge space */
844:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
845:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

847:   if (fl2g) {
848:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
849:     PetscMalloc1(nee,&eedges);
850:     for (i=0;i<nee;i++) {
851:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
852:     }
853:   } else {
854:     eedges  = alleedges;
855:     primals = allprimals;
856:   }

858:   /* Mark fine edge dofs with their coarse edge id */
859:   PetscMemzero(marks,ne*sizeof(PetscInt));
860:   ISGetLocalSize(primals,&cum);
861:   ISGetIndices(primals,&idxs);
862:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
863:   ISRestoreIndices(primals,&idxs);
864:   if (print) {
865:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
866:     ISView(primals,NULL);
867:   }

869:   maxsize = 0;
870:   for (i=0;i<nee;i++) {
871:     PetscInt size,mark = i+1;

873:     ISGetLocalSize(eedges[i],&size);
874:     ISGetIndices(eedges[i],&idxs);
875:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
876:     ISRestoreIndices(eedges[i],&idxs);
877:     maxsize = PetscMax(maxsize,size);
878:   }

880:   /* Find coarse edge endpoints */
881:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
882:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
883:   for (i=0;i<nee;i++) {
884:     PetscInt mark = i+1,size;

886:     ISGetLocalSize(eedges[i],&size);
887:     if (!size && nedfieldlocal) continue;
888:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
889:     ISGetIndices(eedges[i],&idxs);
890:     if (print) {
891:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
892:       ISView(eedges[i],NULL);
893:     }
894:     for (j=0;j<size;j++) {
895:       PetscInt k, ee = idxs[j];
896:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %d\n",ee);
897:       for (k=ii[ee];k<ii[ee+1];k++) {
898:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %d\n",jj[k]);
899:         if (PetscBTLookup(btv,jj[k])) {
900:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %d\n",jj[k]);
901:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
902:           PetscInt  k2;
903:           PetscBool corner = PETSC_FALSE;
904:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
905:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
906:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
907:                if the edge dof lie on the natural part of the boundary */
908:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
909:               corner = PETSC_TRUE;
910:               break;
911:             }
912:           }
913:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
914:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %d\n",jj[k]);
915:             PetscBTSet(btv,jj[k]);
916:           } else {
917:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
918:           }
919:         }
920:       }
921:     }
922:     ISRestoreIndices(eedges[i],&idxs);
923:   }
924:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
925:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
926:   PetscBTDestroy(&btb);

928:   /* Reset marked primal dofs */
929:   ISGetLocalSize(primals,&cum);
930:   ISGetIndices(primals,&idxs);
931:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
932:   ISRestoreIndices(primals,&idxs);

934:   /* Now use the initial lG */
935:   MatDestroy(&lG);
936:   MatDestroy(&lGt);
937:   lG   = lGinit;
938:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

940:   /* Compute extended cols indices */
941:   PetscBTCreate(nv,&btvc);
942:   PetscBTCreate(nee,&bter);
943:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
944:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
945:   i   *= maxsize;
946:   PetscCalloc1(nee,&extcols);
947:   PetscMalloc2(i,&extrow,i,&gidxs);
948:   eerr = PETSC_FALSE;
949:   for (i=0;i<nee;i++) {
950:     PetscInt size,found = 0;

952:     cum  = 0;
953:     ISGetLocalSize(eedges[i],&size);
954:     if (!size && nedfieldlocal) continue;
955:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
956:     ISGetIndices(eedges[i],&idxs);
957:     PetscBTMemzero(nv,btvc);
958:     for (j=0;j<size;j++) {
959:       PetscInt k,ee = idxs[j];
960:       for (k=ii[ee];k<ii[ee+1];k++) {
961:         PetscInt vv = jj[k];
962:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
963:         else if (!PetscBTLookupSet(btvc,vv)) found++;
964:       }
965:     }
966:     ISRestoreIndices(eedges[i],&idxs);
967:     PetscSortRemoveDupsInt(&cum,extrow);
968:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
969:     PetscSortIntWithArray(cum,gidxs,extrow);
970:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
971:     /* it may happen that endpoints are not defined at this point
972:        if it is the case, mark this edge for a second pass */
973:     if (cum != size -1 || found != 2) {
974:       PetscBTSet(bter,i);
975:       if (print) {
976:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
977:         ISView(eedges[i],NULL);
978:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
979:         ISView(extcols[i],NULL);
980:       }
981:       eerr = PETSC_TRUE;
982:     }
983:   }
984:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
985:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
986:   if (done) {
987:     PetscInt *newprimals;

989:     PetscMalloc1(ne,&newprimals);
990:     ISGetLocalSize(primals,&cum);
991:     ISGetIndices(primals,&idxs);
992:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
993:     ISRestoreIndices(primals,&idxs);
994:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
995:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
996:     for (i=0;i<nee;i++) {
997:       PetscBool has_candidates = PETSC_FALSE;
998:       if (PetscBTLookup(bter,i)) {
999:         PetscInt size,mark = i+1;

1001:         ISGetLocalSize(eedges[i],&size);
1002:         ISGetIndices(eedges[i],&idxs);
1003:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1004:         for (j=0;j<size;j++) {
1005:           PetscInt k,ee = idxs[j];
1006:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1007:           for (k=ii[ee];k<ii[ee+1];k++) {
1008:             /* set all candidates located on the edge as corners */
1009:             if (PetscBTLookup(btvcand,jj[k])) {
1010:               PetscInt k2,vv = jj[k];
1011:               has_candidates = PETSC_TRUE;
1012:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %d\n",vv);
1013:               PetscBTSet(btv,vv);
1014:               /* set all edge dofs connected to candidate as primals */
1015:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
1016:                 if (marks[jjt[k2]] == mark) {
1017:                   PetscInt k3,ee2 = jjt[k2];
1018:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %d\n",ee2);
1019:                   newprimals[cum++] = ee2;
1020:                   /* finally set the new corners */
1021:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1022:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %d\n",jj[k3]);
1023:                     PetscBTSet(btv,jj[k3]);
1024:                   }
1025:                 }
1026:               }
1027:             } else {
1028:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %d\n",jj[k]);
1029:             }
1030:           }
1031:         }
1032:         if (!has_candidates) { /* circular edge */
1033:           PetscInt k, ee = idxs[0],*tmarks;

1035:           PetscCalloc1(ne,&tmarks);
1036:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %d\n",i);
1037:           for (k=ii[ee];k<ii[ee+1];k++) {
1038:             PetscInt k2;
1039:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %d\n",jj[k]);
1040:             PetscBTSet(btv,jj[k]);
1041:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1042:           }
1043:           for (j=0;j<size;j++) {
1044:             if (tmarks[idxs[j]] > 1) {
1045:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %d\n",idxs[j]);
1046:               newprimals[cum++] = idxs[j];
1047:             }
1048:           }
1049:           PetscFree(tmarks);
1050:         }
1051:         ISRestoreIndices(eedges[i],&idxs);
1052:       }
1053:       ISDestroy(&extcols[i]);
1054:     }
1055:     PetscFree(extcols);
1056:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1057:     PetscSortRemoveDupsInt(&cum,newprimals);
1058:     if (fl2g) {
1059:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1060:       ISDestroy(&primals);
1061:       for (i=0;i<nee;i++) {
1062:         ISDestroy(&eedges[i]);
1063:       }
1064:       PetscFree(eedges);
1065:     }
1066:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1067:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1068:     PetscFree(newprimals);
1069:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1070:     ISDestroy(&primals);
1071:     PCBDDCAnalyzeInterface(pc);
1072:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1073:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1074:     if (fl2g) {
1075:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1076:       PetscMalloc1(nee,&eedges);
1077:       for (i=0;i<nee;i++) {
1078:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1079:       }
1080:     } else {
1081:       eedges  = alleedges;
1082:       primals = allprimals;
1083:     }
1084:     PetscCalloc1(nee,&extcols);

1086:     /* Mark again */
1087:     PetscMemzero(marks,ne*sizeof(PetscInt));
1088:     for (i=0;i<nee;i++) {
1089:       PetscInt size,mark = i+1;

1091:       ISGetLocalSize(eedges[i],&size);
1092:       ISGetIndices(eedges[i],&idxs);
1093:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1094:       ISRestoreIndices(eedges[i],&idxs);
1095:     }
1096:     if (print) {
1097:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1098:       ISView(primals,NULL);
1099:     }

1101:     /* Recompute extended cols */
1102:     eerr = PETSC_FALSE;
1103:     for (i=0;i<nee;i++) {
1104:       PetscInt size;

1106:       cum  = 0;
1107:       ISGetLocalSize(eedges[i],&size);
1108:       if (!size && nedfieldlocal) continue;
1109:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1110:       ISGetIndices(eedges[i],&idxs);
1111:       for (j=0;j<size;j++) {
1112:         PetscInt k,ee = idxs[j];
1113:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1114:       }
1115:       ISRestoreIndices(eedges[i],&idxs);
1116:       PetscSortRemoveDupsInt(&cum,extrow);
1117:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1118:       PetscSortIntWithArray(cum,gidxs,extrow);
1119:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1120:       if (cum != size -1) {
1121:         if (print) {
1122:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1123:           ISView(eedges[i],NULL);
1124:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1125:           ISView(extcols[i],NULL);
1126:         }
1127:         eerr = PETSC_TRUE;
1128:       }
1129:     }
1130:   }
1131:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1132:   PetscFree2(extrow,gidxs);
1133:   PetscBTDestroy(&bter);
1134:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1135:   /* an error should not occur at this point */
1136:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1138:   /* Check the number of endpoints */
1139:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140:   PetscMalloc1(2*nee,&corners);
1141:   PetscMalloc1(nee,&cedges);
1142:   for (i=0;i<nee;i++) {
1143:     PetscInt size, found = 0, gc[2];

1145:     /* init with defaults */
1146:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1147:     ISGetLocalSize(eedges[i],&size);
1148:     if (!size && nedfieldlocal) continue;
1149:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1150:     ISGetIndices(eedges[i],&idxs);
1151:     PetscBTMemzero(nv,btvc);
1152:     for (j=0;j<size;j++) {
1153:       PetscInt k,ee = idxs[j];
1154:       for (k=ii[ee];k<ii[ee+1];k++) {
1155:         PetscInt vv = jj[k];
1156:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1157:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1158:           corners[i*2+found++] = vv;
1159:         }
1160:       }
1161:     }
1162:     if (found != 2) {
1163:       PetscInt e;
1164:       if (fl2g) {
1165:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1166:       } else {
1167:         e = idxs[0];
1168:       }
1169:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1170:     }

1172:     /* get primal dof index on this coarse edge */
1173:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1174:     if (gc[0] > gc[1]) {
1175:       PetscInt swap  = corners[2*i];
1176:       corners[2*i]   = corners[2*i+1];
1177:       corners[2*i+1] = swap;
1178:     }
1179:     cedges[i] = idxs[size-1];
1180:     ISRestoreIndices(eedges[i],&idxs);
1181:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1182:   }
1183:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1184:   PetscBTDestroy(&btvc);

1186: #if defined(PETSC_USE_DEBUG)
1187:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1188:      not interfere with neighbouring coarse edges */
1189:   PetscMalloc1(nee+1,&emarks);
1190:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1191:   for (i=0;i<nv;i++) {
1192:     PetscInt emax = 0,eemax = 0;

1194:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1195:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1196:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1197:     for (j=1;j<nee+1;j++) {
1198:       if (emax < emarks[j]) {
1199:         emax = emarks[j];
1200:         eemax = j;
1201:       }
1202:     }
1203:     /* not relevant for edges */
1204:     if (!eemax) continue;

1206:     for (j=ii[i];j<ii[i+1];j++) {
1207:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1208:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1209:       }
1210:     }
1211:   }
1212:   PetscFree(emarks);
1213:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1214: #endif

1216:   /* Compute extended rows indices for edge blocks of the change of basis */
1217:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1218:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1219:   extmem *= maxsize;
1220:   PetscMalloc1(extmem*nee,&extrow);
1221:   PetscMalloc1(nee,&extrows);
1222:   PetscCalloc1(nee,&extrowcum);
1223:   for (i=0;i<nv;i++) {
1224:     PetscInt mark = 0,size,start;

1226:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1227:     for (j=ii[i];j<ii[i+1];j++)
1228:       if (marks[jj[j]] && !mark)
1229:         mark = marks[jj[j]];

1231:     /* not relevant */
1232:     if (!mark) continue;

1234:     /* import extended row */
1235:     mark--;
1236:     start = mark*extmem+extrowcum[mark];
1237:     size = ii[i+1]-ii[i];
1238:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1239:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1240:     extrowcum[mark] += size;
1241:   }
1242:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1243:   MatDestroy(&lGt);
1244:   PetscFree(marks);

1246:   /* Compress extrows */
1247:   cum  = 0;
1248:   for (i=0;i<nee;i++) {
1249:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1250:     PetscSortRemoveDupsInt(&size,start);
1251:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1252:     cum  = PetscMax(cum,size);
1253:   }
1254:   PetscFree(extrowcum);
1255:   PetscBTDestroy(&btv);
1256:   PetscBTDestroy(&btvcand);

1258:   /* Workspace for lapack inner calls and VecSetValues */
1259:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1261:   /* Create change of basis matrix (preallocation can be improved) */
1262:   MatCreate(comm,&T);
1263:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1264:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1265:   MatSetType(T,MATAIJ);
1266:   MatSeqAIJSetPreallocation(T,10,NULL);
1267:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1268:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1269:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1270:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1271:   ISLocalToGlobalMappingDestroy(&al2g);

1273:   /* Defaults to identity */
1274:   MatCreateVecs(pc->pmat,&tvec,NULL);
1275:   VecSet(tvec,1.0);
1276:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1277:   VecDestroy(&tvec);

1279:   /* Create discrete gradient for the coarser level if needed */
1280:   MatDestroy(&pcbddc->nedcG);
1281:   ISDestroy(&pcbddc->nedclocal);
1282:   if (pcbddc->current_level < pcbddc->max_levels) {
1283:     ISLocalToGlobalMapping cel2g,cvl2g;
1284:     IS                     wis,gwis;
1285:     PetscInt               cnv,cne;

1287:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1288:     if (fl2g) {
1289:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1290:     } else {
1291:       PetscObjectReference((PetscObject)wis);
1292:       pcbddc->nedclocal = wis;
1293:     }
1294:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1295:     ISDestroy(&wis);
1296:     ISRenumber(gwis,NULL,&cne,&wis);
1297:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1298:     ISDestroy(&wis);
1299:     ISDestroy(&gwis);

1301:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1302:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1303:     ISDestroy(&wis);
1304:     ISRenumber(gwis,NULL,&cnv,&wis);
1305:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1306:     ISDestroy(&wis);
1307:     ISDestroy(&gwis);

1309:     MatCreate(comm,&pcbddc->nedcG);
1310:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1311:     MatSetType(pcbddc->nedcG,MATAIJ);
1312:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1313:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1314:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1315:     ISLocalToGlobalMappingDestroy(&cel2g);
1316:     ISLocalToGlobalMappingDestroy(&cvl2g);
1317:   }
1318:   ISLocalToGlobalMappingDestroy(&vl2g);

1320: #if defined(PRINT_GDET)
1321:   inc = 0;
1322:   lev = pcbddc->current_level;
1323: #endif

1325:   /* Insert values in the change of basis matrix */
1326:   for (i=0;i<nee;i++) {
1327:     Mat         Gins = NULL, GKins = NULL;
1328:     IS          cornersis = NULL;
1329:     PetscScalar cvals[2];

1331:     if (pcbddc->nedcG) {
1332:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1333:     }
1334:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1335:     if (Gins && GKins) {
1336:       PetscScalar    *data;
1337:       const PetscInt *rows,*cols;
1338:       PetscInt       nrh,nch,nrc,ncc;

1340:       ISGetIndices(eedges[i],&cols);
1341:       /* H1 */
1342:       ISGetIndices(extrows[i],&rows);
1343:       MatGetSize(Gins,&nrh,&nch);
1344:       MatDenseGetArray(Gins,&data);
1345:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1346:       MatDenseRestoreArray(Gins,&data);
1347:       ISRestoreIndices(extrows[i],&rows);
1348:       /* complement */
1349:       MatGetSize(GKins,&nrc,&ncc);
1350:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1351:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1352:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1353:       MatDenseGetArray(GKins,&data);
1354:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1355:       MatDenseRestoreArray(GKins,&data);

1357:       /* coarse discrete gradient */
1358:       if (pcbddc->nedcG) {
1359:         PetscInt cols[2];

1361:         cols[0] = 2*i;
1362:         cols[1] = 2*i+1;
1363:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1364:       }
1365:       ISRestoreIndices(eedges[i],&cols);
1366:     }
1367:     ISDestroy(&extrows[i]);
1368:     ISDestroy(&extcols[i]);
1369:     ISDestroy(&cornersis);
1370:     MatDestroy(&Gins);
1371:     MatDestroy(&GKins);
1372:   }
1373:   ISLocalToGlobalMappingDestroy(&el2g);

1375:   /* Start assembling */
1376:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1377:   if (pcbddc->nedcG) {
1378:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1379:   }

1381:   /* Free */
1382:   if (fl2g) {
1383:     ISDestroy(&primals);
1384:     for (i=0;i<nee;i++) {
1385:       ISDestroy(&eedges[i]);
1386:     }
1387:     PetscFree(eedges);
1388:   }

1390:   /* hack mat_graph with primal dofs on the coarse edges */
1391:   {
1392:     PCBDDCGraph graph   = pcbddc->mat_graph;
1393:     PetscInt    *oqueue = graph->queue;
1394:     PetscInt    *ocptr  = graph->cptr;
1395:     PetscInt    ncc,*idxs;

1397:     /* find first primal edge */
1398:     if (pcbddc->nedclocal) {
1399:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1400:     } else {
1401:       if (fl2g) {
1402:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1403:       }
1404:       idxs = cedges;
1405:     }
1406:     cum = 0;
1407:     while (cum < nee && cedges[cum] < 0) cum++;

1409:     /* adapt connected components */
1410:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1411:     graph->cptr[0] = 0;
1412:     for (i=0,ncc=0;i<graph->ncc;i++) {
1413:       PetscInt lc = ocptr[i+1]-ocptr[i];
1414:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1415:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1416:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1417:         ncc++;
1418:         lc--;
1419:         cum++;
1420:         while (cum < nee && cedges[cum] < 0) cum++;
1421:       }
1422:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1423:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1424:       ncc++;
1425:     }
1426:     graph->ncc = ncc;
1427:     if (pcbddc->nedclocal) {
1428:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1429:     }
1430:     PetscFree2(ocptr,oqueue);
1431:   }
1432:   ISLocalToGlobalMappingDestroy(&fl2g);
1433:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1434:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1435:   MatDestroy(&conn);

1437:   ISDestroy(&nedfieldlocal);
1438:   PetscFree(extrow);
1439:   PetscFree2(work,rwork);
1440:   PetscFree(corners);
1441:   PetscFree(cedges);
1442:   PetscFree(extrows);
1443:   PetscFree(extcols);
1444:   MatDestroy(&lG);

1446:   /* Complete assembling */
1447:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1448:   if (pcbddc->nedcG) {
1449:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1450: #if 0
1451:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1452:     MatView(pcbddc->nedcG,NULL);
1453: #endif
1454:   }

1456:   /* set change of basis */
1457:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1458:   MatDestroy(&T);

1460:   return(0);
1461: }

1463: /* the near-null space of BDDC carries information on quadrature weights,
1464:    and these can be collinear -> so cheat with MatNullSpaceCreate
1465:    and create a suitable set of basis vectors first */
1466: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1467: {
1469:   PetscInt       i;

1472:   for (i=0;i<nvecs;i++) {
1473:     PetscInt first,last;

1475:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1476:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1477:     if (i>=first && i < last) {
1478:       PetscScalar *data;
1479:       VecGetArray(quad_vecs[i],&data);
1480:       if (!has_const) {
1481:         data[i-first] = 1.;
1482:       } else {
1483:         data[2*i-first] = 1./PetscSqrtReal(2.);
1484:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1485:       }
1486:       VecRestoreArray(quad_vecs[i],&data);
1487:     }
1488:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1489:   }
1490:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1491:   for (i=0;i<nvecs;i++) { /* reset vectors */
1492:     PetscInt first,last;
1493:     VecLockPop(quad_vecs[i]);
1494:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1495:     if (i>=first && i < last) {
1496:       PetscScalar *data;
1497:       VecGetArray(quad_vecs[i],&data);
1498:       if (!has_const) {
1499:         data[i-first] = 0.;
1500:       } else {
1501:         data[2*i-first] = 0.;
1502:         data[2*i-first+1] = 0.;
1503:       }
1504:       VecRestoreArray(quad_vecs[i],&data);
1505:     }
1506:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1507:     VecLockPush(quad_vecs[i]);
1508:   }
1509:   return(0);
1510: }

1512: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1513: {
1514:   Mat                    loc_divudotp;
1515:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1516:   ISLocalToGlobalMapping map;
1517:   PetscScalar            *vals;
1518:   const PetscScalar      *array;
1519:   PetscInt               i,maxneighs,maxsize;
1520:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1521:   PetscMPIInt            rank;
1522:   PetscErrorCode         ierr;

1525:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1526:   MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1527:   if (!maxneighs) {
1528:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1529:     *nnsp = NULL;
1530:     return(0);
1531:   }
1532:   maxsize = 0;
1533:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1534:   PetscMalloc1(maxsize,&vals);
1535:   /* create vectors to hold quadrature weights */
1536:   MatCreateVecs(A,&quad_vec,NULL);
1537:   if (!transpose) {
1538:     MatGetLocalToGlobalMapping(A,&map,NULL);
1539:   } else {
1540:     MatGetLocalToGlobalMapping(A,NULL,&map);
1541:   }
1542:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1543:   VecDestroy(&quad_vec);
1544:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1545:   for (i=0;i<maxneighs;i++) {
1546:     VecLockPop(quad_vecs[i]);
1547:     VecSetLocalToGlobalMapping(quad_vecs[i],map);
1548:   }

1550:   /* compute local quad vec */
1551:   MatISGetLocalMat(divudotp,&loc_divudotp);
1552:   if (!transpose) {
1553:     MatCreateVecs(loc_divudotp,&v,&p);
1554:   } else {
1555:     MatCreateVecs(loc_divudotp,&p,&v);
1556:   }
1557:   VecSet(p,1.);
1558:   if (!transpose) {
1559:     MatMultTranspose(loc_divudotp,p,v);
1560:   } else {
1561:     MatMult(loc_divudotp,p,v);
1562:   }
1563:   if (vl2l) {
1564:     Mat        lA;
1565:     VecScatter sc;

1567:     MatISGetLocalMat(A,&lA);
1568:     MatCreateVecs(lA,&vins,NULL);
1569:     VecScatterCreate(v,vl2l,vins,NULL,&sc);
1570:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1571:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1572:     VecScatterDestroy(&sc);
1573:   } else {
1574:     vins = v;
1575:   }
1576:   VecGetArrayRead(vins,&array);
1577:   VecDestroy(&p);

1579:   /* insert in global quadrature vecs */
1580:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1581:   for (i=0;i<n_neigh;i++) {
1582:     const PetscInt    *idxs;
1583:     PetscInt          idx,nn,j;

1585:     idxs = shared[i];
1586:     nn   = n_shared[i];
1587:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1588:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1589:     idx  = -(idx+1);
1590:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1591:   }
1592:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1593:   VecRestoreArrayRead(vins,&array);
1594:   if (vl2l) {
1595:     VecDestroy(&vins);
1596:   }
1597:   VecDestroy(&v);
1598:   PetscFree(vals);

1600:   /* assemble near null space */
1601:   for (i=0;i<maxneighs;i++) {
1602:     VecAssemblyBegin(quad_vecs[i]);
1603:   }
1604:   for (i=0;i<maxneighs;i++) {
1605:     VecAssemblyEnd(quad_vecs[i]);
1606:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1607:     VecLockPush(quad_vecs[i]);
1608:   }
1609:   VecDestroyVecs(maxneighs,&quad_vecs);
1610:   return(0);
1611: }

1613: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1614: {
1615:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1619:   if (primalv) {
1620:     if (pcbddc->user_primal_vertices_local) {
1621:       IS list[2], newp;

1623:       list[0] = primalv;
1624:       list[1] = pcbddc->user_primal_vertices_local;
1625:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1626:       ISSortRemoveDups(newp);
1627:       ISDestroy(&list[1]);
1628:       pcbddc->user_primal_vertices_local = newp;
1629:     } else {
1630:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1631:     }
1632:   }
1633:   return(0);
1634: }

1636: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1637: {
1638:   PetscInt f, *comp  = (PetscInt *)ctx;

1641:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1642:   return(0);
1643: }

1645: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1646: {
1648:   Vec            local,global;
1649:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1650:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1651:   PetscBool      monolithic = PETSC_FALSE;

1654:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1655:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1656:   PetscOptionsEnd();
1657:   /* need to convert from global to local topology information and remove references to information in global ordering */
1658:   MatCreateVecs(pc->pmat,&global,NULL);
1659:   MatCreateVecs(matis->A,&local,NULL);
1660:   if (monolithic) { /* just get block size to properly compute vertices */
1661:     if (pcbddc->vertex_size == 1) {
1662:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1663:     }
1664:     goto boundary;
1665:   }

1667:   if (pcbddc->user_provided_isfordofs) {
1668:     if (pcbddc->n_ISForDofs) {
1669:       PetscInt i;
1670:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1671:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1672:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1673:         ISDestroy(&pcbddc->ISForDofs[i]);
1674:       }
1675:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1676:       pcbddc->n_ISForDofs = 0;
1677:       PetscFree(pcbddc->ISForDofs);
1678:     }
1679:   } else {
1680:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1681:       DM dm;

1683:       PCGetDM(pc, &dm);
1684:       if (!dm) {
1685:         MatGetDM(pc->pmat, &dm);
1686:       }
1687:       if (dm) {
1688:         IS      *fields;
1689:         PetscInt nf,i;
1690:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1691:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1692:         for (i=0;i<nf;i++) {
1693:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1694:           ISDestroy(&fields[i]);
1695:         }
1696:         PetscFree(fields);
1697:         pcbddc->n_ISForDofsLocal = nf;
1698:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1699:         PetscContainer   c;

1701:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1702:         if (c) {
1703:           MatISLocalFields lf;
1704:           PetscContainerGetPointer(c,(void**)&lf);
1705:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1706:         } else { /* fallback, create the default fields if bs > 1 */
1707:           PetscInt i, n = matis->A->rmap->n;
1708:           MatGetBlockSize(pc->pmat,&i);
1709:           if (i > 1) {
1710:             pcbddc->n_ISForDofsLocal = i;
1711:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1712:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1713:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1714:             }
1715:           }
1716:         }
1717:       }
1718:     } else {
1719:       PetscInt i;
1720:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1721:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1722:       }
1723:     }
1724:   }

1726: boundary:
1727:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1728:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1729:   } else if (pcbddc->DirichletBoundariesLocal) {
1730:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1731:   }
1732:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1733:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1734:   } else if (pcbddc->NeumannBoundariesLocal) {
1735:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1736:   }
1737:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1738:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1739:   }
1740:   VecDestroy(&global);
1741:   VecDestroy(&local);
1742:   /* detect local disconnected subdomains if requested (use matis->A) */
1743:   if (pcbddc->detect_disconnected) {
1744:     IS       primalv = NULL;
1745:     PetscInt i;

1747:     for (i=0;i<pcbddc->n_local_subs;i++) {
1748:       ISDestroy(&pcbddc->local_subs[i]);
1749:     }
1750:     PetscFree(pcbddc->local_subs);
1751:     PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1752:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1753:     ISDestroy(&primalv);
1754:   }
1755:   /* early stage corner detection */
1756:   {
1757:     DM dm;

1759:     MatGetDM(pc->pmat,&dm);
1760:     if (dm) {
1761:       PetscBool isda;

1763:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1764:       if (isda) {
1765:         ISLocalToGlobalMapping l2l;
1766:         IS                     corners;
1767:         Mat                    lA;

1769:         DMDAGetSubdomainCornersIS(dm,&corners);
1770:         MatISGetLocalMat(pc->pmat,&lA);
1771:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1772:         MatISRestoreLocalMat(pc->pmat,&lA);
1773:         if (l2l) {
1774:           const PetscInt *idx;
1775:           PetscInt       bs,*idxout,n;

1777:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1778:           ISGetLocalSize(corners,&n);
1779:           ISGetIndices(corners,&idx);
1780:           PetscMalloc1(n,&idxout);
1781:           ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1782:           ISRestoreIndices(corners,&idx);
1783:           DMDARestoreSubdomainCornersIS(dm,&corners);
1784:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1785:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1786:           ISDestroy(&corners);
1787:           pcbddc->corner_selected = PETSC_TRUE;
1788:         } else { /* not from DMDA */
1789:           DMDARestoreSubdomainCornersIS(dm,&corners);
1790:         }
1791:       }
1792:     }
1793:   }
1794:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1795:     DM dm;

1797:     PCGetDM(pc,&dm);
1798:     if (!dm) {
1799:       MatGetDM(pc->pmat,&dm);
1800:     }
1801:     if (dm) {
1802:       Vec            vcoords;
1803:       PetscSection   section;
1804:       PetscReal      *coords;
1805:       PetscInt       d,cdim,nl,nf,**ctxs;
1806:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1808:       DMGetCoordinateDim(dm,&cdim);
1809:       DMGetDefaultSection(dm,&section);
1810:       PetscSectionGetNumFields(section,&nf);
1811:       DMCreateGlobalVector(dm,&vcoords);
1812:       VecGetLocalSize(vcoords,&nl);
1813:       PetscMalloc1(nl*cdim,&coords);
1814:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1815:       PetscMalloc1(nf,&ctxs[0]);
1816:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1817:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1818:       for (d=0;d<cdim;d++) {
1819:         PetscInt          i;
1820:         const PetscScalar *v;

1822:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1823:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1824:         VecGetArrayRead(vcoords,&v);
1825:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1826:         VecRestoreArrayRead(vcoords,&v);
1827:       }
1828:       VecDestroy(&vcoords);
1829:       PCSetCoordinates(pc,cdim,nl,coords);
1830:       PetscFree(coords);
1831:       PetscFree(ctxs[0]);
1832:       PetscFree2(funcs,ctxs);
1833:     }
1834:   }
1835:   return(0);
1836: }

1838: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1839: {
1840:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1841:   PetscErrorCode  ierr;
1842:   IS              nis;
1843:   const PetscInt  *idxs;
1844:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1845:   PetscBool       *ld;

1848:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1849:   MatISSetUpSF(pc->pmat);
1850:   if (mop == MPI_LAND) {
1851:     /* init rootdata with true */
1852:     ld   = (PetscBool*) matis->sf_rootdata;
1853:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1854:   } else {
1855:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1856:   }
1857:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1858:   ISGetLocalSize(*is,&nd);
1859:   ISGetIndices(*is,&idxs);
1860:   ld   = (PetscBool*) matis->sf_leafdata;
1861:   for (i=0;i<nd;i++)
1862:     if (-1 < idxs[i] && idxs[i] < n)
1863:       ld[idxs[i]] = PETSC_TRUE;
1864:   ISRestoreIndices(*is,&idxs);
1865:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1866:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1867:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1868:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1869:   if (mop == MPI_LAND) {
1870:     PetscMalloc1(nd,&nidxs);
1871:   } else {
1872:     PetscMalloc1(n,&nidxs);
1873:   }
1874:   for (i=0,nnd=0;i<n;i++)
1875:     if (ld[i])
1876:       nidxs[nnd++] = i;
1877:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1878:   ISDestroy(is);
1879:   *is  = nis;
1880:   return(0);
1881: }

1883: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1884: {
1885:   PC_IS             *pcis = (PC_IS*)(pc->data);
1886:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1887:   PetscErrorCode    ierr;

1890:   if (!pcbddc->benign_have_null) {
1891:     return(0);
1892:   }
1893:   if (pcbddc->ChangeOfBasisMatrix) {
1894:     Vec swap;

1896:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1897:     swap = pcbddc->work_change;
1898:     pcbddc->work_change = r;
1899:     r = swap;
1900:   }
1901:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1902:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1903:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1904:   VecSet(z,0.);
1905:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1906:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1907:   if (pcbddc->ChangeOfBasisMatrix) {
1908:     pcbddc->work_change = r;
1909:     VecCopy(z,pcbddc->work_change);
1910:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1911:   }
1912:   return(0);
1913: }

1915: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1916: {
1917:   PCBDDCBenignMatMult_ctx ctx;
1918:   PetscErrorCode          ierr;
1919:   PetscBool               apply_right,apply_left,reset_x;

1922:   MatShellGetContext(A,&ctx);
1923:   if (transpose) {
1924:     apply_right = ctx->apply_left;
1925:     apply_left = ctx->apply_right;
1926:   } else {
1927:     apply_right = ctx->apply_right;
1928:     apply_left = ctx->apply_left;
1929:   }
1930:   reset_x = PETSC_FALSE;
1931:   if (apply_right) {
1932:     const PetscScalar *ax;
1933:     PetscInt          nl,i;

1935:     VecGetLocalSize(x,&nl);
1936:     VecGetArrayRead(x,&ax);
1937:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1938:     VecRestoreArrayRead(x,&ax);
1939:     for (i=0;i<ctx->benign_n;i++) {
1940:       PetscScalar    sum,val;
1941:       const PetscInt *idxs;
1942:       PetscInt       nz,j;
1943:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1944:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1945:       sum = 0.;
1946:       if (ctx->apply_p0) {
1947:         val = ctx->work[idxs[nz-1]];
1948:         for (j=0;j<nz-1;j++) {
1949:           sum += ctx->work[idxs[j]];
1950:           ctx->work[idxs[j]] += val;
1951:         }
1952:       } else {
1953:         for (j=0;j<nz-1;j++) {
1954:           sum += ctx->work[idxs[j]];
1955:         }
1956:       }
1957:       ctx->work[idxs[nz-1]] -= sum;
1958:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1959:     }
1960:     VecPlaceArray(x,ctx->work);
1961:     reset_x = PETSC_TRUE;
1962:   }
1963:   if (transpose) {
1964:     MatMultTranspose(ctx->A,x,y);
1965:   } else {
1966:     MatMult(ctx->A,x,y);
1967:   }
1968:   if (reset_x) {
1969:     VecResetArray(x);
1970:   }
1971:   if (apply_left) {
1972:     PetscScalar *ay;
1973:     PetscInt    i;

1975:     VecGetArray(y,&ay);
1976:     for (i=0;i<ctx->benign_n;i++) {
1977:       PetscScalar    sum,val;
1978:       const PetscInt *idxs;
1979:       PetscInt       nz,j;
1980:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1981:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1982:       val = -ay[idxs[nz-1]];
1983:       if (ctx->apply_p0) {
1984:         sum = 0.;
1985:         for (j=0;j<nz-1;j++) {
1986:           sum += ay[idxs[j]];
1987:           ay[idxs[j]] += val;
1988:         }
1989:         ay[idxs[nz-1]] += sum;
1990:       } else {
1991:         for (j=0;j<nz-1;j++) {
1992:           ay[idxs[j]] += val;
1993:         }
1994:         ay[idxs[nz-1]] = 0.;
1995:       }
1996:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1997:     }
1998:     VecRestoreArray(y,&ay);
1999:   }
2000:   return(0);
2001: }

2003: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2004: {

2008:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2009:   return(0);
2010: }

2012: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2013: {

2017:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2018:   return(0);
2019: }

2021: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2022: {
2023:   PC_IS                   *pcis = (PC_IS*)pc->data;
2024:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2025:   PCBDDCBenignMatMult_ctx ctx;
2026:   PetscErrorCode          ierr;

2029:   if (!restore) {
2030:     Mat                A_IB,A_BI;
2031:     PetscScalar        *work;
2032:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2034:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2035:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2036:     PetscMalloc1(pcis->n,&work);
2037:     MatCreate(PETSC_COMM_SELF,&A_IB);
2038:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2039:     MatSetType(A_IB,MATSHELL);
2040:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2041:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2042:     PetscNew(&ctx);
2043:     MatShellSetContext(A_IB,ctx);
2044:     ctx->apply_left = PETSC_TRUE;
2045:     ctx->apply_right = PETSC_FALSE;
2046:     ctx->apply_p0 = PETSC_FALSE;
2047:     ctx->benign_n = pcbddc->benign_n;
2048:     if (reuse) {
2049:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2050:       ctx->free = PETSC_FALSE;
2051:     } else { /* TODO: could be optimized for successive solves */
2052:       ISLocalToGlobalMapping N_to_D;
2053:       PetscInt               i;

2055:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2056:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2057:       for (i=0;i<pcbddc->benign_n;i++) {
2058:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2059:       }
2060:       ISLocalToGlobalMappingDestroy(&N_to_D);
2061:       ctx->free = PETSC_TRUE;
2062:     }
2063:     ctx->A = pcis->A_IB;
2064:     ctx->work = work;
2065:     MatSetUp(A_IB);
2066:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2067:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2068:     pcis->A_IB = A_IB;

2070:     /* A_BI as A_IB^T */
2071:     MatCreateTranspose(A_IB,&A_BI);
2072:     pcbddc->benign_original_mat = pcis->A_BI;
2073:     pcis->A_BI = A_BI;
2074:   } else {
2075:     if (!pcbddc->benign_original_mat) {
2076:       return(0);
2077:     }
2078:     MatShellGetContext(pcis->A_IB,&ctx);
2079:     MatDestroy(&pcis->A_IB);
2080:     pcis->A_IB = ctx->A;
2081:     ctx->A = NULL;
2082:     MatDestroy(&pcis->A_BI);
2083:     pcis->A_BI = pcbddc->benign_original_mat;
2084:     pcbddc->benign_original_mat = NULL;
2085:     if (ctx->free) {
2086:       PetscInt i;
2087:       for (i=0;i<ctx->benign_n;i++) {
2088:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2089:       }
2090:       PetscFree(ctx->benign_zerodiag_subs);
2091:     }
2092:     PetscFree(ctx->work);
2093:     PetscFree(ctx);
2094:   }
2095:   return(0);
2096: }

2098: /* used just in bddc debug mode */
2099: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2100: {
2101:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2102:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2103:   Mat            An;

2107:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2108:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2109:   if (is1) {
2110:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2111:     MatDestroy(&An);
2112:   } else {
2113:     *B = An;
2114:   }
2115:   return(0);
2116: }

2118: /* TODO: add reuse flag */
2119: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2120: {
2121:   Mat            Bt;
2122:   PetscScalar    *a,*bdata;
2123:   const PetscInt *ii,*ij;
2124:   PetscInt       m,n,i,nnz,*bii,*bij;
2125:   PetscBool      flg_row;

2129:   MatGetSize(A,&n,&m);
2130:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2131:   MatSeqAIJGetArray(A,&a);
2132:   nnz = n;
2133:   for (i=0;i<ii[n];i++) {
2134:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2135:   }
2136:   PetscMalloc1(n+1,&bii);
2137:   PetscMalloc1(nnz,&bij);
2138:   PetscMalloc1(nnz,&bdata);
2139:   nnz = 0;
2140:   bii[0] = 0;
2141:   for (i=0;i<n;i++) {
2142:     PetscInt j;
2143:     for (j=ii[i];j<ii[i+1];j++) {
2144:       PetscScalar entry = a[j];
2145:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2146:         bij[nnz] = ij[j];
2147:         bdata[nnz] = entry;
2148:         nnz++;
2149:       }
2150:     }
2151:     bii[i+1] = nnz;
2152:   }
2153:   MatSeqAIJRestoreArray(A,&a);
2154:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2155:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2156:   {
2157:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2158:     b->free_a = PETSC_TRUE;
2159:     b->free_ij = PETSC_TRUE;
2160:   }
2161:   if (*B == A) {
2162:     MatDestroy(&A);
2163:   }
2164:   *B = Bt;
2165:   return(0);
2166: }

2168: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2169: {
2170:   Mat                    B = NULL;
2171:   DM                     dm;
2172:   IS                     is_dummy,*cc_n;
2173:   ISLocalToGlobalMapping l2gmap_dummy;
2174:   PCBDDCGraph            graph;
2175:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2176:   PetscInt               i,n;
2177:   PetscInt               *xadj,*adjncy;
2178:   PetscBool              isplex = PETSC_FALSE;
2179:   PetscErrorCode         ierr;

2182:   if (ncc) *ncc = 0;
2183:   if (cc) *cc = NULL;
2184:   if (primalv) *primalv = NULL;
2185:   PCBDDCGraphCreate(&graph);
2186:   PCGetDM(pc,&dm);
2187:   if (!dm) {
2188:     MatGetDM(pc->pmat,&dm);
2189:   }
2190:   if (dm) {
2191:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2192:   }
2193:   if (isplex) { /* this code has been modified from plexpartition.c */
2194:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2195:     PetscInt      *adj = NULL;
2196:     IS             cellNumbering;
2197:     const PetscInt *cellNum;
2198:     PetscBool      useCone, useClosure;
2199:     PetscSection   section;
2200:     PetscSegBuffer adjBuffer;
2201:     PetscSF        sfPoint;

2205:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2206:     DMGetPointSF(dm, &sfPoint);
2207:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2208:     /* Build adjacency graph via a section/segbuffer */
2209:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2210:     PetscSectionSetChart(section, pStart, pEnd);
2211:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2212:     /* Always use FVM adjacency to create partitioner graph */
2213:     DMPlexGetAdjacencyUseCone(dm, &useCone);
2214:     DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2215:     DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2216:     DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2217:     DMPlexGetCellNumbering(dm, &cellNumbering);
2218:     ISGetIndices(cellNumbering, &cellNum);
2219:     for (n = 0, p = pStart; p < pEnd; p++) {
2220:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2221:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2222:       adjSize = PETSC_DETERMINE;
2223:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2224:       for (a = 0; a < adjSize; ++a) {
2225:         const PetscInt point = adj[a];
2226:         if (pStart <= point && point < pEnd) {
2227:           PetscInt *PETSC_RESTRICT pBuf;
2228:           PetscSectionAddDof(section, p, 1);
2229:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2230:           *pBuf = point;
2231:         }
2232:       }
2233:       n++;
2234:     }
2235:     DMPlexSetAdjacencyUseCone(dm, useCone);
2236:     DMPlexSetAdjacencyUseClosure(dm, useClosure);
2237:     /* Derive CSR graph from section/segbuffer */
2238:     PetscSectionSetUp(section);
2239:     PetscSectionGetStorageSize(section, &size);
2240:     PetscMalloc1(n+1, &xadj);
2241:     for (idx = 0, p = pStart; p < pEnd; p++) {
2242:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2243:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2244:     }
2245:     xadj[n] = size;
2246:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2247:     /* Clean up */
2248:     PetscSegBufferDestroy(&adjBuffer);
2249:     PetscSectionDestroy(&section);
2250:     PetscFree(adj);
2251:     graph->xadj = xadj;
2252:     graph->adjncy = adjncy;
2253:   } else {
2254:     Mat       A;
2255:     PetscBool filter = PETSC_FALSE, isseqaij, flg_row;

2257:     MatISGetLocalMat(pc->pmat,&A);
2258:     if (!A->rmap->N || !A->cmap->N) {
2259:       PCBDDCGraphDestroy(&graph);
2260:       return(0);
2261:     }
2262:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2263:     if (!isseqaij && filter) {
2264:       PetscBool isseqdense;

2266:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2267:       if (!isseqdense) {
2268:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2269:       } else { /* TODO: rectangular case and LDA */
2270:         PetscScalar *array;
2271:         PetscReal   chop=1.e-6;

2273:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2274:         MatDenseGetArray(B,&array);
2275:         MatGetSize(B,&n,NULL);
2276:         for (i=0;i<n;i++) {
2277:           PetscInt j;
2278:           for (j=i+1;j<n;j++) {
2279:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2280:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2281:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2282:           }
2283:         }
2284:         MatDenseRestoreArray(B,&array);
2285:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2286:       }
2287:     } else {
2288:       PetscObjectReference((PetscObject)A);
2289:       B = A;
2290:     }
2291:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2293:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2294:     if (filter) {
2295:       PetscScalar *data;
2296:       PetscInt    j,cum;

2298:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2299:       MatSeqAIJGetArray(B,&data);
2300:       cum = 0;
2301:       for (i=0;i<n;i++) {
2302:         PetscInt t;

2304:         for (j=xadj[i];j<xadj[i+1];j++) {
2305:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2306:             continue;
2307:           }
2308:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2309:         }
2310:         t = xadj_filtered[i];
2311:         xadj_filtered[i] = cum;
2312:         cum += t;
2313:       }
2314:       MatSeqAIJRestoreArray(B,&data);
2315:       graph->xadj = xadj_filtered;
2316:       graph->adjncy = adjncy_filtered;
2317:     } else {
2318:       graph->xadj = xadj;
2319:       graph->adjncy = adjncy;
2320:     }
2321:   }
2322:   /* compute local connected components using PCBDDCGraph */
2323:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2324:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2325:   ISDestroy(&is_dummy);
2326:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2327:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2328:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2329:   PCBDDCGraphComputeConnectedComponents(graph);

2331:   /* partial clean up */
2332:   PetscFree2(xadj_filtered,adjncy_filtered);
2333:   if (B) {
2334:     PetscBool flg_row;
2335:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2336:     MatDestroy(&B);
2337:   }
2338:   if (isplex) {
2339:     PetscFree(xadj);
2340:     PetscFree(adjncy);
2341:   }

2343:   /* get back data */
2344:   if (isplex) {
2345:     if (ncc) *ncc = graph->ncc;
2346:     if (cc || primalv) {
2347:       Mat          A;
2348:       PetscBT      btv,btvt;
2349:       PetscSection subSection;
2350:       PetscInt     *ids,cum,cump,*cids,*pids;

2352:       DMPlexGetSubdomainSection(dm,&subSection);
2353:       MatISGetLocalMat(pc->pmat,&A);
2354:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2355:       PetscBTCreate(A->rmap->n,&btv);
2356:       PetscBTCreate(A->rmap->n,&btvt);

2358:       cids[0] = 0;
2359:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2360:         PetscInt j;

2362:         PetscBTMemzero(A->rmap->n,btvt);
2363:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2364:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2366:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2367:           for (k = 0; k < 2*size; k += 2) {
2368:             PetscInt s, p = closure[k], off, dof, cdof;

2370:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2371:             PetscSectionGetOffset(subSection,p,&off);
2372:             PetscSectionGetDof(subSection,p,&dof);
2373:             for (s = 0; s < dof-cdof; s++) {
2374:               if (PetscBTLookupSet(btvt,off+s)) continue;
2375:               if (!PetscBTLookup(btv,off+s)) {
2376:                 ids[cum++] = off+s;
2377:               } else { /* cross-vertex */
2378:                 pids[cump++] = off+s;
2379:               }
2380:             }
2381:           }
2382:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2383:         }
2384:         cids[i+1] = cum;
2385:         /* mark dofs as already assigned */
2386:         for (j = cids[i]; j < cids[i+1]; j++) {
2387:           PetscBTSet(btv,ids[j]);
2388:         }
2389:       }
2390:       if (cc) {
2391:         PetscMalloc1(graph->ncc,&cc_n);
2392:         for (i = 0; i < graph->ncc; i++) {
2393:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2394:         }
2395:         *cc = cc_n;
2396:       }
2397:       if (primalv) {
2398:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2399:       }
2400:       PetscFree3(ids,cids,pids);
2401:       PetscBTDestroy(&btv);
2402:       PetscBTDestroy(&btvt);
2403:     }
2404:   } else {
2405:     if (ncc) *ncc = graph->ncc;
2406:     if (cc) {
2407:       PetscMalloc1(graph->ncc,&cc_n);
2408:       for (i=0;i<graph->ncc;i++) {
2409:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2410:       }
2411:       *cc = cc_n;
2412:     }
2413:   }
2414:   /* clean up graph */
2415:   graph->xadj = 0;
2416:   graph->adjncy = 0;
2417:   PCBDDCGraphDestroy(&graph);
2418:   return(0);
2419: }

2421: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2422: {
2423:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2424:   PC_IS*         pcis = (PC_IS*)(pc->data);
2425:   IS             dirIS = NULL;
2426:   PetscInt       i;

2430:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2431:   if (zerodiag) {
2432:     Mat            A;
2433:     Vec            vec3_N;
2434:     PetscScalar    *vals;
2435:     const PetscInt *idxs;
2436:     PetscInt       nz,*count;

2438:     /* p0 */
2439:     VecSet(pcis->vec1_N,0.);
2440:     PetscMalloc1(pcis->n,&vals);
2441:     ISGetLocalSize(zerodiag,&nz);
2442:     ISGetIndices(zerodiag,&idxs);
2443:     for (i=0;i<nz;i++) vals[i] = 1.;
2444:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2445:     VecAssemblyBegin(pcis->vec1_N);
2446:     VecAssemblyEnd(pcis->vec1_N);
2447:     /* v_I */
2448:     VecSetRandom(pcis->vec2_N,NULL);
2449:     for (i=0;i<nz;i++) vals[i] = 0.;
2450:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2451:     ISRestoreIndices(zerodiag,&idxs);
2452:     ISGetIndices(pcis->is_B_local,&idxs);
2453:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2454:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2455:     ISRestoreIndices(pcis->is_B_local,&idxs);
2456:     if (dirIS) {
2457:       PetscInt n;

2459:       ISGetLocalSize(dirIS,&n);
2460:       ISGetIndices(dirIS,&idxs);
2461:       for (i=0;i<n;i++) vals[i] = 0.;
2462:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2463:       ISRestoreIndices(dirIS,&idxs);
2464:     }
2465:     VecAssemblyBegin(pcis->vec2_N);
2466:     VecAssemblyEnd(pcis->vec2_N);
2467:     VecDuplicate(pcis->vec1_N,&vec3_N);
2468:     VecSet(vec3_N,0.);
2469:     MatISGetLocalMat(pc->pmat,&A);
2470:     MatMult(A,pcis->vec1_N,vec3_N);
2471:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2472:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2473:     PetscFree(vals);
2474:     VecDestroy(&vec3_N);

2476:     /* there should not be any pressure dofs lying on the interface */
2477:     PetscCalloc1(pcis->n,&count);
2478:     ISGetIndices(pcis->is_B_local,&idxs);
2479:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2480:     ISRestoreIndices(pcis->is_B_local,&idxs);
2481:     ISGetIndices(zerodiag,&idxs);
2482:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2483:     ISRestoreIndices(zerodiag,&idxs);
2484:     PetscFree(count);
2485:   }
2486:   ISDestroy(&dirIS);

2488:   /* check PCBDDCBenignGetOrSetP0 */
2489:   VecSetRandom(pcis->vec1_global,NULL);
2490:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2491:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2492:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2493:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2494:   for (i=0;i<pcbddc->benign_n;i++) {
2495:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2496:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2497:   }
2498:   return(0);
2499: }

2501: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2502: {
2503:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2504:   IS             pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2505:   PetscInt       nz,n;
2506:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2507:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2511:   PetscSFDestroy(&pcbddc->benign_sf);
2512:   MatDestroy(&pcbddc->benign_B0);
2513:   for (n=0;n<pcbddc->benign_n;n++) {
2514:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2515:   }
2516:   PetscFree(pcbddc->benign_zerodiag_subs);
2517:   pcbddc->benign_n = 0;

2519:   /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2520:      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2521:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2522:      If not, a change of basis on pressures is not needed
2523:      since the local Schur complements are already SPD
2524:   */
2525:   has_null_pressures = PETSC_TRUE;
2526:   have_null = PETSC_TRUE;
2527:   if (pcbddc->n_ISForDofsLocal) {
2528:     IS       iP = NULL;
2529:     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;

2531:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2532:     PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2533:     PetscOptionsEnd();
2534:     if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2535:     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2536:     ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2537:     ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2538:     ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2539:     ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2540:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2541:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2542:     if (iP) {
2543:       IS newpressures;

2545:       ISDifference(pressures,iP,&newpressures);
2546:       ISDestroy(&pressures);
2547:       pressures = newpressures;
2548:     }
2549:     ISSorted(pressures,&sorted);
2550:     if (!sorted) {
2551:       ISSort(pressures);
2552:     }
2553:   } else {
2554:     pressures = NULL;
2555:   }
2556:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2557:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2558:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2559:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2560:   ISSorted(zerodiag,&sorted);
2561:   if (!sorted) {
2562:     ISSort(zerodiag);
2563:   }
2564:   PetscObjectReference((PetscObject)zerodiag);
2565:   zerodiag_save = zerodiag;
2566:   ISGetLocalSize(zerodiag,&nz);
2567:   if (!nz) {
2568:     if (n) have_null = PETSC_FALSE;
2569:     has_null_pressures = PETSC_FALSE;
2570:     ISDestroy(&zerodiag);
2571:   }
2572:   recompute_zerodiag = PETSC_FALSE;
2573:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2574:   zerodiag_subs    = NULL;
2575:   pcbddc->benign_n = 0;
2576:   n_interior_dofs  = 0;
2577:   interior_dofs    = NULL;
2578:   nneu             = 0;
2579:   if (pcbddc->NeumannBoundariesLocal) {
2580:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2581:   }
2582:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2583:   if (checkb) { /* need to compute interior nodes */
2584:     PetscInt n,i,j;
2585:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2586:     PetscInt *iwork;

2588:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2589:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2590:     PetscCalloc1(n,&iwork);
2591:     PetscMalloc1(n,&interior_dofs);
2592:     for (i=1;i<n_neigh;i++)
2593:       for (j=0;j<n_shared[i];j++)
2594:           iwork[shared[i][j]] += 1;
2595:     for (i=0;i<n;i++)
2596:       if (!iwork[i])
2597:         interior_dofs[n_interior_dofs++] = i;
2598:     PetscFree(iwork);
2599:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2600:   }
2601:   if (has_null_pressures) {
2602:     IS             *subs;
2603:     PetscInt       nsubs,i,j,nl;
2604:     const PetscInt *idxs;
2605:     PetscScalar    *array;
2606:     Vec            *work;
2607:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2609:     subs  = pcbddc->local_subs;
2610:     nsubs = pcbddc->n_local_subs;
2611:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2612:     if (checkb) {
2613:       VecDuplicateVecs(matis->y,2,&work);
2614:       ISGetLocalSize(zerodiag,&nl);
2615:       ISGetIndices(zerodiag,&idxs);
2616:       /* work[0] = 1_p */
2617:       VecSet(work[0],0.);
2618:       VecGetArray(work[0],&array);
2619:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2620:       VecRestoreArray(work[0],&array);
2621:       /* work[0] = 1_v */
2622:       VecSet(work[1],1.);
2623:       VecGetArray(work[1],&array);
2624:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2625:       VecRestoreArray(work[1],&array);
2626:       ISRestoreIndices(zerodiag,&idxs);
2627:     }
2628:     if (nsubs > 1) {
2629:       PetscCalloc1(nsubs,&zerodiag_subs);
2630:       for (i=0;i<nsubs;i++) {
2631:         ISLocalToGlobalMapping l2g;
2632:         IS                     t_zerodiag_subs;
2633:         PetscInt               nl;

2635:         ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2636:         ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2637:         ISGetLocalSize(t_zerodiag_subs,&nl);
2638:         if (nl) {
2639:           PetscBool valid = PETSC_TRUE;

2641:           if (checkb) {
2642:             VecSet(matis->x,0);
2643:             ISGetLocalSize(subs[i],&nl);
2644:             ISGetIndices(subs[i],&idxs);
2645:             VecGetArray(matis->x,&array);
2646:             for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2647:             VecRestoreArray(matis->x,&array);
2648:             ISRestoreIndices(subs[i],&idxs);
2649:             VecPointwiseMult(matis->x,work[0],matis->x);
2650:             MatMult(matis->A,matis->x,matis->y);
2651:             VecPointwiseMult(matis->y,work[1],matis->y);
2652:             VecGetArray(matis->y,&array);
2653:             for (j=0;j<n_interior_dofs;j++) {
2654:               if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2655:                 valid = PETSC_FALSE;
2656:                 break;
2657:               }
2658:             }
2659:             VecRestoreArray(matis->y,&array);
2660:           }
2661:           if (valid && nneu) {
2662:             const PetscInt *idxs;
2663:             PetscInt       nzb;

2665:             ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2666:             ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2667:             ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2668:             if (nzb) valid = PETSC_FALSE;
2669:           }
2670:           if (valid && pressures) {
2671:             IS t_pressure_subs;
2672:             ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2673:             ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2674:             ISDestroy(&t_pressure_subs);
2675:           }
2676:           if (valid) {
2677:             ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2678:             pcbddc->benign_n++;
2679:           } else {
2680:             recompute_zerodiag = PETSC_TRUE;
2681:           }
2682:         }
2683:         ISDestroy(&t_zerodiag_subs);
2684:         ISLocalToGlobalMappingDestroy(&l2g);
2685:       }
2686:     } else { /* there's just one subdomain (or zero if they have not been detected */
2687:       PetscBool valid = PETSC_TRUE;

2689:       if (nneu) valid = PETSC_FALSE;
2690:       if (valid && pressures) {
2691:         ISEqual(pressures,zerodiag,&valid);
2692:       }
2693:       if (valid && checkb) {
2694:         MatMult(matis->A,work[0],matis->x);
2695:         VecPointwiseMult(matis->x,work[1],matis->x);
2696:         VecGetArray(matis->x,&array);
2697:         for (j=0;j<n_interior_dofs;j++) {
2698:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2699:             valid = PETSC_FALSE;
2700:             break;
2701:           }
2702:         }
2703:         VecRestoreArray(matis->x,&array);
2704:       }
2705:       if (valid) {
2706:         pcbddc->benign_n = 1;
2707:         PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2708:         PetscObjectReference((PetscObject)zerodiag);
2709:         zerodiag_subs[0] = zerodiag;
2710:       }
2711:     }
2712:     if (checkb) {
2713:       VecDestroyVecs(2,&work);
2714:     }
2715:   }
2716:   PetscFree(interior_dofs);

2718:   if (!pcbddc->benign_n) {
2719:     PetscInt n;

2721:     ISDestroy(&zerodiag);
2722:     recompute_zerodiag = PETSC_FALSE;
2723:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2724:     if (n) {
2725:       has_null_pressures = PETSC_FALSE;
2726:       have_null = PETSC_FALSE;
2727:     }
2728:   }

2730:   /* final check for null pressures */
2731:   if (zerodiag && pressures) {
2732:     PetscInt nz,np;
2733:     ISGetLocalSize(zerodiag,&nz);
2734:     ISGetLocalSize(pressures,&np);
2735:     if (nz != np) have_null = PETSC_FALSE;
2736:   }

2738:   if (recompute_zerodiag) {
2739:     ISDestroy(&zerodiag);
2740:     if (pcbddc->benign_n == 1) {
2741:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2742:       zerodiag = zerodiag_subs[0];
2743:     } else {
2744:       PetscInt i,nzn,*new_idxs;

2746:       nzn = 0;
2747:       for (i=0;i<pcbddc->benign_n;i++) {
2748:         PetscInt ns;
2749:         ISGetLocalSize(zerodiag_subs[i],&ns);
2750:         nzn += ns;
2751:       }
2752:       PetscMalloc1(nzn,&new_idxs);
2753:       nzn = 0;
2754:       for (i=0;i<pcbddc->benign_n;i++) {
2755:         PetscInt ns,*idxs;
2756:         ISGetLocalSize(zerodiag_subs[i],&ns);
2757:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2758:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2759:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2760:         nzn += ns;
2761:       }
2762:       PetscSortInt(nzn,new_idxs);
2763:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2764:     }
2765:     have_null = PETSC_FALSE;
2766:   }

2768:   /* Prepare matrix to compute no-net-flux */
2769:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2770:     Mat                    A,loc_divudotp;
2771:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2772:     IS                     row,col,isused = NULL;
2773:     PetscInt               M,N,n,st,n_isused;

2775:     if (pressures) {
2776:       isused = pressures;
2777:     } else {
2778:       isused = zerodiag_save;
2779:     }
2780:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2781:     MatISGetLocalMat(pc->pmat,&A);
2782:     MatGetLocalSize(A,&n,NULL);
2783:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2784:     n_isused = 0;
2785:     if (isused) {
2786:       ISGetLocalSize(isused,&n_isused);
2787:     }
2788:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2789:     st = st-n_isused;
2790:     if (n) {
2791:       const PetscInt *gidxs;

2793:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2794:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2795:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2796:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2797:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2798:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2799:     } else {
2800:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2801:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2802:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2803:     }
2804:     MatGetSize(pc->pmat,NULL,&N);
2805:     ISGetSize(row,&M);
2806:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2807:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2808:     ISDestroy(&row);
2809:     ISDestroy(&col);
2810:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2811:     MatSetType(pcbddc->divudotp,MATIS);
2812:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2813:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2814:     ISLocalToGlobalMappingDestroy(&rl2g);
2815:     ISLocalToGlobalMappingDestroy(&cl2g);
2816:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2817:     MatDestroy(&loc_divudotp);
2818:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2819:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2820:   }
2821:   ISDestroy(&zerodiag_save);

2823:   /* change of basis and p0 dofs */
2824:   if (has_null_pressures) {
2825:     IS             zerodiagc;
2826:     const PetscInt *idxs,*idxsc;
2827:     PetscInt       i,s,*nnz;

2829:     ISGetLocalSize(zerodiag,&nz);
2830:     ISComplement(zerodiag,0,n,&zerodiagc);
2831:     ISGetIndices(zerodiagc,&idxsc);
2832:     /* local change of basis for pressures */
2833:     MatDestroy(&pcbddc->benign_change);
2834:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2835:     MatSetType(pcbddc->benign_change,MATAIJ);
2836:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2837:     PetscMalloc1(n,&nnz);
2838:     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2839:     for (i=0;i<pcbddc->benign_n;i++) {
2840:       PetscInt nzs,j;

2842:       ISGetLocalSize(zerodiag_subs[i],&nzs);
2843:       ISGetIndices(zerodiag_subs[i],&idxs);
2844:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2845:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2846:       ISRestoreIndices(zerodiag_subs[i],&idxs);
2847:     }
2848:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2849:     PetscFree(nnz);
2850:     /* set identity on velocities */
2851:     for (i=0;i<n-nz;i++) {
2852:       MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2853:     }
2854:     ISRestoreIndices(zerodiagc,&idxsc);
2855:     ISDestroy(&zerodiagc);
2856:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2857:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2858:     /* set change on pressures */
2859:     for (s=0;s<pcbddc->benign_n;s++) {
2860:       PetscScalar *array;
2861:       PetscInt    nzs;

2863:       ISGetLocalSize(zerodiag_subs[s],&nzs);
2864:       ISGetIndices(zerodiag_subs[s],&idxs);
2865:       for (i=0;i<nzs-1;i++) {
2866:         PetscScalar vals[2];
2867:         PetscInt    cols[2];

2869:         cols[0] = idxs[i];
2870:         cols[1] = idxs[nzs-1];
2871:         vals[0] = 1.;
2872:         vals[1] = 1.;
2873:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2874:       }
2875:       PetscMalloc1(nzs,&array);
2876:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2877:       array[nzs-1] = 1.;
2878:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2879:       /* store local idxs for p0 */
2880:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2881:       ISRestoreIndices(zerodiag_subs[s],&idxs);
2882:       PetscFree(array);
2883:     }
2884:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2885:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2886:     /* project if needed */
2887:     if (pcbddc->benign_change_explicit) {
2888:       Mat M;

2890:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2891:       MatDestroy(&pcbddc->local_mat);
2892:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2893:       MatDestroy(&M);
2894:     }
2895:     /* store global idxs for p0 */
2896:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2897:   }
2898:   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2899:   ISDestroy(&pressures);

2901:   /* determines if the coarse solver will be singular or not */
2902:   MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2903:   /* determines if the problem has subdomains with 0 pressure block */
2904:   have_null = (PetscBool)(!!pcbddc->benign_n);
2905:   MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2906:   *zerodiaglocal = zerodiag;
2907:   return(0);
2908: }

2910: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2911: {
2912:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2913:   PetscScalar    *array;

2917:   if (!pcbddc->benign_sf) {
2918:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2919:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2920:   }
2921:   if (get) {
2922:     VecGetArrayRead(v,(const PetscScalar**)&array);
2923:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2924:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2925:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2926:   } else {
2927:     VecGetArray(v,&array);
2928:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2929:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2930:     VecRestoreArray(v,&array);
2931:   }
2932:   return(0);
2933: }

2935: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2936: {
2937:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

2941:   /* TODO: add error checking
2942:     - avoid nested pop (or push) calls.
2943:     - cannot push before pop.
2944:     - cannot call this if pcbddc->local_mat is NULL
2945:   */
2946:   if (!pcbddc->benign_n) {
2947:     return(0);
2948:   }
2949:   if (pop) {
2950:     if (pcbddc->benign_change_explicit) {
2951:       IS       is_p0;
2952:       MatReuse reuse;

2954:       /* extract B_0 */
2955:       reuse = MAT_INITIAL_MATRIX;
2956:       if (pcbddc->benign_B0) {
2957:         reuse = MAT_REUSE_MATRIX;
2958:       }
2959:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2960:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2961:       /* remove rows and cols from local problem */
2962:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2963:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2964:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2965:       ISDestroy(&is_p0);
2966:     } else {
2967:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
2968:       PetscScalar *vals;
2969:       PetscInt    i,n,*idxs_ins;

2971:       VecGetLocalSize(matis->y,&n);
2972:       PetscMalloc2(n,&idxs_ins,n,&vals);
2973:       if (!pcbddc->benign_B0) {
2974:         PetscInt *nnz;
2975:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2976:         MatSetType(pcbddc->benign_B0,MATAIJ);
2977:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2978:         PetscMalloc1(pcbddc->benign_n,&nnz);
2979:         for (i=0;i<pcbddc->benign_n;i++) {
2980:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2981:           nnz[i] = n - nnz[i];
2982:         }
2983:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2984:         PetscFree(nnz);
2985:       }

2987:       for (i=0;i<pcbddc->benign_n;i++) {
2988:         PetscScalar *array;
2989:         PetscInt    *idxs,j,nz,cum;

2991:         VecSet(matis->x,0.);
2992:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2993:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2994:         for (j=0;j<nz;j++) vals[j] = 1.;
2995:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2996:         VecAssemblyBegin(matis->x);
2997:         VecAssemblyEnd(matis->x);
2998:         VecSet(matis->y,0.);
2999:         MatMult(matis->A,matis->x,matis->y);
3000:         VecGetArray(matis->y,&array);
3001:         cum = 0;
3002:         for (j=0;j<n;j++) {
3003:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3004:             vals[cum] = array[j];
3005:             idxs_ins[cum] = j;
3006:             cum++;
3007:           }
3008:         }
3009:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3010:         VecRestoreArray(matis->y,&array);
3011:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3012:       }
3013:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3014:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3015:       PetscFree2(idxs_ins,vals);
3016:     }
3017:   } else { /* push */
3018:     if (pcbddc->benign_change_explicit) {
3019:       PetscInt i;

3021:       for (i=0;i<pcbddc->benign_n;i++) {
3022:         PetscScalar *B0_vals;
3023:         PetscInt    *B0_cols,B0_ncol;

3025:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3026:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3027:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3028:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3029:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3030:       }
3031:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3032:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3033:     } else {
3034:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
3035:     }
3036:   }
3037:   return(0);
3038: }

3040: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3041: {
3042:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3043:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3044:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3045:   PetscBLASInt    *B_iwork,*B_ifail;
3046:   PetscScalar     *work,lwork;
3047:   PetscScalar     *St,*S,*eigv;
3048:   PetscScalar     *Sarray,*Starray;
3049:   PetscReal       *eigs,thresh,lthresh,uthresh;
3050:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3051:   PetscBool       allocated_S_St;
3052: #if defined(PETSC_USE_COMPLEX)
3053:   PetscReal       *rwork;
3054: #endif
3055:   PetscErrorCode  ierr;

3058:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3059:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3060:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);

3062:   if (pcbddc->dbg_flag) {
3063:     PetscViewerFlush(pcbddc->dbg_viewer);
3064:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3065:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3066:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3067:   }

3069:   if (pcbddc->dbg_flag) {
3070:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3071:   }

3073:   /* max size of subsets */
3074:   mss = 0;
3075:   for (i=0;i<sub_schurs->n_subs;i++) {
3076:     PetscInt subset_size;

3078:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3079:     mss = PetscMax(mss,subset_size);
3080:   }

3082:   /* min/max and threshold */
3083:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3084:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3085:   nmax = PetscMax(nmin,nmax);
3086:   allocated_S_St = PETSC_FALSE;
3087:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3088:     allocated_S_St = PETSC_TRUE;
3089:   }

3091:   /* allocate lapack workspace */
3092:   cum = cum2 = 0;
3093:   maxneigs = 0;
3094:   for (i=0;i<sub_schurs->n_subs;i++) {
3095:     PetscInt n,subset_size;

3097:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3098:     n = PetscMin(subset_size,nmax);
3099:     cum += subset_size;
3100:     cum2 += subset_size*n;
3101:     maxneigs = PetscMax(maxneigs,n);
3102:   }
3103:   if (mss) {
3104:     if (sub_schurs->is_symmetric) {
3105:       PetscBLASInt B_itype = 1;
3106:       PetscBLASInt B_N = mss;
3107:       PetscReal    zero = 0.0;
3108:       PetscReal    eps = 0.0; /* dlamch? */

3110:       B_lwork = -1;
3111:       S = NULL;
3112:       St = NULL;
3113:       eigs = NULL;
3114:       eigv = NULL;
3115:       B_iwork = NULL;
3116:       B_ifail = NULL;
3117: #if defined(PETSC_USE_COMPLEX)
3118:       rwork = NULL;
3119: #endif
3120:       thresh = 1.0;
3121:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3122: #if defined(PETSC_USE_COMPLEX)
3123:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3124: #else
3125:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3126: #endif
3127:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3128:       PetscFPTrapPop();
3129:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3130:   } else {
3131:     lwork = 0;
3132:   }

3134:   nv = 0;
3135:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3136:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3137:   }
3138:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3139:   if (allocated_S_St) {
3140:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3141:   }
3142:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3143: #if defined(PETSC_USE_COMPLEX)
3144:   PetscMalloc1(7*mss,&rwork);
3145: #endif
3146:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3147:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3148:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3149:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3150:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3151:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3153:   maxneigs = 0;
3154:   cum = cumarray = 0;
3155:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3156:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3157:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3158:     const PetscInt *idxs;

3160:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3161:     for (cum=0;cum<nv;cum++) {
3162:       pcbddc->adaptive_constraints_n[cum] = 1;
3163:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3164:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3165:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3166:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3167:     }
3168:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3169:   }

3171:   if (mss) { /* multilevel */
3172:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3173:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3174:   }

3176:   lthresh = pcbddc->adaptive_threshold[0];
3177:   uthresh = pcbddc->adaptive_threshold[1];
3178:   for (i=0;i<sub_schurs->n_subs;i++) {
3179:     const PetscInt *idxs;
3180:     PetscReal      upper,lower;
3181:     PetscInt       j,subset_size,eigs_start = 0;
3182:     PetscBLASInt   B_N;
3183:     PetscBool      same_data = PETSC_FALSE;
3184:     PetscBool      scal = PETSC_FALSE;

3186:     if (pcbddc->use_deluxe_scaling) {
3187:       upper = PETSC_MAX_REAL;
3188:       lower = uthresh;
3189:     } else {
3190:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3191:       upper = 1./uthresh;
3192:       lower = 0.;
3193:     }
3194:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3195:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3196:     PetscBLASIntCast(subset_size,&B_N);
3197:     /* this is experimental: we assume the dofs have been properly grouped to have
3198:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3199:     if (!sub_schurs->is_posdef) {
3200:       Mat T;

3202:       for (j=0;j<subset_size;j++) {
3203:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3204:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3205:           MatScale(T,-1.0);
3206:           MatDestroy(&T);
3207:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3208:           MatScale(T,-1.0);
3209:           MatDestroy(&T);
3210:           if (sub_schurs->change_primal_sub) {
3211:             PetscInt       nz,k;
3212:             const PetscInt *idxs;

3214:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3215:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3216:             for (k=0;k<nz;k++) {
3217:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3218:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3219:             }
3220:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3221:           }
3222:           scal = PETSC_TRUE;
3223:           break;
3224:         }
3225:       }
3226:     }

3228:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3229:       if (sub_schurs->is_symmetric) {
3230:         PetscInt j,k;
3231:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3232:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3233:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3234:         }
3235:         for (j=0;j<subset_size;j++) {
3236:           for (k=j;k<subset_size;k++) {
3237:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3238:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3239:           }
3240:         }
3241:       } else {
3242:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3243:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3244:       }
3245:     } else {
3246:       S = Sarray + cumarray;
3247:       St = Starray + cumarray;
3248:     }
3249:     /* see if we can save some work */
3250:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3251:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3252:     }

3254:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3255:       B_neigs = 0;
3256:     } else {
3257:       if (sub_schurs->is_symmetric) {
3258:         PetscBLASInt B_itype = 1;
3259:         PetscBLASInt B_IL, B_IU;
3260:         PetscReal    eps = -1.0; /* dlamch? */
3261:         PetscInt     nmin_s;
3262:         PetscBool    compute_range;

3264:         B_neigs = 0;
3265:         compute_range = (PetscBool)!same_data;
3266:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3268:         if (pcbddc->dbg_flag) {
3269:           PetscInt nc = 0;

3271:           if (sub_schurs->change_primal_sub) {
3272:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3273:           }
3274:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3275:         }

3277:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3278:         if (compute_range) {

3280:           /* ask for eigenvalues larger than thresh */
3281:           if (sub_schurs->is_posdef) {
3282: #if defined(PETSC_USE_COMPLEX)
3283:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3284: #else
3285:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3286: #endif
3287:           } else { /* no theory so far, but it works nicely */
3288:             PetscInt  recipe = 0,recipe_m = 1;
3289:             PetscReal bb[2];

3291:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3292:             switch (recipe) {
3293:             case 0:
3294:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3295:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3296: #if defined(PETSC_USE_COMPLEX)
3297:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3298: #else
3299:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3300: #endif
3301:               break;
3302:             case 1:
3303:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3304: #if defined(PETSC_USE_COMPLEX)
3305:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3306: #else
3307:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3308: #endif
3309:               if (!scal) {
3310:                 PetscBLASInt B_neigs2 = 0;

3312:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3313:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3314:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3315: #if defined(PETSC_USE_COMPLEX)
3316:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3317: #else
3318:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3319: #endif
3320:                 B_neigs += B_neigs2;
3321:               }
3322:               break;
3323:             case 2:
3324:               if (scal) {
3325:                 bb[0] = PETSC_MIN_REAL;
3326:                 bb[1] = 0;
3327: #if defined(PETSC_USE_COMPLEX)
3328:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3329: #else
3330:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3331: #endif
3332:               } else {
3333:                 PetscBLASInt B_neigs2 = 0;
3334:                 PetscBool    import = PETSC_FALSE;

3336:                 lthresh = PetscMax(lthresh,0.0);
3337:                 if (lthresh > 0.0) {
3338:                   bb[0] = PETSC_MIN_REAL;
3339:                   bb[1] = lthresh*lthresh;

3341:                   import = PETSC_TRUE;
3342: #if defined(PETSC_USE_COMPLEX)
3343:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3344: #else
3345:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3346: #endif
3347:                 }
3348:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3349:                 bb[1] = PETSC_MAX_REAL;
3350:                 if (import) {
3351:                   PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3352:                   PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3353:                 }
3354: #if defined(PETSC_USE_COMPLEX)
3355:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3356: #else
3357:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3358: #endif
3359:                 B_neigs += B_neigs2;
3360:               }
3361:               break;
3362:             case 3:
3363:               if (scal) {
3364:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3365:               } else {
3366:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3367:               }
3368:               if (!scal) {
3369:                 bb[0] = uthresh;
3370:                 bb[1] = PETSC_MAX_REAL;
3371: #if defined(PETSC_USE_COMPLEX)
3372:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3373: #else
3374:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3375: #endif
3376:               }
3377:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3378:                 PetscBLASInt B_neigs2 = 0;

3380:                 B_IL = 1;
3381:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3382:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3383:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3384: #if defined(PETSC_USE_COMPLEX)
3385:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3386: #else
3387:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3388: #endif
3389:                 B_neigs += B_neigs2;
3390:               }
3391:               break;
3392:             case 4:
3393:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3394: #if defined(PETSC_USE_COMPLEX)
3395:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3396: #else
3397:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3398: #endif
3399:               {
3400:                 PetscBLASInt B_neigs2 = 0;

3402:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3403:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3404:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3405: #if defined(PETSC_USE_COMPLEX)
3406:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3407: #else
3408:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3409: #endif
3410:                 B_neigs += B_neigs2;
3411:               }
3412:               break;
3413:             case 5: /* same as before: first compute all eigenvalues, then filter */
3414: #if defined(PETSC_USE_COMPLEX)
3415:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3416: #else
3417:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3418: #endif
3419:               {
3420:                 PetscInt e,k,ne;
3421:                 for (e=0,ne=0;e<B_neigs;e++) {
3422:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3423:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3424:                     eigs[ne] = eigs[e];
3425:                     ne++;
3426:                   }
3427:                 }
3428:                 PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3429:                 B_neigs = ne;
3430:               }
3431:               break;
3432:             default:
3433:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3434:               break;
3435:             }
3436:           }
3437:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3438:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3439:           B_IL = 1;
3440: #if defined(PETSC_USE_COMPLEX)
3441:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3442: #else
3443:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3444: #endif
3445:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3446:           PetscInt k;
3447:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3448:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3449:           PetscBLASIntCast(nmax,&B_neigs);
3450:           nmin = nmax;
3451:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3452:           for (k=0;k<nmax;k++) {
3453:             eigs[k] = 1./PETSC_SMALL;
3454:             eigv[k*(subset_size+1)] = 1.0;
3455:           }
3456:         }
3457:         PetscFPTrapPop();
3458:         if (B_ierr) {
3459:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3460:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3461:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3462:         }

3464:         if (B_neigs > nmax) {
3465:           if (pcbddc->dbg_flag) {
3466:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3467:           }
3468:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3469:           B_neigs = nmax;
3470:         }

3472:         nmin_s = PetscMin(nmin,B_N);
3473:         if (B_neigs < nmin_s) {
3474:           PetscBLASInt B_neigs2 = 0;

3476:           if (pcbddc->use_deluxe_scaling) {
3477:             if (scal) {
3478:               B_IU = nmin_s;
3479:               B_IL = B_neigs + 1;
3480:             } else {
3481:               B_IL = B_N - nmin_s + 1;
3482:               B_IU = B_N - B_neigs;
3483:             }
3484:           } else {
3485:             B_IL = B_neigs + 1;
3486:             B_IU = nmin_s;
3487:           }
3488:           if (pcbddc->dbg_flag) {
3489:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3490:           }
3491:           if (sub_schurs->is_symmetric) {
3492:             PetscInt j,k;
3493:             for (j=0;j<subset_size;j++) {
3494:               for (k=j;k<subset_size;k++) {
3495:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3496:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3497:               }
3498:             }
3499:           } else {
3500:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3501:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3502:           }
3503:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3504: #if defined(PETSC_USE_COMPLEX)
3505:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3506: #else
3507:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3508: #endif
3509:           PetscFPTrapPop();
3510:           B_neigs += B_neigs2;
3511:         }
3512:         if (B_ierr) {
3513:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3514:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3515:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3516:         }
3517:         if (pcbddc->dbg_flag) {
3518:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3519:           for (j=0;j<B_neigs;j++) {
3520:             if (eigs[j] == 0.0) {
3521:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3522:             } else {
3523:               if (pcbddc->use_deluxe_scaling) {
3524:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3525:               } else {
3526:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3527:               }
3528:             }
3529:           }
3530:         }
3531:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3532:     }
3533:     /* change the basis back to the original one */
3534:     if (sub_schurs->change) {
3535:       Mat change,phi,phit;

3537:       if (pcbddc->dbg_flag > 2) {
3538:         PetscInt ii;
3539:         for (ii=0;ii<B_neigs;ii++) {
3540:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3541:           for (j=0;j<B_N;j++) {
3542: #if defined(PETSC_USE_COMPLEX)
3543:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3544:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3545:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3546: #else
3547:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3548: #endif
3549:           }
3550:         }
3551:       }
3552:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3553:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3554:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3555:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3556:       MatDestroy(&phit);
3557:       MatDestroy(&phi);
3558:     }
3559:     maxneigs = PetscMax(B_neigs,maxneigs);
3560:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3561:     if (B_neigs) {
3562:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3564:       if (pcbddc->dbg_flag > 1) {
3565:         PetscInt ii;
3566:         for (ii=0;ii<B_neigs;ii++) {
3567:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3568:           for (j=0;j<B_N;j++) {
3569: #if defined(PETSC_USE_COMPLEX)
3570:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3571:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3572:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3573: #else
3574:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3575: #endif
3576:           }
3577:         }
3578:       }
3579:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3580:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3581:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3582:       cum++;
3583:     }
3584:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3585:     /* shift for next computation */
3586:     cumarray += subset_size*subset_size;
3587:   }
3588:   if (pcbddc->dbg_flag) {
3589:     PetscViewerFlush(pcbddc->dbg_viewer);
3590:   }

3592:   if (mss) {
3593:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3594:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3595:     /* destroy matrices (junk) */
3596:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3597:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3598:   }
3599:   if (allocated_S_St) {
3600:     PetscFree2(S,St);
3601:   }
3602:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3603: #if defined(PETSC_USE_COMPLEX)
3604:   PetscFree(rwork);
3605: #endif
3606:   if (pcbddc->dbg_flag) {
3607:     PetscInt maxneigs_r;
3608:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3609:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3610:   }
3611:   return(0);
3612: }

3614: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3615: {
3616:   PetscScalar    *coarse_submat_vals;

3620:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3621:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3622:   PCBDDCSetUpLocalScatters(pc);

3624:   /* Setup local neumann solver ksp_R */
3625:   /* PCBDDCSetUpLocalScatters should be called first! */
3626:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3628:   /*
3629:      Setup local correction and local part of coarse basis.
3630:      Gives back the dense local part of the coarse matrix in column major ordering
3631:   */
3632:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3634:   /* Compute total number of coarse nodes and setup coarse solver */
3635:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3637:   /* free */
3638:   PetscFree(coarse_submat_vals);
3639:   return(0);
3640: }

3642: PetscErrorCode PCBDDCResetCustomization(PC pc)
3643: {
3644:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3648:   ISDestroy(&pcbddc->user_primal_vertices);
3649:   ISDestroy(&pcbddc->user_primal_vertices_local);
3650:   ISDestroy(&pcbddc->NeumannBoundaries);
3651:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3652:   ISDestroy(&pcbddc->DirichletBoundaries);
3653:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3654:   PetscFree(pcbddc->onearnullvecs_state);
3655:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3656:   PCBDDCSetDofsSplitting(pc,0,NULL);
3657:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3658:   return(0);
3659: }

3661: PetscErrorCode PCBDDCResetTopography(PC pc)
3662: {
3663:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3664:   PetscInt       i;

3668:   MatDestroy(&pcbddc->nedcG);
3669:   ISDestroy(&pcbddc->nedclocal);
3670:   MatDestroy(&pcbddc->discretegradient);
3671:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3672:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3673:   MatDestroy(&pcbddc->switch_static_change);
3674:   VecDestroy(&pcbddc->work_change);
3675:   MatDestroy(&pcbddc->ConstraintMatrix);
3676:   MatDestroy(&pcbddc->divudotp);
3677:   ISDestroy(&pcbddc->divudotp_vl2l);
3678:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3679:   for (i=0;i<pcbddc->n_local_subs;i++) {
3680:     ISDestroy(&pcbddc->local_subs[i]);
3681:   }
3682:   pcbddc->n_local_subs = 0;
3683:   PetscFree(pcbddc->local_subs);
3684:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3685:   pcbddc->graphanalyzed        = PETSC_FALSE;
3686:   pcbddc->recompute_topography = PETSC_TRUE;
3687:   pcbddc->corner_selected      = PETSC_FALSE;
3688:   return(0);
3689: }

3691: PetscErrorCode PCBDDCResetSolvers(PC pc)
3692: {
3693:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3697:   VecDestroy(&pcbddc->coarse_vec);
3698:   if (pcbddc->coarse_phi_B) {
3699:     PetscScalar *array;
3700:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3701:     PetscFree(array);
3702:   }
3703:   MatDestroy(&pcbddc->coarse_phi_B);
3704:   MatDestroy(&pcbddc->coarse_phi_D);
3705:   MatDestroy(&pcbddc->coarse_psi_B);
3706:   MatDestroy(&pcbddc->coarse_psi_D);
3707:   VecDestroy(&pcbddc->vec1_P);
3708:   VecDestroy(&pcbddc->vec1_C);
3709:   MatDestroy(&pcbddc->local_auxmat2);
3710:   MatDestroy(&pcbddc->local_auxmat1);
3711:   VecDestroy(&pcbddc->vec1_R);
3712:   VecDestroy(&pcbddc->vec2_R);
3713:   ISDestroy(&pcbddc->is_R_local);
3714:   VecScatterDestroy(&pcbddc->R_to_B);
3715:   VecScatterDestroy(&pcbddc->R_to_D);
3716:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3717:   KSPReset(pcbddc->ksp_D);
3718:   KSPReset(pcbddc->ksp_R);
3719:   KSPReset(pcbddc->coarse_ksp);
3720:   MatDestroy(&pcbddc->local_mat);
3721:   PetscFree(pcbddc->primal_indices_local_idxs);
3722:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3723:   PetscFree(pcbddc->global_primal_indices);
3724:   ISDestroy(&pcbddc->coarse_subassembling);
3725:   MatDestroy(&pcbddc->benign_change);
3726:   VecDestroy(&pcbddc->benign_vec);
3727:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3728:   MatDestroy(&pcbddc->benign_B0);
3729:   PetscSFDestroy(&pcbddc->benign_sf);
3730:   if (pcbddc->benign_zerodiag_subs) {
3731:     PetscInt i;
3732:     for (i=0;i<pcbddc->benign_n;i++) {
3733:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3734:     }
3735:     PetscFree(pcbddc->benign_zerodiag_subs);
3736:   }
3737:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3738:   return(0);
3739: }

3741: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3742: {
3743:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3744:   PC_IS          *pcis = (PC_IS*)pc->data;
3745:   VecType        impVecType;
3746:   PetscInt       n_constraints,n_R,old_size;

3750:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3751:   n_R = pcis->n - pcbddc->n_vertices;
3752:   VecGetType(pcis->vec1_N,&impVecType);
3753:   /* local work vectors (try to avoid unneeded work)*/
3754:   /* R nodes */
3755:   old_size = -1;
3756:   if (pcbddc->vec1_R) {
3757:     VecGetSize(pcbddc->vec1_R,&old_size);
3758:   }
3759:   if (n_R != old_size) {
3760:     VecDestroy(&pcbddc->vec1_R);
3761:     VecDestroy(&pcbddc->vec2_R);
3762:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3763:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3764:     VecSetType(pcbddc->vec1_R,impVecType);
3765:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3766:   }
3767:   /* local primal dofs */
3768:   old_size = -1;
3769:   if (pcbddc->vec1_P) {
3770:     VecGetSize(pcbddc->vec1_P,&old_size);
3771:   }
3772:   if (pcbddc->local_primal_size != old_size) {
3773:     VecDestroy(&pcbddc->vec1_P);
3774:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3775:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3776:     VecSetType(pcbddc->vec1_P,impVecType);
3777:   }
3778:   /* local explicit constraints */
3779:   old_size = -1;
3780:   if (pcbddc->vec1_C) {
3781:     VecGetSize(pcbddc->vec1_C,&old_size);
3782:   }
3783:   if (n_constraints && n_constraints != old_size) {
3784:     VecDestroy(&pcbddc->vec1_C);
3785:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3786:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3787:     VecSetType(pcbddc->vec1_C,impVecType);
3788:   }
3789:   return(0);
3790: }

3792: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3793: {
3794:   PetscErrorCode  ierr;
3795:   /* pointers to pcis and pcbddc */
3796:   PC_IS*          pcis = (PC_IS*)pc->data;
3797:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3798:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3799:   /* submatrices of local problem */
3800:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3801:   /* submatrices of local coarse problem */
3802:   Mat             S_VV,S_CV,S_VC,S_CC;
3803:   /* working matrices */
3804:   Mat             C_CR;
3805:   /* additional working stuff */
3806:   PC              pc_R;
3807:   Mat             F,Brhs = NULL;
3808:   Vec             dummy_vec;
3809:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3810:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3811:   PetscScalar     *work;
3812:   PetscInt        *idx_V_B;
3813:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3814:   PetscInt        i,n_R,n_D,n_B;

3816:   /* some shortcuts to scalars */
3817:   PetscScalar     one=1.0,m_one=-1.0;

3820:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");

3822:   /* Set Non-overlapping dimensions */
3823:   n_vertices = pcbddc->n_vertices;
3824:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3825:   n_B = pcis->n_B;
3826:   n_D = pcis->n - n_B;
3827:   n_R = pcis->n - n_vertices;

3829:   /* vertices in boundary numbering */
3830:   PetscMalloc1(n_vertices,&idx_V_B);
3831:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3832:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);

3834:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3835:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3836:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3837:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3838:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3839:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3840:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3841:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3842:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3843:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3845:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3846:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3847:   PCSetUp(pc_R);
3848:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3849:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3850:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3851:   lda_rhs = n_R;
3852:   need_benign_correction = PETSC_FALSE;
3853:   if (isLU || isILU || isCHOL) {
3854:     PCFactorGetMatrix(pc_R,&F);
3855:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3856:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3857:     MatFactorType      type;

3859:     F = reuse_solver->F;
3860:     MatGetFactorType(F,&type);
3861:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3862:     MatGetSize(F,&lda_rhs,NULL);
3863:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3864:   } else {
3865:     F = NULL;
3866:   }

3868:   /* determine if we can use a sparse right-hand side */
3869:   sparserhs = PETSC_FALSE;
3870:   if (F) {
3871:     MatSolverType solver;

3873:     MatFactorGetSolverType(F,&solver);
3874:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3875:   }

3877:   /* allocate workspace */
3878:   n = 0;
3879:   if (n_constraints) {
3880:     n += lda_rhs*n_constraints;
3881:   }
3882:   if (n_vertices) {
3883:     n = PetscMax(2*lda_rhs*n_vertices,n);
3884:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3885:   }
3886:   if (!pcbddc->symmetric_primal) {
3887:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3888:   }
3889:   PetscMalloc1(n,&work);

3891:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3892:   dummy_vec = NULL;
3893:   if (need_benign_correction && lda_rhs != n_R && F) {
3894:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3895:   }

3897:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3898:   if (n_constraints) {
3899:     Mat         M3,C_B;
3900:     IS          is_aux;
3901:     PetscScalar *array,*array2;

3903:     MatDestroy(&pcbddc->local_auxmat1);
3904:     MatDestroy(&pcbddc->local_auxmat2);

3906:     /* Extract constraints on R nodes: C_{CR}  */
3907:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3908:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3909:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3911:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3912:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3913:     if (!sparserhs) {
3914:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3915:       for (i=0;i<n_constraints;i++) {
3916:         const PetscScalar *row_cmat_values;
3917:         const PetscInt    *row_cmat_indices;
3918:         PetscInt          size_of_constraint,j;

3920:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3921:         for (j=0;j<size_of_constraint;j++) {
3922:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3923:         }
3924:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3925:       }
3926:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3927:     } else {
3928:       Mat tC_CR;

3930:       MatScale(C_CR,-1.0);
3931:       if (lda_rhs != n_R) {
3932:         PetscScalar *aa;
3933:         PetscInt    r,*ii,*jj;
3934:         PetscBool   done;

3936:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3937:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3938:         MatSeqAIJGetArray(C_CR,&aa);
3939:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3940:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3941:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3942:       } else {
3943:         PetscObjectReference((PetscObject)C_CR);
3944:         tC_CR = C_CR;
3945:       }
3946:       MatCreateTranspose(tC_CR,&Brhs);
3947:       MatDestroy(&tC_CR);
3948:     }
3949:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3950:     if (F) {
3951:       if (need_benign_correction) {
3952:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3954:         /* rhs is already zero on interior dofs, no need to change the rhs */
3955:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3956:       }
3957:       MatMatSolve(F,Brhs,local_auxmat2_R);
3958:       if (need_benign_correction) {
3959:         PetscScalar        *marr;
3960:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3962:         MatDenseGetArray(local_auxmat2_R,&marr);
3963:         if (lda_rhs != n_R) {
3964:           for (i=0;i<n_constraints;i++) {
3965:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3966:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3967:             VecResetArray(dummy_vec);
3968:           }
3969:         } else {
3970:           for (i=0;i<n_constraints;i++) {
3971:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3972:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3973:             VecResetArray(pcbddc->vec1_R);
3974:           }
3975:         }
3976:         MatDenseRestoreArray(local_auxmat2_R,&marr);
3977:       }
3978:     } else {
3979:       PetscScalar *marr;

3981:       MatDenseGetArray(local_auxmat2_R,&marr);
3982:       for (i=0;i<n_constraints;i++) {
3983:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3984:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3985:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3986:         VecResetArray(pcbddc->vec1_R);
3987:         VecResetArray(pcbddc->vec2_R);
3988:       }
3989:       MatDenseRestoreArray(local_auxmat2_R,&marr);
3990:     }
3991:     if (sparserhs) {
3992:       MatScale(C_CR,-1.0);
3993:     }
3994:     MatDestroy(&Brhs);
3995:     if (!pcbddc->switch_static) {
3996:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3997:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
3998:       MatDenseGetArray(local_auxmat2_R,&array2);
3999:       for (i=0;i<n_constraints;i++) {
4000:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4001:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
4002:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4003:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4004:         VecResetArray(pcis->vec1_B);
4005:         VecResetArray(pcbddc->vec1_R);
4006:       }
4007:       MatDenseRestoreArray(local_auxmat2_R,&array2);
4008:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4009:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4010:     } else {
4011:       if (lda_rhs != n_R) {
4012:         IS dummy;

4014:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4015:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4016:         ISDestroy(&dummy);
4017:       } else {
4018:         PetscObjectReference((PetscObject)local_auxmat2_R);
4019:         pcbddc->local_auxmat2 = local_auxmat2_R;
4020:       }
4021:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4022:     }
4023:     ISDestroy(&is_aux);
4024:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
4025:     MatScale(M3,m_one);
4026:     if (isCHOL) {
4027:       MatCholeskyFactor(M3,NULL,NULL);
4028:     } else {
4029:       MatLUFactor(M3,NULL,NULL,NULL);
4030:     }
4031:     MatSeqDenseInvertFactors_Private(M3);
4032:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4033:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4034:     MatDestroy(&C_B);
4035:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4036:     MatDestroy(&M3);
4037:   }

4039:   /* Get submatrices from subdomain matrix */
4040:   if (n_vertices) {
4041:     IS        is_aux;
4042:     PetscBool isseqaij;

4044:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4045:       IS tis;

4047:       ISDuplicate(pcbddc->is_R_local,&tis);
4048:       ISSort(tis);
4049:       ISComplement(tis,0,pcis->n,&is_aux);
4050:       ISDestroy(&tis);
4051:     } else {
4052:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4053:     }
4054:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4055:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4056:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4057:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4058:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4059:     }
4060:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4061:     ISDestroy(&is_aux);
4062:   }

4064:   /* Matrix of coarse basis functions (local) */
4065:   if (pcbddc->coarse_phi_B) {
4066:     PetscInt on_B,on_primal,on_D=n_D;
4067:     if (pcbddc->coarse_phi_D) {
4068:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4069:     }
4070:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4071:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4072:       PetscScalar *marray;

4074:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4075:       PetscFree(marray);
4076:       MatDestroy(&pcbddc->coarse_phi_B);
4077:       MatDestroy(&pcbddc->coarse_psi_B);
4078:       MatDestroy(&pcbddc->coarse_phi_D);
4079:       MatDestroy(&pcbddc->coarse_psi_D);
4080:     }
4081:   }

4083:   if (!pcbddc->coarse_phi_B) {
4084:     PetscScalar *marr;

4086:     /* memory size */
4087:     n = n_B*pcbddc->local_primal_size;
4088:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4089:     if (!pcbddc->symmetric_primal) n *= 2;
4090:     PetscCalloc1(n,&marr);
4091:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4092:     marr += n_B*pcbddc->local_primal_size;
4093:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4094:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4095:       marr += n_D*pcbddc->local_primal_size;
4096:     }
4097:     if (!pcbddc->symmetric_primal) {
4098:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4099:       marr += n_B*pcbddc->local_primal_size;
4100:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4101:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4102:       }
4103:     } else {
4104:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4105:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4106:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4107:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4108:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4109:       }
4110:     }
4111:   }

4113:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4114:   p0_lidx_I = NULL;
4115:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4116:     const PetscInt *idxs;

4118:     ISGetIndices(pcis->is_I_local,&idxs);
4119:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4120:     for (i=0;i<pcbddc->benign_n;i++) {
4121:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4122:     }
4123:     ISRestoreIndices(pcis->is_I_local,&idxs);
4124:   }

4126:   /* vertices */
4127:   if (n_vertices) {
4128:     PetscBool restoreavr = PETSC_FALSE;

4130:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4132:     if (n_R) {
4133:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4134:       PetscBLASInt B_N,B_one = 1;
4135:       PetscScalar  *x,*y;

4137:       MatScale(A_RV,m_one);
4138:       if (need_benign_correction) {
4139:         ISLocalToGlobalMapping RtoN;
4140:         IS                     is_p0;
4141:         PetscInt               *idxs_p0,n;

4143:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4144:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4145:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4146:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
4147:         ISLocalToGlobalMappingDestroy(&RtoN);
4148:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4149:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4150:         ISDestroy(&is_p0);
4151:       }

4153:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4154:       if (!sparserhs || need_benign_correction) {
4155:         if (lda_rhs == n_R) {
4156:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4157:         } else {
4158:           PetscScalar    *av,*array;
4159:           const PetscInt *xadj,*adjncy;
4160:           PetscInt       n;
4161:           PetscBool      flg_row;

4163:           array = work+lda_rhs*n_vertices;
4164:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4165:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4166:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4167:           MatSeqAIJGetArray(A_RV,&av);
4168:           for (i=0;i<n;i++) {
4169:             PetscInt j;
4170:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4171:           }
4172:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4173:           MatDestroy(&A_RV);
4174:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4175:         }
4176:         if (need_benign_correction) {
4177:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4178:           PetscScalar        *marr;

4180:           MatDenseGetArray(A_RV,&marr);
4181:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4183:                  | 0 0  0 | (V)
4184:              L = | 0 0 -1 | (P-p0)
4185:                  | 0 0 -1 | (p0)

4187:           */
4188:           for (i=0;i<reuse_solver->benign_n;i++) {
4189:             const PetscScalar *vals;
4190:             const PetscInt    *idxs,*idxs_zero;
4191:             PetscInt          n,j,nz;

4193:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4194:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4195:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4196:             for (j=0;j<n;j++) {
4197:               PetscScalar val = vals[j];
4198:               PetscInt    k,col = idxs[j];
4199:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4200:             }
4201:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4202:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4203:           }
4204:           MatDenseRestoreArray(A_RV,&marr);
4205:         }
4206:         PetscObjectReference((PetscObject)A_RV);
4207:         Brhs = A_RV;
4208:       } else {
4209:         Mat tA_RVT,A_RVT;

4211:         if (!pcbddc->symmetric_primal) {
4212:           /* A_RV already scaled by -1 */
4213:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4214:         } else {
4215:           restoreavr = PETSC_TRUE;
4216:           MatScale(A_VR,-1.0);
4217:           PetscObjectReference((PetscObject)A_VR);
4218:           A_RVT = A_VR;
4219:         }
4220:         if (lda_rhs != n_R) {
4221:           PetscScalar *aa;
4222:           PetscInt    r,*ii,*jj;
4223:           PetscBool   done;

4225:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4226:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4227:           MatSeqAIJGetArray(A_RVT,&aa);
4228:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4229:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4230:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4231:         } else {
4232:           PetscObjectReference((PetscObject)A_RVT);
4233:           tA_RVT = A_RVT;
4234:         }
4235:         MatCreateTranspose(tA_RVT,&Brhs);
4236:         MatDestroy(&tA_RVT);
4237:         MatDestroy(&A_RVT);
4238:       }
4239:       if (F) {
4240:         /* need to correct the rhs */
4241:         if (need_benign_correction) {
4242:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4243:           PetscScalar        *marr;

4245:           MatDenseGetArray(Brhs,&marr);
4246:           if (lda_rhs != n_R) {
4247:             for (i=0;i<n_vertices;i++) {
4248:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4249:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4250:               VecResetArray(dummy_vec);
4251:             }
4252:           } else {
4253:             for (i=0;i<n_vertices;i++) {
4254:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4255:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4256:               VecResetArray(pcbddc->vec1_R);
4257:             }
4258:           }
4259:           MatDenseRestoreArray(Brhs,&marr);
4260:         }
4261:         MatMatSolve(F,Brhs,A_RRmA_RV);
4262:         if (restoreavr) {
4263:           MatScale(A_VR,-1.0);
4264:         }
4265:         /* need to correct the solution */
4266:         if (need_benign_correction) {
4267:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4268:           PetscScalar        *marr;

4270:           MatDenseGetArray(A_RRmA_RV,&marr);
4271:           if (lda_rhs != n_R) {
4272:             for (i=0;i<n_vertices;i++) {
4273:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4274:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4275:               VecResetArray(dummy_vec);
4276:             }
4277:           } else {
4278:             for (i=0;i<n_vertices;i++) {
4279:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4280:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4281:               VecResetArray(pcbddc->vec1_R);
4282:             }
4283:           }
4284:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4285:         }
4286:       } else {
4287:         MatDenseGetArray(Brhs,&y);
4288:         for (i=0;i<n_vertices;i++) {
4289:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4290:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4291:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4292:           VecResetArray(pcbddc->vec1_R);
4293:           VecResetArray(pcbddc->vec2_R);
4294:         }
4295:         MatDenseRestoreArray(Brhs,&y);
4296:       }
4297:       MatDestroy(&A_RV);
4298:       MatDestroy(&Brhs);
4299:       /* S_VV and S_CV */
4300:       if (n_constraints) {
4301:         Mat B;

4303:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4304:         for (i=0;i<n_vertices;i++) {
4305:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4306:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4307:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4308:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4309:           VecResetArray(pcis->vec1_B);
4310:           VecResetArray(pcbddc->vec1_R);
4311:         }
4312:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4313:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4314:         MatDestroy(&B);
4315:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4316:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4317:         MatScale(S_CV,m_one);
4318:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4319:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4320:         MatDestroy(&B);
4321:       }
4322:       if (lda_rhs != n_R) {
4323:         MatDestroy(&A_RRmA_RV);
4324:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4325:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4326:       }
4327:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4328:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4329:       if (need_benign_correction) {
4330:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4331:         PetscScalar      *marr,*sums;

4333:         PetscMalloc1(n_vertices,&sums);
4334:         MatDenseGetArray(S_VVt,&marr);
4335:         for (i=0;i<reuse_solver->benign_n;i++) {
4336:           const PetscScalar *vals;
4337:           const PetscInt    *idxs,*idxs_zero;
4338:           PetscInt          n,j,nz;

4340:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4341:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4342:           for (j=0;j<n_vertices;j++) {
4343:             PetscInt k;
4344:             sums[j] = 0.;
4345:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4346:           }
4347:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4348:           for (j=0;j<n;j++) {
4349:             PetscScalar val = vals[j];
4350:             PetscInt k;
4351:             for (k=0;k<n_vertices;k++) {
4352:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4353:             }
4354:           }
4355:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4356:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4357:         }
4358:         PetscFree(sums);
4359:         MatDenseRestoreArray(S_VVt,&marr);
4360:         MatDestroy(&A_RV_bcorr);
4361:       }
4362:       MatDestroy(&A_RRmA_RV);
4363:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4364:       MatDenseGetArray(A_VV,&x);
4365:       MatDenseGetArray(S_VVt,&y);
4366:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4367:       MatDenseRestoreArray(A_VV,&x);
4368:       MatDenseRestoreArray(S_VVt,&y);
4369:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4370:       MatDestroy(&S_VVt);
4371:     } else {
4372:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4373:     }
4374:     MatDestroy(&A_VV);

4376:     /* coarse basis functions */
4377:     for (i=0;i<n_vertices;i++) {
4378:       PetscScalar *y;

4380:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4381:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4382:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4383:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4384:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4385:       y[n_B*i+idx_V_B[i]] = 1.0;
4386:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4387:       VecResetArray(pcis->vec1_B);

4389:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4390:         PetscInt j;

4392:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4393:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4394:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4395:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4396:         VecResetArray(pcis->vec1_D);
4397:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4398:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4399:       }
4400:       VecResetArray(pcbddc->vec1_R);
4401:     }
4402:     /* if n_R == 0 the object is not destroyed */
4403:     MatDestroy(&A_RV);
4404:   }
4405:   VecDestroy(&dummy_vec);

4407:   if (n_constraints) {
4408:     Mat B;

4410:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4411:     MatScale(S_CC,m_one);
4412:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4413:     MatScale(S_CC,m_one);
4414:     if (n_vertices) {
4415:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4416:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4417:       } else {
4418:         Mat S_VCt;

4420:         if (lda_rhs != n_R) {
4421:           MatDestroy(&B);
4422:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4423:           MatSeqDenseSetLDA(B,lda_rhs);
4424:         }
4425:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4426:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4427:         MatDestroy(&S_VCt);
4428:       }
4429:     }
4430:     MatDestroy(&B);
4431:     /* coarse basis functions */
4432:     for (i=0;i<n_constraints;i++) {
4433:       PetscScalar *y;

4435:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4436:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4437:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4438:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4439:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4440:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4441:       VecResetArray(pcis->vec1_B);
4442:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4443:         PetscInt j;

4445:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4446:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4447:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4448:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4449:         VecResetArray(pcis->vec1_D);
4450:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4451:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4452:       }
4453:       VecResetArray(pcbddc->vec1_R);
4454:     }
4455:   }
4456:   if (n_constraints) {
4457:     MatDestroy(&local_auxmat2_R);
4458:   }
4459:   PetscFree(p0_lidx_I);

4461:   /* coarse matrix entries relative to B_0 */
4462:   if (pcbddc->benign_n) {
4463:     Mat         B0_B,B0_BPHI;
4464:     IS          is_dummy;
4465:     PetscScalar *data;
4466:     PetscInt    j;

4468:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4469:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4470:     ISDestroy(&is_dummy);
4471:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4472:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4473:     MatDenseGetArray(B0_BPHI,&data);
4474:     for (j=0;j<pcbddc->benign_n;j++) {
4475:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4476:       for (i=0;i<pcbddc->local_primal_size;i++) {
4477:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4478:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4479:       }
4480:     }
4481:     MatDenseRestoreArray(B0_BPHI,&data);
4482:     MatDestroy(&B0_B);
4483:     MatDestroy(&B0_BPHI);
4484:   }

4486:   /* compute other basis functions for non-symmetric problems */
4487:   if (!pcbddc->symmetric_primal) {
4488:     Mat         B_V=NULL,B_C=NULL;
4489:     PetscScalar *marray;

4491:     if (n_constraints) {
4492:       Mat S_CCT,C_CRT;

4494:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4495:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4496:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4497:       MatDestroy(&S_CCT);
4498:       if (n_vertices) {
4499:         Mat S_VCT;

4501:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4502:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4503:         MatDestroy(&S_VCT);
4504:       }
4505:       MatDestroy(&C_CRT);
4506:     } else {
4507:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4508:     }
4509:     if (n_vertices && n_R) {
4510:       PetscScalar    *av,*marray;
4511:       const PetscInt *xadj,*adjncy;
4512:       PetscInt       n;
4513:       PetscBool      flg_row;

4515:       /* B_V = B_V - A_VR^T */
4516:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4517:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4518:       MatSeqAIJGetArray(A_VR,&av);
4519:       MatDenseGetArray(B_V,&marray);
4520:       for (i=0;i<n;i++) {
4521:         PetscInt j;
4522:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4523:       }
4524:       MatDenseRestoreArray(B_V,&marray);
4525:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4526:       MatDestroy(&A_VR);
4527:     }

4529:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4530:     if (n_vertices) {
4531:       MatDenseGetArray(B_V,&marray);
4532:       for (i=0;i<n_vertices;i++) {
4533:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4534:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4535:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4536:         VecResetArray(pcbddc->vec1_R);
4537:         VecResetArray(pcbddc->vec2_R);
4538:       }
4539:       MatDenseRestoreArray(B_V,&marray);
4540:     }
4541:     if (B_C) {
4542:       MatDenseGetArray(B_C,&marray);
4543:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4544:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4545:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4546:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4547:         VecResetArray(pcbddc->vec1_R);
4548:         VecResetArray(pcbddc->vec2_R);
4549:       }
4550:       MatDenseRestoreArray(B_C,&marray);
4551:     }
4552:     /* coarse basis functions */
4553:     for (i=0;i<pcbddc->local_primal_size;i++) {
4554:       PetscScalar *y;

4556:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4557:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4558:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4559:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4560:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4561:       if (i<n_vertices) {
4562:         y[n_B*i+idx_V_B[i]] = 1.0;
4563:       }
4564:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4565:       VecResetArray(pcis->vec1_B);

4567:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4568:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4569:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4570:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4571:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4572:         VecResetArray(pcis->vec1_D);
4573:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4574:       }
4575:       VecResetArray(pcbddc->vec1_R);
4576:     }
4577:     MatDestroy(&B_V);
4578:     MatDestroy(&B_C);
4579:   }

4581:   /* free memory */
4582:   PetscFree(idx_V_B);
4583:   MatDestroy(&S_VV);
4584:   MatDestroy(&S_CV);
4585:   MatDestroy(&S_VC);
4586:   MatDestroy(&S_CC);
4587:   PetscFree(work);
4588:   if (n_vertices) {
4589:     MatDestroy(&A_VR);
4590:   }
4591:   if (n_constraints) {
4592:     MatDestroy(&C_CR);
4593:   }
4594:   /* Checking coarse_sub_mat and coarse basis functios */
4595:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4596:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4597:   if (pcbddc->dbg_flag) {
4598:     Mat         coarse_sub_mat;
4599:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4600:     Mat         coarse_phi_D,coarse_phi_B;
4601:     Mat         coarse_psi_D,coarse_psi_B;
4602:     Mat         A_II,A_BB,A_IB,A_BI;
4603:     Mat         C_B,CPHI;
4604:     IS          is_dummy;
4605:     Vec         mones;
4606:     MatType     checkmattype=MATSEQAIJ;
4607:     PetscReal   real_value;

4609:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4610:       Mat A;
4611:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4612:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4613:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4614:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4615:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4616:       MatDestroy(&A);
4617:     } else {
4618:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4619:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4620:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4621:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4622:     }
4623:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4624:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4625:     if (!pcbddc->symmetric_primal) {
4626:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4627:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4628:     }
4629:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4631:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4632:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4633:     PetscViewerFlush(pcbddc->dbg_viewer);
4634:     if (!pcbddc->symmetric_primal) {
4635:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4636:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4637:       MatDestroy(&AUXMAT);
4638:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4639:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4640:       MatDestroy(&AUXMAT);
4641:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4642:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4643:       MatDestroy(&AUXMAT);
4644:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4645:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4646:       MatDestroy(&AUXMAT);
4647:     } else {
4648:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4649:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4650:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4651:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4652:       MatDestroy(&AUXMAT);
4653:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4654:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4655:       MatDestroy(&AUXMAT);
4656:     }
4657:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4658:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4659:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4660:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4661:     if (pcbddc->benign_n) {
4662:       Mat         B0_B,B0_BPHI;
4663:       PetscScalar *data,*data2;
4664:       PetscInt    j;

4666:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4667:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4668:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4669:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4670:       MatDenseGetArray(TM1,&data);
4671:       MatDenseGetArray(B0_BPHI,&data2);
4672:       for (j=0;j<pcbddc->benign_n;j++) {
4673:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4674:         for (i=0;i<pcbddc->local_primal_size;i++) {
4675:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4676:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4677:         }
4678:       }
4679:       MatDenseRestoreArray(TM1,&data);
4680:       MatDenseRestoreArray(B0_BPHI,&data2);
4681:       MatDestroy(&B0_B);
4682:       ISDestroy(&is_dummy);
4683:       MatDestroy(&B0_BPHI);
4684:     }
4685: #if 0
4686:   {
4687:     PetscViewer viewer;
4688:     char filename[256];
4689:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4690:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4691:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4692:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4693:     MatView(coarse_sub_mat,viewer);
4694:     PetscObjectSetName((PetscObject)TM1,"projected");
4695:     MatView(TM1,viewer);
4696:     if (pcbddc->coarse_phi_B) {
4697:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4698:       MatView(pcbddc->coarse_phi_B,viewer);
4699:     }
4700:     if (pcbddc->coarse_phi_D) {
4701:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4702:       MatView(pcbddc->coarse_phi_D,viewer);
4703:     }
4704:     if (pcbddc->coarse_psi_B) {
4705:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4706:       MatView(pcbddc->coarse_psi_B,viewer);
4707:     }
4708:     if (pcbddc->coarse_psi_D) {
4709:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4710:       MatView(pcbddc->coarse_psi_D,viewer);
4711:     }
4712:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4713:     MatView(pcbddc->local_mat,viewer);
4714:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4715:     MatView(pcbddc->ConstraintMatrix,viewer);
4716:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4717:     ISView(pcis->is_I_local,viewer);
4718:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4719:     ISView(pcis->is_B_local,viewer);
4720:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4721:     ISView(pcbddc->is_R_local,viewer);
4722:     PetscViewerDestroy(&viewer);
4723:   }
4724: #endif
4725:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4726:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4727:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4728:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4730:     /* check constraints */
4731:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4732:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4733:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4734:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4735:     } else {
4736:       PetscScalar *data;
4737:       Mat         tmat;
4738:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4739:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4740:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4741:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4742:       MatDestroy(&tmat);
4743:     }
4744:     MatCreateVecs(CPHI,&mones,NULL);
4745:     VecSet(mones,-1.0);
4746:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4747:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4748:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4749:     if (!pcbddc->symmetric_primal) {
4750:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4751:       VecSet(mones,-1.0);
4752:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4753:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4754:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4755:     }
4756:     MatDestroy(&C_B);
4757:     MatDestroy(&CPHI);
4758:     ISDestroy(&is_dummy);
4759:     VecDestroy(&mones);
4760:     PetscViewerFlush(pcbddc->dbg_viewer);
4761:     MatDestroy(&A_II);
4762:     MatDestroy(&A_BB);
4763:     MatDestroy(&A_IB);
4764:     MatDestroy(&A_BI);
4765:     MatDestroy(&TM1);
4766:     MatDestroy(&TM2);
4767:     MatDestroy(&TM3);
4768:     MatDestroy(&TM4);
4769:     MatDestroy(&coarse_phi_D);
4770:     MatDestroy(&coarse_phi_B);
4771:     if (!pcbddc->symmetric_primal) {
4772:       MatDestroy(&coarse_psi_D);
4773:       MatDestroy(&coarse_psi_B);
4774:     }
4775:     MatDestroy(&coarse_sub_mat);
4776:   }
4777:   /* get back data */
4778:   *coarse_submat_vals_n = coarse_submat_vals;
4779:   return(0);
4780: }

4782: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4783: {
4784:   Mat            *work_mat;
4785:   IS             isrow_s,iscol_s;
4786:   PetscBool      rsorted,csorted;
4787:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4791:   ISSorted(isrow,&rsorted);
4792:   ISSorted(iscol,&csorted);
4793:   ISGetLocalSize(isrow,&rsize);
4794:   ISGetLocalSize(iscol,&csize);

4796:   if (!rsorted) {
4797:     const PetscInt *idxs;
4798:     PetscInt *idxs_sorted,i;

4800:     PetscMalloc1(rsize,&idxs_perm_r);
4801:     PetscMalloc1(rsize,&idxs_sorted);
4802:     for (i=0;i<rsize;i++) {
4803:       idxs_perm_r[i] = i;
4804:     }
4805:     ISGetIndices(isrow,&idxs);
4806:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4807:     for (i=0;i<rsize;i++) {
4808:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4809:     }
4810:     ISRestoreIndices(isrow,&idxs);
4811:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4812:   } else {
4813:     PetscObjectReference((PetscObject)isrow);
4814:     isrow_s = isrow;
4815:   }

4817:   if (!csorted) {
4818:     if (isrow == iscol) {
4819:       PetscObjectReference((PetscObject)isrow_s);
4820:       iscol_s = isrow_s;
4821:     } else {
4822:       const PetscInt *idxs;
4823:       PetscInt       *idxs_sorted,i;

4825:       PetscMalloc1(csize,&idxs_perm_c);
4826:       PetscMalloc1(csize,&idxs_sorted);
4827:       for (i=0;i<csize;i++) {
4828:         idxs_perm_c[i] = i;
4829:       }
4830:       ISGetIndices(iscol,&idxs);
4831:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4832:       for (i=0;i<csize;i++) {
4833:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4834:       }
4835:       ISRestoreIndices(iscol,&idxs);
4836:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4837:     }
4838:   } else {
4839:     PetscObjectReference((PetscObject)iscol);
4840:     iscol_s = iscol;
4841:   }

4843:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4845:   if (!rsorted || !csorted) {
4846:     Mat      new_mat;
4847:     IS       is_perm_r,is_perm_c;

4849:     if (!rsorted) {
4850:       PetscInt *idxs_r,i;
4851:       PetscMalloc1(rsize,&idxs_r);
4852:       for (i=0;i<rsize;i++) {
4853:         idxs_r[idxs_perm_r[i]] = i;
4854:       }
4855:       PetscFree(idxs_perm_r);
4856:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4857:     } else {
4858:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4859:     }
4860:     ISSetPermutation(is_perm_r);

4862:     if (!csorted) {
4863:       if (isrow_s == iscol_s) {
4864:         PetscObjectReference((PetscObject)is_perm_r);
4865:         is_perm_c = is_perm_r;
4866:       } else {
4867:         PetscInt *idxs_c,i;
4868:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4869:         PetscMalloc1(csize,&idxs_c);
4870:         for (i=0;i<csize;i++) {
4871:           idxs_c[idxs_perm_c[i]] = i;
4872:         }
4873:         PetscFree(idxs_perm_c);
4874:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4875:       }
4876:     } else {
4877:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4878:     }
4879:     ISSetPermutation(is_perm_c);

4881:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4882:     MatDestroy(&work_mat[0]);
4883:     work_mat[0] = new_mat;
4884:     ISDestroy(&is_perm_r);
4885:     ISDestroy(&is_perm_c);
4886:   }

4888:   PetscObjectReference((PetscObject)work_mat[0]);
4889:   *B = work_mat[0];
4890:   MatDestroyMatrices(1,&work_mat);
4891:   ISDestroy(&isrow_s);
4892:   ISDestroy(&iscol_s);
4893:   return(0);
4894: }

4896: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4897: {
4898:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4899:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4900:   Mat            new_mat,lA;
4901:   IS             is_local,is_global;
4902:   PetscInt       local_size;
4903:   PetscBool      isseqaij;

4907:   MatDestroy(&pcbddc->local_mat);
4908:   MatGetSize(matis->A,&local_size,NULL);
4909:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4910:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4911:   ISDestroy(&is_local);
4912:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4913:   ISDestroy(&is_global);

4915:   /* check */
4916:   if (pcbddc->dbg_flag) {
4917:     Vec       x,x_change;
4918:     PetscReal error;

4920:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4921:     VecSetRandom(x,NULL);
4922:     MatMult(ChangeOfBasisMatrix,x,x_change);
4923:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4924:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4925:     MatMult(new_mat,matis->x,matis->y);
4926:     if (!pcbddc->change_interior) {
4927:       const PetscScalar *x,*y,*v;
4928:       PetscReal         lerror = 0.;
4929:       PetscInt          i;

4931:       VecGetArrayRead(matis->x,&x);
4932:       VecGetArrayRead(matis->y,&y);
4933:       VecGetArrayRead(matis->counter,&v);
4934:       for (i=0;i<local_size;i++)
4935:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4936:           lerror = PetscAbsScalar(x[i]-y[i]);
4937:       VecRestoreArrayRead(matis->x,&x);
4938:       VecRestoreArrayRead(matis->y,&y);
4939:       VecRestoreArrayRead(matis->counter,&v);
4940:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4941:       if (error > PETSC_SMALL) {
4942:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4943:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4944:         } else {
4945:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4946:         }
4947:       }
4948:     }
4949:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4950:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4951:     VecAXPY(x,-1.0,x_change);
4952:     VecNorm(x,NORM_INFINITY,&error);
4953:     if (error > PETSC_SMALL) {
4954:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4955:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4956:       } else {
4957:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4958:       }
4959:     }
4960:     VecDestroy(&x);
4961:     VecDestroy(&x_change);
4962:   }

4964:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4965:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

4967:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4968:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4969:   if (isseqaij) {
4970:     MatDestroy(&pcbddc->local_mat);
4971:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4972:     if (lA) {
4973:       Mat work;
4974:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4975:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4976:       MatDestroy(&work);
4977:     }
4978:   } else {
4979:     Mat work_mat;

4981:     MatDestroy(&pcbddc->local_mat);
4982:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4983:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4984:     MatDestroy(&work_mat);
4985:     if (lA) {
4986:       Mat work;
4987:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4988:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4989:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4990:       MatDestroy(&work);
4991:     }
4992:   }
4993:   if (matis->A->symmetric_set) {
4994:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4995: #if !defined(PETSC_USE_COMPLEX)
4996:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4997: #endif
4998:   }
4999:   MatDestroy(&new_mat);
5000:   return(0);
5001: }

5003: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5004: {
5005:   PC_IS*          pcis = (PC_IS*)(pc->data);
5006:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5007:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5008:   PetscInt        *idx_R_local=NULL;
5009:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5010:   PetscInt        vbs,bs;
5011:   PetscBT         bitmask=NULL;
5012:   PetscErrorCode  ierr;

5015:   /*
5016:     No need to setup local scatters if
5017:       - primal space is unchanged
5018:         AND
5019:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5020:         AND
5021:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5022:   */
5023:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5024:     return(0);
5025:   }
5026:   /* destroy old objects */
5027:   ISDestroy(&pcbddc->is_R_local);
5028:   VecScatterDestroy(&pcbddc->R_to_B);
5029:   VecScatterDestroy(&pcbddc->R_to_D);
5030:   /* Set Non-overlapping dimensions */
5031:   n_B = pcis->n_B;
5032:   n_D = pcis->n - n_B;
5033:   n_vertices = pcbddc->n_vertices;

5035:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5037:   /* create auxiliary bitmask and allocate workspace */
5038:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5039:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5040:     PetscBTCreate(pcis->n,&bitmask);
5041:     for (i=0;i<n_vertices;i++) {
5042:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5043:     }

5045:     for (i=0, n_R=0; i<pcis->n; i++) {
5046:       if (!PetscBTLookup(bitmask,i)) {
5047:         idx_R_local[n_R++] = i;
5048:       }
5049:     }
5050:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5051:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5053:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5054:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5055:   }

5057:   /* Block code */
5058:   vbs = 1;
5059:   MatGetBlockSize(pcbddc->local_mat,&bs);
5060:   if (bs>1 && !(n_vertices%bs)) {
5061:     PetscBool is_blocked = PETSC_TRUE;
5062:     PetscInt  *vary;
5063:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5064:       PetscMalloc1(pcis->n/bs,&vary);
5065:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5066:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5067:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5068:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5069:       for (i=0; i<pcis->n/bs; i++) {
5070:         if (vary[i]!=0 && vary[i]!=bs) {
5071:           is_blocked = PETSC_FALSE;
5072:           break;
5073:         }
5074:       }
5075:       PetscFree(vary);
5076:     } else {
5077:       /* Verify directly the R set */
5078:       for (i=0; i<n_R/bs; i++) {
5079:         PetscInt j,node=idx_R_local[bs*i];
5080:         for (j=1; j<bs; j++) {
5081:           if (node != idx_R_local[bs*i+j]-j) {
5082:             is_blocked = PETSC_FALSE;
5083:             break;
5084:           }
5085:         }
5086:       }
5087:     }
5088:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5089:       vbs = bs;
5090:       for (i=0;i<n_R/vbs;i++) {
5091:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5092:       }
5093:     }
5094:   }
5095:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5096:   if (sub_schurs && sub_schurs->reuse_solver) {
5097:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5099:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5100:     ISDestroy(&reuse_solver->is_R);
5101:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5102:     reuse_solver->is_R = pcbddc->is_R_local;
5103:   } else {
5104:     PetscFree(idx_R_local);
5105:   }

5107:   /* print some info if requested */
5108:   if (pcbddc->dbg_flag) {
5109:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5110:     PetscViewerFlush(pcbddc->dbg_viewer);
5111:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5112:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5113:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
5114:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5115:     PetscViewerFlush(pcbddc->dbg_viewer);
5116:   }

5118:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5119:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5120:     IS       is_aux1,is_aux2;
5121:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5123:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5124:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5125:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5126:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5127:     for (i=0; i<n_D; i++) {
5128:       PetscBTSet(bitmask,is_indices[i]);
5129:     }
5130:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5131:     for (i=0, j=0; i<n_R; i++) {
5132:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5133:         aux_array1[j++] = i;
5134:       }
5135:     }
5136:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5137:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5138:     for (i=0, j=0; i<n_B; i++) {
5139:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5140:         aux_array2[j++] = i;
5141:       }
5142:     }
5143:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5144:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5145:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5146:     ISDestroy(&is_aux1);
5147:     ISDestroy(&is_aux2);

5149:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5150:       PetscMalloc1(n_D,&aux_array1);
5151:       for (i=0, j=0; i<n_R; i++) {
5152:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5153:           aux_array1[j++] = i;
5154:         }
5155:       }
5156:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5157:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5158:       ISDestroy(&is_aux1);
5159:     }
5160:     PetscBTDestroy(&bitmask);
5161:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5162:   } else {
5163:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5164:     IS                 tis;
5165:     PetscInt           schur_size;

5167:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5168:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5169:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5170:     ISDestroy(&tis);
5171:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5172:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5173:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5174:       ISDestroy(&tis);
5175:     }
5176:   }
5177:   return(0);
5178: }


5181: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5182: {
5183:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5184:   PC_IS          *pcis = (PC_IS*)pc->data;
5185:   PC             pc_temp;
5186:   Mat            A_RR;
5187:   MatReuse       reuse;
5188:   PetscScalar    m_one = -1.0;
5189:   PetscReal      value;
5190:   PetscInt       n_D,n_R;
5191:   PetscBool      check_corr,issbaij;
5193:   /* prefixes stuff */
5194:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5195:   size_t         len;


5199:   /* compute prefixes */
5200:   PetscStrcpy(dir_prefix,"");
5201:   PetscStrcpy(neu_prefix,"");
5202:   if (!pcbddc->current_level) {
5203:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5204:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5205:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5206:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5207:   } else {
5208:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5209:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5210:     len -= 15; /* remove "pc_bddc_coarse_" */
5211:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5212:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5213:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5214:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5215:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5216:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5217:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5218:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5219:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5220:   }

5222:   /* DIRICHLET PROBLEM */
5223:   if (dirichlet) {
5224:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5225:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5226:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
5227:       if (pcbddc->dbg_flag) {
5228:         Mat    A_IIn;

5230:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5231:         MatDestroy(&pcis->A_II);
5232:         pcis->A_II = A_IIn;
5233:       }
5234:     }
5235:     if (pcbddc->local_mat->symmetric_set) {
5236:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5237:     }
5238:     /* Matrix for Dirichlet problem is pcis->A_II */
5239:     n_D = pcis->n - pcis->n_B;
5240:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5241:       void (*f)(void) = 0;

5243:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5244:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5245:       /* default */
5246:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5247:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5248:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
5249:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5250:       if (issbaij) {
5251:         PCSetType(pc_temp,PCCHOLESKY);
5252:       } else {
5253:         PCSetType(pc_temp,PCLU);
5254:       }
5255:       /* Allow user's customization */
5256:       KSPSetFromOptions(pcbddc->ksp_D);
5257:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5258:       if (f && pcbddc->mat_graph->cloc) {
5259:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5260:         const PetscInt *idxs;
5261:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5263:         ISGetLocalSize(pcis->is_I_local,&nl);
5264:         ISGetIndices(pcis->is_I_local,&idxs);
5265:         PetscMalloc1(nl*cdim,&scoords);
5266:         for (i=0;i<nl;i++) {
5267:           for (d=0;d<cdim;d++) {
5268:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5269:           }
5270:         }
5271:         ISRestoreIndices(pcis->is_I_local,&idxs);
5272:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5273:         PetscFree(scoords);
5274:       }
5275:     }
5276:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
5277:     if (sub_schurs && sub_schurs->reuse_solver) {
5278:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5280:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5281:     }
5282:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5283:     if (!n_D) {
5284:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5285:       PCSetType(pc_temp,PCNONE);
5286:     }
5287:     /* set ksp_D into pcis data */
5288:     KSPDestroy(&pcis->ksp_D);
5289:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5290:     pcis->ksp_D = pcbddc->ksp_D;
5291:   }

5293:   /* NEUMANN PROBLEM */
5294:   A_RR = 0;
5295:   if (neumann) {
5296:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5297:     PetscInt        ibs,mbs;
5298:     PetscBool       issbaij, reuse_neumann_solver;
5299:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5301:     reuse_neumann_solver = PETSC_FALSE;
5302:     if (sub_schurs && sub_schurs->reuse_solver) {
5303:       IS iP;

5305:       reuse_neumann_solver = PETSC_TRUE;
5306:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5307:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5308:     }
5309:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5310:     ISGetSize(pcbddc->is_R_local,&n_R);
5311:     if (pcbddc->ksp_R) { /* already created ksp */
5312:       PetscInt nn_R;
5313:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5314:       PetscObjectReference((PetscObject)A_RR);
5315:       MatGetSize(A_RR,&nn_R,NULL);
5316:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5317:         KSPReset(pcbddc->ksp_R);
5318:         MatDestroy(&A_RR);
5319:         reuse = MAT_INITIAL_MATRIX;
5320:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5321:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5322:           MatDestroy(&A_RR);
5323:           reuse = MAT_INITIAL_MATRIX;
5324:         } else { /* safe to reuse the matrix */
5325:           reuse = MAT_REUSE_MATRIX;
5326:         }
5327:       }
5328:       /* last check */
5329:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5330:         MatDestroy(&A_RR);
5331:         reuse = MAT_INITIAL_MATRIX;
5332:       }
5333:     } else { /* first time, so we need to create the matrix */
5334:       reuse = MAT_INITIAL_MATRIX;
5335:     }
5336:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5337:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5338:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5339:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5340:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5341:       if (matis->A == pcbddc->local_mat) {
5342:         MatDestroy(&pcbddc->local_mat);
5343:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5344:       } else {
5345:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5346:       }
5347:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5348:       if (matis->A == pcbddc->local_mat) {
5349:         MatDestroy(&pcbddc->local_mat);
5350:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5351:       } else {
5352:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5353:       }
5354:     }
5355:     /* extract A_RR */
5356:     if (reuse_neumann_solver) {
5357:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5359:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5360:         MatDestroy(&A_RR);
5361:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5362:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5363:         } else {
5364:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5365:         }
5366:       } else {
5367:         MatDestroy(&A_RR);
5368:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5369:         PetscObjectReference((PetscObject)A_RR);
5370:       }
5371:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5372:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5373:     }
5374:     if (pcbddc->local_mat->symmetric_set) {
5375:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5376:     }
5377:     if (!pcbddc->ksp_R) { /* create object if not present */
5378:       void (*f)(void) = 0;

5380:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5381:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5382:       /* default */
5383:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5384:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5385:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5386:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5387:       if (issbaij) {
5388:         PCSetType(pc_temp,PCCHOLESKY);
5389:       } else {
5390:         PCSetType(pc_temp,PCLU);
5391:       }
5392:       /* Allow user's customization */
5393:       KSPSetFromOptions(pcbddc->ksp_R);
5394:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5395:       if (f && pcbddc->mat_graph->cloc) {
5396:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5397:         const PetscInt *idxs;
5398:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5400:         ISGetLocalSize(pcbddc->is_R_local,&nl);
5401:         ISGetIndices(pcbddc->is_R_local,&idxs);
5402:         PetscMalloc1(nl*cdim,&scoords);
5403:         for (i=0;i<nl;i++) {
5404:           for (d=0;d<cdim;d++) {
5405:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5406:           }
5407:         }
5408:         ISRestoreIndices(pcbddc->is_R_local,&idxs);
5409:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5410:         PetscFree(scoords);
5411:       }
5412:     }
5413:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5414:     if (!n_R) {
5415:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5416:       PCSetType(pc_temp,PCNONE);
5417:     }
5418:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5419:     /* Reuse solver if it is present */
5420:     if (reuse_neumann_solver) {
5421:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5423:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5424:     }
5425:   }

5427:   if (pcbddc->dbg_flag) {
5428:     PetscViewerFlush(pcbddc->dbg_viewer);
5429:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5430:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5431:   }

5433:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5434:   check_corr = PETSC_FALSE;
5435:   if (pcbddc->NullSpace_corr[0]) {
5436:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5437:   }
5438:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5439:     check_corr = PETSC_TRUE;
5440:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5441:   }
5442:   if (neumann && pcbddc->NullSpace_corr[2]) {
5443:     check_corr = PETSC_TRUE;
5444:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5445:   }
5446:   /* check Dirichlet and Neumann solvers */
5447:   if (pcbddc->dbg_flag) {
5448:     if (dirichlet) { /* Dirichlet */
5449:       VecSetRandom(pcis->vec1_D,NULL);
5450:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5451:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5452:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5453:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5454:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5455:       if (check_corr) {
5456:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5457:       }
5458:       PetscViewerFlush(pcbddc->dbg_viewer);
5459:     }
5460:     if (neumann) { /* Neumann */
5461:       VecSetRandom(pcbddc->vec1_R,NULL);
5462:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5463:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5464:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5465:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5466:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5467:       if (check_corr) {
5468:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5469:       }
5470:       PetscViewerFlush(pcbddc->dbg_viewer);
5471:     }
5472:   }
5473:   /* free Neumann problem's matrix */
5474:   MatDestroy(&A_RR);
5475:   return(0);
5476: }

5478: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5479: {
5480:   PetscErrorCode  ierr;
5481:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5482:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5483:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5486:   if (!reuse_solver) {
5487:     VecSet(pcbddc->vec1_R,0.);
5488:   }
5489:   if (!pcbddc->switch_static) {
5490:     if (applytranspose && pcbddc->local_auxmat1) {
5491:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5492:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5493:     }
5494:     if (!reuse_solver) {
5495:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5496:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5497:     } else {
5498:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5500:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5501:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5502:     }
5503:   } else {
5504:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5505:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5506:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5507:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5508:     if (applytranspose && pcbddc->local_auxmat1) {
5509:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5510:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5511:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5512:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5513:     }
5514:   }
5515:   if (!reuse_solver || pcbddc->switch_static) {
5516:     if (applytranspose) {
5517:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5518:     } else {
5519:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5520:     }
5521:   } else {
5522:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5524:     if (applytranspose) {
5525:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5526:     } else {
5527:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5528:     }
5529:   }
5530:   VecSet(inout_B,0.);
5531:   if (!pcbddc->switch_static) {
5532:     if (!reuse_solver) {
5533:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5534:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5535:     } else {
5536:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5538:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5539:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5540:     }
5541:     if (!applytranspose && pcbddc->local_auxmat1) {
5542:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5543:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5544:     }
5545:   } else {
5546:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5547:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5548:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5549:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5550:     if (!applytranspose && pcbddc->local_auxmat1) {
5551:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5552:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5553:     }
5554:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5555:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5556:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5557:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5558:   }
5559:   return(0);
5560: }

5562: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5563: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5564: {
5566:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5567:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5568:   const PetscScalar zero = 0.0;

5571:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5572:   if (!pcbddc->benign_apply_coarse_only) {
5573:     if (applytranspose) {
5574:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5575:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5576:     } else {
5577:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5578:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5579:     }
5580:   } else {
5581:     VecSet(pcbddc->vec1_P,zero);
5582:   }

5584:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5585:   if (pcbddc->benign_n) {
5586:     PetscScalar *array;
5587:     PetscInt    j;

5589:     VecGetArray(pcbddc->vec1_P,&array);
5590:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5591:     VecRestoreArray(pcbddc->vec1_P,&array);
5592:   }

5594:   /* start communications from local primal nodes to rhs of coarse solver */
5595:   VecSet(pcbddc->coarse_vec,zero);
5596:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5597:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5599:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5600:   if (pcbddc->coarse_ksp) {
5601:     Mat          coarse_mat;
5602:     Vec          rhs,sol;
5603:     MatNullSpace nullsp;
5604:     PetscBool    isbddc = PETSC_FALSE;

5606:     if (pcbddc->benign_have_null) {
5607:       PC        coarse_pc;

5609:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5610:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5611:       /* we need to propagate to coarser levels the need for a possible benign correction */
5612:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5613:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5614:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5615:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5616:       }
5617:     }
5618:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5619:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5620:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5621:     MatGetNullSpace(coarse_mat,&nullsp);
5622:     if (nullsp) {
5623:       MatNullSpaceRemove(nullsp,rhs);
5624:     }
5625:     if (applytranspose) {
5626:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5627:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5628:     } else {
5629:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5630:         PC        coarse_pc;

5632:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5633:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5634:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5635:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5636:       } else {
5637:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5638:       }
5639:     }
5640:     /* we don't need the benign correction at coarser levels anymore */
5641:     if (pcbddc->benign_have_null && isbddc) {
5642:       PC        coarse_pc;
5643:       PC_BDDC*  coarsepcbddc;

5645:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5646:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5647:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5648:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5649:     }
5650:     if (nullsp) {
5651:       MatNullSpaceRemove(nullsp,sol);
5652:     }
5653:   }

5655:   /* Local solution on R nodes */
5656:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5657:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5658:   }
5659:   /* communications from coarse sol to local primal nodes */
5660:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5661:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5663:   /* Sum contributions from the two levels */
5664:   if (!pcbddc->benign_apply_coarse_only) {
5665:     if (applytranspose) {
5666:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5667:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5668:     } else {
5669:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5670:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5671:     }
5672:     /* store p0 */
5673:     if (pcbddc->benign_n) {
5674:       PetscScalar *array;
5675:       PetscInt    j;

5677:       VecGetArray(pcbddc->vec1_P,&array);
5678:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5679:       VecRestoreArray(pcbddc->vec1_P,&array);
5680:     }
5681:   } else { /* expand the coarse solution */
5682:     if (applytranspose) {
5683:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5684:     } else {
5685:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5686:     }
5687:   }
5688:   return(0);
5689: }

5691: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5692: {
5694:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5695:   PetscScalar    *array;
5696:   Vec            from,to;

5699:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5700:     from = pcbddc->coarse_vec;
5701:     to = pcbddc->vec1_P;
5702:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5703:       Vec tvec;

5705:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5706:       VecResetArray(tvec);
5707:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5708:       VecGetArray(tvec,&array);
5709:       VecPlaceArray(from,array);
5710:       VecRestoreArray(tvec,&array);
5711:     }
5712:   } else { /* from local to global -> put data in coarse right hand side */
5713:     from = pcbddc->vec1_P;
5714:     to = pcbddc->coarse_vec;
5715:   }
5716:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5717:   return(0);
5718: }

5720: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5721: {
5723:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5724:   PetscScalar    *array;
5725:   Vec            from,to;

5728:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5729:     from = pcbddc->coarse_vec;
5730:     to = pcbddc->vec1_P;
5731:   } else { /* from local to global -> put data in coarse right hand side */
5732:     from = pcbddc->vec1_P;
5733:     to = pcbddc->coarse_vec;
5734:   }
5735:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5736:   if (smode == SCATTER_FORWARD) {
5737:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5738:       Vec tvec;

5740:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5741:       VecGetArray(to,&array);
5742:       VecPlaceArray(tvec,array);
5743:       VecRestoreArray(to,&array);
5744:     }
5745:   } else {
5746:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5747:      VecResetArray(from);
5748:     }
5749:   }
5750:   return(0);
5751: }

5753: /* uncomment for testing purposes */
5754: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5755: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5756: {
5757:   PetscErrorCode    ierr;
5758:   PC_IS*            pcis = (PC_IS*)(pc->data);
5759:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5760:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5761:   /* one and zero */
5762:   PetscScalar       one=1.0,zero=0.0;
5763:   /* space to store constraints and their local indices */
5764:   PetscScalar       *constraints_data;
5765:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5766:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5767:   PetscInt          *constraints_n;
5768:   /* iterators */
5769:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5770:   /* BLAS integers */
5771:   PetscBLASInt      lwork,lierr;
5772:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5773:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5774:   /* reuse */
5775:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5776:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5777:   /* change of basis */
5778:   PetscBool         qr_needed;
5779:   PetscBT           change_basis,qr_needed_idx;
5780:   /* auxiliary stuff */
5781:   PetscInt          *nnz,*is_indices;
5782:   PetscInt          ncc;
5783:   /* some quantities */
5784:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5785:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5786:   PetscReal         tol; /* tolerance for retaining eigenmodes */

5789:   tol  = PetscSqrtReal(PETSC_SMALL);
5790:   /* Destroy Mat objects computed previously */
5791:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5792:   MatDestroy(&pcbddc->ConstraintMatrix);
5793:   MatDestroy(&pcbddc->switch_static_change);
5794:   /* save info on constraints from previous setup (if any) */
5795:   olocal_primal_size = pcbddc->local_primal_size;
5796:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5797:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5798:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5799:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5800:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5801:   PetscFree(pcbddc->primal_indices_local_idxs);

5803:   if (!pcbddc->adaptive_selection) {
5804:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5805:     MatNullSpace nearnullsp;
5806:     const Vec    *nearnullvecs;
5807:     Vec          *localnearnullsp;
5808:     PetscScalar  *array;
5809:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5810:     PetscBool    nnsp_has_cnst;
5811:     /* LAPACK working arrays for SVD or POD */
5812:     PetscBool    skip_lapack,boolforchange;
5813:     PetscScalar  *work;
5814:     PetscReal    *singular_vals;
5815: #if defined(PETSC_USE_COMPLEX)
5816:     PetscReal    *rwork;
5817: #endif
5818: #if defined(PETSC_MISSING_LAPACK_GESVD)
5819:     PetscScalar  *temp_basis,*correlation_mat;
5820: #else
5821:     PetscBLASInt dummy_int=1;
5822:     PetscScalar  dummy_scalar=1.;
5823: #endif

5825:     /* Get index sets for faces, edges and vertices from graph */
5826:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5827:     /* print some info */
5828:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5829:       PetscInt nv;

5831:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5832:       ISGetSize(ISForVertices,&nv);
5833:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5834:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5835:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5836:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5837:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5838:       PetscViewerFlush(pcbddc->dbg_viewer);
5839:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5840:     }

5842:     /* free unneeded index sets */
5843:     if (!pcbddc->use_vertices) {
5844:       ISDestroy(&ISForVertices);
5845:     }
5846:     if (!pcbddc->use_edges) {
5847:       for (i=0;i<n_ISForEdges;i++) {
5848:         ISDestroy(&ISForEdges[i]);
5849:       }
5850:       PetscFree(ISForEdges);
5851:       n_ISForEdges = 0;
5852:     }
5853:     if (!pcbddc->use_faces) {
5854:       for (i=0;i<n_ISForFaces;i++) {
5855:         ISDestroy(&ISForFaces[i]);
5856:       }
5857:       PetscFree(ISForFaces);
5858:       n_ISForFaces = 0;
5859:     }

5861:     /* check if near null space is attached to global mat */
5862:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
5863:     if (nearnullsp) {
5864:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5865:       /* remove any stored info */
5866:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5867:       PetscFree(pcbddc->onearnullvecs_state);
5868:       /* store information for BDDC solver reuse */
5869:       PetscObjectReference((PetscObject)nearnullsp);
5870:       pcbddc->onearnullspace = nearnullsp;
5871:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5872:       for (i=0;i<nnsp_size;i++) {
5873:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5874:       }
5875:     } else { /* if near null space is not provided BDDC uses constants by default */
5876:       nnsp_size = 0;
5877:       nnsp_has_cnst = PETSC_TRUE;
5878:     }
5879:     /* get max number of constraints on a single cc */
5880:     max_constraints = nnsp_size;
5881:     if (nnsp_has_cnst) max_constraints++;

5883:     /*
5884:          Evaluate maximum storage size needed by the procedure
5885:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5886:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5887:          There can be multiple constraints per connected component
5888:                                                                                                                                                            */
5889:     n_vertices = 0;
5890:     if (ISForVertices) {
5891:       ISGetSize(ISForVertices,&n_vertices);
5892:     }
5893:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5894:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

5896:     total_counts = n_ISForFaces+n_ISForEdges;
5897:     total_counts *= max_constraints;
5898:     total_counts += n_vertices;
5899:     PetscBTCreate(total_counts,&change_basis);

5901:     total_counts = 0;
5902:     max_size_of_constraint = 0;
5903:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5904:       IS used_is;
5905:       if (i<n_ISForEdges) {
5906:         used_is = ISForEdges[i];
5907:       } else {
5908:         used_is = ISForFaces[i-n_ISForEdges];
5909:       }
5910:       ISGetSize(used_is,&j);
5911:       total_counts += j;
5912:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5913:     }
5914:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

5916:     /* get local part of global near null space vectors */
5917:     PetscMalloc1(nnsp_size,&localnearnullsp);
5918:     for (k=0;k<nnsp_size;k++) {
5919:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5920:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5921:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5922:     }

5924:     /* whether or not to skip lapack calls */
5925:     skip_lapack = PETSC_TRUE;
5926:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5928:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5929:     if (!skip_lapack) {
5930:       PetscScalar temp_work;

5932: #if defined(PETSC_MISSING_LAPACK_GESVD)
5933:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5934:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5935:       PetscMalloc1(max_constraints,&singular_vals);
5936:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5937: #if defined(PETSC_USE_COMPLEX)
5938:       PetscMalloc1(3*max_constraints,&rwork);
5939: #endif
5940:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5941:       PetscBLASIntCast(max_constraints,&Blas_N);
5942:       PetscBLASIntCast(max_constraints,&Blas_LDA);
5943:       lwork = -1;
5944:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5945: #if !defined(PETSC_USE_COMPLEX)
5946:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5947: #else
5948:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5949: #endif
5950:       PetscFPTrapPop();
5951:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5952: #else /* on missing GESVD */
5953:       /* SVD */
5954:       PetscInt max_n,min_n;
5955:       max_n = max_size_of_constraint;
5956:       min_n = max_constraints;
5957:       if (max_size_of_constraint < max_constraints) {
5958:         min_n = max_size_of_constraint;
5959:         max_n = max_constraints;
5960:       }
5961:       PetscMalloc1(min_n,&singular_vals);
5962: #if defined(PETSC_USE_COMPLEX)
5963:       PetscMalloc1(5*min_n,&rwork);
5964: #endif
5965:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5966:       lwork = -1;
5967:       PetscBLASIntCast(max_n,&Blas_M);
5968:       PetscBLASIntCast(min_n,&Blas_N);
5969:       PetscBLASIntCast(max_n,&Blas_LDA);
5970:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5971: #if !defined(PETSC_USE_COMPLEX)
5972:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5973: #else
5974:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5975: #endif
5976:       PetscFPTrapPop();
5977:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5978: #endif /* on missing GESVD */
5979:       /* Allocate optimal workspace */
5980:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5981:       PetscMalloc1(lwork,&work);
5982:     }
5983:     /* Now we can loop on constraining sets */
5984:     total_counts = 0;
5985:     constraints_idxs_ptr[0] = 0;
5986:     constraints_data_ptr[0] = 0;
5987:     /* vertices */
5988:     if (n_vertices) {
5989:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5990:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5991:       for (i=0;i<n_vertices;i++) {
5992:         constraints_n[total_counts] = 1;
5993:         constraints_data[total_counts] = 1.0;
5994:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5995:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5996:         total_counts++;
5997:       }
5998:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5999:       n_vertices = total_counts;
6000:     }

6002:     /* edges and faces */
6003:     total_counts_cc = total_counts;
6004:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6005:       IS        used_is;
6006:       PetscBool idxs_copied = PETSC_FALSE;

6008:       if (ncc<n_ISForEdges) {
6009:         used_is = ISForEdges[ncc];
6010:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6011:       } else {
6012:         used_is = ISForFaces[ncc-n_ISForEdges];
6013:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6014:       }
6015:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6017:       ISGetSize(used_is,&size_of_constraint);
6018:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6019:       /* change of basis should not be performed on local periodic nodes */
6020:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6021:       if (nnsp_has_cnst) {
6022:         PetscScalar quad_value;

6024:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6025:         idxs_copied = PETSC_TRUE;

6027:         if (!pcbddc->use_nnsp_true) {
6028:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6029:         } else {
6030:           quad_value = 1.0;
6031:         }
6032:         for (j=0;j<size_of_constraint;j++) {
6033:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6034:         }
6035:         temp_constraints++;
6036:         total_counts++;
6037:       }
6038:       for (k=0;k<nnsp_size;k++) {
6039:         PetscReal real_value;
6040:         PetscScalar *ptr_to_data;

6042:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6043:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6044:         for (j=0;j<size_of_constraint;j++) {
6045:           ptr_to_data[j] = array[is_indices[j]];
6046:         }
6047:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6048:         /* check if array is null on the connected component */
6049:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6050:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6051:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6052:           temp_constraints++;
6053:           total_counts++;
6054:           if (!idxs_copied) {
6055:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6056:             idxs_copied = PETSC_TRUE;
6057:           }
6058:         }
6059:       }
6060:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6061:       valid_constraints = temp_constraints;
6062:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6063:         if (temp_constraints == 1) { /* just normalize the constraint */
6064:           PetscScalar norm,*ptr_to_data;

6066:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6067:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6068:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6069:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6070:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6071:         } else { /* perform SVD */
6072:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6074: #if defined(PETSC_MISSING_LAPACK_GESVD)
6075:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6076:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6077:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6078:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6079:                 from that computed using LAPACKgesvd
6080:              -> This is due to a different computation of eigenvectors in LAPACKheev
6081:              -> The quality of the POD-computed basis will be the same */
6082:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6083:           /* Store upper triangular part of correlation matrix */
6084:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6085:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6086:           for (j=0;j<temp_constraints;j++) {
6087:             for (k=0;k<j+1;k++) {
6088:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6089:             }
6090:           }
6091:           /* compute eigenvalues and eigenvectors of correlation matrix */
6092:           PetscBLASIntCast(temp_constraints,&Blas_N);
6093:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
6094: #if !defined(PETSC_USE_COMPLEX)
6095:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6096: #else
6097:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6098: #endif
6099:           PetscFPTrapPop();
6100:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6101:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6102:           j = 0;
6103:           while (j < temp_constraints && singular_vals[j] < tol) j++;
6104:           total_counts = total_counts-j;
6105:           valid_constraints = temp_constraints-j;
6106:           /* scale and copy POD basis into used quadrature memory */
6107:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6108:           PetscBLASIntCast(temp_constraints,&Blas_N);
6109:           PetscBLASIntCast(temp_constraints,&Blas_K);
6110:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6111:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
6112:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6113:           if (j<temp_constraints) {
6114:             PetscInt ii;
6115:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6116:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6117:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6118:             PetscFPTrapPop();
6119:             for (k=0;k<temp_constraints-j;k++) {
6120:               for (ii=0;ii<size_of_constraint;ii++) {
6121:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6122:               }
6123:             }
6124:           }
6125: #else  /* on missing GESVD */
6126:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6127:           PetscBLASIntCast(temp_constraints,&Blas_N);
6128:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6129:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6130: #if !defined(PETSC_USE_COMPLEX)
6131:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6132: #else
6133:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6134: #endif
6135:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6136:           PetscFPTrapPop();
6137:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6138:           k = temp_constraints;
6139:           if (k > size_of_constraint) k = size_of_constraint;
6140:           j = 0;
6141:           while (j < k && singular_vals[k-j-1] < tol) j++;
6142:           valid_constraints = k-j;
6143:           total_counts = total_counts-temp_constraints+valid_constraints;
6144: #endif /* on missing GESVD */
6145:         }
6146:       }
6147:       /* update pointers information */
6148:       if (valid_constraints) {
6149:         constraints_n[total_counts_cc] = valid_constraints;
6150:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6151:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6152:         /* set change_of_basis flag */
6153:         if (boolforchange) {
6154:           PetscBTSet(change_basis,total_counts_cc);
6155:         }
6156:         total_counts_cc++;
6157:       }
6158:     }
6159:     /* free workspace */
6160:     if (!skip_lapack) {
6161:       PetscFree(work);
6162: #if defined(PETSC_USE_COMPLEX)
6163:       PetscFree(rwork);
6164: #endif
6165:       PetscFree(singular_vals);
6166: #if defined(PETSC_MISSING_LAPACK_GESVD)
6167:       PetscFree(correlation_mat);
6168:       PetscFree(temp_basis);
6169: #endif
6170:     }
6171:     for (k=0;k<nnsp_size;k++) {
6172:       VecDestroy(&localnearnullsp[k]);
6173:     }
6174:     PetscFree(localnearnullsp);
6175:     /* free index sets of faces, edges and vertices */
6176:     for (i=0;i<n_ISForFaces;i++) {
6177:       ISDestroy(&ISForFaces[i]);
6178:     }
6179:     if (n_ISForFaces) {
6180:       PetscFree(ISForFaces);
6181:     }
6182:     for (i=0;i<n_ISForEdges;i++) {
6183:       ISDestroy(&ISForEdges[i]);
6184:     }
6185:     if (n_ISForEdges) {
6186:       PetscFree(ISForEdges);
6187:     }
6188:     ISDestroy(&ISForVertices);
6189:   } else {
6190:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6192:     total_counts = 0;
6193:     n_vertices = 0;
6194:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6195:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6196:     }
6197:     max_constraints = 0;
6198:     total_counts_cc = 0;
6199:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6200:       total_counts += pcbddc->adaptive_constraints_n[i];
6201:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6202:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6203:     }
6204:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6205:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6206:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6207:     constraints_data = pcbddc->adaptive_constraints_data;
6208:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6209:     PetscMalloc1(total_counts_cc,&constraints_n);
6210:     total_counts_cc = 0;
6211:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6212:       if (pcbddc->adaptive_constraints_n[i]) {
6213:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6214:       }
6215:     }
6216: #if 0
6217:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
6218:     for (i=0;i<total_counts_cc;i++) {
6219:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
6220:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
6221:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
6222:         printf(" %d",constraints_idxs[j]);
6223:       }
6224:       printf("\n");
6225:       printf("number of cc: %d\n",constraints_n[i]);
6226:     }
6227:     for (i=0;i<n_vertices;i++) {
6228:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
6229:     }
6230:     for (i=0;i<sub_schurs->n_subs;i++) {
6231:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
6232:     }
6233: #endif

6235:     max_size_of_constraint = 0;
6236:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6237:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6238:     /* Change of basis */
6239:     PetscBTCreate(total_counts_cc,&change_basis);
6240:     if (pcbddc->use_change_of_basis) {
6241:       for (i=0;i<sub_schurs->n_subs;i++) {
6242:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6243:           PetscBTSet(change_basis,i+n_vertices);
6244:         }
6245:       }
6246:     }
6247:   }
6248:   pcbddc->local_primal_size = total_counts;
6249:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6251:   /* map constraints_idxs in boundary numbering */
6252:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6253:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);

6255:   /* Create constraint matrix */
6256:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6257:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6258:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6260:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6261:   /* determine if a QR strategy is needed for change of basis */
6262:   qr_needed = PETSC_FALSE;
6263:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6264:   total_primal_vertices=0;
6265:   pcbddc->local_primal_size_cc = 0;
6266:   for (i=0;i<total_counts_cc;i++) {
6267:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6268:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6269:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6270:       pcbddc->local_primal_size_cc += 1;
6271:     } else if (PetscBTLookup(change_basis,i)) {
6272:       for (k=0;k<constraints_n[i];k++) {
6273:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6274:       }
6275:       pcbddc->local_primal_size_cc += constraints_n[i];
6276:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6277:         PetscBTSet(qr_needed_idx,i);
6278:         qr_needed = PETSC_TRUE;
6279:       }
6280:     } else {
6281:       pcbddc->local_primal_size_cc += 1;
6282:     }
6283:   }
6284:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6285:   pcbddc->n_vertices = total_primal_vertices;
6286:   /* permute indices in order to have a sorted set of vertices */
6287:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6288:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6289:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6290:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6292:   /* nonzero structure of constraint matrix */
6293:   /* and get reference dof for local constraints */
6294:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6295:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6297:   j = total_primal_vertices;
6298:   total_counts = total_primal_vertices;
6299:   cum = total_primal_vertices;
6300:   for (i=n_vertices;i<total_counts_cc;i++) {
6301:     if (!PetscBTLookup(change_basis,i)) {
6302:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6303:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6304:       cum++;
6305:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6306:       for (k=0;k<constraints_n[i];k++) {
6307:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6308:         nnz[j+k] = size_of_constraint;
6309:       }
6310:       j += constraints_n[i];
6311:     }
6312:   }
6313:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6314:   PetscFree(nnz);

6316:   /* set values in constraint matrix */
6317:   for (i=0;i<total_primal_vertices;i++) {
6318:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6319:   }
6320:   total_counts = total_primal_vertices;
6321:   for (i=n_vertices;i<total_counts_cc;i++) {
6322:     if (!PetscBTLookup(change_basis,i)) {
6323:       PetscInt *cols;

6325:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6326:       cols = constraints_idxs+constraints_idxs_ptr[i];
6327:       for (k=0;k<constraints_n[i];k++) {
6328:         PetscInt    row = total_counts+k;
6329:         PetscScalar *vals;

6331:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6332:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6333:       }
6334:       total_counts += constraints_n[i];
6335:     }
6336:   }
6337:   /* assembling */
6338:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6339:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6340:   MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);
6341:   MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);
6342:   MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");

6344:   /*
6345:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6346:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6347:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6348:   */
6349:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6350:   if (pcbddc->use_change_of_basis) {
6351:     /* dual and primal dofs on a single cc */
6352:     PetscInt     dual_dofs,primal_dofs;
6353:     /* working stuff for GEQRF */
6354:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6355:     PetscBLASInt lqr_work;
6356:     /* working stuff for UNGQR */
6357:     PetscScalar  *gqr_work,lgqr_work_t;
6358:     PetscBLASInt lgqr_work;
6359:     /* working stuff for TRTRS */
6360:     PetscScalar  *trs_rhs;
6361:     PetscBLASInt Blas_NRHS;
6362:     /* pointers for values insertion into change of basis matrix */
6363:     PetscInt     *start_rows,*start_cols;
6364:     PetscScalar  *start_vals;
6365:     /* working stuff for values insertion */
6366:     PetscBT      is_primal;
6367:     PetscInt     *aux_primal_numbering_B;
6368:     /* matrix sizes */
6369:     PetscInt     global_size,local_size;
6370:     /* temporary change of basis */
6371:     Mat          localChangeOfBasisMatrix;
6372:     /* extra space for debugging */
6373:     PetscScalar  *dbg_work;

6375:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6376:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6377:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6378:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6379:     /* nonzeros for local mat */
6380:     PetscMalloc1(pcis->n,&nnz);
6381:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6382:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6383:     } else {
6384:       const PetscInt *ii;
6385:       PetscInt       n;
6386:       PetscBool      flg_row;
6387:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6388:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6389:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6390:     }
6391:     for (i=n_vertices;i<total_counts_cc;i++) {
6392:       if (PetscBTLookup(change_basis,i)) {
6393:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6394:         if (PetscBTLookup(qr_needed_idx,i)) {
6395:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6396:         } else {
6397:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6398:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6399:         }
6400:       }
6401:     }
6402:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6403:     PetscFree(nnz);
6404:     /* Set interior change in the matrix */
6405:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6406:       for (i=0;i<pcis->n;i++) {
6407:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6408:       }
6409:     } else {
6410:       const PetscInt *ii,*jj;
6411:       PetscScalar    *aa;
6412:       PetscInt       n;
6413:       PetscBool      flg_row;
6414:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6415:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6416:       for (i=0;i<n;i++) {
6417:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6418:       }
6419:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6420:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6421:     }

6423:     if (pcbddc->dbg_flag) {
6424:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6425:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6426:     }


6429:     /* Now we loop on the constraints which need a change of basis */
6430:     /*
6431:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6432:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6434:        Basic blocks of change of basis matrix T computed by

6436:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6438:             | 1        0   ...        0         s_1/S |
6439:             | 0        1   ...        0         s_2/S |
6440:             |              ...                        |
6441:             | 0        ...            1     s_{n-1}/S |
6442:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6444:             with S = \sum_{i=1}^n s_i^2
6445:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6446:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6448:           - QR decomposition of constraints otherwise
6449:     */
6450:     if (qr_needed) {
6451:       /* space to store Q */
6452:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6453:       /* array to store scaling factors for reflectors */
6454:       PetscMalloc1(max_constraints,&qr_tau);
6455:       /* first we issue queries for optimal work */
6456:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6457:       PetscBLASIntCast(max_constraints,&Blas_N);
6458:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6459:       lqr_work = -1;
6460:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6461:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6462:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6463:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6464:       lgqr_work = -1;
6465:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6466:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6467:       PetscBLASIntCast(max_constraints,&Blas_K);
6468:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6469:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6470:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6471:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6472:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6473:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6474:       /* array to store rhs and solution of triangular solver */
6475:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6476:       /* allocating workspace for check */
6477:       if (pcbddc->dbg_flag) {
6478:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6479:       }
6480:     }
6481:     /* array to store whether a node is primal or not */
6482:     PetscBTCreate(pcis->n_B,&is_primal);
6483:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6484:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6485:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6486:     for (i=0;i<total_primal_vertices;i++) {
6487:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6488:     }
6489:     PetscFree(aux_primal_numbering_B);

6491:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6492:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6493:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6494:       if (PetscBTLookup(change_basis,total_counts)) {
6495:         /* get constraint info */
6496:         primal_dofs = constraints_n[total_counts];
6497:         dual_dofs = size_of_constraint-primal_dofs;

6499:         if (pcbddc->dbg_flag) {
6500:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6501:         }

6503:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6505:           /* copy quadrature constraints for change of basis check */
6506:           if (pcbddc->dbg_flag) {
6507:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6508:           }
6509:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6510:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6512:           /* compute QR decomposition of constraints */
6513:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6514:           PetscBLASIntCast(primal_dofs,&Blas_N);
6515:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6516:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6517:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6518:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6519:           PetscFPTrapPop();

6521:           /* explictly compute R^-T */
6522:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6523:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6524:           PetscBLASIntCast(primal_dofs,&Blas_N);
6525:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6526:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6527:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6528:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6529:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6530:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6531:           PetscFPTrapPop();

6533:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6534:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6535:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6536:           PetscBLASIntCast(primal_dofs,&Blas_K);
6537:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6538:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6539:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6540:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6541:           PetscFPTrapPop();

6543:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6544:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6545:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6546:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6547:           PetscBLASIntCast(primal_dofs,&Blas_N);
6548:           PetscBLASIntCast(primal_dofs,&Blas_K);
6549:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6550:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6551:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6552:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6553:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6554:           PetscFPTrapPop();
6555:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6557:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6558:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6559:           /* insert cols for primal dofs */
6560:           for (j=0;j<primal_dofs;j++) {
6561:             start_vals = &qr_basis[j*size_of_constraint];
6562:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6563:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6564:           }
6565:           /* insert cols for dual dofs */
6566:           for (j=0,k=0;j<dual_dofs;k++) {
6567:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6568:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6569:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6570:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6571:               j++;
6572:             }
6573:           }

6575:           /* check change of basis */
6576:           if (pcbddc->dbg_flag) {
6577:             PetscInt   ii,jj;
6578:             PetscBool valid_qr=PETSC_TRUE;
6579:             PetscBLASIntCast(primal_dofs,&Blas_M);
6580:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6581:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6582:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6583:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6584:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6585:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6586:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6587:             PetscFPTrapPop();
6588:             for (jj=0;jj<size_of_constraint;jj++) {
6589:               for (ii=0;ii<primal_dofs;ii++) {
6590:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6591:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6592:               }
6593:             }
6594:             if (!valid_qr) {
6595:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6596:               for (jj=0;jj<size_of_constraint;jj++) {
6597:                 for (ii=0;ii<primal_dofs;ii++) {
6598:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6599:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6600:                   }
6601:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6602:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6603:                   }
6604:                 }
6605:               }
6606:             } else {
6607:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6608:             }
6609:           }
6610:         } else { /* simple transformation block */
6611:           PetscInt    row,col;
6612:           PetscScalar val,norm;

6614:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6615:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6616:           for (j=0;j<size_of_constraint;j++) {
6617:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6618:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6619:             if (!PetscBTLookup(is_primal,row_B)) {
6620:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6621:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6622:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6623:             } else {
6624:               for (k=0;k<size_of_constraint;k++) {
6625:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6626:                 if (row != col) {
6627:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6628:                 } else {
6629:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6630:                 }
6631:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6632:               }
6633:             }
6634:           }
6635:           if (pcbddc->dbg_flag) {
6636:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6637:           }
6638:         }
6639:       } else {
6640:         if (pcbddc->dbg_flag) {
6641:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6642:         }
6643:       }
6644:     }

6646:     /* free workspace */
6647:     if (qr_needed) {
6648:       if (pcbddc->dbg_flag) {
6649:         PetscFree(dbg_work);
6650:       }
6651:       PetscFree(trs_rhs);
6652:       PetscFree(qr_tau);
6653:       PetscFree(qr_work);
6654:       PetscFree(gqr_work);
6655:       PetscFree(qr_basis);
6656:     }
6657:     PetscBTDestroy(&is_primal);
6658:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6659:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6661:     /* assembling of global change of variable */
6662:     if (!pcbddc->fake_change) {
6663:       Mat      tmat;
6664:       PetscInt bs;

6666:       VecGetSize(pcis->vec1_global,&global_size);
6667:       VecGetLocalSize(pcis->vec1_global,&local_size);
6668:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6669:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6670:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6671:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6672:       MatGetBlockSize(pc->pmat,&bs);
6673:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6674:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6675:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6676:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6677:       MatDestroy(&tmat);
6678:       VecSet(pcis->vec1_global,0.0);
6679:       VecSet(pcis->vec1_N,1.0);
6680:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6681:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6682:       VecReciprocal(pcis->vec1_global);
6683:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6685:       /* check */
6686:       if (pcbddc->dbg_flag) {
6687:         PetscReal error;
6688:         Vec       x,x_change;

6690:         VecDuplicate(pcis->vec1_global,&x);
6691:         VecDuplicate(pcis->vec1_global,&x_change);
6692:         VecSetRandom(x,NULL);
6693:         VecCopy(x,pcis->vec1_global);
6694:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6695:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6696:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6697:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6698:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6699:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6700:         VecAXPY(x,-1.0,x_change);
6701:         VecNorm(x,NORM_INFINITY,&error);
6702:         if (error > PETSC_SMALL) {
6703:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6704:         }
6705:         VecDestroy(&x);
6706:         VecDestroy(&x_change);
6707:       }
6708:       /* adapt sub_schurs computed (if any) */
6709:       if (pcbddc->use_deluxe_scaling) {
6710:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6712:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6713:         if (sub_schurs && sub_schurs->S_Ej_all) {
6714:           Mat                    S_new,tmat;
6715:           IS                     is_all_N,is_V_Sall = NULL;

6717:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6718:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6719:           if (pcbddc->deluxe_zerorows) {
6720:             ISLocalToGlobalMapping NtoSall;
6721:             IS                     is_V;
6722:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6723:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6724:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6725:             ISLocalToGlobalMappingDestroy(&NtoSall);
6726:             ISDestroy(&is_V);
6727:           }
6728:           ISDestroy(&is_all_N);
6729:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6730:           MatDestroy(&sub_schurs->S_Ej_all);
6731:           PetscObjectReference((PetscObject)S_new);
6732:           if (pcbddc->deluxe_zerorows) {
6733:             const PetscScalar *array;
6734:             const PetscInt    *idxs_V,*idxs_all;
6735:             PetscInt          i,n_V;

6737:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6738:             ISGetLocalSize(is_V_Sall,&n_V);
6739:             ISGetIndices(is_V_Sall,&idxs_V);
6740:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6741:             VecGetArrayRead(pcis->D,&array);
6742:             for (i=0;i<n_V;i++) {
6743:               PetscScalar val;
6744:               PetscInt    idx;

6746:               idx = idxs_V[i];
6747:               val = array[idxs_all[idxs_V[i]]];
6748:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6749:             }
6750:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6751:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6752:             VecRestoreArrayRead(pcis->D,&array);
6753:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6754:             ISRestoreIndices(is_V_Sall,&idxs_V);
6755:           }
6756:           sub_schurs->S_Ej_all = S_new;
6757:           MatDestroy(&S_new);
6758:           if (sub_schurs->sum_S_Ej_all) {
6759:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6760:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6761:             PetscObjectReference((PetscObject)S_new);
6762:             if (pcbddc->deluxe_zerorows) {
6763:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6764:             }
6765:             sub_schurs->sum_S_Ej_all = S_new;
6766:             MatDestroy(&S_new);
6767:           }
6768:           ISDestroy(&is_V_Sall);
6769:           MatDestroy(&tmat);
6770:         }
6771:         /* destroy any change of basis context in sub_schurs */
6772:         if (sub_schurs && sub_schurs->change) {
6773:           PetscInt i;

6775:           for (i=0;i<sub_schurs->n_subs;i++) {
6776:             KSPDestroy(&sub_schurs->change[i]);
6777:           }
6778:           PetscFree(sub_schurs->change);
6779:         }
6780:       }
6781:       if (pcbddc->switch_static) { /* need to save the local change */
6782:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6783:       } else {
6784:         MatDestroy(&localChangeOfBasisMatrix);
6785:       }
6786:       /* determine if any process has changed the pressures locally */
6787:       pcbddc->change_interior = pcbddc->benign_have_null;
6788:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6789:       MatDestroy(&pcbddc->ConstraintMatrix);
6790:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6791:       pcbddc->use_qr_single = qr_needed;
6792:     }
6793:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6794:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6795:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6796:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6797:     } else {
6798:       Mat benign_global = NULL;
6799:       if (pcbddc->benign_have_null) {
6800:         Mat tmat;

6802:         pcbddc->change_interior = PETSC_TRUE;
6803:         VecSet(pcis->vec1_global,0.0);
6804:         VecSet(pcis->vec1_N,1.0);
6805:         VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6806:         VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6807:         VecReciprocal(pcis->vec1_global);
6808:         VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6809:         VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6810:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6811:         if (pcbddc->benign_change) {
6812:           Mat M;

6814:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6815:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6816:           MatISSetLocalMat(tmat,M);
6817:           MatDestroy(&M);
6818:         } else {
6819:           Mat         eye;
6820:           PetscScalar *array;

6822:           VecGetArray(pcis->vec1_N,&array);
6823:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6824:           for (i=0;i<pcis->n;i++) {
6825:             MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6826:           }
6827:           VecRestoreArray(pcis->vec1_N,&array);
6828:           MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6829:           MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6830:           MatISSetLocalMat(tmat,eye);
6831:           MatDestroy(&eye);
6832:         }
6833:         MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6834:         MatDestroy(&tmat);
6835:       }
6836:       if (pcbddc->user_ChangeOfBasisMatrix) {
6837:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6838:         MatDestroy(&benign_global);
6839:       } else if (pcbddc->benign_have_null) {
6840:         pcbddc->ChangeOfBasisMatrix = benign_global;
6841:       }
6842:     }
6843:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6844:       IS             is_global;
6845:       const PetscInt *gidxs;

6847:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6848:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6849:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6850:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6851:       ISDestroy(&is_global);
6852:     }
6853:   }
6854:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6855:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6856:   }

6858:   if (!pcbddc->fake_change) {
6859:     /* add pressure dofs to set of primal nodes for numbering purposes */
6860:     for (i=0;i<pcbddc->benign_n;i++) {
6861:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6862:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6863:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6864:       pcbddc->local_primal_size_cc++;
6865:       pcbddc->local_primal_size++;
6866:     }

6868:     /* check if a new primal space has been introduced (also take into account benign trick) */
6869:     pcbddc->new_primal_space_local = PETSC_TRUE;
6870:     if (olocal_primal_size == pcbddc->local_primal_size) {
6871:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6872:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6873:       if (!pcbddc->new_primal_space_local) {
6874:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6875:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6876:       }
6877:     }
6878:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6879:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6880:   }
6881:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

6883:   /* flush dbg viewer */
6884:   if (pcbddc->dbg_flag) {
6885:     PetscViewerFlush(pcbddc->dbg_viewer);
6886:   }

6888:   /* free workspace */
6889:   PetscBTDestroy(&qr_needed_idx);
6890:   PetscBTDestroy(&change_basis);
6891:   if (!pcbddc->adaptive_selection) {
6892:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6893:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6894:   } else {
6895:     PetscFree5(pcbddc->adaptive_constraints_n,
6896:                       pcbddc->adaptive_constraints_idxs_ptr,
6897:                       pcbddc->adaptive_constraints_data_ptr,
6898:                       pcbddc->adaptive_constraints_idxs,
6899:                       pcbddc->adaptive_constraints_data);
6900:     PetscFree(constraints_n);
6901:     PetscFree(constraints_idxs_B);
6902:   }
6903:   return(0);
6904: }
6905: /* #undef PETSC_MISSING_LAPACK_GESVD */

6907: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6908: {
6909:   ISLocalToGlobalMapping map;
6910:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
6911:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
6912:   PetscInt               i,N;
6913:   PetscBool              rcsr = PETSC_FALSE;
6914:   PetscErrorCode         ierr;

6917:   if (pcbddc->recompute_topography) {
6918:     pcbddc->graphanalyzed = PETSC_FALSE;
6919:     /* Reset previously computed graph */
6920:     PCBDDCGraphReset(pcbddc->mat_graph);
6921:     /* Init local Graph struct */
6922:     MatGetSize(pc->pmat,&N,NULL);
6923:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6924:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

6926:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6927:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6928:     }
6929:     /* Check validity of the csr graph passed in by the user */
6930:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

6932:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6933:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6934:       PetscInt  *xadj,*adjncy;
6935:       PetscInt  nvtxs;
6936:       PetscBool flg_row=PETSC_FALSE;

6938:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6939:       if (flg_row) {
6940:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6941:         pcbddc->computed_rowadj = PETSC_TRUE;
6942:       }
6943:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6944:       rcsr = PETSC_TRUE;
6945:     }
6946:     if (pcbddc->dbg_flag) {
6947:       PetscViewerFlush(pcbddc->dbg_viewer);
6948:     }

6950:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6951:       PetscReal    *lcoords;
6952:       PetscInt     n;
6953:       MPI_Datatype dimrealtype;

6955:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
6956:       MatGetLocalSize(matis->A,&n,NULL);
6957:       MatISSetUpSF(pc->pmat);
6958:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
6959:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
6960:       MPI_Type_commit(&dimrealtype);
6961:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6962:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6963:       MPI_Type_free(&dimrealtype);
6964:       PetscFree(pcbddc->mat_graph->coords);

6966:       pcbddc->mat_graph->coords = lcoords;
6967:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6968:       pcbddc->mat_graph->cnloc  = n;
6969:     }
6970:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
6971:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);

6973:     /* Setup of Graph */
6974:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6975:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

6977:     /* attach info on disconnected subdomains if present */
6978:     if (pcbddc->n_local_subs) {
6979:       PetscInt *local_subs;

6981:       PetscMalloc1(N,&local_subs);
6982:       for (i=0;i<pcbddc->n_local_subs;i++) {
6983:         const PetscInt *idxs;
6984:         PetscInt       nl,j;

6986:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
6987:         ISGetIndices(pcbddc->local_subs[i],&idxs);
6988:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6989:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6990:       }
6991:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6992:       pcbddc->mat_graph->local_subs = local_subs;
6993:     }
6994:   }

6996:   if (!pcbddc->graphanalyzed) {
6997:     /* Graph's connected components analysis */
6998:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6999:     pcbddc->graphanalyzed = PETSC_TRUE;
7000:   }
7001:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7002:   return(0);
7003: }

7005: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7006: {
7007:   PetscInt       i,j;
7008:   PetscScalar    *alphas;

7012:   if (!n) return(0);
7013:   PetscMalloc1(n,&alphas);
7014:   VecNormalize(vecs[0],NULL);
7015:   for (i=1;i<n;i++) {
7016:     VecMDot(vecs[i],i,vecs,alphas);
7017:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7018:     VecMAXPY(vecs[i],i,alphas,vecs);
7019:     VecNormalize(vecs[i],NULL);
7020:   }
7021:   PetscFree(alphas);
7022:   return(0);
7023: }

7025: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7026: {
7027:   Mat            A;
7028:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
7029:   PetscMPIInt    size,rank,color;
7030:   PetscInt       *xadj,*adjncy;
7031:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7032:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
7033:   PetscInt       void_procs,*procs_candidates = NULL;
7034:   PetscInt       xadj_count,*count;
7035:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
7036:   PetscSubcomm   psubcomm;
7037:   MPI_Comm       subcomm;

7042:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7043:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7046:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);

7048:   if (have_void) *have_void = PETSC_FALSE;
7049:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7050:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7051:   MatISGetLocalMat(mat,&A);
7052:   MatGetLocalSize(A,&n,NULL);
7053:   im_active = !!n;
7054:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7055:   void_procs = size - active_procs;
7056:   /* get ranks of of non-active processes in mat communicator */
7057:   if (void_procs) {
7058:     PetscInt ncand;

7060:     if (have_void) *have_void = PETSC_TRUE;
7061:     PetscMalloc1(size,&procs_candidates);
7062:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7063:     for (i=0,ncand=0;i<size;i++) {
7064:       if (!procs_candidates[i]) {
7065:         procs_candidates[ncand++] = i;
7066:       }
7067:     }
7068:     /* force n_subdomains to be not greater that the number of non-active processes */
7069:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7070:   }

7072:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7073:      number of subdomains requested 1 -> send to master or first candidate in voids  */
7074:   MatGetSize(mat,&N,NULL);
7075:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7076:     PetscInt issize,isidx,dest;
7077:     if (*n_subdomains == 1) dest = 0;
7078:     else dest = rank;
7079:     if (im_active) {
7080:       issize = 1;
7081:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7082:         isidx = procs_candidates[dest];
7083:       } else {
7084:         isidx = dest;
7085:       }
7086:     } else {
7087:       issize = 0;
7088:       isidx = -1;
7089:     }
7090:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7091:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7092:     PetscFree(procs_candidates);
7093:     return(0);
7094:   }
7095:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7096:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7097:   threshold = PetscMax(threshold,2);

7099:   /* Get info on mapping */
7100:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7102:   /* build local CSR graph of subdomains' connectivity */
7103:   PetscMalloc1(2,&xadj);
7104:   xadj[0] = 0;
7105:   xadj[1] = PetscMax(n_neighs-1,0);
7106:   PetscMalloc1(xadj[1],&adjncy);
7107:   PetscMalloc1(xadj[1],&adjncy_wgt);
7108:   PetscCalloc1(n,&count);
7109:   for (i=1;i<n_neighs;i++)
7110:     for (j=0;j<n_shared[i];j++)
7111:       count[shared[i][j]] += 1;

7113:   xadj_count = 0;
7114:   for (i=1;i<n_neighs;i++) {
7115:     for (j=0;j<n_shared[i];j++) {
7116:       if (count[shared[i][j]] < threshold) {
7117:         adjncy[xadj_count] = neighs[i];
7118:         adjncy_wgt[xadj_count] = n_shared[i];
7119:         xadj_count++;
7120:         break;
7121:       }
7122:     }
7123:   }
7124:   xadj[1] = xadj_count;
7125:   PetscFree(count);
7126:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7127:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7129:   PetscMalloc1(1,&ranks_send_to_idx);

7131:   /* Restrict work on active processes only */
7132:   PetscMPIIntCast(im_active,&color);
7133:   if (void_procs) {
7134:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7135:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7136:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7137:     subcomm = PetscSubcommChild(psubcomm);
7138:   } else {
7139:     psubcomm = NULL;
7140:     subcomm = PetscObjectComm((PetscObject)mat);
7141:   }

7143:   v_wgt = NULL;
7144:   if (!color) {
7145:     PetscFree(xadj);
7146:     PetscFree(adjncy);
7147:     PetscFree(adjncy_wgt);
7148:   } else {
7149:     Mat             subdomain_adj;
7150:     IS              new_ranks,new_ranks_contig;
7151:     MatPartitioning partitioner;
7152:     PetscInt        rstart=0,rend=0;
7153:     PetscInt        *is_indices,*oldranks;
7154:     PetscMPIInt     size;
7155:     PetscBool       aggregate;

7157:     MPI_Comm_size(subcomm,&size);
7158:     if (void_procs) {
7159:       PetscInt prank = rank;
7160:       PetscMalloc1(size,&oldranks);
7161:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7162:       for (i=0;i<xadj[1];i++) {
7163:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7164:       }
7165:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7166:     } else {
7167:       oldranks = NULL;
7168:     }
7169:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7170:     if (aggregate) { /* TODO: all this part could be made more efficient */
7171:       PetscInt    lrows,row,ncols,*cols;
7172:       PetscMPIInt nrank;
7173:       PetscScalar *vals;

7175:       MPI_Comm_rank(subcomm,&nrank);
7176:       lrows = 0;
7177:       if (nrank<redprocs) {
7178:         lrows = size/redprocs;
7179:         if (nrank<size%redprocs) lrows++;
7180:       }
7181:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7182:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7183:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7184:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7185:       row = nrank;
7186:       ncols = xadj[1]-xadj[0];
7187:       cols = adjncy;
7188:       PetscMalloc1(ncols,&vals);
7189:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7190:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7191:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7192:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7193:       PetscFree(xadj);
7194:       PetscFree(adjncy);
7195:       PetscFree(adjncy_wgt);
7196:       PetscFree(vals);
7197:       if (use_vwgt) {
7198:         Vec               v;
7199:         const PetscScalar *array;
7200:         PetscInt          nl;

7202:         MatCreateVecs(subdomain_adj,&v,NULL);
7203:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7204:         VecAssemblyBegin(v);
7205:         VecAssemblyEnd(v);
7206:         VecGetLocalSize(v,&nl);
7207:         VecGetArrayRead(v,&array);
7208:         PetscMalloc1(nl,&v_wgt);
7209:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7210:         VecRestoreArrayRead(v,&array);
7211:         VecDestroy(&v);
7212:       }
7213:     } else {
7214:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7215:       if (use_vwgt) {
7216:         PetscMalloc1(1,&v_wgt);
7217:         v_wgt[0] = n;
7218:       }
7219:     }
7220:     /* MatView(subdomain_adj,0); */

7222:     /* Partition */
7223:     MatPartitioningCreate(subcomm,&partitioner);
7224:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7225:     if (v_wgt) {
7226:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7227:     }
7228:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7229:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7230:     MatPartitioningSetFromOptions(partitioner);
7231:     MatPartitioningApply(partitioner,&new_ranks);
7232:     /* MatPartitioningView(partitioner,0); */

7234:     /* renumber new_ranks to avoid "holes" in new set of processors */
7235:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7236:     ISDestroy(&new_ranks);
7237:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7238:     if (!aggregate) {
7239:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7240: #if defined(PETSC_USE_DEBUG)
7241:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7242: #endif
7243:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7244:       } else if (oldranks) {
7245:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7246:       } else {
7247:         ranks_send_to_idx[0] = is_indices[0];
7248:       }
7249:     } else {
7250:       PetscInt    idx = 0;
7251:       PetscMPIInt tag;
7252:       MPI_Request *reqs;

7254:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7255:       PetscMalloc1(rend-rstart,&reqs);
7256:       for (i=rstart;i<rend;i++) {
7257:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7258:       }
7259:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7260:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7261:       PetscFree(reqs);
7262:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7263: #if defined(PETSC_USE_DEBUG)
7264:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7265: #endif
7266:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7267:       } else if (oldranks) {
7268:         ranks_send_to_idx[0] = oldranks[idx];
7269:       } else {
7270:         ranks_send_to_idx[0] = idx;
7271:       }
7272:     }
7273:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7274:     /* clean up */
7275:     PetscFree(oldranks);
7276:     ISDestroy(&new_ranks_contig);
7277:     MatDestroy(&subdomain_adj);
7278:     MatPartitioningDestroy(&partitioner);
7279:   }
7280:   PetscSubcommDestroy(&psubcomm);
7281:   PetscFree(procs_candidates);

7283:   /* assemble parallel IS for sends */
7284:   i = 1;
7285:   if (!color) i=0;
7286:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7287:   return(0);
7288: }

7290: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7292: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7293: {
7294:   Mat                    local_mat;
7295:   IS                     is_sends_internal;
7296:   PetscInt               rows,cols,new_local_rows;
7297:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7298:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7299:   ISLocalToGlobalMapping l2gmap;
7300:   PetscInt*              l2gmap_indices;
7301:   const PetscInt*        is_indices;
7302:   MatType                new_local_type;
7303:   /* buffers */
7304:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7305:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7306:   PetscInt               *recv_buffer_idxs_local;
7307:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7308:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7309:   /* MPI */
7310:   MPI_Comm               comm,comm_n;
7311:   PetscSubcomm           subcomm;
7312:   PetscMPIInt            n_sends,n_recvs,commsize;
7313:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7314:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7315:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7316:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7317:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7318:   PetscErrorCode         ierr;

7322:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7323:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7330:   if (nvecs) {
7331:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7333:   }
7334:   /* further checks */
7335:   MatISGetLocalMat(mat,&local_mat);
7336:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7337:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7338:   MatGetSize(local_mat,&rows,&cols);
7339:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7340:   if (reuse && *mat_n) {
7341:     PetscInt mrows,mcols,mnrows,mncols;
7343:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7344:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7345:     MatGetSize(mat,&mrows,&mcols);
7346:     MatGetSize(*mat_n,&mnrows,&mncols);
7347:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7348:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7349:   }
7350:   MatGetBlockSize(local_mat,&bs);

7353:   /* prepare IS for sending if not provided */
7354:   if (!is_sends) {
7355:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7356:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7357:   } else {
7358:     PetscObjectReference((PetscObject)is_sends);
7359:     is_sends_internal = is_sends;
7360:   }

7362:   /* get comm */
7363:   PetscObjectGetComm((PetscObject)mat,&comm);

7365:   /* compute number of sends */
7366:   ISGetLocalSize(is_sends_internal,&i);
7367:   PetscMPIIntCast(i,&n_sends);

7369:   /* compute number of receives */
7370:   MPI_Comm_size(comm,&commsize);
7371:   PetscMalloc1(commsize,&iflags);
7372:   PetscMemzero(iflags,commsize*sizeof(*iflags));
7373:   ISGetIndices(is_sends_internal,&is_indices);
7374:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7375:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7376:   PetscFree(iflags);

7378:   /* restrict comm if requested */
7379:   subcomm = 0;
7380:   destroy_mat = PETSC_FALSE;
7381:   if (restrict_comm) {
7382:     PetscMPIInt color,subcommsize;

7384:     color = 0;
7385:     if (restrict_full) {
7386:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7387:     } else {
7388:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7389:     }
7390:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7391:     subcommsize = commsize - subcommsize;
7392:     /* check if reuse has been requested */
7393:     if (reuse) {
7394:       if (*mat_n) {
7395:         PetscMPIInt subcommsize2;
7396:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7397:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7398:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7399:       } else {
7400:         comm_n = PETSC_COMM_SELF;
7401:       }
7402:     } else { /* MAT_INITIAL_MATRIX */
7403:       PetscMPIInt rank;

7405:       MPI_Comm_rank(comm,&rank);
7406:       PetscSubcommCreate(comm,&subcomm);
7407:       PetscSubcommSetNumber(subcomm,2);
7408:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7409:       comm_n = PetscSubcommChild(subcomm);
7410:     }
7411:     /* flag to destroy *mat_n if not significative */
7412:     if (color) destroy_mat = PETSC_TRUE;
7413:   } else {
7414:     comm_n = comm;
7415:   }

7417:   /* prepare send/receive buffers */
7418:   PetscMalloc1(commsize,&ilengths_idxs);
7419:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7420:   PetscMalloc1(commsize,&ilengths_vals);
7421:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7422:   if (nis) {
7423:     PetscCalloc1(commsize,&ilengths_idxs_is);
7424:   }

7426:   /* Get data from local matrices */
7427:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7428:     /* TODO: See below some guidelines on how to prepare the local buffers */
7429:     /*
7430:        send_buffer_vals should contain the raw values of the local matrix
7431:        send_buffer_idxs should contain:
7432:        - MatType_PRIVATE type
7433:        - PetscInt        size_of_l2gmap
7434:        - PetscInt        global_row_indices[size_of_l2gmap]
7435:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7436:     */
7437:   else {
7438:     MatDenseGetArray(local_mat,&send_buffer_vals);
7439:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7440:     PetscMalloc1(i+2,&send_buffer_idxs);
7441:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7442:     send_buffer_idxs[1] = i;
7443:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7444:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7445:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7446:     PetscMPIIntCast(i,&len);
7447:     for (i=0;i<n_sends;i++) {
7448:       ilengths_vals[is_indices[i]] = len*len;
7449:       ilengths_idxs[is_indices[i]] = len+2;
7450:     }
7451:   }
7452:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7453:   /* additional is (if any) */
7454:   if (nis) {
7455:     PetscMPIInt psum;
7456:     PetscInt j;
7457:     for (j=0,psum=0;j<nis;j++) {
7458:       PetscInt plen;
7459:       ISGetLocalSize(isarray[j],&plen);
7460:       PetscMPIIntCast(plen,&len);
7461:       psum += len+1; /* indices + lenght */
7462:     }
7463:     PetscMalloc1(psum,&send_buffer_idxs_is);
7464:     for (j=0,psum=0;j<nis;j++) {
7465:       PetscInt plen;
7466:       const PetscInt *is_array_idxs;
7467:       ISGetLocalSize(isarray[j],&plen);
7468:       send_buffer_idxs_is[psum] = plen;
7469:       ISGetIndices(isarray[j],&is_array_idxs);
7470:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7471:       ISRestoreIndices(isarray[j],&is_array_idxs);
7472:       psum += plen+1; /* indices + lenght */
7473:     }
7474:     for (i=0;i<n_sends;i++) {
7475:       ilengths_idxs_is[is_indices[i]] = psum;
7476:     }
7477:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7478:   }
7479:   MatISRestoreLocalMat(mat,&local_mat);

7481:   buf_size_idxs = 0;
7482:   buf_size_vals = 0;
7483:   buf_size_idxs_is = 0;
7484:   buf_size_vecs = 0;
7485:   for (i=0;i<n_recvs;i++) {
7486:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7487:     buf_size_vals += (PetscInt)olengths_vals[i];
7488:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7489:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7490:   }
7491:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7492:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7493:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7494:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7496:   /* get new tags for clean communications */
7497:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7498:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7499:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7500:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7502:   /* allocate for requests */
7503:   PetscMalloc1(n_sends,&send_req_idxs);
7504:   PetscMalloc1(n_sends,&send_req_vals);
7505:   PetscMalloc1(n_sends,&send_req_idxs_is);
7506:   PetscMalloc1(n_sends,&send_req_vecs);
7507:   PetscMalloc1(n_recvs,&recv_req_idxs);
7508:   PetscMalloc1(n_recvs,&recv_req_vals);
7509:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7510:   PetscMalloc1(n_recvs,&recv_req_vecs);

7512:   /* communications */
7513:   ptr_idxs = recv_buffer_idxs;
7514:   ptr_vals = recv_buffer_vals;
7515:   ptr_idxs_is = recv_buffer_idxs_is;
7516:   ptr_vecs = recv_buffer_vecs;
7517:   for (i=0;i<n_recvs;i++) {
7518:     source_dest = onodes[i];
7519:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7520:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7521:     ptr_idxs += olengths_idxs[i];
7522:     ptr_vals += olengths_vals[i];
7523:     if (nis) {
7524:       source_dest = onodes_is[i];
7525:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7526:       ptr_idxs_is += olengths_idxs_is[i];
7527:     }
7528:     if (nvecs) {
7529:       source_dest = onodes[i];
7530:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7531:       ptr_vecs += olengths_idxs[i]-2;
7532:     }
7533:   }
7534:   for (i=0;i<n_sends;i++) {
7535:     PetscMPIIntCast(is_indices[i],&source_dest);
7536:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7537:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7538:     if (nis) {
7539:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7540:     }
7541:     if (nvecs) {
7542:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7543:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7544:     }
7545:   }
7546:   ISRestoreIndices(is_sends_internal,&is_indices);
7547:   ISDestroy(&is_sends_internal);

7549:   /* assemble new l2g map */
7550:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7551:   ptr_idxs = recv_buffer_idxs;
7552:   new_local_rows = 0;
7553:   for (i=0;i<n_recvs;i++) {
7554:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7555:     ptr_idxs += olengths_idxs[i];
7556:   }
7557:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7558:   ptr_idxs = recv_buffer_idxs;
7559:   new_local_rows = 0;
7560:   for (i=0;i<n_recvs;i++) {
7561:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7562:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7563:     ptr_idxs += olengths_idxs[i];
7564:   }
7565:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7566:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7567:   PetscFree(l2gmap_indices);

7569:   /* infer new local matrix type from received local matrices type */
7570:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7571:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7572:   if (n_recvs) {
7573:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7574:     ptr_idxs = recv_buffer_idxs;
7575:     for (i=0;i<n_recvs;i++) {
7576:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7577:         new_local_type_private = MATAIJ_PRIVATE;
7578:         break;
7579:       }
7580:       ptr_idxs += olengths_idxs[i];
7581:     }
7582:     switch (new_local_type_private) {
7583:       case MATDENSE_PRIVATE:
7584:         new_local_type = MATSEQAIJ;
7585:         bs = 1;
7586:         break;
7587:       case MATAIJ_PRIVATE:
7588:         new_local_type = MATSEQAIJ;
7589:         bs = 1;
7590:         break;
7591:       case MATBAIJ_PRIVATE:
7592:         new_local_type = MATSEQBAIJ;
7593:         break;
7594:       case MATSBAIJ_PRIVATE:
7595:         new_local_type = MATSEQSBAIJ;
7596:         break;
7597:       default:
7598:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7599:         break;
7600:     }
7601:   } else { /* by default, new_local_type is seqaij */
7602:     new_local_type = MATSEQAIJ;
7603:     bs = 1;
7604:   }

7606:   /* create MATIS object if needed */
7607:   if (!reuse) {
7608:     MatGetSize(mat,&rows,&cols);
7609:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7610:   } else {
7611:     /* it also destroys the local matrices */
7612:     if (*mat_n) {
7613:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7614:     } else { /* this is a fake object */
7615:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7616:     }
7617:   }
7618:   MatISGetLocalMat(*mat_n,&local_mat);
7619:   MatSetType(local_mat,new_local_type);

7621:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7623:   /* Global to local map of received indices */
7624:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7625:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7626:   ISLocalToGlobalMappingDestroy(&l2gmap);

7628:   /* restore attributes -> type of incoming data and its size */
7629:   buf_size_idxs = 0;
7630:   for (i=0;i<n_recvs;i++) {
7631:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7632:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7633:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7634:   }
7635:   PetscFree(recv_buffer_idxs);

7637:   /* set preallocation */
7638:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7639:   if (!newisdense) {
7640:     PetscInt *new_local_nnz=0;

7642:     ptr_idxs = recv_buffer_idxs_local;
7643:     if (n_recvs) {
7644:       PetscCalloc1(new_local_rows,&new_local_nnz);
7645:     }
7646:     for (i=0;i<n_recvs;i++) {
7647:       PetscInt j;
7648:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7649:         for (j=0;j<*(ptr_idxs+1);j++) {
7650:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7651:         }
7652:       } else {
7653:         /* TODO */
7654:       }
7655:       ptr_idxs += olengths_idxs[i];
7656:     }
7657:     if (new_local_nnz) {
7658:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7659:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7660:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7661:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7662:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7663:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7664:     } else {
7665:       MatSetUp(local_mat);
7666:     }
7667:     PetscFree(new_local_nnz);
7668:   } else {
7669:     MatSetUp(local_mat);
7670:   }

7672:   /* set values */
7673:   ptr_vals = recv_buffer_vals;
7674:   ptr_idxs = recv_buffer_idxs_local;
7675:   for (i=0;i<n_recvs;i++) {
7676:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7677:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7678:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7679:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7680:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7681:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7682:     } else {
7683:       /* TODO */
7684:     }
7685:     ptr_idxs += olengths_idxs[i];
7686:     ptr_vals += olengths_vals[i];
7687:   }
7688:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7689:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7690:   MatISRestoreLocalMat(*mat_n,&local_mat);
7691:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7692:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7693:   PetscFree(recv_buffer_vals);

7695: #if 0
7696:   if (!restrict_comm) { /* check */
7697:     Vec       lvec,rvec;
7698:     PetscReal infty_error;

7700:     MatCreateVecs(mat,&rvec,&lvec);
7701:     VecSetRandom(rvec,NULL);
7702:     MatMult(mat,rvec,lvec);
7703:     VecScale(lvec,-1.0);
7704:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7705:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7706:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7707:     VecDestroy(&rvec);
7708:     VecDestroy(&lvec);
7709:   }
7710: #endif

7712:   /* assemble new additional is (if any) */
7713:   if (nis) {
7714:     PetscInt **temp_idxs,*count_is,j,psum;

7716:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7717:     PetscCalloc1(nis,&count_is);
7718:     ptr_idxs = recv_buffer_idxs_is;
7719:     psum = 0;
7720:     for (i=0;i<n_recvs;i++) {
7721:       for (j=0;j<nis;j++) {
7722:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7723:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7724:         psum += plen;
7725:         ptr_idxs += plen+1; /* shift pointer to received data */
7726:       }
7727:     }
7728:     PetscMalloc1(nis,&temp_idxs);
7729:     PetscMalloc1(psum,&temp_idxs[0]);
7730:     for (i=1;i<nis;i++) {
7731:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7732:     }
7733:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7734:     ptr_idxs = recv_buffer_idxs_is;
7735:     for (i=0;i<n_recvs;i++) {
7736:       for (j=0;j<nis;j++) {
7737:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7738:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7739:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7740:         ptr_idxs += plen+1; /* shift pointer to received data */
7741:       }
7742:     }
7743:     for (i=0;i<nis;i++) {
7744:       ISDestroy(&isarray[i]);
7745:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7746:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7747:     }
7748:     PetscFree(count_is);
7749:     PetscFree(temp_idxs[0]);
7750:     PetscFree(temp_idxs);
7751:   }
7752:   /* free workspace */
7753:   PetscFree(recv_buffer_idxs_is);
7754:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7755:   PetscFree(send_buffer_idxs);
7756:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7757:   if (isdense) {
7758:     MatISGetLocalMat(mat,&local_mat);
7759:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7760:     MatISRestoreLocalMat(mat,&local_mat);
7761:   } else {
7762:     /* PetscFree(send_buffer_vals); */
7763:   }
7764:   if (nis) {
7765:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7766:     PetscFree(send_buffer_idxs_is);
7767:   }

7769:   if (nvecs) {
7770:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7771:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7772:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7773:     VecDestroy(&nnsp_vec[0]);
7774:     VecCreate(comm_n,&nnsp_vec[0]);
7775:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7776:     VecSetType(nnsp_vec[0],VECSTANDARD);
7777:     /* set values */
7778:     ptr_vals = recv_buffer_vecs;
7779:     ptr_idxs = recv_buffer_idxs_local;
7780:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7781:     for (i=0;i<n_recvs;i++) {
7782:       PetscInt j;
7783:       for (j=0;j<*(ptr_idxs+1);j++) {
7784:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7785:       }
7786:       ptr_idxs += olengths_idxs[i];
7787:       ptr_vals += olengths_idxs[i]-2;
7788:     }
7789:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7790:     VecAssemblyBegin(nnsp_vec[0]);
7791:     VecAssemblyEnd(nnsp_vec[0]);
7792:   }

7794:   PetscFree(recv_buffer_vecs);
7795:   PetscFree(recv_buffer_idxs_local);
7796:   PetscFree(recv_req_idxs);
7797:   PetscFree(recv_req_vals);
7798:   PetscFree(recv_req_vecs);
7799:   PetscFree(recv_req_idxs_is);
7800:   PetscFree(send_req_idxs);
7801:   PetscFree(send_req_vals);
7802:   PetscFree(send_req_vecs);
7803:   PetscFree(send_req_idxs_is);
7804:   PetscFree(ilengths_vals);
7805:   PetscFree(ilengths_idxs);
7806:   PetscFree(olengths_vals);
7807:   PetscFree(olengths_idxs);
7808:   PetscFree(onodes);
7809:   if (nis) {
7810:     PetscFree(ilengths_idxs_is);
7811:     PetscFree(olengths_idxs_is);
7812:     PetscFree(onodes_is);
7813:   }
7814:   PetscSubcommDestroy(&subcomm);
7815:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7816:     MatDestroy(mat_n);
7817:     for (i=0;i<nis;i++) {
7818:       ISDestroy(&isarray[i]);
7819:     }
7820:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7821:       VecDestroy(&nnsp_vec[0]);
7822:     }
7823:     *mat_n = NULL;
7824:   }
7825:   return(0);
7826: }

7828: /* temporary hack into ksp private data structure */
7829:  #include <petsc/private/kspimpl.h>

7831: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7832: {
7833:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7834:   PC_IS                  *pcis = (PC_IS*)pc->data;
7835:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7836:   Mat                    coarsedivudotp = NULL;
7837:   Mat                    coarseG,t_coarse_mat_is;
7838:   MatNullSpace           CoarseNullSpace = NULL;
7839:   ISLocalToGlobalMapping coarse_islg;
7840:   IS                     coarse_is,*isarray;
7841:   PetscInt               i,im_active=-1,active_procs=-1;
7842:   PetscInt               nis,nisdofs,nisneu,nisvert;
7843:   PC                     pc_temp;
7844:   PCType                 coarse_pc_type;
7845:   KSPType                coarse_ksp_type;
7846:   PetscBool              multilevel_requested,multilevel_allowed;
7847:   PetscBool              coarse_reuse;
7848:   PetscInt               ncoarse,nedcfield;
7849:   PetscBool              compute_vecs = PETSC_FALSE;
7850:   PetscScalar            *array;
7851:   MatReuse               coarse_mat_reuse;
7852:   PetscBool              restr, full_restr, have_void;
7853:   PetscMPIInt            commsize;
7854:   PetscErrorCode         ierr;

7857:   /* Assign global numbering to coarse dofs */
7858:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7859:     PetscInt ocoarse_size;
7860:     compute_vecs = PETSC_TRUE;

7862:     pcbddc->new_primal_space = PETSC_TRUE;
7863:     ocoarse_size = pcbddc->coarse_size;
7864:     PetscFree(pcbddc->global_primal_indices);
7865:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7866:     /* see if we can avoid some work */
7867:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7868:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7869:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7870:         KSPReset(pcbddc->coarse_ksp);
7871:         coarse_reuse = PETSC_FALSE;
7872:       } else { /* we can safely reuse already computed coarse matrix */
7873:         coarse_reuse = PETSC_TRUE;
7874:       }
7875:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7876:       coarse_reuse = PETSC_FALSE;
7877:     }
7878:     /* reset any subassembling information */
7879:     if (!coarse_reuse || pcbddc->recompute_topography) {
7880:       ISDestroy(&pcbddc->coarse_subassembling);
7881:     }
7882:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7883:     coarse_reuse = PETSC_TRUE;
7884:   }
7885:   /* assemble coarse matrix */
7886:   if (coarse_reuse && pcbddc->coarse_ksp) {
7887:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7888:     PetscObjectReference((PetscObject)coarse_mat);
7889:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7890:   } else {
7891:     coarse_mat = NULL;
7892:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7893:   }

7895:   /* creates temporary l2gmap and IS for coarse indexes */
7896:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7897:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

7899:   /* creates temporary MATIS object for coarse matrix */
7900:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7901:   MatDenseGetArray(coarse_submat_dense,&array);
7902:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7903:   MatDenseRestoreArray(coarse_submat_dense,&array);
7904:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7905:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7906:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7907:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7908:   MatDestroy(&coarse_submat_dense);

7910:   /* count "active" (i.e. with positive local size) and "void" processes */
7911:   im_active = !!(pcis->n);
7912:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

7914:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7915:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7916:   /* full_restr : just use the receivers from the subassembling pattern */
7917:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7918:   coarse_mat_is = NULL;
7919:   multilevel_allowed = PETSC_FALSE;
7920:   multilevel_requested = PETSC_FALSE;
7921:   pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7922:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7923:   if (multilevel_requested) {
7924:     ncoarse = active_procs/pcbddc->coarsening_ratio;
7925:     restr = PETSC_FALSE;
7926:     full_restr = PETSC_FALSE;
7927:   } else {
7928:     ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7929:     restr = PETSC_TRUE;
7930:     full_restr = PETSC_TRUE;
7931:   }
7932:   if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7933:   ncoarse = PetscMax(1,ncoarse);
7934:   if (!pcbddc->coarse_subassembling) {
7935:     if (pcbddc->coarsening_ratio > 1) {
7936:       if (multilevel_requested) {
7937:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7938:       } else {
7939:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7940:       }
7941:     } else {
7942:       PetscMPIInt rank;
7943:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7944:       have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7945:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7946:     }
7947:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7948:     PetscInt    psum;
7949:     if (pcbddc->coarse_ksp) psum = 1;
7950:     else psum = 0;
7951:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7952:     if (ncoarse < commsize) have_void = PETSC_TRUE;
7953:   }
7954:   /* determine if we can go multilevel */
7955:   if (multilevel_requested) {
7956:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7957:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7958:   }
7959:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7961:   /* dump subassembling pattern */
7962:   if (pcbddc->dbg_flag && multilevel_allowed) {
7963:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7964:   }

7966:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7967:   nedcfield = -1;
7968:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7969:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
7970:     const PetscInt         *idxs;
7971:     ISLocalToGlobalMapping tmap;

7973:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7974:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7975:     /* allocate space for temporary storage */
7976:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7977:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7978:     /* allocate for IS array */
7979:     nisdofs = pcbddc->n_ISForDofsLocal;
7980:     if (pcbddc->nedclocal) {
7981:       if (pcbddc->nedfield > -1) {
7982:         nedcfield = pcbddc->nedfield;
7983:       } else {
7984:         nedcfield = 0;
7985:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7986:         nisdofs = 1;
7987:       }
7988:     }
7989:     nisneu = !!pcbddc->NeumannBoundariesLocal;
7990:     nisvert = 0; /* nisvert is not used */
7991:     nis = nisdofs + nisneu + nisvert;
7992:     PetscMalloc1(nis,&isarray);
7993:     /* dofs splitting */
7994:     for (i=0;i<nisdofs;i++) {
7995:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7996:       if (nedcfield != i) {
7997:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7998:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7999:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8000:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8001:       } else {
8002:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8003:         ISGetIndices(pcbddc->nedclocal,&idxs);
8004:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8005:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
8006:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8007:       }
8008:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8009:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8010:       /* ISView(isarray[i],0); */
8011:     }
8012:     /* neumann boundaries */
8013:     if (pcbddc->NeumannBoundariesLocal) {
8014:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8015:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8016:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8017:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8018:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8019:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8020:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8021:       /* ISView(isarray[nisdofs],0); */
8022:     }
8023:     /* free memory */
8024:     PetscFree(tidxs);
8025:     PetscFree(tidxs2);
8026:     ISLocalToGlobalMappingDestroy(&tmap);
8027:   } else {
8028:     nis = 0;
8029:     nisdofs = 0;
8030:     nisneu = 0;
8031:     nisvert = 0;
8032:     isarray = NULL;
8033:   }
8034:   /* destroy no longer needed map */
8035:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8037:   /* subassemble */
8038:   if (multilevel_allowed) {
8039:     Vec       vp[1];
8040:     PetscInt  nvecs = 0;
8041:     PetscBool reuse,reuser;

8043:     if (coarse_mat) reuse = PETSC_TRUE;
8044:     else reuse = PETSC_FALSE;
8045:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8046:     vp[0] = NULL;
8047:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8048:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8049:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8050:       VecSetType(vp[0],VECSTANDARD);
8051:       nvecs = 1;

8053:       if (pcbddc->divudotp) {
8054:         Mat      B,loc_divudotp;
8055:         Vec      v,p;
8056:         IS       dummy;
8057:         PetscInt np;

8059:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8060:         MatGetSize(loc_divudotp,&np,NULL);
8061:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8062:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8063:         MatCreateVecs(B,&v,&p);
8064:         VecSet(p,1.);
8065:         MatMultTranspose(B,p,v);
8066:         VecDestroy(&p);
8067:         MatDestroy(&B);
8068:         VecGetArray(vp[0],&array);
8069:         VecPlaceArray(pcbddc->vec1_P,array);
8070:         VecRestoreArray(vp[0],&array);
8071:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8072:         VecResetArray(pcbddc->vec1_P);
8073:         ISDestroy(&dummy);
8074:         VecDestroy(&v);
8075:       }
8076:     }
8077:     if (reuser) {
8078:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8079:     } else {
8080:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8081:     }
8082:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8083:       PetscScalar *arraym,*arrayv;
8084:       PetscInt    nl;
8085:       VecGetLocalSize(vp[0],&nl);
8086:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8087:       MatDenseGetArray(coarsedivudotp,&arraym);
8088:       VecGetArray(vp[0],&arrayv);
8089:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8090:       VecRestoreArray(vp[0],&arrayv);
8091:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8092:       VecDestroy(&vp[0]);
8093:     } else {
8094:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8095:     }
8096:   } else {
8097:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8098:   }
8099:   if (coarse_mat_is || coarse_mat) {
8100:     PetscMPIInt size;
8101:     MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
8102:     if (!multilevel_allowed) {
8103:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
8104:     } else {
8105:       Mat A;

8107:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8108:       if (coarse_mat_is) {
8109:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8110:         PetscObjectReference((PetscObject)coarse_mat_is);
8111:         coarse_mat = coarse_mat_is;
8112:       }
8113:       /* be sure we don't have MatSeqDENSE as local mat */
8114:       MatISGetLocalMat(coarse_mat,&A);
8115:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8116:     }
8117:   }
8118:   MatDestroy(&t_coarse_mat_is);
8119:   MatDestroy(&coarse_mat_is);

8121:   /* create local to global scatters for coarse problem */
8122:   if (compute_vecs) {
8123:     PetscInt lrows;
8124:     VecDestroy(&pcbddc->coarse_vec);
8125:     if (coarse_mat) {
8126:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8127:     } else {
8128:       lrows = 0;
8129:     }
8130:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8131:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8132:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
8133:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8134:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8135:   }
8136:   ISDestroy(&coarse_is);

8138:   /* set defaults for coarse KSP and PC */
8139:   if (multilevel_allowed) {
8140:     coarse_ksp_type = KSPRICHARDSON;
8141:     coarse_pc_type = PCBDDC;
8142:   } else {
8143:     coarse_ksp_type = KSPPREONLY;
8144:     coarse_pc_type = PCREDUNDANT;
8145:   }

8147:   /* print some info if requested */
8148:   if (pcbddc->dbg_flag) {
8149:     if (!multilevel_allowed) {
8150:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8151:       if (multilevel_requested) {
8152:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8153:       } else if (pcbddc->max_levels) {
8154:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
8155:       }
8156:       PetscViewerFlush(pcbddc->dbg_viewer);
8157:     }
8158:   }

8160:   /* communicate coarse discrete gradient */
8161:   coarseG = NULL;
8162:   if (pcbddc->nedcG && multilevel_allowed) {
8163:     MPI_Comm ccomm;
8164:     if (coarse_mat) {
8165:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8166:     } else {
8167:       ccomm = MPI_COMM_NULL;
8168:     }
8169:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8170:   }

8172:   /* create the coarse KSP object only once with defaults */
8173:   if (coarse_mat) {
8174:     PetscBool   isredundant,isnn,isbddc;
8175:     PetscViewer dbg_viewer = NULL;

8177:     if (pcbddc->dbg_flag) {
8178:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8179:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8180:     }
8181:     if (!pcbddc->coarse_ksp) {
8182:       char   prefix[256],str_level[16];
8183:       size_t len;

8185:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8186:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8187:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8188:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8189:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8190:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8191:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8192:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8193:       /* TODO is this logic correct? should check for coarse_mat type */
8194:       PCSetType(pc_temp,coarse_pc_type);
8195:       /* prefix */
8196:       PetscStrcpy(prefix,"");
8197:       PetscStrcpy(str_level,"");
8198:       if (!pcbddc->current_level) {
8199:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8200:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8201:       } else {
8202:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8203:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8204:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8205:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8206:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8207:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8208:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8209:       }
8210:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8211:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8212:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8213:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8214:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8215:       /* allow user customization */
8216:       KSPSetFromOptions(pcbddc->coarse_ksp);
8217:     }
8218:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8219:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8220:     if (nisdofs) {
8221:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8222:       for (i=0;i<nisdofs;i++) {
8223:         ISDestroy(&isarray[i]);
8224:       }
8225:     }
8226:     if (nisneu) {
8227:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8228:       ISDestroy(&isarray[nisdofs]);
8229:     }
8230:     if (nisvert) {
8231:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8232:       ISDestroy(&isarray[nis-1]);
8233:     }
8234:     if (coarseG) {
8235:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8236:     }

8238:     /* get some info after set from options */
8239:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8240:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8241:     if (isbddc && !multilevel_allowed) {
8242:       PCSetType(pc_temp,coarse_pc_type);
8243:       isbddc = PETSC_FALSE;
8244:     }
8245:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8246:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8247:     if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8248:       PCSetType(pc_temp,PCBDDC);
8249:       isbddc = PETSC_TRUE;
8250:     }
8251:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8252:     if (isredundant) {
8253:       KSP inner_ksp;
8254:       PC  inner_pc;

8256:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8257:       KSPGetPC(inner_ksp,&inner_pc);
8258:     }

8260:     /* parameters which miss an API */
8261:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8262:     if (isbddc) {
8263:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8265:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8266:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8267:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8268:       if (pcbddc_coarse->benign_saddle_point) {
8269:         Mat                    coarsedivudotp_is;
8270:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8271:         IS                     row,col;
8272:         const PetscInt         *gidxs;
8273:         PetscInt               n,st,M,N;

8275:         MatGetSize(coarsedivudotp,&n,NULL);
8276:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8277:         st   = st-n;
8278:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8279:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8280:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8281:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8282:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8283:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8284:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8285:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8286:         ISGetSize(row,&M);
8287:         MatGetSize(coarse_mat,&N,NULL);
8288:         ISDestroy(&row);
8289:         ISDestroy(&col);
8290:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8291:         MatSetType(coarsedivudotp_is,MATIS);
8292:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8293:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8294:         ISLocalToGlobalMappingDestroy(&rl2g);
8295:         ISLocalToGlobalMappingDestroy(&cl2g);
8296:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8297:         MatDestroy(&coarsedivudotp);
8298:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8299:         MatDestroy(&coarsedivudotp_is);
8300:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8301:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8302:       }
8303:     }

8305:     /* propagate symmetry info of coarse matrix */
8306:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8307:     if (pc->pmat->symmetric_set) {
8308:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8309:     }
8310:     if (pc->pmat->hermitian_set) {
8311:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8312:     }
8313:     if (pc->pmat->spd_set) {
8314:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8315:     }
8316:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8317:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8318:     }
8319:     /* set operators */
8320:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8321:     if (pcbddc->dbg_flag) {
8322:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8323:     }
8324:   }
8325:   MatDestroy(&coarseG);
8326:   PetscFree(isarray);
8327: #if 0
8328:   {
8329:     PetscViewer viewer;
8330:     char filename[256];
8331:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8332:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8333:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8334:     MatView(coarse_mat,viewer);
8335:     PetscViewerPopFormat(viewer);
8336:     PetscViewerDestroy(&viewer);
8337:   }
8338: #endif

8340:   if (pcbddc->coarse_ksp) {
8341:     Vec crhs,csol;

8343:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8344:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8345:     if (!csol) {
8346:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8347:     }
8348:     if (!crhs) {
8349:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8350:     }
8351:   }
8352:   MatDestroy(&coarsedivudotp);

8354:   /* compute null space for coarse solver if the benign trick has been requested */
8355:   if (pcbddc->benign_null) {

8357:     VecSet(pcbddc->vec1_P,0.);
8358:     for (i=0;i<pcbddc->benign_n;i++) {
8359:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8360:     }
8361:     VecAssemblyBegin(pcbddc->vec1_P);
8362:     VecAssemblyEnd(pcbddc->vec1_P);
8363:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8364:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8365:     if (coarse_mat) {
8366:       Vec         nullv;
8367:       PetscScalar *array,*array2;
8368:       PetscInt    nl;

8370:       MatCreateVecs(coarse_mat,&nullv,NULL);
8371:       VecGetLocalSize(nullv,&nl);
8372:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8373:       VecGetArray(nullv,&array2);
8374:       PetscMemcpy(array2,array,nl*sizeof(*array));
8375:       VecRestoreArray(nullv,&array2);
8376:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8377:       VecNormalize(nullv,NULL);
8378:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8379:       VecDestroy(&nullv);
8380:     }
8381:   }

8383:   if (pcbddc->coarse_ksp) {
8384:     PetscBool ispreonly;

8386:     if (CoarseNullSpace) {
8387:       PetscBool isnull;
8388:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8389:       if (isnull) {
8390:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8391:       }
8392:       /* TODO: add local nullspaces (if any) */
8393:     }
8394:     /* setup coarse ksp */
8395:     KSPSetUp(pcbddc->coarse_ksp);
8396:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8397:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8398:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8399:       KSP       check_ksp;
8400:       KSPType   check_ksp_type;
8401:       PC        check_pc;
8402:       Vec       check_vec,coarse_vec;
8403:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8404:       PetscInt  its;
8405:       PetscBool compute_eigs;
8406:       PetscReal *eigs_r,*eigs_c;
8407:       PetscInt  neigs;
8408:       const char *prefix;

8410:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8411:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8412:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8413:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8414:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8415:       /* prevent from setup unneeded object */
8416:       KSPGetPC(check_ksp,&check_pc);
8417:       PCSetType(check_pc,PCNONE);
8418:       if (ispreonly) {
8419:         check_ksp_type = KSPPREONLY;
8420:         compute_eigs = PETSC_FALSE;
8421:       } else {
8422:         check_ksp_type = KSPGMRES;
8423:         compute_eigs = PETSC_TRUE;
8424:       }
8425:       KSPSetType(check_ksp,check_ksp_type);
8426:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8427:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8428:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8429:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8430:       KSPSetOptionsPrefix(check_ksp,prefix);
8431:       KSPAppendOptionsPrefix(check_ksp,"check_");
8432:       KSPSetFromOptions(check_ksp);
8433:       KSPSetUp(check_ksp);
8434:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8435:       KSPSetPC(check_ksp,check_pc);
8436:       /* create random vec */
8437:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8438:       VecSetRandom(check_vec,NULL);
8439:       MatMult(coarse_mat,check_vec,coarse_vec);
8440:       /* solve coarse problem */
8441:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8442:       /* set eigenvalue estimation if preonly has not been requested */
8443:       if (compute_eigs) {
8444:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8445:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8446:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8447:         if (neigs) {
8448:           lambda_max = eigs_r[neigs-1];
8449:           lambda_min = eigs_r[0];
8450:           if (pcbddc->use_coarse_estimates) {
8451:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8452:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8453:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8454:             }
8455:           }
8456:         }
8457:       }

8459:       /* check coarse problem residual error */
8460:       if (pcbddc->dbg_flag) {
8461:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8462:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8463:         VecAXPY(check_vec,-1.0,coarse_vec);
8464:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8465:         MatMult(coarse_mat,check_vec,coarse_vec);
8466:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8467:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8468:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8469:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8470:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8471:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8472:         if (CoarseNullSpace) {
8473:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8474:         }
8475:         if (compute_eigs) {
8476:           PetscReal          lambda_max_s,lambda_min_s;
8477:           KSPConvergedReason reason;
8478:           KSPGetType(check_ksp,&check_ksp_type);
8479:           KSPGetIterationNumber(check_ksp,&its);
8480:           KSPGetConvergedReason(check_ksp,&reason);
8481:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8482:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8483:           for (i=0;i<neigs;i++) {
8484:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8485:           }
8486:         }
8487:         PetscViewerFlush(dbg_viewer);
8488:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8489:       }
8490:       VecDestroy(&check_vec);
8491:       VecDestroy(&coarse_vec);
8492:       KSPDestroy(&check_ksp);
8493:       if (compute_eigs) {
8494:         PetscFree(eigs_r);
8495:         PetscFree(eigs_c);
8496:       }
8497:     }
8498:   }
8499:   MatNullSpaceDestroy(&CoarseNullSpace);
8500:   /* print additional info */
8501:   if (pcbddc->dbg_flag) {
8502:     /* waits until all processes reaches this point */
8503:     PetscBarrier((PetscObject)pc);
8504:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8505:     PetscViewerFlush(pcbddc->dbg_viewer);
8506:   }

8508:   /* free memory */
8509:   MatDestroy(&coarse_mat);
8510:   return(0);
8511: }

8513: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8514: {
8515:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8516:   PC_IS*         pcis = (PC_IS*)pc->data;
8517:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8518:   IS             subset,subset_mult,subset_n;
8519:   PetscInt       local_size,coarse_size=0;
8520:   PetscInt       *local_primal_indices=NULL;
8521:   const PetscInt *t_local_primal_indices;

8525:   /* Compute global number of coarse dofs */
8526:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8527:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8528:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8529:   ISDestroy(&subset_n);
8530:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8531:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8532:   ISDestroy(&subset);
8533:   ISDestroy(&subset_mult);
8534:   ISGetLocalSize(subset_n,&local_size);
8535:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8536:   PetscMalloc1(local_size,&local_primal_indices);
8537:   ISGetIndices(subset_n,&t_local_primal_indices);
8538:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8539:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8540:   ISDestroy(&subset_n);

8542:   /* check numbering */
8543:   if (pcbddc->dbg_flag) {
8544:     PetscScalar coarsesum,*array,*array2;
8545:     PetscInt    i;
8546:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8548:     PetscViewerFlush(pcbddc->dbg_viewer);
8549:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8550:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8551:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8552:     /* counter */
8553:     VecSet(pcis->vec1_global,0.0);
8554:     VecSet(pcis->vec1_N,1.0);
8555:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8556:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8557:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8558:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8559:     VecSet(pcis->vec1_N,0.0);
8560:     for (i=0;i<pcbddc->local_primal_size;i++) {
8561:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8562:     }
8563:     VecAssemblyBegin(pcis->vec1_N);
8564:     VecAssemblyEnd(pcis->vec1_N);
8565:     VecSet(pcis->vec1_global,0.0);
8566:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8567:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8568:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8569:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8570:     VecGetArray(pcis->vec1_N,&array);
8571:     VecGetArray(pcis->vec2_N,&array2);
8572:     for (i=0;i<pcis->n;i++) {
8573:       if (array[i] != 0.0 && array[i] != array2[i]) {
8574:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8575:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8576:         set_error = PETSC_TRUE;
8577:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8578:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8579:       }
8580:     }
8581:     VecRestoreArray(pcis->vec2_N,&array2);
8582:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8583:     PetscViewerFlush(pcbddc->dbg_viewer);
8584:     for (i=0;i<pcis->n;i++) {
8585:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8586:     }
8587:     VecRestoreArray(pcis->vec1_N,&array);
8588:     VecSet(pcis->vec1_global,0.0);
8589:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8590:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8591:     VecSum(pcis->vec1_global,&coarsesum);
8592:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8593:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8594:       PetscInt *gidxs;

8596:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8597:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8598:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8599:       PetscViewerFlush(pcbddc->dbg_viewer);
8600:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8601:       for (i=0;i<pcbddc->local_primal_size;i++) {
8602:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8603:       }
8604:       PetscViewerFlush(pcbddc->dbg_viewer);
8605:       PetscFree(gidxs);
8606:     }
8607:     PetscViewerFlush(pcbddc->dbg_viewer);
8608:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8609:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8610:   }
8611:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8612:   /* get back data */
8613:   *coarse_size_n = coarse_size;
8614:   *local_primal_indices_n = local_primal_indices;
8615:   return(0);
8616: }

8618: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8619: {
8620:   IS             localis_t;
8621:   PetscInt       i,lsize,*idxs,n;
8622:   PetscScalar    *vals;

8626:   /* get indices in local ordering exploiting local to global map */
8627:   ISGetLocalSize(globalis,&lsize);
8628:   PetscMalloc1(lsize,&vals);
8629:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8630:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8631:   VecSet(gwork,0.0);
8632:   VecSet(lwork,0.0);
8633:   if (idxs) { /* multilevel guard */
8634:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8635:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8636:   }
8637:   VecAssemblyBegin(gwork);
8638:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8639:   PetscFree(vals);
8640:   VecAssemblyEnd(gwork);
8641:   /* now compute set in local ordering */
8642:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8643:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8644:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8645:   VecGetSize(lwork,&n);
8646:   for (i=0,lsize=0;i<n;i++) {
8647:     if (PetscRealPart(vals[i]) > 0.5) {
8648:       lsize++;
8649:     }
8650:   }
8651:   PetscMalloc1(lsize,&idxs);
8652:   for (i=0,lsize=0;i<n;i++) {
8653:     if (PetscRealPart(vals[i]) > 0.5) {
8654:       idxs[lsize++] = i;
8655:     }
8656:   }
8657:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8658:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8659:   *localis = localis_t;
8660:   return(0);
8661: }

8663: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8664: {
8665:   PC_IS               *pcis=(PC_IS*)pc->data;
8666:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8667:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8668:   Mat                 S_j;
8669:   PetscInt            *used_xadj,*used_adjncy;
8670:   PetscBool           free_used_adj;
8671:   PetscErrorCode      ierr;

8674:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8675:   free_used_adj = PETSC_FALSE;
8676:   if (pcbddc->sub_schurs_layers == -1) {
8677:     used_xadj = NULL;
8678:     used_adjncy = NULL;
8679:   } else {
8680:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8681:       used_xadj = pcbddc->mat_graph->xadj;
8682:       used_adjncy = pcbddc->mat_graph->adjncy;
8683:     } else if (pcbddc->computed_rowadj) {
8684:       used_xadj = pcbddc->mat_graph->xadj;
8685:       used_adjncy = pcbddc->mat_graph->adjncy;
8686:     } else {
8687:       PetscBool      flg_row=PETSC_FALSE;
8688:       const PetscInt *xadj,*adjncy;
8689:       PetscInt       nvtxs;

8691:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8692:       if (flg_row) {
8693:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8694:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8695:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8696:         free_used_adj = PETSC_TRUE;
8697:       } else {
8698:         pcbddc->sub_schurs_layers = -1;
8699:         used_xadj = NULL;
8700:         used_adjncy = NULL;
8701:       }
8702:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8703:     }
8704:   }

8706:   /* setup sub_schurs data */
8707:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8708:   if (!sub_schurs->schur_explicit) {
8709:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8710:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8711:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8712:   } else {
8713:     Mat       change = NULL;
8714:     Vec       scaling = NULL;
8715:     IS        change_primal = NULL, iP;
8716:     PetscInt  benign_n;
8717:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8718:     PetscBool isseqaij,need_change = PETSC_FALSE;
8719:     PetscBool discrete_harmonic = PETSC_FALSE;

8721:     if (!pcbddc->use_vertices && reuse_solvers) {
8722:       PetscInt n_vertices;

8724:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8725:       reuse_solvers = (PetscBool)!n_vertices;
8726:     }
8727:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8728:     if (!isseqaij) {
8729:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8730:       if (matis->A == pcbddc->local_mat) {
8731:         MatDestroy(&pcbddc->local_mat);
8732:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8733:       } else {
8734:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8735:       }
8736:     }
8737:     if (!pcbddc->benign_change_explicit) {
8738:       benign_n = pcbddc->benign_n;
8739:     } else {
8740:       benign_n = 0;
8741:     }
8742:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8743:        We need a global reduction to avoid possible deadlocks.
8744:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8745:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8746:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8747:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8748:       need_change = (PetscBool)(!need_change);
8749:     }
8750:     /* If the user defines additional constraints, we import them here.
8751:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8752:     if (need_change) {
8753:       PC_IS   *pcisf;
8754:       PC_BDDC *pcbddcf;
8755:       PC      pcf;

8757:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8758:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8759:       PCSetOperators(pcf,pc->mat,pc->pmat);
8760:       PCSetType(pcf,PCBDDC);

8762:       /* hacks */
8763:       pcisf                        = (PC_IS*)pcf->data;
8764:       pcisf->is_B_local            = pcis->is_B_local;
8765:       pcisf->vec1_N                = pcis->vec1_N;
8766:       pcisf->BtoNmap               = pcis->BtoNmap;
8767:       pcisf->n                     = pcis->n;
8768:       pcisf->n_B                   = pcis->n_B;
8769:       pcbddcf                      = (PC_BDDC*)pcf->data;
8770:       PetscFree(pcbddcf->mat_graph);
8771:       pcbddcf->mat_graph           = pcbddc->mat_graph;
8772:       pcbddcf->use_faces           = PETSC_TRUE;
8773:       pcbddcf->use_change_of_basis = PETSC_TRUE;
8774:       pcbddcf->use_change_on_faces = PETSC_TRUE;
8775:       pcbddcf->use_qr_single       = PETSC_TRUE;
8776:       pcbddcf->fake_change         = PETSC_TRUE;

8778:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8779:       PCBDDCConstraintsSetUp(pcf);
8780:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8781:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8782:       change = pcbddcf->ConstraintMatrix;
8783:       pcbddcf->ConstraintMatrix = NULL;

8785:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8786:       PetscFree(pcbddcf->sub_schurs);
8787:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8788:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8789:       PetscFree(pcbddcf->primal_indices_local_idxs);
8790:       PetscFree(pcbddcf->onearnullvecs_state);
8791:       PetscFree(pcf->data);
8792:       pcf->ops->destroy = NULL;
8793:       pcf->ops->reset   = NULL;
8794:       PCDestroy(&pcf);
8795:     }
8796:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8798:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8799:     if (iP) {
8800:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8801:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8802:       PetscOptionsEnd();
8803:     }
8804:     if (discrete_harmonic) {
8805:       Mat A;
8806:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8807:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8808:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8809:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8810:       MatDestroy(&A);
8811:     } else {
8812:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8813:     }
8814:     MatDestroy(&change);
8815:     ISDestroy(&change_primal);
8816:   }
8817:   MatDestroy(&S_j);

8819:   /* free adjacency */
8820:   if (free_used_adj) {
8821:     PetscFree2(used_xadj,used_adjncy);
8822:   }
8823:   return(0);
8824: }

8826: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8827: {
8828:   PC_IS               *pcis=(PC_IS*)pc->data;
8829:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8830:   PCBDDCGraph         graph;
8831:   PetscErrorCode      ierr;

8834:   /* attach interface graph for determining subsets */
8835:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8836:     IS       verticesIS,verticescomm;
8837:     PetscInt vsize,*idxs;

8839:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8840:     ISGetSize(verticesIS,&vsize);
8841:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8842:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8843:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8844:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8845:     PCBDDCGraphCreate(&graph);
8846:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8847:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8848:     ISDestroy(&verticescomm);
8849:     PCBDDCGraphComputeConnectedComponents(graph);
8850:   } else {
8851:     graph = pcbddc->mat_graph;
8852:   }
8853:   /* print some info */
8854:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8855:     IS       vertices;
8856:     PetscInt nv,nedges,nfaces;
8857:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8858:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8859:     ISGetSize(vertices,&nv);
8860:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8861:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8862:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8863:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8864:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8865:     PetscViewerFlush(pcbddc->dbg_viewer);
8866:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8867:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8868:   }

8870:   /* sub_schurs init */
8871:   if (!pcbddc->sub_schurs) {
8872:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8873:   }
8874:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

8876:   /* free graph struct */
8877:   if (pcbddc->sub_schurs_rebuild) {
8878:     PCBDDCGraphDestroy(&graph);
8879:   }
8880:   return(0);
8881: }

8883: PetscErrorCode PCBDDCCheckOperator(PC pc)
8884: {
8885:   PC_IS               *pcis=(PC_IS*)pc->data;
8886:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8887:   PetscErrorCode      ierr;

8890:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8891:     IS             zerodiag = NULL;
8892:     Mat            S_j,B0_B=NULL;
8893:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
8894:     PetscScalar    *p0_check,*array,*array2;
8895:     PetscReal      norm;
8896:     PetscInt       i;

8898:     /* B0 and B0_B */
8899:     if (zerodiag) {
8900:       IS       dummy;

8902:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8903:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8904:       MatCreateVecs(B0_B,NULL,&dummy_vec);
8905:       ISDestroy(&dummy);
8906:     }
8907:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8908:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8909:     VecSet(pcbddc->vec1_P,1.0);
8910:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8911:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8912:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8913:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8914:     VecReciprocal(vec_scale_P);
8915:     /* S_j */
8916:     MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8917:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

8919:     /* mimic vector in \widetilde{W}_\Gamma */
8920:     VecSetRandom(pcis->vec1_N,NULL);
8921:     /* continuous in primal space */
8922:     VecSetRandom(pcbddc->coarse_vec,NULL);
8923:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8924:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8925:     VecGetArray(pcbddc->vec1_P,&array);
8926:     PetscCalloc1(pcbddc->benign_n,&p0_check);
8927:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8928:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8929:     VecRestoreArray(pcbddc->vec1_P,&array);
8930:     VecAssemblyBegin(pcis->vec1_N);
8931:     VecAssemblyEnd(pcis->vec1_N);
8932:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8933:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8934:     VecDuplicate(pcis->vec2_B,&vec_check_B);
8935:     VecCopy(pcis->vec2_B,vec_check_B);

8937:     /* assemble rhs for coarse problem */
8938:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8939:     /* local with Schur */
8940:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8941:     if (zerodiag) {
8942:       VecGetArray(dummy_vec,&array);
8943:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8944:       VecRestoreArray(dummy_vec,&array);
8945:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8946:     }
8947:     /* sum on primal nodes the local contributions */
8948:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8949:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8950:     VecGetArray(pcis->vec1_N,&array);
8951:     VecGetArray(pcbddc->vec1_P,&array2);
8952:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8953:     VecRestoreArray(pcbddc->vec1_P,&array2);
8954:     VecRestoreArray(pcis->vec1_N,&array);
8955:     VecSet(pcbddc->coarse_vec,0.);
8956:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8957:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8958:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8959:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8960:     VecGetArray(pcbddc->vec1_P,&array);
8961:     /* scale primal nodes (BDDC sums contibutions) */
8962:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8963:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8964:     VecRestoreArray(pcbddc->vec1_P,&array);
8965:     VecAssemblyBegin(pcis->vec1_N);
8966:     VecAssemblyEnd(pcis->vec1_N);
8967:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8968:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8969:     /* global: \widetilde{B0}_B w_\Gamma */
8970:     if (zerodiag) {
8971:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
8972:       VecGetArray(dummy_vec,&array);
8973:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8974:       VecRestoreArray(dummy_vec,&array);
8975:     }
8976:     /* BDDC */
8977:     VecSet(pcis->vec1_D,0.);
8978:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

8980:     VecCopy(pcis->vec1_B,pcis->vec2_B);
8981:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8982:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8983:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8984:     for (i=0;i<pcbddc->benign_n;i++) {
8985:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8986:     }
8987:     PetscFree(p0_check);
8988:     VecDestroy(&vec_scale_P);
8989:     VecDestroy(&vec_check_B);
8990:     VecDestroy(&dummy_vec);
8991:     MatDestroy(&S_j);
8992:     MatDestroy(&B0_B);
8993:   }
8994:   return(0);
8995: }

8997:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
8998: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8999: {
9000:   Mat            At;
9001:   IS             rows;
9002:   PetscInt       rst,ren;
9004:   PetscLayout    rmap;

9007:   rst = ren = 0;
9008:   if (ccomm != MPI_COMM_NULL) {
9009:     PetscLayoutCreate(ccomm,&rmap);
9010:     PetscLayoutSetSize(rmap,A->rmap->N);
9011:     PetscLayoutSetBlockSize(rmap,1);
9012:     PetscLayoutSetUp(rmap);
9013:     PetscLayoutGetRange(rmap,&rst,&ren);
9014:   }
9015:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9016:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9017:   ISDestroy(&rows);

9019:   if (ccomm != MPI_COMM_NULL) {
9020:     Mat_MPIAIJ *a,*b;
9021:     IS         from,to;
9022:     Vec        gvec;
9023:     PetscInt   lsize;

9025:     MatCreate(ccomm,B);
9026:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9027:     MatSetType(*B,MATAIJ);
9028:     PetscLayoutDestroy(&((*B)->rmap));
9029:     PetscLayoutSetUp((*B)->cmap);
9030:     a    = (Mat_MPIAIJ*)At->data;
9031:     b    = (Mat_MPIAIJ*)(*B)->data;
9032:     MPI_Comm_size(ccomm,&b->size);
9033:     MPI_Comm_rank(ccomm,&b->rank);
9034:     PetscObjectReference((PetscObject)a->A);
9035:     PetscObjectReference((PetscObject)a->B);
9036:     b->A = a->A;
9037:     b->B = a->B;

9039:     b->donotstash      = a->donotstash;
9040:     b->roworiented     = a->roworiented;
9041:     b->rowindices      = 0;
9042:     b->rowvalues       = 0;
9043:     b->getrowactive    = PETSC_FALSE;

9045:     (*B)->rmap         = rmap;
9046:     (*B)->factortype   = A->factortype;
9047:     (*B)->assembled    = PETSC_TRUE;
9048:     (*B)->insertmode   = NOT_SET_VALUES;
9049:     (*B)->preallocated = PETSC_TRUE;

9051:     if (a->colmap) {
9052: #if defined(PETSC_USE_CTABLE)
9053:       PetscTableCreateCopy(a->colmap,&b->colmap);
9054: #else
9055:       PetscMalloc1(At->cmap->N,&b->colmap);
9056:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9057:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9058: #endif
9059:     } else b->colmap = 0;
9060:     if (a->garray) {
9061:       PetscInt len;
9062:       len  = a->B->cmap->n;
9063:       PetscMalloc1(len+1,&b->garray);
9064:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9065:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9066:     } else b->garray = 0;

9068:     PetscObjectReference((PetscObject)a->lvec);
9069:     b->lvec = a->lvec;
9070:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9072:     /* cannot use VecScatterCopy */
9073:     VecGetLocalSize(b->lvec,&lsize);
9074:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9075:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9076:     MatCreateVecs(*B,&gvec,NULL);
9077:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9078:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9079:     ISDestroy(&from);
9080:     ISDestroy(&to);
9081:     VecDestroy(&gvec);
9082:   }
9083:   MatDestroy(&At);
9084:   return(0);
9085: }