Actual source code: bddcscalingbasic.c

petsc-3.9.3 2018-07-02
Report Typos and Errors
  1:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3:  #include <petscblaslapack.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
 14: {
 15:   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
 17:   PetscScalar    *b,*x;
 18:   PetscInt       n;
 19:   PetscBLASInt   nrhs,info,m;
 20:   PetscBool      flg;

 23:   PetscBLASIntCast(A->rmap->n,&m);
 24:   PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 25:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
 26:   PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 27:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

 29:   MatGetSize(B,NULL,&n);
 30:   PetscBLASIntCast(n,&nrhs);
 31:   MatDenseGetArray(B,&b);
 32:   MatDenseGetArray(X,&x);

 34:   PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));

 36:   if (A->factortype == MAT_FACTOR_LU) {
 37: #if defined(PETSC_MISSING_LAPACK_GETRS)
 38:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
 39: #else
 40:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
 41:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
 42: #endif
 43:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");

 45:   MatDenseRestoreArray(B,&b);
 46:   MatDenseRestoreArray(X,&x);
 47:   PetscLogFlops(nrhs*(2.0*m*m - m));
 48:   return(0);
 49: }

 51: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 52: {
 53:   PC_IS*         pcis = (PC_IS*)pc->data;
 54:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

 58:   /* Apply partition of unity */
 59:   VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
 60:   VecSet(global_vector,0.0);
 61:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 62:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 63:   return(0);
 64: }

 66: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 67: {
 68:   PC_IS*              pcis=(PC_IS*)pc->data;
 69:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
 70:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
 71:   PetscErrorCode      ierr;

 74:   VecSet(pcbddc->work_scaling,0.0);
 75:   VecSet(y,0.0);
 76:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 77:     PetscInt          i;
 78:     const PetscScalar *array_x,*array_D;
 79:     PetscScalar       *array;
 80:     VecGetArrayRead(x,&array_x);
 81:     VecGetArrayRead(pcis->D,&array_D);
 82:     VecGetArray(pcbddc->work_scaling,&array);
 83:     for (i=0;i<deluxe_ctx->n_simple;i++) {
 84:       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
 85:     }
 86:     VecRestoreArray(pcbddc->work_scaling,&array);
 87:     VecRestoreArrayRead(pcis->D,&array_D);
 88:     VecRestoreArrayRead(x,&array_x);
 89:   }
 90:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
 91:   if (deluxe_ctx->seq_mat) {
 92:     PetscInt i;
 93:     for (i=0;i<deluxe_ctx->seq_n;i++) {
 94:       if (deluxe_ctx->change) {
 95:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 96:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 97:         if (deluxe_ctx->change_with_qr) {
 98:           Mat change;

100:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
101:           MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
102:         } else {
103:           KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
104:         }
105:       } else {
106:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
107:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
108:       }
109:       MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
110:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
111:         PetscScalar *x;

113:         VecGetArray(deluxe_ctx->seq_work2[i],&x);
114:         VecPlaceArray(deluxe_ctx->seq_work1[i],x);
115:         VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
116:         MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
117:         VecResetArray(deluxe_ctx->seq_work1[i]);
118:       }
119:       if (deluxe_ctx->change) {
120:         Mat change;

122:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
123:         MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
124:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
125:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
126:       } else {
127:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
128:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
129:       }
130:     }
131:   }
132:   /* put local boundary part in global vector */
133:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
134:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
135:   return(0);
136: }

138: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
139: {
140:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

147:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
148:   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
149:   return(0);
150: }

152: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
153: {
155:   PC_IS          *pcis = (PC_IS*)pc->data;

158:   VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
159:   VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
160:   /* Apply partition of unity */
161:   VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
162:   return(0);
163: }

165: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
166: {
167:   PC_IS*              pcis=(PC_IS*)pc->data;
168:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
169:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
170:   PetscErrorCode      ierr;

173:   /* get local boundary part of global vector */
174:   VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
175:   VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
176:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
177:     PetscInt          i;
178:     PetscScalar       *array_y;
179:     const PetscScalar *array_D;
180:     VecGetArray(y,&array_y);
181:     VecGetArrayRead(pcis->D,&array_D);
182:     for (i=0;i<deluxe_ctx->n_simple;i++) {
183:       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
184:     }
185:     VecRestoreArrayRead(pcis->D,&array_D);
186:     VecRestoreArray(y,&array_y);
187:   }
188:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
189:   if (deluxe_ctx->seq_mat) {
190:     PetscInt i;
191:     for (i=0;i<deluxe_ctx->seq_n;i++) {
192:       if (deluxe_ctx->change) {
193:         Mat change;

195:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
196:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
197:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
198:         MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
199:       } else {
200:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
201:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
202:       }
203:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
204:         PetscScalar *x;

206:         VecGetArray(deluxe_ctx->seq_work1[i],&x);
207:         VecPlaceArray(deluxe_ctx->seq_work2[i],x);
208:         VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
209:         MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
210:         VecResetArray(deluxe_ctx->seq_work2[i]);
211:       }
212:       MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
213:       if (deluxe_ctx->change) {
214:         if (deluxe_ctx->change_with_qr) {
215:           Mat change;

217:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
218:           MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
219:         } else {
220:           KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
221:         }
222:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
223:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
224:       } else {
225:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
226:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
227:       }
228:     }
229:   }
230:   return(0);
231: }

233: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
234: {
235:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

242:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
243:   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
244:   return(0);
245: }

247: PetscErrorCode PCBDDCScalingSetUp(PC pc)
248: {
249:   PC_IS*         pcis=(PC_IS*)pc->data;
250:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

255:   /* create work vector for the operator */
256:   VecDestroy(&pcbddc->work_scaling);
257:   VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
258:   /* always rebuild pcis->D */
259:   if (pcis->use_stiffness_scaling) {
260:     PetscScalar *a;
261:     PetscInt    i,n;

263:     MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
264:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
265:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
266:     VecGetLocalSize(pcis->D,&n);
267:     VecGetArray(pcis->D,&a);
268:     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
269:     VecRestoreArray(pcis->D,&a);
270:   }
271:   VecCopy(pcis->D,pcis->vec1_B);
272:   VecSet(pcis->vec1_global,0.0);
273:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
274:   VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
275:   VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
276:   VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
277:   VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
278:   /* now setup */
279:   if (pcbddc->use_deluxe_scaling) {
280:     if (!pcbddc->deluxe_ctx) {
281:       PCBDDCScalingCreate_Deluxe(pc);
282:     }
283:     PCBDDCScalingSetUp_Deluxe(pc);
284:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
285:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
286:   } else {
287:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
288:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
289:   }

291:   /* test */
292:   if (pcbddc->dbg_flag) {
293:     Mat         B0_B = NULL;
294:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
295:     Vec         vec2_global;
296:     PetscViewer viewer = pcbddc->dbg_viewer;
297:     PetscReal   error;

299:     /* extension -> from local to parallel */
300:     VecSet(pcis->vec1_global,0.0);
301:     VecSetRandom(pcis->vec1_B,NULL);
302:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
303:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
304:     VecDuplicate(pcis->vec1_global,&vec2_global);
305:     VecCopy(pcis->vec1_global,vec2_global);
306:     VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
307:     VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
308:     if (pcbddc->benign_n) {
309:       IS is_dummy;

311:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
312:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
313:       ISDestroy(&is_dummy);
314:       MatCreateVecs(B0_B,NULL,&B0_Bv);
315:       VecDuplicate(B0_Bv,&B0_Bv2);
316:       MatMult(B0_B,pcis->vec1_B,B0_Bv);
317:     }
318:     PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
319:     if (pcbddc->benign_saddle_point) {
320:       PetscReal errorl = 0.;
321:       VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
322:       VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
323:       if (pcbddc->benign_n) {
324:         MatMult(B0_B,pcis->vec1_B,B0_Bv2);
325:         VecAXPY(B0_Bv,-1.0,B0_Bv2);
326:         VecNorm(B0_Bv,NORM_INFINITY,&errorl);
327:       }
328:       MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
329:       PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
330:     }
331:     VecAXPY(pcis->vec1_global,-1.0,vec2_global);
332:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
333:     PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
334:     VecDestroy(&vec2_global);

336:     /* restriction -> from parallel to local */
337:     VecSet(pcis->vec1_global,0.0);
338:     VecSetRandom(pcis->vec1_B,NULL);
339:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
340:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
341:     PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
342:     VecScale(pcis->vec1_B,-1.0);
343:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
344:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
345:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
346:     PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
347:     MatDestroy(&B0_B);
348:     VecDestroy(&B0_Bv);
349:     VecDestroy(&B0_Bv2);
350:   }
351:   return(0);
352: }

354: PetscErrorCode PCBDDCScalingDestroy(PC pc)
355: {
356:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

360:   if (pcbddc->deluxe_ctx) {
361:     PCBDDCScalingDestroy_Deluxe(pc);
362:   }
363:   VecDestroy(&pcbddc->work_scaling);
364:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
365:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
366:   return(0);
367: }

369: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
370: {
371:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
372:   PCBDDCDeluxeScaling deluxe_ctx;
373:   PetscErrorCode      ierr;

376:   PetscNew(&deluxe_ctx);
377:   pcbddc->deluxe_ctx = deluxe_ctx;
378:   return(0);
379: }

381: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
382: {
383:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
384:   PetscErrorCode      ierr;

387:   PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
388:   PetscFree(pcbddc->deluxe_ctx);
389:   return(0);
390: }

392: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
393: {
394:   PetscInt       i;

398:   PetscFree(deluxe_ctx->idx_simple_B);
399:   deluxe_ctx->n_simple = 0;
400:   for (i=0;i<deluxe_ctx->seq_n;i++) {
401:     VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
402:     VecDestroy(&deluxe_ctx->seq_work1[i]);
403:     VecDestroy(&deluxe_ctx->seq_work2[i]);
404:     MatDestroy(&deluxe_ctx->seq_mat[i]);
405:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
406:   }
407:   PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
408:   PetscFree(deluxe_ctx->workspace);
409:   deluxe_ctx->seq_n = 0;
410:   return(0);
411: }

413: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
414: {
415:   PC_IS               *pcis=(PC_IS*)pc->data;
416:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
417:   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
418:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
419:   PetscErrorCode      ierr;

422:   /* reset data structures if the topology has changed */
423:   if (pcbddc->recompute_topography) {
424:     PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
425:   }

427:   /* Compute data structures to solve sequential problems */
428:   PCBDDCScalingSetUp_Deluxe_Private(pc);

430:   /* diagonal scaling on interface dofs not contained in cc */
431:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
432:     PetscInt n_com,n_dir;
433:     n_com = 0;
434:     if (sub_schurs->is_vertices) {
435:       ISGetLocalSize(sub_schurs->is_vertices,&n_com);
436:     }
437:     n_dir = 0;
438:     if (sub_schurs->is_dir) {
439:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
440:     }
441:     if (!deluxe_ctx->n_simple) {
442:       deluxe_ctx->n_simple = n_dir + n_com;
443:       PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
444:       if (sub_schurs->is_vertices) {
445:         PetscInt       nmap;
446:         const PetscInt *idxs;

448:         ISGetIndices(sub_schurs->is_vertices,&idxs);
449:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
450:         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
451:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
452:       }
453:       if (sub_schurs->is_dir) {
454:         PetscInt       nmap;
455:         const PetscInt *idxs;

457:         ISGetIndices(sub_schurs->is_dir,&idxs);
458:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
459:         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
460:         ISRestoreIndices(sub_schurs->is_dir,&idxs);
461:       }
462:       PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
463:     } else {
464:       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
465:     }
466:   } else {
467:     deluxe_ctx->n_simple = 0;
468:     deluxe_ctx->idx_simple_B = 0;
469:   }
470:   return(0);
471: }

473: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
474: {
475:   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
476:   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
477:   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
478:   PetscScalar            *matdata,*matdata2;
479:   PetscInt               i,max_subset_size,cum,cum2;
480:   const PetscInt         *idxs;
481:   PetscBool              newsetup = PETSC_FALSE;
482:   PetscErrorCode         ierr;

485:   if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
486:   if (!sub_schurs->n_subs) return(0);

488:   /* Allocate arrays for subproblems */
489:   if (!deluxe_ctx->seq_n) {
490:     deluxe_ctx->seq_n = sub_schurs->n_subs;
491:     PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
492:     newsetup = PETSC_TRUE;
493:   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
494:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
495:   }
496:   /* the change of basis is just a reference to sub_schurs->change (if any) */
497:   deluxe_ctx->change         = sub_schurs->change;
498:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

500:   /* Create objects for deluxe */
501:   max_subset_size = 0;
502:   for (i=0;i<sub_schurs->n_subs;i++) {
503:     PetscInt subset_size;
504:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
505:     max_subset_size = PetscMax(subset_size,max_subset_size);
506:   }
507:   if (newsetup) {
508:     PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
509:   }
510:   cum = cum2 = 0;
511:   ISGetIndices(sub_schurs->is_Ej_all,&idxs);
512:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
513:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
514:   for (i=0;i<deluxe_ctx->seq_n;i++) {
515:     PetscInt     subset_size;

517:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
518:     if (newsetup) {
519:       IS  sub;
520:       /* work vectors */
521:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
522:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);

524:       /* scatters */
525:       ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
526:       VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
527:       ISDestroy(&sub);
528:     }

530:     /* S_E_j */
531:     MatDestroy(&deluxe_ctx->seq_mat[i]);
532:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);

534:     /* \sum_k S^k_E_j */
535:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
536:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
537:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
538:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
539:     if (sub_schurs->is_hermitian) {
540:       MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
541:     } else {
542:       MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
543:     }
544:     if (pcbddc->deluxe_singlemat) {
545:       Mat X,Y;
546:       if (!sub_schurs->is_hermitian) {
547:         MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
548:       } else {
549:         PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
550:         X    = deluxe_ctx->seq_mat[i];
551:       }
552:       MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
553:       if (!sub_schurs->is_hermitian) {
554:         PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
555:       } else {
556:         MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
557:       }

559:       MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
560:       MatDestroy(&deluxe_ctx->seq_mat[i]);
561:       MatDestroy(&X);
562:       if (deluxe_ctx->change) {
563:         Mat C,CY;

565:         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
566:         KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
567:         MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
568:         MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
569:         MatDestroy(&CY);
570:       }
571:       MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
572:       deluxe_ctx->seq_mat[i] = Y;
573:     }
574:     cum += subset_size;
575:     cum2 += subset_size*subset_size;
576:   }
577:   ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
578:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
579:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
580:   if (pcbddc->deluxe_singlemat) {
581:     deluxe_ctx->change         = NULL;
582:     deluxe_ctx->change_with_qr = PETSC_FALSE;
583:   }

585:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
586:     for (i=0;i<deluxe_ctx->seq_n;i++) {
587:       if (newsetup) {
588:         PC pc;

590:         KSPGetPC(deluxe_ctx->change[i],&pc);
591:         PCSetType(pc,PCLU);
592:         KSPSetFromOptions(deluxe_ctx->change[i]);
593:       }
594:       KSPSetUp(deluxe_ctx->change[i]);
595:     }
596:   }
597:   return(0);
598: }