Actual source code: bddcscalingbasic.c

petsc-3.8.4 2018-03-24
Report Typos and Errors
  1:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3:  #include <petscblaslapack.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
 14: {
 15:   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
 17:   PetscScalar    *b,*x;
 18:   PetscInt       n;
 19:   PetscBLASInt   nrhs,info,m;
 20:   PetscBool      flg;

 23:   PetscBLASIntCast(A->rmap->n,&m);
 24:   PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 25:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
 26:   PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 27:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

 29:   MatGetSize(B,NULL,&n);
 30:   PetscBLASIntCast(n,&nrhs);
 31:   MatDenseGetArray(B,&b);
 32:   MatDenseGetArray(X,&x);

 34:   PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));

 36:   if (A->factortype == MAT_FACTOR_LU) {
 37: #if defined(PETSC_MISSING_LAPACK_GETRS)
 38:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
 39: #else
 40:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
 41:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
 42: #endif
 43:   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
 44: #if defined(PETSC_MISSING_LAPACK_POTRS)
 45:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable.");
 46: #else
 47:     PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info));
 48:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve");
 49: #endif
 50:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve");

 52:   MatDenseRestoreArray(B,&b);
 53:   MatDenseRestoreArray(X,&x);
 54:   PetscLogFlops(nrhs*(2.0*m*m - m));
 55:   return(0);
 56: }


 59: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 60: {
 61:   PC_IS*         pcis = (PC_IS*)pc->data;
 62:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

 66:   /* Apply partition of unity */
 67:   VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
 68:   VecSet(global_vector,0.0);
 69:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 70:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 71:   return(0);
 72: }

 74: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 75: {
 76:   PC_IS*              pcis=(PC_IS*)pc->data;
 77:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
 78:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
 79:   PetscErrorCode      ierr;

 82:   VecSet(pcbddc->work_scaling,0.0);
 83:   VecSet(y,0.0);
 84:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 85:     PetscInt          i;
 86:     const PetscScalar *array_x,*array_D;
 87:     PetscScalar       *array;
 88:     VecGetArrayRead(x,&array_x);
 89:     VecGetArrayRead(pcis->D,&array_D);
 90:     VecGetArray(pcbddc->work_scaling,&array);
 91:     for (i=0;i<deluxe_ctx->n_simple;i++) {
 92:       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
 93:     }
 94:     VecRestoreArray(pcbddc->work_scaling,&array);
 95:     VecRestoreArrayRead(pcis->D,&array_D);
 96:     VecRestoreArrayRead(x,&array_x);
 97:   }
 98:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
 99:   if (deluxe_ctx->seq_mat) {
100:     PetscInt i;
101:     for (i=0;i<deluxe_ctx->seq_n;i++) {
102:       if (deluxe_ctx->change) {
103:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
104:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
105:         if (deluxe_ctx->change_with_qr) {
106:           Mat change;

108:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
109:           MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
110:         } else {
111:           KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
112:         }
113:       } else {
114:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
115:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
116:       }
117:       MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
118:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
119:         PetscScalar *x;

121:         VecGetArray(deluxe_ctx->seq_work2[i],&x);
122:         VecPlaceArray(deluxe_ctx->seq_work1[i],x);
123:         VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
124:         MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
125:         VecResetArray(deluxe_ctx->seq_work1[i]);
126:       }
127:       if (deluxe_ctx->change) {
128:         Mat change;

130:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
131:         MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
132:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
133:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
134:       } else {
135:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
136:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
137:       }
138:     }
139:   }
140:   /* put local boundary part in global vector */
141:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
142:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
143:   return(0);
144: }

146: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
147: {
148:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

155:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
156:   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
157:   return(0);
158: }

160: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
161: {
163:   PC_IS          *pcis = (PC_IS*)pc->data;

166:   VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
167:   VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
168:   /* Apply partition of unity */
169:   VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
170:   return(0);
171: }

173: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
174: {
175:   PC_IS*              pcis=(PC_IS*)pc->data;
176:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
177:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
178:   PetscErrorCode      ierr;

181:   /* get local boundary part of global vector */
182:   VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
183:   VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
184:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
185:     PetscInt          i;
186:     PetscScalar       *array_y;
187:     const PetscScalar *array_D;
188:     VecGetArray(y,&array_y);
189:     VecGetArrayRead(pcis->D,&array_D);
190:     for (i=0;i<deluxe_ctx->n_simple;i++) {
191:       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
192:     }
193:     VecRestoreArrayRead(pcis->D,&array_D);
194:     VecRestoreArray(y,&array_y);
195:   }
196:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
197:   if (deluxe_ctx->seq_mat) {
198:     PetscInt i;
199:     for (i=0;i<deluxe_ctx->seq_n;i++) {
200:       if (deluxe_ctx->change) {
201:         Mat change;

203:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
204:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
205:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
206:         MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
207:       } else {
208:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
209:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
210:       }
211:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
212:         PetscScalar *x;

214:         VecGetArray(deluxe_ctx->seq_work1[i],&x);
215:         VecPlaceArray(deluxe_ctx->seq_work2[i],x);
216:         VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
217:         MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
218:         VecResetArray(deluxe_ctx->seq_work2[i]);
219:       }
220:       MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
221:       if (deluxe_ctx->change) {
222:         if (deluxe_ctx->change_with_qr) {
223:           Mat change;

225:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
226:           MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
227:         } else {
228:           KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
229:         }
230:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
231:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
232:       } else {
233:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
234:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
235:       }
236:     }
237:   }
238:   return(0);
239: }

241: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
242: {
243:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

250:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
251:   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
252:   return(0);
253: }

255: PetscErrorCode PCBDDCScalingSetUp(PC pc)
256: {
257:   PC_IS*         pcis=(PC_IS*)pc->data;
258:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

263:   /* create work vector for the operator */
264:   VecDestroy(&pcbddc->work_scaling);
265:   VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
266:   /* always rebuild pcis->D */
267:   if (pcis->use_stiffness_scaling) {
268:     MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
269:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
270:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
271:   }
272:   VecCopy(pcis->D,pcis->vec1_B);
273:   VecSet(pcis->vec1_global,0.0);
274:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
275:   VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
276:   VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
277:   VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
278:   VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
279:   /* now setup */
280:   if (pcbddc->use_deluxe_scaling) {
281:     if (!pcbddc->deluxe_ctx) {
282:       PCBDDCScalingCreate_Deluxe(pc);
283:     }
284:     PCBDDCScalingSetUp_Deluxe(pc);
285:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
286:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
287:   } else {
288:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
289:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
290:   }

292:   /* test */
293:   if (pcbddc->dbg_flag) {
294:     Mat         B0_B = NULL;
295:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
296:     Vec         vec2_global;
297:     PetscViewer viewer = pcbddc->dbg_viewer;
298:     PetscReal   error;

300:     /* extension -> from local to parallel */
301:     VecSet(pcis->vec1_global,0.0);
302:     VecSetRandom(pcis->vec1_B,NULL);
303:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
304:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
305:     VecDuplicate(pcis->vec1_global,&vec2_global);
306:     VecCopy(pcis->vec1_global,vec2_global);
307:     VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
308:     VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
309:     if (pcbddc->benign_n) {
310:       IS is_dummy;

312:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
313:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
314:       ISDestroy(&is_dummy);
315:       MatCreateVecs(B0_B,NULL,&B0_Bv);
316:       VecDuplicate(B0_Bv,&B0_Bv2);
317:       MatMult(B0_B,pcis->vec1_B,B0_Bv);
318:     }
319:     PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
320:     if (pcbddc->benign_saddle_point) {
321:       PetscReal errorl = 0.;
322:       VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
323:       VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
324:       if (pcbddc->benign_n) {
325:         MatMult(B0_B,pcis->vec1_B,B0_Bv2);
326:         VecAXPY(B0_Bv,-1.0,B0_Bv2);
327:         VecNorm(B0_Bv,NORM_INFINITY,&errorl);
328:       }
329:       MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
330:       PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
331:     }
332:     VecAXPY(pcis->vec1_global,-1.0,vec2_global);
333:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
334:     PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
335:     VecDestroy(&vec2_global);

337:     /* restriction -> from parallel to local */
338:     VecSet(pcis->vec1_global,0.0);
339:     VecSetRandom(pcis->vec1_B,NULL);
340:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
341:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
342:     PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
343:     VecScale(pcis->vec1_B,-1.0);
344:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
345:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
346:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
347:     PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
348:     MatDestroy(&B0_B);
349:     VecDestroy(&B0_Bv);
350:     VecDestroy(&B0_Bv2);
351:   }
352:   return(0);
353: }

355: PetscErrorCode PCBDDCScalingDestroy(PC pc)
356: {
357:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

361:   if (pcbddc->deluxe_ctx) {
362:     PCBDDCScalingDestroy_Deluxe(pc);
363:   }
364:   VecDestroy(&pcbddc->work_scaling);
365:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
366:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
367:   return(0);
368: }

370: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
371: {
372:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
373:   PCBDDCDeluxeScaling deluxe_ctx;
374:   PetscErrorCode      ierr;

377:   PetscNew(&deluxe_ctx);
378:   pcbddc->deluxe_ctx = deluxe_ctx;
379:   return(0);
380: }

382: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
383: {
384:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
385:   PetscErrorCode      ierr;

388:   PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
389:   PetscFree(pcbddc->deluxe_ctx);
390:   return(0);
391: }

393: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
394: {
395:   PetscInt       i;

399:   PetscFree(deluxe_ctx->idx_simple_B);
400:   deluxe_ctx->n_simple = 0;
401:   for (i=0;i<deluxe_ctx->seq_n;i++) {
402:     VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
403:     VecDestroy(&deluxe_ctx->seq_work1[i]);
404:     VecDestroy(&deluxe_ctx->seq_work2[i]);
405:     MatDestroy(&deluxe_ctx->seq_mat[i]);
406:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
407:   }
408:   PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
409:   PetscFree(deluxe_ctx->workspace);
410:   deluxe_ctx->seq_n = 0;
411:   return(0);
412: }

414: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
415: {
416:   PC_IS               *pcis=(PC_IS*)pc->data;
417:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
418:   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
419:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
420:   PetscErrorCode      ierr;

423:   /* reset data structures if the topology has changed */
424:   if (pcbddc->recompute_topography) {
425:     PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
426:   }

428:   /* Compute data structures to solve sequential problems */
429:   PCBDDCScalingSetUp_Deluxe_Private(pc);

431:   /* diagonal scaling on interface dofs not contained in cc */
432:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
433:     PetscInt n_com,n_dir;
434:     n_com = 0;
435:     if (sub_schurs->is_vertices) {
436:       ISGetLocalSize(sub_schurs->is_vertices,&n_com);
437:     }
438:     n_dir = 0;
439:     if (sub_schurs->is_dir) {
440:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
441:     }
442:     if (!deluxe_ctx->n_simple) {
443:       deluxe_ctx->n_simple = n_dir + n_com;
444:       PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
445:       if (sub_schurs->is_vertices) {
446:         PetscInt       nmap;
447:         const PetscInt *idxs;

449:         ISGetIndices(sub_schurs->is_vertices,&idxs);
450:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
451:         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
452:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
453:       }
454:       if (sub_schurs->is_dir) {
455:         PetscInt       nmap;
456:         const PetscInt *idxs;

458:         ISGetIndices(sub_schurs->is_dir,&idxs);
459:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
460:         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
461:         ISRestoreIndices(sub_schurs->is_dir,&idxs);
462:       }
463:       PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
464:     } else {
465:       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
466:     }
467:   } else {
468:     deluxe_ctx->n_simple = 0;
469:     deluxe_ctx->idx_simple_B = 0;
470:   }
471:   return(0);
472: }

474: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
475: {
476:   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
477:   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
478:   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
479:   PetscScalar            *matdata,*matdata2;
480:   PetscInt               i,max_subset_size,cum,cum2;
481:   const PetscInt         *idxs;
482:   PetscBool              newsetup = PETSC_FALSE;
483:   PetscErrorCode         ierr;

486:   if (!sub_schurs->n_subs) {
487:     return(0);
488:   }

490:   /* Allocate arrays for subproblems */
491:   if (!deluxe_ctx->seq_n) {
492:     deluxe_ctx->seq_n = sub_schurs->n_subs;
493:     PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
494:     newsetup = PETSC_TRUE;
495:   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
496:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
497:   }
498:   /* the change of basis is just a reference to sub_schurs->change (if any) */
499:   deluxe_ctx->change         = sub_schurs->change;
500:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

502:   /* Create objects for deluxe */
503:   max_subset_size = 0;
504:   for (i=0;i<sub_schurs->n_subs;i++) {
505:     PetscInt subset_size;
506:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
507:     max_subset_size = PetscMax(subset_size,max_subset_size);
508:   }
509:   if (newsetup) {
510:     PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
511:   }
512:   cum = cum2 = 0;
513:   ISGetIndices(sub_schurs->is_Ej_all,&idxs);
514:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
515:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
516:   for (i=0;i<deluxe_ctx->seq_n;i++) {
517:     PetscInt     subset_size;

519:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
520:     if (newsetup) {
521:       IS  sub;
522:       /* work vectors */
523:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
524:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);

526:       /* scatters */
527:       ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
528:       VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
529:       ISDestroy(&sub);
530:     }

532:     /* S_E_j */
533:     MatDestroy(&deluxe_ctx->seq_mat[i]);
534:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);

536:     /* \sum_k S^k_E_j */
537:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
538:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);

540:     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
541:       MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
542:     } else {
543:       MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
544:     }
545:     if (pcbddc->deluxe_singlemat) {
546:       Mat X,Y;
547:       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
548:         MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
549:       } else {
550:         PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
551:         X    = deluxe_ctx->seq_mat[i];
552:       }
553:       MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
554:       if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
555:         PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
556:       } else {
557:         MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
558:       }

560:       MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
561:       MatDestroy(&deluxe_ctx->seq_mat[i]);
562:       MatDestroy(&X);
563:       if (deluxe_ctx->change) {
564:         Mat C,CY;

566:         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
567:         KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
568:         MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
569:         MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
570:         MatDestroy(&CY);
571:       }
572:       MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
573:       deluxe_ctx->seq_mat[i] = Y;
574:     }
575:     cum += subset_size;
576:     cum2 += subset_size*subset_size;
577:   }
578:   ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
579:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
580:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
581:   if (pcbddc->deluxe_singlemat) {
582:     deluxe_ctx->change         = NULL;
583:     deluxe_ctx->change_with_qr = PETSC_FALSE;
584:   }

586:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
587:     for (i=0;i<deluxe_ctx->seq_n;i++) {
588:       if (newsetup) {
589:         PC pc;

591:         KSPGetPC(deluxe_ctx->change[i],&pc);
592:         PCSetType(pc,PCLU);
593:         KSPSetFromOptions(deluxe_ctx->change[i]);
594:       }
595:       KSPSetUp(deluxe_ctx->change[i]);
596:     }
597:   }
598:   return(0);
599: }