Actual source code: bddcscalingbasic.c
petsc-3.8.4 2018-03-24
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
6: /* prototypes for deluxe functions */
7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14: {
15: Mat_SeqDense *mat = (Mat_SeqDense*)A->data;
17: PetscScalar *b,*x;
18: PetscInt n;
19: PetscBLASInt nrhs,info,m;
20: PetscBool flg;
23: PetscBLASIntCast(A->rmap->n,&m);
24: PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
25: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
26: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
27: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
29: MatGetSize(B,NULL,&n);
30: PetscBLASIntCast(n,&nrhs);
31: MatDenseGetArray(B,&b);
32: MatDenseGetArray(X,&x);
34: PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));
36: if (A->factortype == MAT_FACTOR_LU) {
37: #if defined(PETSC_MISSING_LAPACK_GETRS)
38: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
39: #else
40: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
41: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
42: #endif
43: } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
44: #if defined(PETSC_MISSING_LAPACK_POTRS)
45: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRS - Lapack routine is unavailable.");
46: #else
47: PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&m,&nrhs,mat->v,&mat->lda,x,&m,&info));
48: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"POTRS Bad solve");
49: #endif
50: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be factored to solve");
52: MatDenseRestoreArray(B,&b);
53: MatDenseRestoreArray(X,&x);
54: PetscLogFlops(nrhs*(2.0*m*m - m));
55: return(0);
56: }
59: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
60: {
61: PC_IS* pcis = (PC_IS*)pc->data;
62: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
66: /* Apply partition of unity */
67: VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
68: VecSet(global_vector,0.0);
69: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
70: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
71: return(0);
72: }
74: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
75: {
76: PC_IS* pcis=(PC_IS*)pc->data;
77: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
78: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
79: PetscErrorCode ierr;
82: VecSet(pcbddc->work_scaling,0.0);
83: VecSet(y,0.0);
84: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
85: PetscInt i;
86: const PetscScalar *array_x,*array_D;
87: PetscScalar *array;
88: VecGetArrayRead(x,&array_x);
89: VecGetArrayRead(pcis->D,&array_D);
90: VecGetArray(pcbddc->work_scaling,&array);
91: for (i=0;i<deluxe_ctx->n_simple;i++) {
92: array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
93: }
94: VecRestoreArray(pcbddc->work_scaling,&array);
95: VecRestoreArrayRead(pcis->D,&array_D);
96: VecRestoreArrayRead(x,&array_x);
97: }
98: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
99: if (deluxe_ctx->seq_mat) {
100: PetscInt i;
101: for (i=0;i<deluxe_ctx->seq_n;i++) {
102: if (deluxe_ctx->change) {
103: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
104: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
105: if (deluxe_ctx->change_with_qr) {
106: Mat change;
108: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
109: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
110: } else {
111: KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
112: }
113: } else {
114: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
115: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
116: }
117: MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
118: if (deluxe_ctx->seq_mat_inv_sum[i]) {
119: PetscScalar *x;
121: VecGetArray(deluxe_ctx->seq_work2[i],&x);
122: VecPlaceArray(deluxe_ctx->seq_work1[i],x);
123: VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
124: MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
125: VecResetArray(deluxe_ctx->seq_work1[i]);
126: }
127: if (deluxe_ctx->change) {
128: Mat change;
130: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
131: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
132: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
133: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
134: } else {
135: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
136: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
137: }
138: }
139: }
140: /* put local boundary part in global vector */
141: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
142: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
143: return(0);
144: }
146: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
147: {
148: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
155: if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
156: PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
157: return(0);
158: }
160: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
161: {
163: PC_IS *pcis = (PC_IS*)pc->data;
166: VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
167: VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
168: /* Apply partition of unity */
169: VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
170: return(0);
171: }
173: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
174: {
175: PC_IS* pcis=(PC_IS*)pc->data;
176: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
177: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
178: PetscErrorCode ierr;
181: /* get local boundary part of global vector */
182: VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
183: VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
184: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
185: PetscInt i;
186: PetscScalar *array_y;
187: const PetscScalar *array_D;
188: VecGetArray(y,&array_y);
189: VecGetArrayRead(pcis->D,&array_D);
190: for (i=0;i<deluxe_ctx->n_simple;i++) {
191: array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
192: }
193: VecRestoreArrayRead(pcis->D,&array_D);
194: VecRestoreArray(y,&array_y);
195: }
196: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
197: if (deluxe_ctx->seq_mat) {
198: PetscInt i;
199: for (i=0;i<deluxe_ctx->seq_n;i++) {
200: if (deluxe_ctx->change) {
201: Mat change;
203: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
204: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
205: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
206: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
207: } else {
208: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
209: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
210: }
211: if (deluxe_ctx->seq_mat_inv_sum[i]) {
212: PetscScalar *x;
214: VecGetArray(deluxe_ctx->seq_work1[i],&x);
215: VecPlaceArray(deluxe_ctx->seq_work2[i],x);
216: VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
217: MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
218: VecResetArray(deluxe_ctx->seq_work2[i]);
219: }
220: MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
221: if (deluxe_ctx->change) {
222: if (deluxe_ctx->change_with_qr) {
223: Mat change;
225: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
226: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
227: } else {
228: KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
229: }
230: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
231: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
232: } else {
233: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
234: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
235: }
236: }
237: }
238: return(0);
239: }
241: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
242: {
243: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
250: if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!\n");
251: PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
252: return(0);
253: }
255: PetscErrorCode PCBDDCScalingSetUp(PC pc)
256: {
257: PC_IS* pcis=(PC_IS*)pc->data;
258: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
263: /* create work vector for the operator */
264: VecDestroy(&pcbddc->work_scaling);
265: VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
266: /* always rebuild pcis->D */
267: if (pcis->use_stiffness_scaling) {
268: MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
269: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
270: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
271: }
272: VecCopy(pcis->D,pcis->vec1_B);
273: VecSet(pcis->vec1_global,0.0);
274: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
275: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
276: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
277: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
278: VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
279: /* now setup */
280: if (pcbddc->use_deluxe_scaling) {
281: if (!pcbddc->deluxe_ctx) {
282: PCBDDCScalingCreate_Deluxe(pc);
283: }
284: PCBDDCScalingSetUp_Deluxe(pc);
285: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
286: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
287: } else {
288: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
289: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
290: }
292: /* test */
293: if (pcbddc->dbg_flag) {
294: Mat B0_B = NULL;
295: Vec B0_Bv = NULL, B0_Bv2 = NULL;
296: Vec vec2_global;
297: PetscViewer viewer = pcbddc->dbg_viewer;
298: PetscReal error;
300: /* extension -> from local to parallel */
301: VecSet(pcis->vec1_global,0.0);
302: VecSetRandom(pcis->vec1_B,NULL);
303: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
304: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
305: VecDuplicate(pcis->vec1_global,&vec2_global);
306: VecCopy(pcis->vec1_global,vec2_global);
307: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
308: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
309: if (pcbddc->benign_n) {
310: IS is_dummy;
312: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
313: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
314: ISDestroy(&is_dummy);
315: MatCreateVecs(B0_B,NULL,&B0_Bv);
316: VecDuplicate(B0_Bv,&B0_Bv2);
317: MatMult(B0_B,pcis->vec1_B,B0_Bv);
318: }
319: PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
320: if (pcbddc->benign_saddle_point) {
321: PetscReal errorl = 0.;
322: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
323: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
324: if (pcbddc->benign_n) {
325: MatMult(B0_B,pcis->vec1_B,B0_Bv2);
326: VecAXPY(B0_Bv,-1.0,B0_Bv2);
327: VecNorm(B0_Bv,NORM_INFINITY,&errorl);
328: }
329: MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
330: PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
331: }
332: VecAXPY(pcis->vec1_global,-1.0,vec2_global);
333: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
334: PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
335: VecDestroy(&vec2_global);
337: /* restriction -> from parallel to local */
338: VecSet(pcis->vec1_global,0.0);
339: VecSetRandom(pcis->vec1_B,NULL);
340: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
341: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
342: PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
343: VecScale(pcis->vec1_B,-1.0);
344: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
345: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
346: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
347: PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
348: MatDestroy(&B0_B);
349: VecDestroy(&B0_Bv);
350: VecDestroy(&B0_Bv2);
351: }
352: return(0);
353: }
355: PetscErrorCode PCBDDCScalingDestroy(PC pc)
356: {
357: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
361: if (pcbddc->deluxe_ctx) {
362: PCBDDCScalingDestroy_Deluxe(pc);
363: }
364: VecDestroy(&pcbddc->work_scaling);
365: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
366: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
367: return(0);
368: }
370: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
371: {
372: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
373: PCBDDCDeluxeScaling deluxe_ctx;
374: PetscErrorCode ierr;
377: PetscNew(&deluxe_ctx);
378: pcbddc->deluxe_ctx = deluxe_ctx;
379: return(0);
380: }
382: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
383: {
384: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
385: PetscErrorCode ierr;
388: PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
389: PetscFree(pcbddc->deluxe_ctx);
390: return(0);
391: }
393: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
394: {
395: PetscInt i;
399: PetscFree(deluxe_ctx->idx_simple_B);
400: deluxe_ctx->n_simple = 0;
401: for (i=0;i<deluxe_ctx->seq_n;i++) {
402: VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
403: VecDestroy(&deluxe_ctx->seq_work1[i]);
404: VecDestroy(&deluxe_ctx->seq_work2[i]);
405: MatDestroy(&deluxe_ctx->seq_mat[i]);
406: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
407: }
408: PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
409: PetscFree(deluxe_ctx->workspace);
410: deluxe_ctx->seq_n = 0;
411: return(0);
412: }
414: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
415: {
416: PC_IS *pcis=(PC_IS*)pc->data;
417: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
418: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
419: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
420: PetscErrorCode ierr;
423: /* reset data structures if the topology has changed */
424: if (pcbddc->recompute_topography) {
425: PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
426: }
428: /* Compute data structures to solve sequential problems */
429: PCBDDCScalingSetUp_Deluxe_Private(pc);
431: /* diagonal scaling on interface dofs not contained in cc */
432: if (sub_schurs->is_vertices || sub_schurs->is_dir) {
433: PetscInt n_com,n_dir;
434: n_com = 0;
435: if (sub_schurs->is_vertices) {
436: ISGetLocalSize(sub_schurs->is_vertices,&n_com);
437: }
438: n_dir = 0;
439: if (sub_schurs->is_dir) {
440: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
441: }
442: if (!deluxe_ctx->n_simple) {
443: deluxe_ctx->n_simple = n_dir + n_com;
444: PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
445: if (sub_schurs->is_vertices) {
446: PetscInt nmap;
447: const PetscInt *idxs;
449: ISGetIndices(sub_schurs->is_vertices,&idxs);
450: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
451: if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %d != %d",nmap,n_com);
452: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
453: }
454: if (sub_schurs->is_dir) {
455: PetscInt nmap;
456: const PetscInt *idxs;
458: ISGetIndices(sub_schurs->is_dir,&idxs);
459: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
460: if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %d != %d",nmap,n_dir);
461: ISRestoreIndices(sub_schurs->is_dir,&idxs);
462: }
463: PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
464: } else {
465: if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %d is different from the previous one computed %d",n_dir + n_com,deluxe_ctx->n_simple);
466: }
467: } else {
468: deluxe_ctx->n_simple = 0;
469: deluxe_ctx->idx_simple_B = 0;
470: }
471: return(0);
472: }
474: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
475: {
476: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
477: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
478: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
479: PetscScalar *matdata,*matdata2;
480: PetscInt i,max_subset_size,cum,cum2;
481: const PetscInt *idxs;
482: PetscBool newsetup = PETSC_FALSE;
483: PetscErrorCode ierr;
486: if (!sub_schurs->n_subs) {
487: return(0);
488: }
490: /* Allocate arrays for subproblems */
491: if (!deluxe_ctx->seq_n) {
492: deluxe_ctx->seq_n = sub_schurs->n_subs;
493: PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
494: newsetup = PETSC_TRUE;
495: } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) {
496: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %d is different from the sub_schurs %d",deluxe_ctx->seq_n,sub_schurs->n_subs);
497: }
498: /* the change of basis is just a reference to sub_schurs->change (if any) */
499: deluxe_ctx->change = sub_schurs->change;
500: deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
502: /* Create objects for deluxe */
503: max_subset_size = 0;
504: for (i=0;i<sub_schurs->n_subs;i++) {
505: PetscInt subset_size;
506: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
507: max_subset_size = PetscMax(subset_size,max_subset_size);
508: }
509: if (newsetup) {
510: PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
511: }
512: cum = cum2 = 0;
513: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
514: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
515: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
516: for (i=0;i<deluxe_ctx->seq_n;i++) {
517: PetscInt subset_size;
519: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
520: if (newsetup) {
521: IS sub;
522: /* work vectors */
523: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
524: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);
526: /* scatters */
527: ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
528: VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
529: ISDestroy(&sub);
530: }
532: /* S_E_j */
533: MatDestroy(&deluxe_ctx->seq_mat[i]);
534: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);
536: /* \sum_k S^k_E_j */
537: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
538: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
540: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
541: MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
542: } else {
543: MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
544: }
545: if (pcbddc->deluxe_singlemat) {
546: Mat X,Y;
547: if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
548: MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
549: } else {
550: PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
551: X = deluxe_ctx->seq_mat[i];
552: }
553: MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
554: if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
555: PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
556: } else {
557: MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
558: }
560: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
561: MatDestroy(&deluxe_ctx->seq_mat[i]);
562: MatDestroy(&X);
563: if (deluxe_ctx->change) {
564: Mat C,CY;
566: if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
567: KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
568: MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
569: MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
570: MatDestroy(&CY);
571: }
572: MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
573: deluxe_ctx->seq_mat[i] = Y;
574: }
575: cum += subset_size;
576: cum2 += subset_size*subset_size;
577: }
578: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
579: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
580: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
581: if (pcbddc->deluxe_singlemat) {
582: deluxe_ctx->change = NULL;
583: deluxe_ctx->change_with_qr = PETSC_FALSE;
584: }
586: if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
587: for (i=0;i<deluxe_ctx->seq_n;i++) {
588: if (newsetup) {
589: PC pc;
591: KSPGetPC(deluxe_ctx->change[i],&pc);
592: PCSetType(pc,PCLU);
593: KSPSetFromOptions(deluxe_ctx->change[i]);
594: }
595: KSPSetUp(deluxe_ctx->change[i]);
596: }
597: }
598: return(0);
599: }