Actual source code: pbjacobi.c
2: /*
3: Include files needed for the PBJacobi preconditioner:
4: pcimpl.h - private include file intended for use by all preconditioners
5: */
7: #include <petsc/private/pcimpl.h>
9: /*
10: Private context (data structure) for the PBJacobi preconditioner.
11: */
12: typedef struct {
13: const MatScalar *diag;
14: PetscInt bs,mbs;
15: } PC_PBJacobi;
17: static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y)
18: {
19: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
20: PetscInt i,m = jac->mbs;
21: const MatScalar *diag = jac->diag;
22: const PetscScalar *xx;
23: PetscScalar *yy;
25: VecGetArrayRead(x,&xx);
26: VecGetArray(y,&yy);
27: for (i=0; i<m; i++) yy[i] = diag[i]*xx[i];
28: VecRestoreArrayRead(x,&xx);
29: VecRestoreArray(y,&yy);
30: PetscLogFlops(m);
31: return 0;
32: }
34: static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
35: {
36: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
37: PetscInt i,m = jac->mbs;
38: const MatScalar *diag = jac->diag;
39: PetscScalar x0,x1,*yy;
40: const PetscScalar *xx;
42: VecGetArrayRead(x,&xx);
43: VecGetArray(y,&yy);
44: for (i=0; i<m; i++) {
45: x0 = xx[2*i]; x1 = xx[2*i+1];
46: yy[2*i] = diag[0]*x0 + diag[2]*x1;
47: yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
48: diag += 4;
49: }
50: VecRestoreArrayRead(x,&xx);
51: VecRestoreArray(y,&yy);
52: PetscLogFlops(6.0*m);
53: return 0;
54: }
55: static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
56: {
57: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
58: PetscInt i,m = jac->mbs;
59: const MatScalar *diag = jac->diag;
60: PetscScalar x0,x1,x2,*yy;
61: const PetscScalar *xx;
63: VecGetArrayRead(x,&xx);
64: VecGetArray(y,&yy);
65: for (i=0; i<m; i++) {
66: x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
68: yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
69: yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
70: yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
71: diag += 9;
72: }
73: VecRestoreArrayRead(x,&xx);
74: VecRestoreArray(y,&yy);
75: PetscLogFlops(15.0*m);
76: return 0;
77: }
78: static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
79: {
80: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
81: PetscInt i,m = jac->mbs;
82: const MatScalar *diag = jac->diag;
83: PetscScalar x0,x1,x2,x3,*yy;
84: const PetscScalar *xx;
86: VecGetArrayRead(x,&xx);
87: VecGetArray(y,&yy);
88: for (i=0; i<m; i++) {
89: x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
91: yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
92: yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
93: yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
94: yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
95: diag += 16;
96: }
97: VecRestoreArrayRead(x,&xx);
98: VecRestoreArray(y,&yy);
99: PetscLogFlops(28.0*m);
100: return 0;
101: }
102: static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
103: {
104: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
105: PetscInt i,m = jac->mbs;
106: const MatScalar *diag = jac->diag;
107: PetscScalar x0,x1,x2,x3,x4,*yy;
108: const PetscScalar *xx;
110: VecGetArrayRead(x,&xx);
111: VecGetArray(y,&yy);
112: for (i=0; i<m; i++) {
113: x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
115: yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
116: yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
117: yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
118: yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
119: yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
120: diag += 25;
121: }
122: VecRestoreArrayRead(x,&xx);
123: VecRestoreArray(y,&yy);
124: PetscLogFlops(45.0*m);
125: return 0;
126: }
127: static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y)
128: {
129: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
130: PetscInt i,m = jac->mbs;
131: const MatScalar *diag = jac->diag;
132: PetscScalar x0,x1,x2,x3,x4,x5,*yy;
133: const PetscScalar *xx;
135: VecGetArrayRead(x,&xx);
136: VecGetArray(y,&yy);
137: for (i=0; i<m; i++) {
138: x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5];
140: yy[6*i] = diag[0]*x0 + diag[6]*x1 + diag[12]*x2 + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
141: yy[6*i+1] = diag[1]*x0 + diag[7]*x1 + diag[13]*x2 + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
142: yy[6*i+2] = diag[2]*x0 + diag[8]*x1 + diag[14]*x2 + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
143: yy[6*i+3] = diag[3]*x0 + diag[9]*x1 + diag[15]*x2 + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
144: yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2 + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
145: yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2 + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
146: diag += 36;
147: }
148: VecRestoreArrayRead(x,&xx);
149: VecRestoreArray(y,&yy);
150: PetscLogFlops(66.0*m);
151: return 0;
152: }
153: static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y)
154: {
155: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
156: PetscInt i,m = jac->mbs;
157: const MatScalar *diag = jac->diag;
158: PetscScalar x0,x1,x2,x3,x4,x5,x6,*yy;
159: const PetscScalar *xx;
161: VecGetArrayRead(x,&xx);
162: VecGetArray(y,&yy);
163: for (i=0; i<m; i++) {
164: x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6];
166: yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
167: yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
168: yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
169: yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
170: yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
171: yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
172: yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
173: diag += 49;
174: }
175: VecRestoreArrayRead(x,&xx);
176: VecRestoreArray(y,&yy);
177: PetscLogFlops(91.0*m); /* 2*bs2 - bs */
178: return 0;
179: }
180: static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y)
181: {
182: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
183: PetscInt i,ib,jb;
184: const PetscInt m = jac->mbs;
185: const PetscInt bs = jac->bs;
186: const MatScalar *diag = jac->diag;
187: PetscScalar *yy;
188: const PetscScalar *xx;
190: VecGetArrayRead(x,&xx);
191: VecGetArray(y,&yy);
192: for (i=0; i<m; i++) {
193: for (ib=0; ib<bs; ib++) {
194: PetscScalar rowsum = 0;
195: for (jb=0; jb<bs; jb++) {
196: rowsum += diag[ib+jb*bs] * xx[bs*i+jb];
197: }
198: yy[bs*i+ib] = rowsum;
199: }
200: diag += bs*bs;
201: }
202: VecRestoreArrayRead(x,&xx);
203: VecRestoreArray(y,&yy);
204: PetscLogFlops((2.0*bs*bs-bs)*m); /* 2*bs2 - bs */
205: return 0;
206: }
208: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y)
209: {
210: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
211: PetscInt i,j,k,m = jac->mbs,bs=jac->bs;
212: const MatScalar *diag = jac->diag;
213: const PetscScalar *xx;
214: PetscScalar *yy;
216: VecGetArrayRead(x,&xx);
217: VecGetArray(y,&yy);
218: for (i=0; i<m; i++) {
219: for (j=0; j<bs; j++) yy[i*bs+j] = 0.;
220: for (j=0; j<bs; j++) {
221: for (k=0; k<bs; k++) {
222: yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j];
223: }
224: }
225: diag += bs*bs;
226: }
227: VecRestoreArrayRead(x,&xx);
228: VecRestoreArray(y,&yy);
229: PetscLogFlops(m*bs*(2*bs-1));
230: return 0;
231: }
233: /* -------------------------------------------------------------------------- */
234: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
235: {
236: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
237: Mat A = pc->pmat;
238: MatFactorError err;
239: PetscInt nlocal;
241: MatInvertBlockDiagonal(A,&jac->diag);
242: MatFactorGetError(A,&err);
243: if (err) pc->failedreason = (PCFailedReason)err;
245: MatGetBlockSize(A,&jac->bs);
246: MatGetLocalSize(A,&nlocal,NULL);
247: jac->mbs = nlocal/jac->bs;
248: switch (jac->bs) {
249: case 1:
250: pc->ops->apply = PCApply_PBJacobi_1;
251: break;
252: case 2:
253: pc->ops->apply = PCApply_PBJacobi_2;
254: break;
255: case 3:
256: pc->ops->apply = PCApply_PBJacobi_3;
257: break;
258: case 4:
259: pc->ops->apply = PCApply_PBJacobi_4;
260: break;
261: case 5:
262: pc->ops->apply = PCApply_PBJacobi_5;
263: break;
264: case 6:
265: pc->ops->apply = PCApply_PBJacobi_6;
266: break;
267: case 7:
268: pc->ops->apply = PCApply_PBJacobi_7;
269: break;
270: default:
271: pc->ops->apply = PCApply_PBJacobi_N;
272: break;
273: }
274: pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
275: return 0;
276: }
277: /* -------------------------------------------------------------------------- */
278: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
279: {
280: /*
281: Free the private data structure that was hanging off the PC
282: */
283: PetscFree(pc->data);
284: return 0;
285: }
287: static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer)
288: {
289: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
290: PetscBool iascii;
292: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
293: if (iascii) {
294: PetscViewerASCIIPrintf(viewer," point-block size %D\n",jac->bs);
295: }
296: return 0;
297: }
299: /* -------------------------------------------------------------------------- */
300: /*MC
301: PCPBJACOBI - Point block Jacobi preconditioner
303: Notes:
304: See PCJACOBI for point Jacobi preconditioning, PCVPBJACOBI for variable size point block Jacobi and PCBJACOBI for large blocks
306: This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix
308: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
309: is detected a PETSc error is generated.
311: Developer Notes:
312: This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
313: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
314: terminating KSP with a KSP_DIVERGED_NANORIF allowing
315: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
317: Perhaps should provide an option that allows generation of a valid preconditioner
318: even if a block is singular as the PCJACOBI does.
320: Level: beginner
322: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCVPBJACOBI, PCBJACOBI
324: M*/
326: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
327: {
328: PC_PBJacobi *jac;
330: /*
331: Creates the private data structure for this preconditioner and
332: attach it to the PC object.
333: */
334: PetscNewLog(pc,&jac);
335: pc->data = (void*)jac;
337: /*
338: Initialize the pointers to vectors to ZERO; these will be used to store
339: diagonal entries of the matrix for fast preconditioner application.
340: */
341: jac->diag = NULL;
343: /*
344: Set the pointers for the functions that are provided above.
345: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
346: are called, they will automatically call these functions. Note we
347: choose not to provide a couple of these functions since they are
348: not needed.
349: */
350: pc->ops->apply = NULL; /*set depending on the block size */
351: pc->ops->applytranspose = NULL;
352: pc->ops->setup = PCSetUp_PBJacobi;
353: pc->ops->destroy = PCDestroy_PBJacobi;
354: pc->ops->setfromoptions = NULL;
355: pc->ops->view = PCView_PBJacobi;
356: pc->ops->applyrichardson = NULL;
357: pc->ops->applysymmetricleft = NULL;
358: pc->ops->applysymmetricright = NULL;
359: return 0;
360: }