Actual source code: ex192.c

  1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
  2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";

  4: #include <petscmat.h>

  6: int main(int argc,char **args)
  7: {
  8:   Mat            A,RHS,C,F,X,S;
  9:   Vec            u,x,b;
 10:   Vec            xschur,bschur,uschur;
 11:   IS             is_schur;
 12:   PetscMPIInt    size;
 13:   PetscInt       isolver=0,size_schur,m,n,nfact,nsolve,nrhs;
 14:   PetscReal      norm,tol=PETSC_SQRT_MACHINE_EPSILON;
 15:   PetscRandom    rand;
 16:   PetscBool      data_provided,herm,symm,use_lu,cuda = PETSC_FALSE;
 17:   PetscReal      sratio = 5.1/12.;
 18:   PetscViewer    fd;              /* viewer */
 19:   char           solver[256];
 20:   char           file[PETSC_MAX_PATH_LEN]; /* input file name */

 22:   PetscInitialize(&argc,&args,(char*)0,help);
 23:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 25:   /* Determine which type of solver we want to test for */
 26:   herm = PETSC_FALSE;
 27:   symm = PETSC_FALSE;
 28:   PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);
 29:   PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);
 30:   if (herm) symm = PETSC_TRUE;
 31:   PetscOptionsGetBool(NULL,NULL,"-cuda_solve",&cuda,NULL);
 32:   PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);

 34:   /* Determine file from which we read the matrix A */
 35:   PetscOptionsGetString(NULL,NULL,"-f",file,sizeof(file),&data_provided);
 36:   if (!data_provided) { /* get matrices from PETSc distribution */
 37:     PetscStrncpy(file,"${PETSC_DIR}/share/petsc/datafiles/matrices/",sizeof(file));
 38:     if (symm) {
 39: #if defined (PETSC_USE_COMPLEX)
 40:       PetscStrlcat(file,"hpd-complex-",sizeof(file));
 41: #else
 42:       PetscStrlcat(file,"spd-real-",sizeof(file));
 43: #endif
 44:     } else {
 45: #if defined (PETSC_USE_COMPLEX)
 46:       PetscStrlcat(file,"nh-complex-",sizeof(file));
 47: #else
 48:       PetscStrlcat(file,"ns-real-",sizeof(file));
 49: #endif
 50:     }
 51: #if defined(PETSC_USE_64BIT_INDICES)
 52:     PetscStrlcat(file,"int64-",sizeof(file));
 53: #else
 54:     PetscStrlcat(file,"int32-",sizeof(file));
 55: #endif
 56: #if defined (PETSC_USE_REAL_SINGLE)
 57:     PetscStrlcat(file,"float32",sizeof(file));
 58: #else
 59:     PetscStrlcat(file,"float64",sizeof(file));
 60: #endif
 61:   }
 62:   /* Load matrix A */
 63:   PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);
 64:   MatCreate(PETSC_COMM_WORLD,&A);
 65:   MatLoad(A,fd);
 66:   PetscViewerDestroy(&fd);
 67:   MatGetSize(A,&m,&n);

 70:   /* Create dense matrix C and X; C holds true solution with identical columns */
 71:   nrhs = 2;
 72:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
 73:   MatCreate(PETSC_COMM_WORLD,&C);
 74:   MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);
 75:   MatSetType(C,MATDENSE);
 76:   MatSetFromOptions(C);
 77:   MatSetUp(C);

 79:   PetscRandomCreate(PETSC_COMM_WORLD,&rand);
 80:   PetscRandomSetFromOptions(rand);
 81:   MatSetRandom(C,rand);
 82:   MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);

 84:   /* Create vectors */
 85:   VecCreate(PETSC_COMM_WORLD,&x);
 86:   VecSetSizes(x,n,PETSC_DECIDE);
 87:   VecSetFromOptions(x);
 88:   VecDuplicate(x,&b);
 89:   VecDuplicate(x,&u); /* save the true solution */

 91:   PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);
 92:   switch (isolver) {
 93: #if defined(PETSC_HAVE_MUMPS)
 94:     case 0:
 95:       PetscStrcpy(solver,MATSOLVERMUMPS);
 96:       break;
 97: #endif
 98: #if defined(PETSC_HAVE_MKL_PARDISO)
 99:     case 1:
100:       PetscStrcpy(solver,MATSOLVERMKL_PARDISO);
101:       break;
102: #endif
103:     default:
104:       PetscStrcpy(solver,MATSOLVERPETSC);
105:       break;
106:   }

108: #if defined (PETSC_USE_COMPLEX)
109:   if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
110:     PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
111:     PetscScalar val = -1.0;
112:     val = val + im;
113:     MatSetValue(A,1,0,val,INSERT_VALUES);
114:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
115:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
116:   }
117: #endif

119:   PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);
121:   size_schur = (PetscInt)(sratio*m);

123:   PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n",solver,nrhs,symm,herm,size_schur,m);

125:   /* Test LU/Cholesky Factorization */
126:   use_lu = PETSC_FALSE;
127:   if (!symm) use_lu = PETSC_TRUE;
128: #if defined (PETSC_USE_COMPLEX)
129:   if (isolver == 1) use_lu = PETSC_TRUE;
130: #endif
131:   if (cuda && symm && !herm) use_lu = PETSC_TRUE;

133:   if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
134:     MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
135:     MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);
136:   }

138:   if (use_lu) {
139:     MatGetFactor(A,solver,MAT_FACTOR_LU,&F);
140:   } else {
141:     if (herm) {
142:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
143:       MatSetOption(A,MAT_SPD,PETSC_TRUE);
144:     } else {
145:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
146:       MatSetOption(A,MAT_SPD,PETSC_FALSE);
147:     }
148:     MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);
149:   }
150:   ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);
151:   MatFactorSetSchurIS(F,is_schur);

153:   ISDestroy(&is_schur);
154:   if (use_lu) {
155:     MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
156:   } else {
157:     MatCholeskyFactorSymbolic(F,A,NULL,NULL);
158:   }

160:   for (nfact = 0; nfact < 3; nfact++) {
161:     Mat AD;

163:     if (!nfact) {
164:       VecSetRandom(x,rand);
165:       if (symm && herm) {
166:         VecAbs(x);
167:       }
168:       MatDiagonalSet(A,x,ADD_VALUES);
169:     }
170:     if (use_lu) {
171:       MatLUFactorNumeric(F,A,NULL);
172:     } else {
173:       MatCholeskyFactorNumeric(F,A,NULL);
174:     }
175:     if (cuda) {
176:       MatFactorGetSchurComplement(F,&S,NULL);
177:       MatSetType(S,MATSEQDENSECUDA);
178:       MatCreateVecs(S,&xschur,&bschur);
179:       MatFactorRestoreSchurComplement(F,&S,MAT_FACTOR_SCHUR_UNFACTORED);
180:     }
181:     MatFactorCreateSchurComplement(F,&S,NULL);
182:     if (!cuda) {
183:       MatCreateVecs(S,&xschur,&bschur);
184:     }
185:     VecDuplicate(xschur,&uschur);
186:     if (nfact == 1 && (!cuda || (herm && symm))) {
187:       MatFactorInvertSchurComplement(F);
188:     }
189:     for (nsolve = 0; nsolve < 2; nsolve++) {
190:       VecSetRandom(x,rand);
191:       VecCopy(x,u);

193:       if (nsolve) {
194:         MatMult(A,x,b);
195:         MatSolve(F,b,x);
196:       } else {
197:         MatMultTranspose(A,x,b);
198:         MatSolveTranspose(F,b,x);
199:       }
200:       /* Check the error */
201:       VecAXPY(u,-1.0,x);  /* u <- (-1.0)x + u */
202:       VecNorm(u,NORM_2,&norm);
203:       if (norm > tol) {
204:         PetscReal resi;
205:         if (nsolve) {
206:           MatMult(A,x,u); /* u = A*x */
207:         } else {
208:           MatMultTranspose(A,x,u); /* u = A*x */
209:         }
210:         VecAXPY(u,-1.0,b);  /* u <- (-1.0)b + u */
211:         VecNorm(u,NORM_2,&resi);
212:         if (nsolve) {
213:           PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n",nfact,nsolve,(double)norm,(double)resi);
214:         } else {
215:           PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,(double)norm,(double)resi);
216:         }
217:       }
218:       VecSetRandom(xschur,rand);
219:       VecCopy(xschur,uschur);
220:       if (nsolve) {
221:         MatMult(S,xschur,bschur);
222:         MatFactorSolveSchurComplement(F,bschur,xschur);
223:       } else {
224:         MatMultTranspose(S,xschur,bschur);
225:         MatFactorSolveSchurComplementTranspose(F,bschur,xschur);
226:       }
227:       /* Check the error */
228:       VecAXPY(uschur,-1.0,xschur);  /* u <- (-1.0)x + u */
229:       VecNorm(uschur,NORM_2,&norm);
230:       if (norm > tol) {
231:         PetscReal resi;
232:         if (nsolve) {
233:           MatMult(S,xschur,uschur); /* u = A*x */
234:         } else {
235:           MatMultTranspose(S,xschur,uschur); /* u = A*x */
236:         }
237:         VecAXPY(uschur,-1.0,bschur);  /* u <- (-1.0)b + u */
238:         VecNorm(uschur,NORM_2,&resi);
239:         if (nsolve) {
240:           PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n",nfact,nsolve,(double)norm,(double)resi);
241:         } else {
242:           PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,(double)norm,(double)resi);
243:         }
244:       }
245:     }
246:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD);
247:     if (!nfact) {
248:       MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);
249:     } else {
250:       MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);
251:     }
252:     MatDestroy(&AD);
253:     for (nsolve = 0; nsolve < 2; nsolve++) {
254:       MatMatSolve(F,RHS,X);

256:       /* Check the error */
257:       MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
258:       MatNorm(X,NORM_FROBENIUS,&norm);
259:       if (norm > tol) {
260:         PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n",nfact,nsolve,(double)norm);
261:       }
262:     }
263:     if (isolver == 0) {
264:       Mat spRHS,spRHST,RHST;

266:       MatTranspose(RHS,MAT_INITIAL_MATRIX,&RHST);
267:       MatConvert(RHST,MATSEQAIJ,MAT_INITIAL_MATRIX,&spRHST);
268:       MatCreateTranspose(spRHST,&spRHS);
269:       for (nsolve = 0; nsolve < 2; nsolve++) {
270:         MatMatSolve(F,spRHS,X);

272:         /* Check the error */
273:         MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
274:         MatNorm(X,NORM_FROBENIUS,&norm);
275:         if (norm > tol) {
276:           PetscPrintf(PETSC_COMM_SELF,"(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n",nfact,nsolve,(double)norm);
277:         }
278:       }
279:       MatDestroy(&spRHST);
280:       MatDestroy(&spRHS);
281:       MatDestroy(&RHST);
282:     }
283:     MatDestroy(&S);
284:     VecDestroy(&xschur);
285:     VecDestroy(&bschur);
286:     VecDestroy(&uschur);
287:   }
288:   /* Free data structures */
289:   MatDestroy(&A);
290:   MatDestroy(&C);
291:   MatDestroy(&F);
292:   MatDestroy(&X);
293:   MatDestroy(&RHS);
294:   PetscRandomDestroy(&rand);
295:   VecDestroy(&x);
296:   VecDestroy(&b);
297:   VecDestroy(&u);
298:   PetscFinalize();
299:   return 0;
300: }

302: /*TEST

304:    testset:
305:      requires: mkl_pardiso double !complex
306:      args: -solver 1

308:      test:
309:        suffix: mkl_pardiso
310:      test:
311:        requires: cuda
312:        suffix: mkl_pardiso_cuda
313:        args: -cuda_solve
314:        output_file: output/ex192_mkl_pardiso.out
315:      test:
316:        suffix: mkl_pardiso_1
317:        args: -symmetric_solve
318:        output_file: output/ex192_mkl_pardiso_1.out
319:      test:
320:        requires: cuda
321:        suffix: mkl_pardiso_cuda_1
322:        args: -symmetric_solve -cuda_solve
323:        output_file: output/ex192_mkl_pardiso_1.out
324:      test:
325:        suffix: mkl_pardiso_3
326:        args: -symmetric_solve -hermitian_solve
327:        output_file: output/ex192_mkl_pardiso_3.out
328:      test:
329:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
330:        suffix: mkl_pardiso_cuda_3
331:        args: -symmetric_solve -hermitian_solve -cuda_solve
332:        output_file: output/ex192_mkl_pardiso_3.out

334:    testset:
335:      requires: mumps double !complex
336:      args: -solver 0

338:      test:
339:        suffix: mumps
340:      test:
341:        requires: cuda
342:        suffix: mumps_cuda
343:        args: -cuda_solve
344:        output_file: output/ex192_mumps.out
345:      test:
346:        suffix: mumps_2
347:        args: -symmetric_solve
348:        output_file: output/ex192_mumps_2.out
349:      test:
350:        requires: cuda
351:        suffix: mumps_cuda_2
352:        args: -symmetric_solve -cuda_solve
353:        output_file: output/ex192_mumps_2.out
354:      test:
355:        suffix: mumps_3
356:        args: -symmetric_solve -hermitian_solve
357:        output_file: output/ex192_mumps_3.out
358:      test:
359:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
360:        suffix: mumps_cuda_3
361:        args: -symmetric_solve -hermitian_solve -cuda_solve
362:        output_file: output/ex192_mumps_3.out

364: TEST*/