Actual source code: ex1.c


  2: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  3:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  4:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  6: #include <petscmat.h>

  8: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  9: {
 10:   PetscRandom    rand;
 11:   Mat            mat,RHS,SOLU;
 12:   PetscInt       rstart, rend;
 13:   PetscInt       cstart, cend;
 14:   PetscScalar    value = 1.0;
 15:   Vec            x, y, b;

 17:   /* create multiple vectors RHS and SOLU */
 18:   MatCreate(PETSC_COMM_WORLD,&RHS);
 19:   MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs);
 20:   MatSetType(RHS,MATDENSE);
 21:   MatSetOptionsPrefix(RHS,"rhs_");
 22:   MatSetFromOptions(RHS);
 23:   MatSeqDenseSetPreallocation(RHS,NULL);

 25:   PetscRandomCreate(PETSC_COMM_WORLD,&rand);
 26:   PetscRandomSetFromOptions(rand);
 27:   MatSetRandom(RHS,rand);

 29:   if (m == n) {
 30:     MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU);
 31:   } else {
 32:     MatCreate(PETSC_COMM_WORLD,&SOLU);
 33:     MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs);
 34:     MatSetType(SOLU,MATDENSE);
 35:     MatSeqDenseSetPreallocation(SOLU,NULL);
 36:   }
 37:   MatSetRandom(SOLU,rand);

 39:   /* create matrix */
 40:   MatCreate(PETSC_COMM_WORLD,&mat);
 41:   MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n);
 42:   MatSetType(mat,MATDENSE);
 43:   MatSetFromOptions(mat);
 44:   MatSetUp(mat);
 45:   MatGetOwnershipRange(mat,&rstart,&rend);
 46:   MatGetOwnershipRangeColumn(mat,&cstart,&cend);
 47:   if (!full) {
 48:     for (PetscInt i=rstart; i<rend; i++) {
 49:       if (m == n) {
 50:         value = (PetscReal)i+1;
 51:         MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);
 52:       } else {
 53:         for (PetscInt j = cstart; j < cend; j++) {
 54:           value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.);
 55:           MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES);
 56:         }
 57:       }
 58:     }
 59:     MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
 60:     MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
 61:   } else {
 62:     MatSetRandom(mat,rand);
 63:     if (m == n) {
 64:       Mat T;

 66:       MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
 67:       MatDestroy(&mat);
 68:       mat  = T;
 69:     }
 70:   }

 72:   /* create single vectors */
 73:   MatCreateVecs(mat,&x,&b);
 74:   VecDuplicate(x,&y);
 75:   VecSet(x,value);
 76:   PetscRandomDestroy(&rand);
 77:   *_mat  = mat;
 78:   *_RHS  = RHS;
 79:   *_SOLU = SOLU;
 80:   *_x    = x;
 81:   *_y    = y;
 82:   *_b    = b;
 83:   return 0;
 84: }

 86: int main(int argc,char **argv)
 87: {
 88:   Mat            mat,F,RHS,SOLU;
 89:   MatInfo        info;
 91:   PetscInt       m = 15, n = 10,i,j,nrhs=2;
 92:   Vec            x,y,b,ytmp;
 93:   IS             perm;
 94:   PetscReal      norm,tol=PETSC_SMALL;
 95:   PetscMPIInt    size;
 96:   char           solver[64];
 97:   PetscBool      inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

 99:   PetscInitialize(&argc,&argv,(char*) 0,help);
100:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
102:   PetscStrcpy(solver,"petsc");
103:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
104:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
105:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
106:   PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL);
107:   PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL);
108:   PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL);
109:   PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL);

111:   createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
112:   VecDuplicate(y,&ytmp);

114:   /* Only SeqDense* support in-place factorizations and NULL permutations */
115:   PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace);
116:   MatGetLocalSize(mat,&i,NULL);
117:   MatGetOwnershipRange(mat,&j,NULL);
118:   ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm);

120:   MatGetInfo(mat,MAT_LOCAL,&info);
121:   PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n",
122:                      (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);
123:   MatMult(mat,x,b);

125:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
126:   /* in-place Cholesky */
127:   if (inplace) {
128:     Mat RHS2;

130:     MatDuplicate(mat,MAT_COPY_VALUES,&F);
131:     if (!ldl) MatSetOption(F,MAT_SPD,PETSC_TRUE);
132:     MatCholeskyFactor(F,perm,0);
133:     MatSolve(F,b,y);
134:     VecAXPY(y,-1.0,x);
135:     VecNorm(y,NORM_2,&norm);
136:     if (norm > tol) {
137:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm);
138:     }

140:     MatMatSolve(F,RHS,SOLU);
141:     MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);
142:     MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);
143:     MatNorm(RHS,NORM_FROBENIUS,&norm);
144:     if (norm > tol) {
145:       PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm);
146:     }
147:     MatDestroy(&F);
148:     MatDestroy(&RHS2);
149:   }

151:   /* out-of-place Cholesky */
152:   MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F);
153:   if (!ldl) MatSetOption(F,MAT_SPD,PETSC_TRUE);
154:   MatCholeskyFactorSymbolic(F,mat,perm,0);
155:   MatCholeskyFactorNumeric(F,mat,0);
156:   MatSolve(F,b,y);
157:   VecAXPY(y,-1.0,x);
158:   VecNorm(y,NORM_2,&norm);
159:   if (norm > tol) {
160:     PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm);
161:   }
162:   MatDestroy(&F);

164:   /* LU factorization - perms and factinfo are ignored by LAPACK */
165:   i    = n-1;
166:   MatZeroRows(mat,1,&i,-1.0,NULL,NULL);
167:   MatMult(mat,x,b);

169:   /* in-place LU */
170:   if (inplace) {
171:     Mat RHS2;

173:     MatDuplicate(mat,MAT_COPY_VALUES,&F);
174:     MatLUFactor(F,perm,perm,0);
175:     MatSolve(F,b,y);
176:     VecAXPY(y,-1.0,x);
177:     VecNorm(y,NORM_2,&norm);
178:     if (norm > tol) {
179:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm);
180:     }
181:     MatMatSolve(F,RHS,SOLU);
182:     MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);
183:     MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);
184:     MatNorm(RHS,NORM_FROBENIUS,&norm);
185:     if (norm > tol) {
186:       PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm);
187:     }
188:     MatDestroy(&F);
189:     MatDestroy(&RHS2);
190:   }

192:   /* out-of-place LU */
193:   MatGetFactor(mat,solver,MAT_FACTOR_LU,&F);
194:   MatLUFactorSymbolic(F,mat,perm,perm,0);
195:   MatLUFactorNumeric(F,mat,0);
196:   MatSolve(F,b,y);
197:   VecAXPY(y,-1.0,x);
198:   VecNorm(y,NORM_2,&norm);
199:   if (norm > tol) {
200:     PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm);
201:   }

203:   /* free space */
204:   ISDestroy(&perm);
205:   MatDestroy(&F);
206:   MatDestroy(&mat);
207:   MatDestroy(&RHS);
208:   MatDestroy(&SOLU);
209:   VecDestroy(&x);
210:   VecDestroy(&b);
211:   VecDestroy(&y);
212:   VecDestroy(&ytmp);

214:   if (qr) {
215:     /* setup rectanglar */
216:     createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
217:     VecDuplicate(y,&ytmp);

219:     /* QR factorization - perms and factinfo are ignored by LAPACK */
220:     MatMult(mat,x,b);

222:     /* in-place QR */
223:     if (inplace) {
224:       Mat SOLU2;

226:       MatDuplicate(mat,MAT_COPY_VALUES,&F);
227:       MatQRFactor(F,NULL,0);
228:       MatSolve(F,b,y);
229:       VecAXPY(y,-1.0,x);
230:       VecNorm(y,NORM_2,&norm);
231:       if (norm > tol) {
232:         PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm);
233:       }
234:       MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS);
235:       MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2);
236:       MatMatSolve(F,RHS,SOLU2);
237:       MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN);
238:       MatNorm(SOLU2,NORM_FROBENIUS,&norm);
239:       if (norm > tol) {
240:         PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm);
241:       }
242:       MatDestroy(&F);
243:       MatDestroy(&SOLU2);
244:     }

246:     /* out-of-place QR */
247:     MatGetFactor(mat,solver,MAT_FACTOR_QR,&F);
248:     MatQRFactorSymbolic(F,mat,NULL,NULL);
249:     MatQRFactorNumeric(F,mat,NULL);
250:     MatSolve(F,b,y);
251:     VecAXPY(y,-1.0,x);
252:     VecNorm(y,NORM_2,&norm);
253:     if (norm > tol) {
254:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);
255:     }

257:     if (m == n) {
258:       /* out-of-place MatSolveTranspose */
259:       MatMultTranspose(mat,x,b);
260:       MatSolveTranspose(F,b,y);
261:       VecAXPY(y,-1.0,x);
262:       VecNorm(y,NORM_2,&norm);
263:       if (norm > tol) {
264:         PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);
265:       }
266:     }

268:     /* free space */
269:     MatDestroy(&F);
270:     MatDestroy(&mat);
271:     MatDestroy(&RHS);
272:     MatDestroy(&SOLU);
273:     VecDestroy(&x);
274:     VecDestroy(&b);
275:     VecDestroy(&y);
276:     VecDestroy(&ytmp);
277:   }
278:   PetscFinalize();
279:   return 0;
280: }

282: /*TEST

284:    test:

286:    test:
287:      requires: cuda
288:      suffix: seqdensecuda
289:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}}
290:      output_file: output/ex1_1.out

292:    test:
293:      requires: cuda
294:      suffix: seqdensecuda_2
295:      args: -ldl 0 -solver_type cuda
296:      output_file: output/ex1_1.out

298:    test:
299:      requires: cuda
300:      suffix: seqdensecuda_seqaijcusparse
301:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
302:      output_file: output/ex1_2.out

304:    test:
305:      requires: cuda viennacl
306:      suffix: seqdensecuda_seqaijviennacl
307:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
308:      output_file: output/ex1_2.out

310:    test:
311:      suffix: 4
312:      args: -m 10 -n 10
313:      output_file: output/ex1_1.out

315: TEST*/