Actual source code: aijmkl.c

petsc-3.9.3 2018-07-02
Report Typos and Errors
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses 
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  6:   wherever possible.
  7: */

  9:  #include <../src/mat/impls/aij/seq/aij.h>
 10:  #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>

 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: typedef struct {
 16:   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 17:   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 18:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 19:   PetscObjectState state;
 20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 21:   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 22:   struct matrix_descr descr;
 23: #endif
 24: } Mat_SeqAIJMKL;

 26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 29: {
 30:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 31:   /* so we will ignore 'MatType type'. */
 33:   Mat            B       = *newmat;
 34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 35:   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
 36: #endif

 39:   if (reuse == MAT_INITIAL_MATRIX) {
 40:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 41:   }

 43:   /* Reset the original function pointers. */
 44:   B->ops->duplicate        = MatDuplicate_SeqAIJ;
 45:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
 46:   B->ops->destroy          = MatDestroy_SeqAIJ;
 47:   B->ops->mult             = MatMult_SeqAIJ;
 48:   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
 49:   B->ops->multadd          = MatMultAdd_SeqAIJ;
 50:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
 51:   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
 58:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
 59:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
 60:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
 61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 62:   if(!aijmkl->no_SpMV2) {
 63:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
 64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
 65:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
 66: #endif
 67:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
 68:   }

 70:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 
 71:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 72:    * the spptr pointer. */
 73:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 75:   if (aijmkl->sparse_optimized) {
 76:     sparse_status_t stat;
 77:     stat = mkl_sparse_destroy(aijmkl->csrA);
 78:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
 79:   }
 80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 81:   PetscFree(B->spptr);

 83:   /* Change the type of B to MATSEQAIJ. */
 84:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 86:   *newmat = B;
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 91: {
 93:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 97:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 
 98:    * spptr pointer. */
 99:   if (aijmkl) {
100:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102:     if (aijmkl->sparse_optimized) {
103:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104:       stat = mkl_sparse_destroy(aijmkl->csrA);
105:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106:     }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108:     PetscFree(A->spptr);
109:   }

111:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112:    * to destroy everything that remains. */
113:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114:   /* Note that I don't call MatSetType().  I believe this is because that
115:    * is only to be called when *building* a matrix.  I could be wrong, but
116:    * that is how things work for the SuperLU matrix class. */
117:   MatDestroy_SeqAIJ(A);
118:   return(0);
119: }

121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 
122:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 
124:  * handle, creates a new one, and then calls mkl_sparse_optimize().
125:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 
126:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 
127:  * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine 
132:    * does nothing. We make it callable anyway in this case because it cuts 
133:    * down on littering the code with #ifdefs. */
135:   return(0);
136: #else
137:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
138:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139:   PetscInt         m,n;
140:   MatScalar        *aa;
141:   PetscInt         *aj,*ai;
142:   sparse_status_t  stat;
143:   PetscErrorCode   ierr;

146:   if (aijmkl->no_SpMV2) return(0);

148:   if (aijmkl->sparse_optimized) {
149:     /* Matrix has been previously assembled and optimized. Must destroy old
150:      * matrix handle before running the optimization step again. */
151:     stat = mkl_sparse_destroy(aijmkl->csrA);
152:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153:   }
154:   aijmkl->sparse_optimized = PETSC_FALSE;

156:   /* Now perform the SpMV2 setup and matrix optimization. */
157:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
158:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
159:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
160:   m = A->rmap->n;
161:   n = A->cmap->n;
162:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
163:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
164:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
165:   if ((a->nz!=0) & !(A->structure_only)) {
166:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174:     if (!aijmkl->no_SpMV2) {
175:       stat = mkl_sparse_optimize(aijmkl->csrA);
176:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
177:     }
178:     aijmkl->sparse_optimized = PETSC_TRUE;
179:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
180:   }

182:   return(0);
183: #endif
184: }

186: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 
187:  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 
188:  * matrix handle.
189:  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
190:  * there is no good alternative. */
191: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
192: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
193: {
194:   PetscErrorCode      ierr;
195:   sparse_status_t     stat;
196:   sparse_index_base_t indexing;
197:   PetscInt            nrows, ncols;
198:   PetscInt            *aj,*ai,*dummy;
199:   MatScalar           *aa;
200:   Mat                 A;
201:   Mat_SeqAIJMKL       *aijmkl;

203:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204:   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
205:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

207:   if (reuse == MAT_REUSE_MATRIX) {
208:     MatDestroy(mat);
209:   }
210:   MatCreate(comm,&A);
211:   MatSetType(A,MATSEQAIJ);
212:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
213:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 
214:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215:    * they will be destroyed when the MKL handle is destroyed.
216:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217:   MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);

219:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
220:    * Now turn it into a MATSEQAIJMKL. */
221:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

223:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
224:   aijmkl->csrA = csrA;

226:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
227:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 
228:    * and just need to be able to run the MKL optimization step. */
229:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
230:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
231:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
232:   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
233:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
234:   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
236:   if (!aijmkl->no_SpMV2) {
237:     stat = mkl_sparse_optimize(aijmkl->csrA);
238:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
239:   }
240:   aijmkl->sparse_optimized = PETSC_TRUE;
241:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));

243:   *mat = A;
244:   return(0);
245: }
246: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

248: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
249:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 
250:  * MatMatMultNumeric(). */
251: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
252: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
253: {
254:   PetscInt            i;
255:   PetscInt            nrows,ncols;
256:   PetscInt            nz;
257:   PetscInt            *ai,*aj,*dummy;
258:   PetscScalar         *aa;
259:   PetscErrorCode      ierr;
260:   Mat_SeqAIJMKL       *aijmkl;
261:   sparse_status_t     stat;
262:   sparse_index_base_t indexing;

264:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;

266:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
267:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
268:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

270:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 
271:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
272:   for (i=0; i<nrows; i++) {
273:     nz = ai[i+1] - ai[i];
274:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
275:   }

277:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
278:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

280:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
281:   /* We mark our matrix as having a valid, optimized MKL handle.
282:    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
283:   aijmkl->sparse_optimized = PETSC_TRUE;

285:   return(0);
286: }
287: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

289: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
290: {
292:   Mat_SeqAIJMKL  *aijmkl;
293:   Mat_SeqAIJMKL  *aijmkl_dest;

296:   MatDuplicate_SeqAIJ(A,op,M);
297:   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
298:   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
299:   PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
300:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
301:   if (aijmkl->eager_inspection) {
302:     MatSeqAIJMKL_create_mkl_handle(A);
303:   }
304:   return(0);
305: }

307: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
308: {
309:   PetscErrorCode  ierr;
310:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
311:   Mat_SeqAIJMKL   *aijmkl;

314:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

316:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
317:    * extra information and some different methods, call the AssemblyEnd 
318:    * routine for a MATSEQAIJ.
319:    * I'm not sure if this is the best way to do this, but it avoids
320:    * a lot of code duplication. */
321:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
322:   MatAssemblyEnd_SeqAIJ(A, mode);

324:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
325:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
326:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
327:   if (aijmkl->eager_inspection) {
328:     MatSeqAIJMKL_create_mkl_handle(A);
329:   }

331:   return(0);
332: }

334: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
335: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
336: {
337:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
338:   const PetscScalar *x;
339:   PetscScalar       *y;
340:   const MatScalar   *aa;
341:   PetscErrorCode    ierr;
342:   PetscInt          m=A->rmap->n;
343:   PetscInt          n=A->cmap->n;
344:   PetscScalar       alpha = 1.0;
345:   PetscScalar       beta = 0.0;
346:   const PetscInt    *aj,*ai;
347:   char              matdescra[6];


350:   /* Variables not in MatMult_SeqAIJ. */
351:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

354:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
355:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
356:   VecGetArrayRead(xx,&x);
357:   VecGetArray(yy,&y);
358:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
359:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
360:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

362:   /* Call MKL sparse BLAS routine to do the MatMult. */
363:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

365:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
366:   VecRestoreArrayRead(xx,&x);
367:   VecRestoreArray(yy,&y);
368:   return(0);
369: }
370: #endif

372: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
373: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
374: {
375:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
376:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
377:   const PetscScalar *x;
378:   PetscScalar       *y;
379:   PetscErrorCode    ierr;
380:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
381:   PetscObjectState  state;


385:   /* If there are no nonzero entries, zero yy and return immediately. */
386:   if(!a->nz) {
387:     PetscInt i;
388:     PetscInt m=A->rmap->n;
389:     VecGetArray(yy,&y);
390:     for (i=0; i<m; i++) {
391:       y[i] = 0.0;
392:     }
393:     VecRestoreArray(yy,&y);
394:     return(0);
395:   }

397:   VecGetArrayRead(xx,&x);
398:   VecGetArray(yy,&y);

400:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
401:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
402:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403:   PetscObjectStateGet((PetscObject)A,&state);
404:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
405:     MatSeqAIJMKL_create_mkl_handle(A);
406:   }

408:   /* Call MKL SpMV2 executor routine to do the MatMult. */
409:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
411: 
412:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
413:   VecRestoreArrayRead(xx,&x);
414:   VecRestoreArray(yy,&y);
415:   return(0);
416: }
417: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

419: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
420: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
421: {
422:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
423:   const PetscScalar *x;
424:   PetscScalar       *y;
425:   const MatScalar   *aa;
426:   PetscErrorCode    ierr;
427:   PetscInt          m=A->rmap->n;
428:   PetscInt          n=A->cmap->n;
429:   PetscScalar       alpha = 1.0;
430:   PetscScalar       beta = 0.0;
431:   const PetscInt    *aj,*ai;
432:   char              matdescra[6];

434:   /* Variables not in MatMultTranspose_SeqAIJ. */
435:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

438:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
439:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
440:   VecGetArrayRead(xx,&x);
441:   VecGetArray(yy,&y);
442:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
443:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
444:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

446:   /* Call MKL sparse BLAS routine to do the MatMult. */
447:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

449:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
450:   VecRestoreArrayRead(xx,&x);
451:   VecRestoreArray(yy,&y);
452:   return(0);
453: }
454: #endif

456: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
457: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
458: {
459:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
460:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
461:   const PetscScalar *x;
462:   PetscScalar       *y;
463:   PetscErrorCode    ierr;
464:   sparse_status_t   stat;
465:   PetscObjectState  state;


469:   /* If there are no nonzero entries, zero yy and return immediately. */
470:   if(!a->nz) {
471:     PetscInt i;
472:     PetscInt n=A->cmap->n;
473:     VecGetArray(yy,&y);
474:     for (i=0; i<n; i++) {
475:       y[i] = 0.0;
476:     }
477:     VecRestoreArray(yy,&y);
478:     return(0);
479:   }

481:   VecGetArrayRead(xx,&x);
482:   VecGetArray(yy,&y);

484:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
485:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
486:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
487:   PetscObjectStateGet((PetscObject)A,&state);
488:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
489:     MatSeqAIJMKL_create_mkl_handle(A);
490:   }

492:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
493:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
494:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
495: 
496:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
497:   VecRestoreArrayRead(xx,&x);
498:   VecRestoreArray(yy,&y);
499:   return(0);
500: }
501: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

503: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
504: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
505: {
506:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
507:   const PetscScalar *x;
508:   PetscScalar       *y,*z;
509:   const MatScalar   *aa;
510:   PetscErrorCode    ierr;
511:   PetscInt          m=A->rmap->n;
512:   PetscInt          n=A->cmap->n;
513:   const PetscInt    *aj,*ai;
514:   PetscInt          i;

516:   /* Variables not in MatMultAdd_SeqAIJ. */
517:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
518:   PetscScalar       alpha = 1.0;
519:   PetscScalar       beta;
520:   char              matdescra[6];

523:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
524:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

526:   VecGetArrayRead(xx,&x);
527:   VecGetArrayPair(yy,zz,&y,&z);
528:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
529:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
530:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

532:   /* Call MKL sparse BLAS routine to do the MatMult. */
533:   if (zz == yy) {
534:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
535:     beta = 1.0;
536:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
537:   } else {
538:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
539:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
540:     beta = 0.0;
541:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
542:     for (i=0; i<m; i++) {
543:       z[i] += y[i];
544:     }
545:   }

547:   PetscLogFlops(2.0*a->nz);
548:   VecRestoreArrayRead(xx,&x);
549:   VecRestoreArrayPair(yy,zz,&y,&z);
550:   return(0);
551: }
552: #endif

554: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
555: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
556: {
557:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
558:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
559:   const PetscScalar *x;
560:   PetscScalar       *y,*z;
561:   PetscErrorCode    ierr;
562:   PetscInt          m=A->rmap->n;
563:   PetscInt          i;

565:   /* Variables not in MatMultAdd_SeqAIJ. */
566:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
567:   PetscObjectState  state;


571:   /* If there are no nonzero entries, set zz = yy and return immediately. */
572:   if(!a->nz) {
573:     PetscInt i;
574:     VecGetArrayPair(yy,zz,&y,&z);
575:     for (i=0; i<m; i++) {
576:       z[i] = y[i];
577:     }
578:     VecRestoreArrayPair(yy,zz,&y,&z);
579:     return(0);
580:   }

582:   VecGetArrayRead(xx,&x);
583:   VecGetArrayPair(yy,zz,&y,&z);

585:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
586:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
587:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
588:   PetscObjectStateGet((PetscObject)A,&state);
589:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
590:     MatSeqAIJMKL_create_mkl_handle(A);
591:   }

593:   /* Call MKL sparse BLAS routine to do the MatMult. */
594:   if (zz == yy) {
595:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
596:      * with alpha and beta both set to 1.0. */
597:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
598:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599:   } else {
600:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
601:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
602:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
603:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
604:     for (i=0; i<m; i++) {
605:       z[i] += y[i];
606:     }
607:   }

609:   PetscLogFlops(2.0*a->nz);
610:   VecRestoreArrayRead(xx,&x);
611:   VecRestoreArrayPair(yy,zz,&y,&z);
612:   return(0);
613: }
614: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

616: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
617: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
618: {
619:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
620:   const PetscScalar *x;
621:   PetscScalar       *y,*z;
622:   const MatScalar   *aa;
623:   PetscErrorCode    ierr;
624:   PetscInt          m=A->rmap->n;
625:   PetscInt          n=A->cmap->n;
626:   const PetscInt    *aj,*ai;
627:   PetscInt          i;

629:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
631:   PetscScalar       alpha = 1.0;
632:   PetscScalar       beta;
633:   char              matdescra[6];

636:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
637:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

639:   VecGetArrayRead(xx,&x);
640:   VecGetArrayPair(yy,zz,&y,&z);
641:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
642:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
643:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

645:   /* Call MKL sparse BLAS routine to do the MatMult. */
646:   if (zz == yy) {
647:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
648:     beta = 1.0;
649:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
650:   } else {
651:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
652:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
653:     beta = 0.0;
654:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
655:     for (i=0; i<n; i++) {
656:       z[i] += y[i];
657:     }
658:   }

660:   PetscLogFlops(2.0*a->nz);
661:   VecRestoreArrayRead(xx,&x);
662:   VecRestoreArrayPair(yy,zz,&y,&z);
663:   return(0);
664: }
665: #endif

667: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
668: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
669: {
670:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
671:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
672:   const PetscScalar *x;
673:   PetscScalar       *y,*z;
674:   PetscErrorCode    ierr;
675:   PetscInt          n=A->cmap->n;
676:   PetscInt          i;
677:   PetscObjectState  state;

679:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
680:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


684:   /* If there are no nonzero entries, set zz = yy and return immediately. */
685:   if(!a->nz) {
686:     PetscInt i;
687:     VecGetArrayPair(yy,zz,&y,&z);
688:     for (i=0; i<n; i++) {
689:       z[i] = y[i];
690:     }
691:     VecRestoreArrayPair(yy,zz,&y,&z);
692:     return(0);
693:   }

695:   VecGetArrayRead(xx,&x);
696:   VecGetArrayPair(yy,zz,&y,&z);

698:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
699:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
700:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
701:   PetscObjectStateGet((PetscObject)A,&state);
702:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
703:     MatSeqAIJMKL_create_mkl_handle(A);
704:   }

706:   /* Call MKL sparse BLAS routine to do the MatMult. */
707:   if (zz == yy) {
708:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
709:      * with alpha and beta both set to 1.0. */
710:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
711:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
712:   } else {
713:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
714:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
715:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
716:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
717:     for (i=0; i<n; i++) {
718:       z[i] += y[i];
719:     }
720:   }

722:   PetscLogFlops(2.0*a->nz);
723:   VecRestoreArrayRead(xx,&x);
724:   VecRestoreArrayPair(yy,zz,&y,&z);
725:   return(0);
726: }
727: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

729: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
730: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 
731:  * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 
732:  * For releases of MKL prior to version 18, update 2:
733:  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 
734:  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 
735:  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 
736:  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
737: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
738: {
739:   Mat_SeqAIJMKL    *a, *b;
740:   sparse_matrix_t  csrA, csrB, csrC;
741:   PetscErrorCode   ierr;
742:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
743:   PetscObjectState state;

746:   a = (Mat_SeqAIJMKL*)A->spptr;
747:   b = (Mat_SeqAIJMKL*)B->spptr;
748:   PetscObjectStateGet((PetscObject)A,&state);
749:   if (!a->sparse_optimized || a->state != state) {
750:     MatSeqAIJMKL_create_mkl_handle(A);
751:   }
752:   PetscObjectStateGet((PetscObject)B,&state);
753:   if (!b->sparse_optimized || b->state != state) {
754:     MatSeqAIJMKL_create_mkl_handle(B);
755:   }
756:   csrA = a->csrA;
757:   csrB = b->csrA;

759:   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
760:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

762:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

764:   return(0);
765: }
766: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

768: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
769: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
770: {
771:   Mat_SeqAIJMKL       *a, *b, *c;
772:   sparse_matrix_t     csrA, csrB, csrC;
773:   PetscErrorCode      ierr;
774:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
775:   struct matrix_descr descr_type_gen;
776:   PetscObjectState    state;

779:   a = (Mat_SeqAIJMKL*)A->spptr;
780:   b = (Mat_SeqAIJMKL*)B->spptr;
781:   c = (Mat_SeqAIJMKL*)C->spptr;
782:   PetscObjectStateGet((PetscObject)A,&state);
783:   if (!a->sparse_optimized || a->state != state) {
784:     MatSeqAIJMKL_create_mkl_handle(A);
785:   }
786:   PetscObjectStateGet((PetscObject)B,&state);
787:   if (!b->sparse_optimized || b->state != state) {
788:     MatSeqAIJMKL_create_mkl_handle(B);
789:   }
790:   csrA = a->csrA;
791:   csrB = b->csrA;
792:   csrC = c->csrA;
793:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

795:   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
796:                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
797:                          SPARSE_STAGE_FINALIZE_MULT,&csrC);

799:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");

801:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
802:   MatSeqAIJMKL_update_from_mkl_handle(C);

804:   return(0);
805: }
806: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */

808: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
809: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
810: {
811:   Mat_SeqAIJMKL    *a, *b;
812:   sparse_matrix_t  csrA, csrB, csrC;
813:   PetscErrorCode   ierr;
814:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
815:   PetscObjectState state;

818:   a = (Mat_SeqAIJMKL*)A->spptr;
819:   b = (Mat_SeqAIJMKL*)B->spptr;
820:   PetscObjectStateGet((PetscObject)A,&state);
821:   if (!a->sparse_optimized || a->state != state) {
822:     MatSeqAIJMKL_create_mkl_handle(A);
823:   }
824:   PetscObjectStateGet((PetscObject)B,&state);
825:   if (!b->sparse_optimized || b->state != state) {
826:     MatSeqAIJMKL_create_mkl_handle(B);
827:   }
828:   csrA = a->csrA;
829:   csrB = b->csrA;

831:   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
832:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

834:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

836:   return(0);
837: }
838: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

840: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
841: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
842: {
843:   Mat_SeqAIJMKL       *a, *p, *c;
844:   sparse_matrix_t     csrA, csrP, csrC;
845:   PetscBool           set, flag;
846:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
847:   struct matrix_descr descr_type_sym;
848:   PetscObjectState    state;
849:   PetscErrorCode      ierr;

852:   MatIsSymmetricKnown(A,&set,&flag);
853:   if (!set || (set && !flag)) {
854:     MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
855:     return(0);
856:   }

858:   a = (Mat_SeqAIJMKL*)A->spptr;
859:   p = (Mat_SeqAIJMKL*)P->spptr;
860:   c = (Mat_SeqAIJMKL*)C->spptr;
861:   PetscObjectStateGet((PetscObject)A,&state);
862:   if (!a->sparse_optimized || a->state != state) {
863:     MatSeqAIJMKL_create_mkl_handle(A);
864:   }
865:   PetscObjectStateGet((PetscObject)P,&state);
866:   if (!p->sparse_optimized || p->state != state) {
867:     MatSeqAIJMKL_create_mkl_handle(P);
868:   }
869:   csrA = a->csrA;
870:   csrP = p->csrA;
871:   csrC = c->csrA;
872:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
874:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

876:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
878:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");

880:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
881:   MatSeqAIJMKL_update_from_mkl_handle(C);

883:   return(0);
884: }
885: #endif

887: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
888: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
889: {
890:   Mat_SeqAIJMKL       *a, *p;
891:   sparse_matrix_t     csrA, csrP, csrC;
892:   PetscBool           set, flag;
893:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
894:   struct matrix_descr descr_type_sym;
895:   PetscObjectState    state;
896:   PetscErrorCode      ierr;

899:   MatIsSymmetricKnown(A,&set,&flag);
900:   if (!set || (set && !flag)) {
901:     MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
902:     return(0);
903:   }

905:   if (scall == MAT_REUSE_MATRIX) {
906:     MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
907:     return(0);
908:   }

910:   a = (Mat_SeqAIJMKL*)A->spptr;
911:   p = (Mat_SeqAIJMKL*)P->spptr;
912:   PetscObjectStateGet((PetscObject)A,&state);
913:   if (!a->sparse_optimized || a->state != state) {
914:     MatSeqAIJMKL_create_mkl_handle(A);
915:   }
916:   PetscObjectStateGet((PetscObject)P,&state);
917:   if (!p->sparse_optimized || p->state != state) {
918:     MatSeqAIJMKL_create_mkl_handle(P);
919:   }
920:   csrA = a->csrA;
921:   csrP = p->csrA;
922:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
923:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
924:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

926:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
927:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
928:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");

930:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
931:   MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);

933:   return(0);
934: }
935: #endif

937: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
938:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
939:  * routine, but can also be used to convert an assembled SeqAIJ matrix
940:  * into a SeqAIJMKL one. */
941: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
942: {
944:   Mat            B = *newmat;
945:   Mat_SeqAIJMKL  *aijmkl;
946:   PetscBool      set;
947:   PetscBool      sametype;

950:   if (reuse == MAT_INITIAL_MATRIX) {
951:     MatDuplicate(A,MAT_COPY_VALUES,&B);
952:   }

954:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
955:   if (sametype) return(0);

957:   PetscNewLog(B,&aijmkl);
958:   B->spptr = (void*) aijmkl;

960:   /* Set function pointers for methods that we inherit from AIJ but override. 
961:    * We also parse some command line options below, since those determine some of the methods we point to. */
962:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
963:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
964:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

966:   aijmkl->sparse_optimized = PETSC_FALSE;
967: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
968:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
969: #else
970:   aijmkl->no_SpMV2 = PETSC_TRUE;
971: #endif
972:   aijmkl->eager_inspection = PETSC_FALSE;

974:   /* Parse command line options. */
975:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
976:   PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
977:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
978:   PetscOptionsEnd();
979: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
980:   if(!aijmkl->no_SpMV2) {
981:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
982:     aijmkl->no_SpMV2 = PETSC_TRUE;
983:   }
984: #endif

986: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
987:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
988:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
989:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
990:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
991:   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
992: # ifdef PETSC_HAVE_MKL_SPARSE_SP2M
993:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
994: #   ifndef PETSC_USE_COMPLEX
995:   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
996:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
997: #   endif
998: # endif
999:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1000: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1002: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
1003:   /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were
1004:    * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new
1005:    * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the
1006:    * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old
1007:    * interface. */
1008:   if (aijmkl->no_SpMV2) {
1009:     B->ops->mult             = MatMult_SeqAIJMKL;
1010:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1011:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1012:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1013:   }
1014: #endif

1016:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1017:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1018:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1019:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1020:   if(!aijmkl->no_SpMV2) {
1021: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1022:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1023: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1024:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1025: #endif
1026:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1027: #endif
1028:   }

1030:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1031:   *newmat = B;
1032:   return(0);
1033: }

1035: /*@C
1036:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1037:    This type inherits from AIJ and is largely identical, but uses sparse BLAS 
1038:    routines from Intel MKL whenever possible.
1039:    If the installed version of MKL supports the "SpMV2" sparse 
1040:    inspector-executor routines, then those are used by default.
1041:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1042:    symmetric A) operations are currently supported.
1043:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1045:    Collective on MPI_Comm

1047:    Input Parameters:
1048: +  comm - MPI communicator, set to PETSC_COMM_SELF
1049: .  m - number of rows
1050: .  n - number of columns
1051: .  nz - number of nonzeros per row (same for all rows)
1052: -  nnz - array containing the number of nonzeros in the various rows
1053:          (possibly different for each row) or NULL

1055:    Output Parameter:
1056: .  A - the matrix

1058:    Options Database Keys:
1059: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1060: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1062:    Notes:
1063:    If nnz is given then nz is ignored

1065:    Level: intermediate

1067: .keywords: matrix, MKL, sparse, parallel

1069: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1070: @*/
1071: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1072: {

1076:   MatCreate(comm,A);
1077:   MatSetSizes(*A,m,n,m,n);
1078:   MatSetType(*A,MATSEQAIJMKL);
1079:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1080:   return(0);
1081: }

1083: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1084: {

1088:   MatSetType(A,MATSEQAIJ);
1089:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1090:   return(0);
1091: }