1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h> 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: typedef struct {
16: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19: PetscObjectState state;
20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22: struct matrix_descr descr;
23: #endif
24: } Mat_SeqAIJMKL;
26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29: {
30: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36: #endif
39: if (reuse == MAT_INITIAL_MATRIX) {
40: MatDuplicate(A,MAT_COPY_VALUES,&B);
41: }
43: /* Reset the original function pointers. */
44: B->ops->duplicate = MatDuplicate_SeqAIJ;
45: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
46: B->ops->destroy = MatDestroy_SeqAIJ;
47: B->ops->mult = MatMult_SeqAIJ;
48: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
49: B->ops->multadd = MatMultAdd_SeqAIJ;
50: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51: B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55: B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
58: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
59: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
60: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62: if(!aijmkl->no_SpMV2) {
63: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
65: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
66: #endif
67: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
68: }
70: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
71: * simply involves destroying the MKL sparse matrix handle and then freeing
72: * the spptr pointer. */
73: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
75: if (aijmkl->sparse_optimized) {
76: sparse_status_t stat;
77: stat = mkl_sparse_destroy(aijmkl->csrA);
78: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
79: }
80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81: PetscFree(B->spptr);
83: /* Change the type of B to MATSEQAIJ. */
84: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
86: *newmat = B;
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91: {
93: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
97: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98: * spptr pointer. */
99: if (aijmkl) {
100: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102: if (aijmkl->sparse_optimized) {
103: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104: stat = mkl_sparse_destroy(aijmkl->csrA);
105: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106: }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108: PetscFree(A->spptr);
109: }
111: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112: * to destroy everything that remains. */
113: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114: /* Note that I don't call MatSetType(). I believe this is because that
115: * is only to be called when *building* a matrix. I could be wrong, but
116: * that is how things work for the SuperLU matrix class. */
117: MatDestroy_SeqAIJ(A);
118: return(0);
119: }
121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
122: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
124: * handle, creates a new one, and then calls mkl_sparse_optimize().
125: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
126: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
127: * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
132: * does nothing. We make it callable anyway in this case because it cuts
133: * down on littering the code with #ifdefs. */
135: return(0);
136: #else
137: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
138: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139: PetscInt m,n;
140: MatScalar *aa;
141: PetscInt *aj,*ai;
142: sparse_status_t stat;
143: PetscErrorCode ierr;
146: if (aijmkl->no_SpMV2) return(0);
148: if (aijmkl->sparse_optimized) {
149: /* Matrix has been previously assembled and optimized. Must destroy old
150: * matrix handle before running the optimization step again. */
151: stat = mkl_sparse_destroy(aijmkl->csrA);
152: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153: }
154: aijmkl->sparse_optimized = PETSC_FALSE;
156: /* Now perform the SpMV2 setup and matrix optimization. */
157: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
158: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
159: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
160: m = A->rmap->n;
161: n = A->cmap->n;
162: aj = a->j; /* aj[k] gives column index for element aa[k]. */
163: aa = a->a; /* Nonzero elements stored row-by-row. */
164: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
165: if ((a->nz!=0) & !(A->structure_only)) {
166: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174: if (!aijmkl->no_SpMV2) {
175: stat = mkl_sparse_optimize(aijmkl->csrA);
176: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
177: }
178: aijmkl->sparse_optimized = PETSC_TRUE;
179: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
180: }
182: return(0);
183: #endif
184: }
186: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
187: * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
188: * matrix handle.
189: * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
190: * there is no good alternative. */
191: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
192: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)193: {
194: PetscErrorCode ierr;
195: sparse_status_t stat;
196: sparse_index_base_t indexing;
197: PetscInt nrows, ncols;
198: PetscInt *aj,*ai,*dummy;
199: MatScalar *aa;
200: Mat A;
201: Mat_SeqAIJMKL *aijmkl;
203: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204: stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
205: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
207: if (reuse == MAT_REUSE_MATRIX) {
208: MatDestroy(mat);
209: }
210: MatCreate(comm,&A);
211: MatSetType(A,MATSEQAIJ);
212: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
213: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215: * they will be destroyed when the MKL handle is destroyed.
216: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217: MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);
219: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
220: * Now turn it into a MATSEQAIJMKL. */
221: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
223: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
224: aijmkl->csrA = csrA;
226: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
227: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
228: * and just need to be able to run the MKL optimization step. */
229: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
230: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
231: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
232: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
233: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
234: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
236: if (!aijmkl->no_SpMV2) {
237: stat = mkl_sparse_optimize(aijmkl->csrA);
238: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
239: }
240: aijmkl->sparse_optimized = PETSC_TRUE;
241: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
243: *mat = A;
244: return(0);
245: }
246: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
248: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
249: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
250: * MatMatMultNumeric(). */
251: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
252: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)253: {
254: PetscInt i;
255: PetscInt nrows,ncols;
256: PetscInt nz;
257: PetscInt *ai,*aj,*dummy;
258: PetscScalar *aa;
259: PetscErrorCode ierr;
260: Mat_SeqAIJMKL *aijmkl;
261: sparse_status_t stat;
262: sparse_index_base_t indexing;
264: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
266: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
267: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
268: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
270: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
271: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
272: for (i=0; i<nrows; i++) {
273: nz = ai[i+1] - ai[i];
274: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
275: }
277: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
278: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
280: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
281: /* We mark our matrix as having a valid, optimized MKL handle.
282: * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
283: aijmkl->sparse_optimized = PETSC_TRUE;
285: return(0);
286: }
287: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
289: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)290: {
292: Mat_SeqAIJMKL *aijmkl;
293: Mat_SeqAIJMKL *aijmkl_dest;
296: MatDuplicate_SeqAIJ(A,op,M);
297: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
298: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
299: PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
300: aijmkl_dest->sparse_optimized = PETSC_FALSE;
301: if (aijmkl->eager_inspection) {
302: MatSeqAIJMKL_create_mkl_handle(A);
303: }
304: return(0);
305: }
307: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)308: {
309: PetscErrorCode ierr;
310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
311: Mat_SeqAIJMKL *aijmkl;
314: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
316: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
317: * extra information and some different methods, call the AssemblyEnd
318: * routine for a MATSEQAIJ.
319: * I'm not sure if this is the best way to do this, but it avoids
320: * a lot of code duplication. */
321: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
322: MatAssemblyEnd_SeqAIJ(A, mode);
324: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
325: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
326: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
327: if (aijmkl->eager_inspection) {
328: MatSeqAIJMKL_create_mkl_handle(A);
329: }
331: return(0);
332: }
334: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
335: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)336: {
337: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
338: const PetscScalar *x;
339: PetscScalar *y;
340: const MatScalar *aa;
341: PetscErrorCode ierr;
342: PetscInt m=A->rmap->n;
343: PetscInt n=A->cmap->n;
344: PetscScalar alpha = 1.0;
345: PetscScalar beta = 0.0;
346: const PetscInt *aj,*ai;
347: char matdescra[6];
350: /* Variables not in MatMult_SeqAIJ. */
351: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
354: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356: VecGetArrayRead(xx,&x);
357: VecGetArray(yy,&y);
358: aj = a->j; /* aj[k] gives column index for element aa[k]. */
359: aa = a->a; /* Nonzero elements stored row-by-row. */
360: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
362: /* Call MKL sparse BLAS routine to do the MatMult. */
363: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
365: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
366: VecRestoreArrayRead(xx,&x);
367: VecRestoreArray(yy,&y);
368: return(0);
369: }
370: #endif
372: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
373: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)374: {
375: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
376: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
377: const PetscScalar *x;
378: PetscScalar *y;
379: PetscErrorCode ierr;
380: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
381: PetscObjectState state;
385: /* If there are no nonzero entries, zero yy and return immediately. */
386: if(!a->nz) {
387: PetscInt i;
388: PetscInt m=A->rmap->n;
389: VecGetArray(yy,&y);
390: for (i=0; i<m; i++) {
391: y[i] = 0.0;
392: }
393: VecRestoreArray(yy,&y);
394: return(0);
395: }
397: VecGetArrayRead(xx,&x);
398: VecGetArray(yy,&y);
400: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403: PetscObjectStateGet((PetscObject)A,&state);
404: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
405: MatSeqAIJMKL_create_mkl_handle(A);
406: }
408: /* Call MKL SpMV2 executor routine to do the MatMult. */
409: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
411: 412: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
413: VecRestoreArrayRead(xx,&x);
414: VecRestoreArray(yy,&y);
415: return(0);
416: }
417: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
419: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
420: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)421: {
422: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
423: const PetscScalar *x;
424: PetscScalar *y;
425: const MatScalar *aa;
426: PetscErrorCode ierr;
427: PetscInt m=A->rmap->n;
428: PetscInt n=A->cmap->n;
429: PetscScalar alpha = 1.0;
430: PetscScalar beta = 0.0;
431: const PetscInt *aj,*ai;
432: char matdescra[6];
434: /* Variables not in MatMultTranspose_SeqAIJ. */
435: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
438: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
439: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
440: VecGetArrayRead(xx,&x);
441: VecGetArray(yy,&y);
442: aj = a->j; /* aj[k] gives column index for element aa[k]. */
443: aa = a->a; /* Nonzero elements stored row-by-row. */
444: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
446: /* Call MKL sparse BLAS routine to do the MatMult. */
447: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
449: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
450: VecRestoreArrayRead(xx,&x);
451: VecRestoreArray(yy,&y);
452: return(0);
453: }
454: #endif
456: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
457: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)458: {
459: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
460: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
461: const PetscScalar *x;
462: PetscScalar *y;
463: PetscErrorCode ierr;
464: sparse_status_t stat;
465: PetscObjectState state;
469: /* If there are no nonzero entries, zero yy and return immediately. */
470: if(!a->nz) {
471: PetscInt i;
472: PetscInt n=A->cmap->n;
473: VecGetArray(yy,&y);
474: for (i=0; i<n; i++) {
475: y[i] = 0.0;
476: }
477: VecRestoreArray(yy,&y);
478: return(0);
479: }
481: VecGetArrayRead(xx,&x);
482: VecGetArray(yy,&y);
484: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
485: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
486: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
487: PetscObjectStateGet((PetscObject)A,&state);
488: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
489: MatSeqAIJMKL_create_mkl_handle(A);
490: }
492: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
493: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
494: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
495: 496: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
497: VecRestoreArrayRead(xx,&x);
498: VecRestoreArray(yy,&y);
499: return(0);
500: }
501: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
503: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
504: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)505: {
506: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
507: const PetscScalar *x;
508: PetscScalar *y,*z;
509: const MatScalar *aa;
510: PetscErrorCode ierr;
511: PetscInt m=A->rmap->n;
512: PetscInt n=A->cmap->n;
513: const PetscInt *aj,*ai;
514: PetscInt i;
516: /* Variables not in MatMultAdd_SeqAIJ. */
517: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
518: PetscScalar alpha = 1.0;
519: PetscScalar beta;
520: char matdescra[6];
523: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
524: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
526: VecGetArrayRead(xx,&x);
527: VecGetArrayPair(yy,zz,&y,&z);
528: aj = a->j; /* aj[k] gives column index for element aa[k]. */
529: aa = a->a; /* Nonzero elements stored row-by-row. */
530: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
532: /* Call MKL sparse BLAS routine to do the MatMult. */
533: if (zz == yy) {
534: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
535: beta = 1.0;
536: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
537: } else {
538: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
539: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
540: beta = 0.0;
541: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
542: for (i=0; i<m; i++) {
543: z[i] += y[i];
544: }
545: }
547: PetscLogFlops(2.0*a->nz);
548: VecRestoreArrayRead(xx,&x);
549: VecRestoreArrayPair(yy,zz,&y,&z);
550: return(0);
551: }
552: #endif
554: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
555: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)556: {
557: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
558: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
559: const PetscScalar *x;
560: PetscScalar *y,*z;
561: PetscErrorCode ierr;
562: PetscInt m=A->rmap->n;
563: PetscInt i;
565: /* Variables not in MatMultAdd_SeqAIJ. */
566: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
567: PetscObjectState state;
571: /* If there are no nonzero entries, set zz = yy and return immediately. */
572: if(!a->nz) {
573: PetscInt i;
574: VecGetArrayPair(yy,zz,&y,&z);
575: for (i=0; i<m; i++) {
576: z[i] = y[i];
577: }
578: VecRestoreArrayPair(yy,zz,&y,&z);
579: return(0);
580: }
582: VecGetArrayRead(xx,&x);
583: VecGetArrayPair(yy,zz,&y,&z);
585: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
586: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
587: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
588: PetscObjectStateGet((PetscObject)A,&state);
589: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
590: MatSeqAIJMKL_create_mkl_handle(A);
591: }
593: /* Call MKL sparse BLAS routine to do the MatMult. */
594: if (zz == yy) {
595: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
596: * with alpha and beta both set to 1.0. */
597: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
598: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599: } else {
600: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
601: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
602: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
603: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
604: for (i=0; i<m; i++) {
605: z[i] += y[i];
606: }
607: }
609: PetscLogFlops(2.0*a->nz);
610: VecRestoreArrayRead(xx,&x);
611: VecRestoreArrayPair(yy,zz,&y,&z);
612: return(0);
613: }
614: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
616: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
617: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)618: {
619: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
620: const PetscScalar *x;
621: PetscScalar *y,*z;
622: const MatScalar *aa;
623: PetscErrorCode ierr;
624: PetscInt m=A->rmap->n;
625: PetscInt n=A->cmap->n;
626: const PetscInt *aj,*ai;
627: PetscInt i;
629: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
631: PetscScalar alpha = 1.0;
632: PetscScalar beta;
633: char matdescra[6];
636: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
637: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
639: VecGetArrayRead(xx,&x);
640: VecGetArrayPair(yy,zz,&y,&z);
641: aj = a->j; /* aj[k] gives column index for element aa[k]. */
642: aa = a->a; /* Nonzero elements stored row-by-row. */
643: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
645: /* Call MKL sparse BLAS routine to do the MatMult. */
646: if (zz == yy) {
647: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
648: beta = 1.0;
649: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
650: } else {
651: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
652: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
653: beta = 0.0;
654: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
655: for (i=0; i<n; i++) {
656: z[i] += y[i];
657: }
658: }
660: PetscLogFlops(2.0*a->nz);
661: VecRestoreArrayRead(xx,&x);
662: VecRestoreArrayPair(yy,zz,&y,&z);
663: return(0);
664: }
665: #endif
667: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
668: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)669: {
670: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
671: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
672: const PetscScalar *x;
673: PetscScalar *y,*z;
674: PetscErrorCode ierr;
675: PetscInt n=A->cmap->n;
676: PetscInt i;
677: PetscObjectState state;
679: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
680: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
684: /* If there are no nonzero entries, set zz = yy and return immediately. */
685: if(!a->nz) {
686: PetscInt i;
687: VecGetArrayPair(yy,zz,&y,&z);
688: for (i=0; i<n; i++) {
689: z[i] = y[i];
690: }
691: VecRestoreArrayPair(yy,zz,&y,&z);
692: return(0);
693: }
695: VecGetArrayRead(xx,&x);
696: VecGetArrayPair(yy,zz,&y,&z);
698: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
699: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
700: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
701: PetscObjectStateGet((PetscObject)A,&state);
702: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
703: MatSeqAIJMKL_create_mkl_handle(A);
704: }
706: /* Call MKL sparse BLAS routine to do the MatMult. */
707: if (zz == yy) {
708: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
709: * with alpha and beta both set to 1.0. */
710: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
711: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
712: } else {
713: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
714: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
715: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
716: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
717: for (i=0; i<n; i++) {
718: z[i] += y[i];
719: }
720: }
722: PetscLogFlops(2.0*a->nz);
723: VecRestoreArrayRead(xx,&x);
724: VecRestoreArrayPair(yy,zz,&y,&z);
725: return(0);
726: }
727: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
729: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
730: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
731: * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
732: * For releases of MKL prior to version 18, update 2:
733: * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
734: * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
735: * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
736: * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
737: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)738: {
739: Mat_SeqAIJMKL *a, *b;
740: sparse_matrix_t csrA, csrB, csrC;
741: PetscErrorCode ierr;
742: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
743: PetscObjectState state;
746: a = (Mat_SeqAIJMKL*)A->spptr;
747: b = (Mat_SeqAIJMKL*)B->spptr;
748: PetscObjectStateGet((PetscObject)A,&state);
749: if (!a->sparse_optimized || a->state != state) {
750: MatSeqAIJMKL_create_mkl_handle(A);
751: }
752: PetscObjectStateGet((PetscObject)B,&state);
753: if (!b->sparse_optimized || b->state != state) {
754: MatSeqAIJMKL_create_mkl_handle(B);
755: }
756: csrA = a->csrA;
757: csrB = b->csrA;
759: stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
760: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
762: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
764: return(0);
765: }
766: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
768: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
769: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)770: {
771: Mat_SeqAIJMKL *a, *b, *c;
772: sparse_matrix_t csrA, csrB, csrC;
773: PetscErrorCode ierr;
774: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
775: struct matrix_descr descr_type_gen;
776: PetscObjectState state;
779: a = (Mat_SeqAIJMKL*)A->spptr;
780: b = (Mat_SeqAIJMKL*)B->spptr;
781: c = (Mat_SeqAIJMKL*)C->spptr;
782: PetscObjectStateGet((PetscObject)A,&state);
783: if (!a->sparse_optimized || a->state != state) {
784: MatSeqAIJMKL_create_mkl_handle(A);
785: }
786: PetscObjectStateGet((PetscObject)B,&state);
787: if (!b->sparse_optimized || b->state != state) {
788: MatSeqAIJMKL_create_mkl_handle(B);
789: }
790: csrA = a->csrA;
791: csrB = b->csrA;
792: csrC = c->csrA;
793: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
795: stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
796: SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
797: SPARSE_STAGE_FINALIZE_MULT,&csrC);
799: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
801: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
802: MatSeqAIJMKL_update_from_mkl_handle(C);
804: return(0);
805: }
806: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
808: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
809: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)810: {
811: Mat_SeqAIJMKL *a, *b;
812: sparse_matrix_t csrA, csrB, csrC;
813: PetscErrorCode ierr;
814: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
815: PetscObjectState state;
818: a = (Mat_SeqAIJMKL*)A->spptr;
819: b = (Mat_SeqAIJMKL*)B->spptr;
820: PetscObjectStateGet((PetscObject)A,&state);
821: if (!a->sparse_optimized || a->state != state) {
822: MatSeqAIJMKL_create_mkl_handle(A);
823: }
824: PetscObjectStateGet((PetscObject)B,&state);
825: if (!b->sparse_optimized || b->state != state) {
826: MatSeqAIJMKL_create_mkl_handle(B);
827: }
828: csrA = a->csrA;
829: csrB = b->csrA;
831: stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
832: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
834: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
836: return(0);
837: }
838: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
840: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
841: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)842: {
843: Mat_SeqAIJMKL *a, *p, *c;
844: sparse_matrix_t csrA, csrP, csrC;
845: PetscBool set, flag;
846: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
847: struct matrix_descr descr_type_sym;
848: PetscObjectState state;
849: PetscErrorCode ierr;
852: MatIsSymmetricKnown(A,&set,&flag);
853: if (!set || (set && !flag)) {
854: MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
855: return(0);
856: }
858: a = (Mat_SeqAIJMKL*)A->spptr;
859: p = (Mat_SeqAIJMKL*)P->spptr;
860: c = (Mat_SeqAIJMKL*)C->spptr;
861: PetscObjectStateGet((PetscObject)A,&state);
862: if (!a->sparse_optimized || a->state != state) {
863: MatSeqAIJMKL_create_mkl_handle(A);
864: }
865: PetscObjectStateGet((PetscObject)P,&state);
866: if (!p->sparse_optimized || p->state != state) {
867: MatSeqAIJMKL_create_mkl_handle(P);
868: }
869: csrA = a->csrA;
870: csrP = p->csrA;
871: csrC = c->csrA;
872: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
874: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
876: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
878: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
880: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
881: MatSeqAIJMKL_update_from_mkl_handle(C);
883: return(0);
884: }
885: #endif
887: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
888: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)889: {
890: Mat_SeqAIJMKL *a, *p;
891: sparse_matrix_t csrA, csrP, csrC;
892: PetscBool set, flag;
893: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
894: struct matrix_descr descr_type_sym;
895: PetscObjectState state;
896: PetscErrorCode ierr;
899: MatIsSymmetricKnown(A,&set,&flag);
900: if (!set || (set && !flag)) {
901: MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
902: return(0);
903: }
905: if (scall == MAT_REUSE_MATRIX) {
906: MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
907: return(0);
908: }
910: a = (Mat_SeqAIJMKL*)A->spptr;
911: p = (Mat_SeqAIJMKL*)P->spptr;
912: PetscObjectStateGet((PetscObject)A,&state);
913: if (!a->sparse_optimized || a->state != state) {
914: MatSeqAIJMKL_create_mkl_handle(A);
915: }
916: PetscObjectStateGet((PetscObject)P,&state);
917: if (!p->sparse_optimized || p->state != state) {
918: MatSeqAIJMKL_create_mkl_handle(P);
919: }
920: csrA = a->csrA;
921: csrP = p->csrA;
922: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
923: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
924: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
926: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
927: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
928: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
930: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
931: MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);
933: return(0);
934: }
935: #endif
937: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
938: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ()
939: * routine, but can also be used to convert an assembled SeqAIJ matrix
940: * into a SeqAIJMKL one. */
941: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)942: {
944: Mat B = *newmat;
945: Mat_SeqAIJMKL *aijmkl;
946: PetscBool set;
947: PetscBool sametype;
950: if (reuse == MAT_INITIAL_MATRIX) {
951: MatDuplicate(A,MAT_COPY_VALUES,&B);
952: }
954: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
955: if (sametype) return(0);
957: PetscNewLog(B,&aijmkl);
958: B->spptr = (void*) aijmkl;
960: /* Set function pointers for methods that we inherit from AIJ but override.
961: * We also parse some command line options below, since those determine some of the methods we point to. */
962: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
963: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
964: B->ops->destroy = MatDestroy_SeqAIJMKL;
966: aijmkl->sparse_optimized = PETSC_FALSE;
967: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
968: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
969: #else
970: aijmkl->no_SpMV2 = PETSC_TRUE;
971: #endif
972: aijmkl->eager_inspection = PETSC_FALSE;
974: /* Parse command line options. */
975: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
976: PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
977: PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
978: PetscOptionsEnd();
979: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
980: if(!aijmkl->no_SpMV2) {
981: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
982: aijmkl->no_SpMV2 = PETSC_TRUE;
983: }
984: #endif
986: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
987: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
988: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
989: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
990: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
991: B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
992: # ifdef PETSC_HAVE_MKL_SPARSE_SP2M
993: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
994: # ifndef PETSC_USE_COMPLEX
995: B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
996: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
997: # endif
998: # endif
999: B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1000: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1002: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
1003: /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were
1004: * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new
1005: * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the
1006: * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old
1007: * interface. */
1008: if (aijmkl->no_SpMV2) {
1009: B->ops->mult = MatMult_SeqAIJMKL;
1010: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1011: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1012: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1013: }
1014: #endif
1016: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1017: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1018: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1019: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1020: if(!aijmkl->no_SpMV2) {
1021: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1022: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1023: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1024: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1025: #endif
1026: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1027: #endif
1028: }
1030: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1031: *newmat = B;
1032: return(0);
1033: }
1035: /*@C
1036: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1037: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1038: routines from Intel MKL whenever possible.
1039: If the installed version of MKL supports the "SpMV2" sparse
1040: inspector-executor routines, then those are used by default.
1041: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1042: symmetric A) operations are currently supported.
1043: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1045: Collective on MPI_Comm1047: Input Parameters:
1048: + comm - MPI communicator, set to PETSC_COMM_SELF1049: . m - number of rows
1050: . n - number of columns
1051: . nz - number of nonzeros per row (same for all rows)
1052: - nnz - array containing the number of nonzeros in the various rows
1053: (possibly different for each row) or NULL
1055: Output Parameter:
1056: . A - the matrix
1058: Options Database Keys:
1059: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1060: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1062: Notes:
1063: If nnz is given then nz is ignored
1065: Level: intermediate
1067: .keywords: matrix, MKL, sparse, parallel
1069: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1070: @*/
1071: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)1072: {
1076: MatCreate(comm,A);
1077: MatSetSizes(*A,m,n,m,n);
1078: MatSetType(*A,MATSEQAIJMKL);
1079: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1080: return(0);
1081: }
1083: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)1084: {
1088: MatSetType(A,MATSEQAIJ);
1089: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1090: return(0);
1091: }