Actual source code: matutil.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/slepcimpl.h> /*I "slepcsys.h" I*/
13: static PetscErrorCode MatCreateTile_SeqAIJ(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
14: {
15: PetscErrorCode ierr;
16: PetscInt i,j,M1,M2,N1,N2,*nnz,ncols,*scols,bs;
17: PetscScalar *svals,*buf;
18: const PetscInt *cols;
19: const PetscScalar *vals;
22: MatGetSize(A,&M1,&N1);
23: MatGetSize(D,&M2,&N2);
24: MatGetBlockSize(A,&bs);
26: PetscCalloc1((M1+M2)/bs,&nnz);
27: /* Preallocate for A */
28: if (a!=0.0) {
29: for (i=0;i<(M1+bs-1)/bs;i++) {
30: MatGetRow(A,i*bs,&ncols,NULL,NULL);
31: nnz[i] += ncols/bs;
32: MatRestoreRow(A,i*bs,&ncols,NULL,NULL);
33: }
34: }
35: /* Preallocate for B */
36: if (b!=0.0) {
37: for (i=0;i<(M1+bs-1)/bs;i++) {
38: MatGetRow(B,i*bs,&ncols,NULL,NULL);
39: nnz[i] += ncols/bs;
40: MatRestoreRow(B,i*bs,&ncols,NULL,NULL);
41: }
42: }
43: /* Preallocate for C */
44: if (c!=0.0) {
45: for (i=0;i<(M2+bs-1)/bs;i++) {
46: MatGetRow(C,i*bs,&ncols,NULL,NULL);
47: nnz[i+M1/bs] += ncols/bs;
48: MatRestoreRow(C,i*bs,&ncols,NULL,NULL);
49: }
50: }
51: /* Preallocate for D */
52: if (d!=0.0) {
53: for (i=0;i<(M2+bs-1)/bs;i++) {
54: MatGetRow(D,i*bs,&ncols,NULL,NULL);
55: nnz[i+M1/bs] += ncols/bs;
56: MatRestoreRow(D,i*bs,&ncols,NULL,NULL);
57: }
58: }
59: MatXAIJSetPreallocation(G,bs,nnz,NULL,NULL,NULL);
60: PetscFree(nnz);
62: PetscMalloc2(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols);
63: /* Transfer A */
64: if (a!=0.0) {
65: for (i=0;i<M1;i++) {
66: MatGetRow(A,i,&ncols,&cols,&vals);
67: if (a!=1.0) {
68: svals=buf;
69: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
70: } else svals=(PetscScalar*)vals;
71: MatSetValues(G,1,&i,ncols,cols,svals,INSERT_VALUES);
72: MatRestoreRow(A,i,&ncols,&cols,&vals);
73: }
74: }
75: /* Transfer B */
76: if (b!=0.0) {
77: for (i=0;i<M1;i++) {
78: MatGetRow(B,i,&ncols,&cols,&vals);
79: if (b!=1.0) {
80: svals=buf;
81: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
82: } else svals=(PetscScalar*)vals;
83: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
84: MatSetValues(G,1,&i,ncols,scols,svals,INSERT_VALUES);
85: MatRestoreRow(B,i,&ncols,&cols,&vals);
86: }
87: }
88: /* Transfer C */
89: if (c!=0.0) {
90: for (i=0;i<M2;i++) {
91: MatGetRow(C,i,&ncols,&cols,&vals);
92: if (c!=1.0) {
93: svals=buf;
94: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
95: } else svals=(PetscScalar*)vals;
96: j = i+M1;
97: MatSetValues(G,1,&j,ncols,cols,svals,INSERT_VALUES);
98: MatRestoreRow(C,i,&ncols,&cols,&vals);
99: }
100: }
101: /* Transfer D */
102: if (d!=0.0) {
103: for (i=0;i<M2;i++) {
104: MatGetRow(D,i,&ncols,&cols,&vals);
105: if (d!=1.0) {
106: svals=buf;
107: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
108: } else svals=(PetscScalar*)vals;
109: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
110: j = i+M1;
111: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
112: MatRestoreRow(D,i,&ncols,&cols,&vals);
113: }
114: }
115: PetscFree2(buf,scols);
116: return(0);
117: }
119: static PetscErrorCode MatCreateTile_MPIAIJ(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
120: {
122: PetscMPIInt np;
123: PetscInt p,i,j,N1,N2,m1,m2,n1,n2,*map1,*map2;
124: PetscInt *dnz,*onz,ncols,*scols,start,gstart;
125: PetscScalar *svals,*buf;
126: const PetscInt *cols,*mapptr1,*mapptr2;
127: const PetscScalar *vals;
130: MatGetSize(A,NULL,&N1);
131: MatGetLocalSize(A,&m1,&n1);
132: MatGetSize(D,NULL,&N2);
133: MatGetLocalSize(D,&m2,&n2);
135: /* Create mappings */
136: MPI_Comm_size(PetscObjectComm((PetscObject)G),&np);
137: MatGetOwnershipRangesColumn(A,&mapptr1);
138: MatGetOwnershipRangesColumn(B,&mapptr2);
139: PetscMalloc4(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols,N1,&map1,N2,&map2);
140: for (p=0;p<np;p++) {
141: for (i=mapptr1[p];i<mapptr1[p+1];i++) map1[i] = i+mapptr2[p];
142: }
143: for (p=0;p<np;p++) {
144: for (i=mapptr2[p];i<mapptr2[p+1];i++) map2[i] = i+mapptr1[p+1];
145: }
147: MatPreallocateInitialize(PetscObjectComm((PetscObject)G),m1+m2,n1+n2,dnz,onz);
148: MatGetOwnershipRange(G,&gstart,NULL);
149: /* Preallocate for A */
150: if (a!=0.0) {
151: MatGetOwnershipRange(A,&start,NULL);
152: for (i=0;i<m1;i++) {
153: MatGetRow(A,i+start,&ncols,&cols,NULL);
154: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
155: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
156: MatRestoreRow(A,i+start,&ncols,&cols,NULL);
157: }
158: }
159: /* Preallocate for B */
160: if (b!=0.0) {
161: MatGetOwnershipRange(B,&start,NULL);
162: for (i=0;i<m1;i++) {
163: MatGetRow(B,i+start,&ncols,&cols,NULL);
164: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
165: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
166: MatRestoreRow(B,i+start,&ncols,&cols,NULL);
167: }
168: }
169: /* Preallocate for C */
170: if (c!=0.0) {
171: MatGetOwnershipRange(C,&start,NULL);
172: for (i=0;i<m2;i++) {
173: MatGetRow(C,i+start,&ncols,&cols,NULL);
174: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
175: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
176: MatRestoreRow(C,i+start,&ncols,&cols,NULL);
177: }
178: }
179: /* Preallocate for D */
180: if (d!=0.0) {
181: MatGetOwnershipRange(D,&start,NULL);
182: for (i=0;i<m2;i++) {
183: MatGetRow(D,i+start,&ncols,&cols,NULL);
184: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
185: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
186: MatRestoreRow(D,i+start,&ncols,&cols,NULL);
187: }
188: }
189: MatMPIAIJSetPreallocation(G,0,dnz,0,onz);
190: MatPreallocateFinalize(dnz,onz);
192: /* Transfer A */
193: if (a!=0.0) {
194: MatGetOwnershipRange(A,&start,NULL);
195: for (i=0;i<m1;i++) {
196: MatGetRow(A,i+start,&ncols,&cols,&vals);
197: if (a!=1.0) {
198: svals=buf;
199: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
200: } else svals=(PetscScalar*)vals;
201: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
202: j = gstart+i;
203: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
204: MatRestoreRow(A,i+start,&ncols,&cols,&vals);
205: }
206: }
207: /* Transfer B */
208: if (b!=0.0) {
209: MatGetOwnershipRange(B,&start,NULL);
210: for (i=0;i<m1;i++) {
211: MatGetRow(B,i+start,&ncols,&cols,&vals);
212: if (b!=1.0) {
213: svals=buf;
214: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
215: } else svals=(PetscScalar*)vals;
216: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
217: j = gstart+i;
218: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
219: MatRestoreRow(B,i+start,&ncols,&cols,&vals);
220: }
221: }
222: /* Transfer C */
223: if (c!=0.0) {
224: MatGetOwnershipRange(C,&start,NULL);
225: for (i=0;i<m2;i++) {
226: MatGetRow(C,i+start,&ncols,&cols,&vals);
227: if (c!=1.0) {
228: svals=buf;
229: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
230: } else svals=(PetscScalar*)vals;
231: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
232: j = gstart+m1+i;
233: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
234: MatRestoreRow(C,i+start,&ncols,&cols,&vals);
235: }
236: }
237: /* Transfer D */
238: if (d!=0.0) {
239: MatGetOwnershipRange(D,&start,NULL);
240: for (i=0;i<m2;i++) {
241: MatGetRow(D,i+start,&ncols,&cols,&vals);
242: if (d!=1.0) {
243: svals=buf;
244: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
245: } else svals=(PetscScalar*)vals;
246: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
247: j = gstart+m1+i;
248: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
249: MatRestoreRow(D,i+start,&ncols,&cols,&vals);
250: }
251: }
252: PetscFree4(buf,scols,map1,map2);
253: return(0);
254: }
256: /*@
257: MatCreateTile - Explicitly build a matrix from four blocks, G = [ a*A b*B; c*C d*D ].
259: Collective on Mat
261: Input parameters:
262: + A - matrix for top-left block
263: . a - scaling factor for block A
264: . B - matrix for top-right block
265: . b - scaling factor for block B
266: . C - matrix for bottom-left block
267: . c - scaling factor for block C
268: . D - matrix for bottom-right block
269: - d - scaling factor for block D
271: Output parameter:
272: . G - the resulting matrix
274: Notes:
275: In the case of a parallel matrix, a permuted version of G is returned. The permutation
276: is a perfect shuffle such that the local parts of A, B, C, D remain in the local part of
277: G for the same process.
279: Matrix G must be destroyed by the user.
281: Level: developer
282: @*/
283: PetscErrorCode MatCreateTile(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat *G)
284: {
286: PetscInt M1,M2,N1,N2,M,N,m1,m2,n1,n2,m,n,bs;
287: PetscBool flg1;
288: MatType Atype;
300: /* check row 1 */
301: MatGetSize(A,&M1,NULL);
302: MatGetLocalSize(A,&m1,NULL);
303: MatGetSize(B,&M,NULL);
304: MatGetLocalSize(B,&m,NULL);
305: if (M!=M1 || m!=m1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Incompatible dimensions");
306: /* check row 2 */
307: MatGetSize(C,&M2,NULL);
308: MatGetLocalSize(C,&m2,NULL);
309: MatGetSize(D,&M,NULL);
310: MatGetLocalSize(D,&m,NULL);
311: if (M!=M2 || m!=m2) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Incompatible dimensions");
312: /* check column 1 */
313: MatGetSize(A,NULL,&N1);
314: MatGetLocalSize(A,NULL,&n1);
315: MatGetSize(C,NULL,&N);
316: MatGetLocalSize(C,NULL,&n);
317: if (N!=N1 || n!=n1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Incompatible dimensions");
318: /* check column 2 */
319: MatGetSize(B,NULL,&N2);
320: MatGetLocalSize(B,NULL,&n2);
321: MatGetSize(D,NULL,&N);
322: MatGetLocalSize(D,NULL,&n);
323: if (N!=N2 || n!=n2) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Incompatible dimensions");
325: MatGetType(A,&Atype);
326: MatGetBlockSize(A,&bs);
327: MatCreate(PetscObjectComm((PetscObject)A),G);
328: MatSetSizes(*G,m1+m2,n1+n2,M1+M2,N1+N2);
329: MatSetType(*G,Atype);
330: MatSetBlockSize(*G,bs);
331: MatSetUp(*G);
333: PetscObjectTypeCompareAny((PetscObject)A,&flg1,MATMPIAIJ,MATMPIAIJCUSPARSE,"");
334: if (flg1) {
335: MatCreateTile_MPIAIJ(a,A,b,B,c,C,d,D,*G);
336: } else {
337: PetscObjectTypeCompareAny((PetscObject)A,&flg1,MATSEQAIJ,MATSEQAIJCUSPARSE,MATSEQBAIJ,"");
338: if (flg1) {
339: MatCreateTile_SeqAIJ(a,A,b,B,c,C,d,D,*G);
340: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not implemented for this matrix type");
341: }
342: MatAssemblyBegin(*G,MAT_FINAL_ASSEMBLY);
343: MatAssemblyEnd(*G,MAT_FINAL_ASSEMBLY);
344: return(0);
345: }
347: /*@C
348: MatCreateVecsEmpty - Get vector(s) compatible with the matrix, i.e. with the same
349: parallel layout, but without internal array.
351: Collective on Mat
353: Input Parameter:
354: . mat - the matrix
356: Output Parameters:
357: + right - (optional) vector that the matrix can be multiplied against
358: - left - (optional) vector that the matrix vector product can be stored in
360: Note:
361: This is similar to MatCreateVecs(), but the new vectors do not have an internal
362: array, so the intended usage is with VecPlaceArray().
364: Level: developer
365: @*/
366: PetscErrorCode MatCreateVecsEmpty(Mat mat,Vec *right,Vec *left)
367: {
369: PetscBool notsup,cuda=PETSC_FALSE;
370: PetscInt M,N,mloc,nloc,rbs,cbs;
371: PetscMPIInt size;
377: PetscObjectTypeCompareAny((PetscObject)mat,¬sup,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
378: if (notsup) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Matrix type %s not supported",((PetscObject)mat)->type_name);
379: #if defined(PETSC_HAVE_CUDA)
380: PetscObjectTypeCompareAny((PetscObject)mat,&cuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
381: if (!cuda) {
382: PetscBool flg;
383: Vec v;
384: PetscObjectTypeCompare((PetscObject)mat,MATSHELL,&flg);
385: if (flg) {
386: MatCreateVecs(mat,&v,NULL);
387: PetscObjectTypeCompareAny((PetscObject)v,&cuda,VECSEQCUDA,VECMPICUDA,"");
388: VecDestroy(&v);
389: }
390: }
391: #endif
392: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
393: MatGetLocalSize(mat,&mloc,&nloc);
394: MatGetSize(mat,&M,&N);
395: MatGetBlockSizes(mat,&rbs,&cbs);
397: if (right) {
398: if (cuda) {
399: #if defined(PETSC_HAVE_CUDA)
400: if (size>1) {
401: VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
402: } else {
403: VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
404: }
405: #endif
406: } else {
407: if (size>1) {
408: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
409: } else {
410: VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
411: }
412: }
413: }
414: if (left) {
415: if (cuda) {
416: #if defined(PETSC_HAVE_CUDA)
417: if (size>1) {
418: VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
419: } else {
420: VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
421: }
422: #endif
423: } else {
424: if (size>1) {
425: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
426: } else {
427: VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
428: }
429: }
430: }
431: return(0);
432: }