Actual source code: fdaij.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sell/seq/sell.h>
  4: #include <petsc/private/isimpl.h>

  6: /*
  7:     This routine is shared by SeqAIJ and SeqBAIJ matrices,
  8:     since it operators only on the nonzero structure of the elements or blocks.
  9: */
 10: PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
 11: {
 12:   PetscInt       bs,nis=iscoloring->n,m=mat->rmap->n;
 13:   PetscBool      isBAIJ,isSELL;

 15:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
 16:   MatGetBlockSize(mat,&bs);
 17:   PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);
 18:   PetscObjectTypeCompare((PetscObject)mat,MATSEQSELL,&isSELL);
 19:   if (isBAIJ) {
 20:     c->brows = m;
 21:     c->bcols = 1;
 22:   } else { /* seqaij matrix */
 23:     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
 24:     PetscReal  mem;
 25:     PetscInt   nz,brows,bcols;
 26:     if (isSELL) {
 27:       Mat_SeqSELL *spA = (Mat_SeqSELL*)mat->data;
 28:       nz = spA->nz;
 29:     } else {
 30:       Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data;
 31:       nz = spA->nz;
 32:     }

 34:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
 35:     mem   = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt);
 36:     bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar)));
 37:     brows = 1000/bcols;
 38:     if (bcols > nis) bcols = nis;
 39:     if (brows == 0 || brows > m) brows = m;
 40:     c->brows = brows;
 41:     c->bcols = bcols;
 42:   }

 44:   c->M       = mat->rmap->N/bs;   /* set total rows, columns and local rows */
 45:   c->N       = mat->cmap->N/bs;
 46:   c->m       = mat->rmap->N/bs;
 47:   c->rstart  = 0;
 48:   c->ncolors = nis;
 49:   c->ctype   = iscoloring->ctype;
 50:   return 0;
 51: }

 53: /*
 54:  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
 55:    Input Parameters:
 56: +  mat - the matrix containing the nonzero structure of the Jacobian
 57: .  color - the coloring context
 58: -  nz - number of local non-zeros in mat
 59: */
 60: PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat,MatFDColoring c,PetscInt nz)
 61: {
 62:   PetscInt       i,j,nrows,nbcols,brows=c->brows,bcols=c->bcols,mbs=c->m,nis=c->ncolors;
 63:   PetscInt       *color_start,*row_start,*nrows_new,nz_new,row_end;

 65:   if (brows < 1 || brows > mbs) brows = mbs;
 66:   PetscMalloc2(bcols+1,&color_start,bcols,&row_start);
 67:   PetscCalloc1(nis,&nrows_new);
 68:   PetscMalloc1(bcols*mat->rmap->n,&c->dy);
 69:   PetscLogObjectMemory((PetscObject)c,bcols*mat->rmap->n*sizeof(PetscScalar));

 71:   nz_new = 0;
 72:   nbcols = 0;
 73:   color_start[bcols] = 0;

 75:   if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
 76:     MatEntry *Jentry_new,*Jentry=c->matentry;

 78:     PetscMalloc1(nz,&Jentry_new);
 79:     for (i=0; i<nis; i+=bcols) { /* loop over colors */
 80:       if (i + bcols > nis) {
 81:         color_start[nis - i] = color_start[bcols];
 82:         bcols                = nis - i;
 83:       }

 85:       color_start[0] = color_start[bcols];
 86:       for (j=0; j<bcols; j++) {
 87:         color_start[j+1] = c->nrows[i+j] + color_start[j];
 88:         row_start[j]     = 0;
 89:       }

 91:       row_end = brows;
 92:       if (row_end > mbs) row_end = mbs;

 94:       while (row_end <= mbs) {   /* loop over block rows */
 95:         for (j=0; j<bcols; j++) {       /* loop over block columns */
 96:           nrows = c->nrows[i+j];
 97:           nz    = color_start[j];
 98:           while (row_start[j] < nrows) {
 99:             if (Jentry[nz].row >= row_end) {
100:               color_start[j] = nz;
101:               break;
102:             } else { /* copy Jentry[nz] to Jentry_new[nz_new] */
103:               Jentry_new[nz_new].row     = Jentry[nz].row + j*mbs; /* index in dy-array */
104:               Jentry_new[nz_new].col     = Jentry[nz].col;
105:               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
106:               nz_new++; nz++; row_start[j]++;
107:             }
108:           }
109:         }
110:         if (row_end == mbs) break;
111:         row_end += brows;
112:         if (row_end > mbs) row_end = mbs;
113:       }
114:       nrows_new[nbcols++] = nz_new;
115:     }
116:     PetscFree(Jentry);
117:     c->matentry = Jentry_new;
118:   } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
119:     MatEntry2 *Jentry2_new,*Jentry2=c->matentry2;

121:     PetscMalloc1(nz,&Jentry2_new);
122:     for (i=0; i<nis; i+=bcols) { /* loop over colors */
123:       if (i + bcols > nis) {
124:         color_start[nis - i] = color_start[bcols];
125:         bcols                = nis - i;
126:       }

128:       color_start[0] = color_start[bcols];
129:       for (j=0; j<bcols; j++) {
130:         color_start[j+1] = c->nrows[i+j] + color_start[j];
131:         row_start[j]     = 0;
132:       }

134:       row_end = brows;
135:       if (row_end > mbs) row_end = mbs;

137:       while (row_end <= mbs) {   /* loop over block rows */
138:         for (j=0; j<bcols; j++) {       /* loop over block columns */
139:           nrows = c->nrows[i+j];
140:           nz    = color_start[j];
141:           while (row_start[j] < nrows) {
142:             if (Jentry2[nz].row >= row_end) {
143:               color_start[j] = nz;
144:               break;
145:             } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */
146:               Jentry2_new[nz_new].row     = Jentry2[nz].row + j*mbs; /* index in dy-array */
147:               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
148:               nz_new++; nz++; row_start[j]++;
149:             }
150:           }
151:         }
152:         if (row_end == mbs) break;
153:         row_end += brows;
154:         if (row_end > mbs) row_end = mbs;
155:       }
156:       nrows_new[nbcols++] = nz_new;
157:     }
158:     PetscFree(Jentry2);
159:     c->matentry2 = Jentry2_new;
160:   } /* ---------------------------------------------*/

162:   PetscFree2(color_start,row_start);

164:   for (i=nbcols-1; i>0; i--) nrows_new[i] -= nrows_new[i-1];
165:   PetscFree(c->nrows);
166:   c->nrows = nrows_new;
167:   return 0;
168: }

170: PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
171: {
172:   PetscInt          i,n,nrows,mbs=c->m,j,k,m,ncols,col,nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz,tmp;
173:   const PetscInt    *is,*row,*ci,*cj;
174:   PetscBool         isBAIJ,isSELL;
175:   const PetscScalar *A_val;
176:   PetscScalar       **valaddrhit;
177:   MatEntry          *Jentry;
178:   MatEntry2         *Jentry2;

180:   ISColoringGetIS(iscoloring,PETSC_OWN_POINTER,PETSC_IGNORE,&c->isa);

182:   MatGetBlockSize(mat,&bs);
183:   PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);
184:   PetscObjectTypeCompare((PetscObject)mat,MATSEQSELL,&isSELL);
185:   if (isBAIJ) {
186:     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ*)mat->data;

188:     A_val = spA->a;
189:     nz    = spA->nz;
190:   } else if (isSELL) {
191:     Mat_SeqSELL *spA = (Mat_SeqSELL*)mat->data;

193:     A_val = spA->val;
194:     nz    = spA->nz;
195:     bs    = 1; /* only bs=1 is supported for SeqSELL matrix */
196:   } else {
197:     Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data;

199:     A_val = spA->a;
200:     nz    = spA->nz;
201:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
202:   }

204:   PetscMalloc2(nis,&c->ncolumns,nis,&c->columns);
205:   PetscMalloc1(nis,&c->nrows); /* nrows is freeed separately from ncolumns and columns */
206:   PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));

208:   if (c->htype[0] == 'd') {
209:     PetscMalloc1(nz,&Jentry);
210:     PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));
211:     c->matentry = Jentry;
212:   } else if (c->htype[0] == 'w') {
213:     PetscMalloc1(nz,&Jentry2);
214:     PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));
215:     c->matentry2 = Jentry2;
216:   } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported");

218:   if (isBAIJ) {
219:     MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
220:   } else if (isSELL) {
221:     MatGetColumnIJ_SeqSELL_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
222:   } else {
223:     MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
224:   }

226:   PetscCalloc1(c->m,&rowhit);
227:   PetscMalloc1(c->m,&valaddrhit);

229:   nz = 0;
230:   for (i=0; i<nis; i++) { /* loop over colors */
231:     ISGetLocalSize(c->isa[i],&n);
232:     ISGetIndices(c->isa[i],&is);

234:     c->ncolumns[i] = n;
235:     c->columns[i]  = (PetscInt*)is;
236:     /* note: we know that c->isa is going to be around as long at the c->columns values */
237:     ISRestoreIndices(c->isa[i],&is);

239:     /* fast, crude version requires O(N*N) work */
240:     bs2   = bs*bs;
241:     nrows = 0;
242:     for (j=0; j<n; j++) {  /* loop over columns */
243:       col    = is[j];
244:       tmp    = ci[col];
245:       row    = cj + tmp;
246:       m      = ci[col+1] - tmp;
247:       nrows += m;
248:       for (k=0; k<m; k++) {  /* loop over columns marking them in rowhit */
249:         rowhit[*row]       = col + 1;
250:         valaddrhit[*row++] = (PetscScalar*)&A_val[bs2*spidx[tmp + k]];
251:       }
252:     }
253:     c->nrows[i] = nrows; /* total num of rows for this color */

255:     if (c->htype[0] == 'd') {
256:       for (j=0; j<mbs; j++) { /* loop over rows */
257:         if (rowhit[j]) {
258:           Jentry[nz].row     = j;              /* local row index */
259:           Jentry[nz].col     = rowhit[j] - 1;  /* local column index */
260:           Jentry[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
261:           nz++;
262:           rowhit[j] = 0.0;                     /* zero rowhit for reuse */
263:         }
264:       }
265:     }  else { /* c->htype == 'wp' */
266:       for (j=0; j<mbs; j++) { /* loop over rows */
267:         if (rowhit[j]) {
268:           Jentry2[nz].row     = j;              /* local row index */
269:           Jentry2[nz].valaddr = valaddrhit[j];  /* address of mat value for this entry */
270:           nz++;
271:           rowhit[j] = 0.0;                     /* zero rowhit for reuse */
272:         }
273:       }
274:     }
275:   }

277:   if (c->bcols > 1) {  /* reorder Jentry for faster MatFDColoringApply() */
278:     MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);
279:   }

281:   if (isBAIJ) {
282:     MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
283:     PetscMalloc1(bs*mat->rmap->n,&c->dy);
284:     PetscLogObjectMemory((PetscObject)c,bs*mat->rmap->n*sizeof(PetscScalar));
285:   } else if (isSELL) {
286:     MatRestoreColumnIJ_SeqSELL_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
287:   } else {
288:     MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
289:   }
290:   PetscFree(rowhit);
291:   PetscFree(valaddrhit);
292:   ISColoringRestoreIS(iscoloring,PETSC_OWN_POINTER,&c->isa);

294:   VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);
295:   PetscInfo(c,"ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n",c->ncolors,c->brows,c->bcols);
296:   return 0;
297: }