Actual source code: matstash.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: #define DEFAULT_STASH_SIZE   10000

  6: static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
  7: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
  8: static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
  9: static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
 10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
 11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
 12: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);

 14: /*
 15:   MatStashCreate_Private - Creates a stash,currently used for all the parallel
 16:   matrix implementations. The stash is where elements of a matrix destined
 17:   to be stored on other processors are kept until matrix assembly is done.

 19:   This is a simple minded stash. Simply adds entries to end of stash.

 21:   Input Parameters:
 22:   comm - communicator, required for scatters.
 23:   bs   - stash block size. used when stashing blocks of values

 25:   Output Parameters:
 26:   stash    - the newly created stash
 27: */
 28: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
 29: {
 31:   PetscInt       max,*opt,nopt,i;
 32:   PetscBool      flg;

 35:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 36:   stash->comm = comm;

 38:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 39:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 40:   MPI_Comm_size(stash->comm,&stash->size);
 41:   MPI_Comm_rank(stash->comm,&stash->rank);
 42:   PetscMalloc1(2*stash->size,&stash->flg_v);
 43:   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;


 46:   nopt = stash->size;
 47:   PetscMalloc1(nopt,&opt);
 48:   PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);
 49:   if (flg) {
 50:     if (nopt == 1)                max = opt[0];
 51:     else if (nopt == stash->size) max = opt[stash->rank];
 52:     else if (stash->rank < nopt)  max = opt[stash->rank];
 53:     else                          max = 0; /* Use default */
 54:     stash->umax = max;
 55:   } else {
 56:     stash->umax = 0;
 57:   }
 58:   PetscFree(opt);
 59:   if (bs <= 0) bs = 1;

 61:   stash->bs         = bs;
 62:   stash->nmax       = 0;
 63:   stash->oldnmax    = 0;
 64:   stash->n          = 0;
 65:   stash->reallocs   = -1;
 66:   stash->space_head = 0;
 67:   stash->space      = 0;

 69:   stash->send_waits  = 0;
 70:   stash->recv_waits  = 0;
 71:   stash->send_status = 0;
 72:   stash->nsends      = 0;
 73:   stash->nrecvs      = 0;
 74:   stash->svalues     = 0;
 75:   stash->rvalues     = 0;
 76:   stash->rindices    = 0;
 77:   stash->nprocessed  = 0;
 78:   stash->reproduce   = PETSC_FALSE;
 79:   stash->blocktype   = MPI_DATATYPE_NULL;

 81:   PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);
 82:   PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);
 83:   if (flg) {
 84:     stash->ScatterBegin   = MatStashScatterBegin_BTS;
 85:     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
 86:     stash->ScatterEnd     = MatStashScatterEnd_BTS;
 87:     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
 88:   } else {
 89:     stash->ScatterBegin   = MatStashScatterBegin_Ref;
 90:     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
 91:     stash->ScatterEnd     = MatStashScatterEnd_Ref;
 92:     stash->ScatterDestroy = NULL;
 93:   }
 94:   return(0);
 95: }

 97: /*
 98:    MatStashDestroy_Private - Destroy the stash
 99: */
100: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
101: {

105:   PetscMatStashSpaceDestroy(&stash->space_head);
106:   if (stash->ScatterDestroy) {(*stash->ScatterDestroy)(stash);}

108:   stash->space = 0;

110:   PetscFree(stash->flg_v);
111:   return(0);
112: }

114: /*
115:    MatStashScatterEnd_Private - This is called as the final stage of
116:    scatter. The final stages of message passing is done here, and
117:    all the memory used for message passing is cleaned up. This
118:    routine also resets the stash, and deallocates the memory used
119:    for the stash. It also keeps track of the current memory usage
120:    so that the same value can be used the next time through.
121: */
122: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
123: {

127:   (*stash->ScatterEnd)(stash);
128:   return(0);
129: }

131: static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
132: {
134:   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
135:   MPI_Status     *send_status;

138:   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
139:   /* wait on sends */
140:   if (nsends) {
141:     PetscMalloc1(2*nsends,&send_status);
142:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
143:     PetscFree(send_status);
144:   }

146:   /* Now update nmaxold to be app 10% more than max n used, this way the
147:      wastage of space is reduced the next time this stash is used.
148:      Also update the oldmax, only if it increases */
149:   if (stash->n) {
150:     bs2     = stash->bs*stash->bs;
151:     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
152:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
153:   }

155:   stash->nmax       = 0;
156:   stash->n          = 0;
157:   stash->reallocs   = -1;
158:   stash->nprocessed = 0;

160:   PetscMatStashSpaceDestroy(&stash->space_head);

162:   stash->space = 0;

164:   PetscFree(stash->send_waits);
165:   PetscFree(stash->recv_waits);
166:   PetscFree2(stash->svalues,stash->sindices);
167:   PetscFree(stash->rvalues[0]);
168:   PetscFree(stash->rvalues);
169:   PetscFree(stash->rindices[0]);
170:   PetscFree(stash->rindices);
171:   return(0);
172: }

174: /*
175:    MatStashGetInfo_Private - Gets the relavant statistics of the stash

177:    Input Parameters:
178:    stash    - the stash
179:    nstash   - the size of the stash. Indicates the number of values stored.
180:    reallocs - the number of additional mallocs incurred.

182: */
183: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
184: {
185:   PetscInt bs2 = stash->bs*stash->bs;

188:   if (nstash) *nstash = stash->n*bs2;
189:   if (reallocs) {
190:     if (stash->reallocs < 0) *reallocs = 0;
191:     else                     *reallocs = stash->reallocs;
192:   }
193:   return(0);
194: }

196: /*
197:    MatStashSetInitialSize_Private - Sets the initial size of the stash

199:    Input Parameters:
200:    stash  - the stash
201:    max    - the value that is used as the max size of the stash.
202:             this value is used while allocating memory.
203: */
204: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
205: {
207:   stash->umax = max;
208:   return(0);
209: }

211: /* MatStashExpand_Private - Expand the stash. This function is called
212:    when the space in the stash is not sufficient to add the new values
213:    being inserted into the stash.

215:    Input Parameters:
216:    stash - the stash
217:    incr  - the minimum increase requested

219:    Notes:
220:    This routine doubles the currently used memory.
221:  */
222: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
223: {
225:   PetscInt       newnmax,bs2= stash->bs*stash->bs;

228:   /* allocate a larger stash */
229:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
230:     if (stash->umax)                  newnmax = stash->umax/bs2;
231:     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
232:   } else if (!stash->nmax) { /* resuing stash */
233:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
234:     else                              newnmax = stash->oldnmax/bs2;
235:   } else                              newnmax = stash->nmax*2;
236:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

238:   /* Get a MatStashSpace and attach it to stash */
239:   PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
240:   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
241:     stash->space_head = stash->space;
242:   }

244:   stash->reallocs++;
245:   stash->nmax = newnmax;
246:   return(0);
247: }
248: /*
249:   MatStashValuesRow_Private - inserts values into the stash. This function
250:   expects the values to be roworiented. Multiple columns belong to the same row
251:   can be inserted with a single call to this function.

253:   Input Parameters:
254:   stash  - the stash
255:   row    - the global row correspoiding to the values
256:   n      - the number of elements inserted. All elements belong to the above row.
257:   idxn   - the global column indices corresponding to each of the values.
258:   values - the values inserted
259: */
260: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
261: {
262:   PetscErrorCode     ierr;
263:   PetscInt           i,k,cnt = 0;
264:   PetscMatStashSpace space=stash->space;

267:   /* Check and see if we have sufficient memory */
268:   if (!space || space->local_remaining < n) {
269:     MatStashExpand_Private(stash,n);
270:   }
271:   space = stash->space;
272:   k     = space->local_used;
273:   for (i=0; i<n; i++) {
274:     if (ignorezeroentries && (values[i] == 0.0)) continue;
275:     space->idx[k] = row;
276:     space->idy[k] = idxn[i];
277:     space->val[k] = values[i];
278:     k++;
279:     cnt++;
280:   }
281:   stash->n               += cnt;
282:   space->local_used      += cnt;
283:   space->local_remaining -= cnt;
284:   return(0);
285: }

287: /*
288:   MatStashValuesCol_Private - inserts values into the stash. This function
289:   expects the values to be columnoriented. Multiple columns belong to the same row
290:   can be inserted with a single call to this function.

292:   Input Parameters:
293:   stash   - the stash
294:   row     - the global row correspoiding to the values
295:   n       - the number of elements inserted. All elements belong to the above row.
296:   idxn    - the global column indices corresponding to each of the values.
297:   values  - the values inserted
298:   stepval - the consecutive values are sepated by a distance of stepval.
299:             this happens because the input is columnoriented.
300: */
301: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
302: {
303:   PetscErrorCode     ierr;
304:   PetscInt           i,k,cnt = 0;
305:   PetscMatStashSpace space=stash->space;

308:   /* Check and see if we have sufficient memory */
309:   if (!space || space->local_remaining < n) {
310:     MatStashExpand_Private(stash,n);
311:   }
312:   space = stash->space;
313:   k     = space->local_used;
314:   for (i=0; i<n; i++) {
315:     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
316:     space->idx[k] = row;
317:     space->idy[k] = idxn[i];
318:     space->val[k] = values[i*stepval];
319:     k++;
320:     cnt++;
321:   }
322:   stash->n               += cnt;
323:   space->local_used      += cnt;
324:   space->local_remaining -= cnt;
325:   return(0);
326: }

328: /*
329:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
330:   This function expects the values to be roworiented. Multiple columns belong
331:   to the same block-row can be inserted with a single call to this function.
332:   This function extracts the sub-block of values based on the dimensions of
333:   the original input block, and the row,col values corresponding to the blocks.

335:   Input Parameters:
336:   stash  - the stash
337:   row    - the global block-row correspoiding to the values
338:   n      - the number of elements inserted. All elements belong to the above row.
339:   idxn   - the global block-column indices corresponding to each of the blocks of
340:            values. Each block is of size bs*bs.
341:   values - the values inserted
342:   rmax   - the number of block-rows in the original block.
343:   cmax   - the number of block-columsn on the original block.
344:   idx    - the index of the current block-row in the original block.
345: */
346: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
347: {
348:   PetscErrorCode     ierr;
349:   PetscInt           i,j,k,bs2,bs=stash->bs,l;
350:   const PetscScalar  *vals;
351:   PetscScalar        *array;
352:   PetscMatStashSpace space=stash->space;

355:   if (!space || space->local_remaining < n) {
356:     MatStashExpand_Private(stash,n);
357:   }
358:   space = stash->space;
359:   l     = space->local_used;
360:   bs2   = bs*bs;
361:   for (i=0; i<n; i++) {
362:     space->idx[l] = row;
363:     space->idy[l] = idxn[i];
364:     /* Now copy over the block of values. Store the values column oriented.
365:        This enables inserting multiple blocks belonging to a row with a single
366:        funtion call */
367:     array = space->val + bs2*l;
368:     vals  = values + idx*bs2*n + bs*i;
369:     for (j=0; j<bs; j++) {
370:       for (k=0; k<bs; k++) array[k*bs] = vals[k];
371:       array++;
372:       vals += cmax*bs;
373:     }
374:     l++;
375:   }
376:   stash->n               += n;
377:   space->local_used      += n;
378:   space->local_remaining -= n;
379:   return(0);
380: }

382: /*
383:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
384:   This function expects the values to be roworiented. Multiple columns belong
385:   to the same block-row can be inserted with a single call to this function.
386:   This function extracts the sub-block of values based on the dimensions of
387:   the original input block, and the row,col values corresponding to the blocks.

389:   Input Parameters:
390:   stash  - the stash
391:   row    - the global block-row correspoiding to the values
392:   n      - the number of elements inserted. All elements belong to the above row.
393:   idxn   - the global block-column indices corresponding to each of the blocks of
394:            values. Each block is of size bs*bs.
395:   values - the values inserted
396:   rmax   - the number of block-rows in the original block.
397:   cmax   - the number of block-columsn on the original block.
398:   idx    - the index of the current block-row in the original block.
399: */
400: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
401: {
402:   PetscErrorCode     ierr;
403:   PetscInt           i,j,k,bs2,bs=stash->bs,l;
404:   const PetscScalar  *vals;
405:   PetscScalar        *array;
406:   PetscMatStashSpace space=stash->space;

409:   if (!space || space->local_remaining < n) {
410:     MatStashExpand_Private(stash,n);
411:   }
412:   space = stash->space;
413:   l     = space->local_used;
414:   bs2   = bs*bs;
415:   for (i=0; i<n; i++) {
416:     space->idx[l] = row;
417:     space->idy[l] = idxn[i];
418:     /* Now copy over the block of values. Store the values column oriented.
419:      This enables inserting multiple blocks belonging to a row with a single
420:      funtion call */
421:     array = space->val + bs2*l;
422:     vals  = values + idx*bs2*n + bs*i;
423:     for (j=0; j<bs; j++) {
424:       for (k=0; k<bs; k++) array[k] = vals[k];
425:       array += bs;
426:       vals  += rmax*bs;
427:     }
428:     l++;
429:   }
430:   stash->n               += n;
431:   space->local_used      += n;
432:   space->local_remaining -= n;
433:   return(0);
434: }
435: /*
436:   MatStashScatterBegin_Private - Initiates the transfer of values to the
437:   correct owners. This function goes through the stash, and check the
438:   owners of each stashed value, and sends the values off to the owner
439:   processors.

441:   Input Parameters:
442:   stash  - the stash
443:   owners - an array of size 'no-of-procs' which gives the ownership range
444:            for each node.

446:   Notes: The 'owners' array in the cased of the blocked-stash has the
447:   ranges specified blocked global indices, and for the regular stash in
448:   the proper global indices.
449: */
450: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
451: {

455:   (*stash->ScatterBegin)(mat,stash,owners);
456:   return(0);
457: }

459: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
460: {
461:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
462:   PetscInt           size=stash->size,nsends;
463:   PetscErrorCode     ierr;
464:   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
465:   PetscScalar        **rvalues,*svalues;
466:   MPI_Comm           comm = stash->comm;
467:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
468:   PetscMPIInt        *sizes,*nlengths,nreceives;
469:   PetscInt           *sp_idx,*sp_idy;
470:   PetscScalar        *sp_val;
471:   PetscMatStashSpace space,space_next;

474:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
475:     InsertMode addv;
476:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
477:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
478:     mat->insertmode = addv; /* in case this processor had no cache */
479:   }

481:   bs2 = stash->bs*stash->bs;

483:   /*  first count number of contributors to each processor */
484:   PetscCalloc1(size,&sizes);
485:   PetscCalloc1(size,&nlengths);
486:   PetscMalloc1(stash->n+1,&owner);

488:   i       = j    = 0;
489:   lastidx = -1;
490:   space   = stash->space_head;
491:   while (space) {
492:     space_next = space->next;
493:     sp_idx     = space->idx;
494:     for (l=0; l<space->local_used; l++) {
495:       /* if indices are NOT locally sorted, need to start search at the beginning */
496:       if (lastidx > (idx = sp_idx[l])) j = 0;
497:       lastidx = idx;
498:       for (; j<size; j++) {
499:         if (idx >= owners[j] && idx < owners[j+1]) {
500:           nlengths[j]++; owner[i] = j; break;
501:         }
502:       }
503:       i++;
504:     }
505:     space = space_next;
506:   }
507:   /* Now check what procs get messages - and compute nsends. */
508:   for (i=0, nsends=0; i<size; i++) {
509:     if (nlengths[i]) {
510:       sizes[i] = 1; nsends++;
511:     }
512:   }

514:   {PetscMPIInt *onodes,*olengths;
515:    /* Determine the number of messages to expect, their lengths, from from-ids */
516:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
517:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
518:    /* since clubbing row,col - lengths are multiplied by 2 */
519:    for (i=0; i<nreceives; i++) olengths[i] *=2;
520:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
521:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
522:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
523:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
524:    PetscFree(onodes);
525:    PetscFree(olengths);}

527:   /* do sends:
528:       1) starts[i] gives the starting index in svalues for stuff going to
529:          the ith processor
530:   */
531:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
532:   PetscMalloc1(2*nsends,&send_waits);
533:   PetscMalloc2(size,&startv,size,&starti);
534:   /* use 2 sends the first with all_a, the next with all_i and all_j */
535:   startv[0] = 0; starti[0] = 0;
536:   for (i=1; i<size; i++) {
537:     startv[i] = startv[i-1] + nlengths[i-1];
538:     starti[i] = starti[i-1] + 2*nlengths[i-1];
539:   }

541:   i     = 0;
542:   space = stash->space_head;
543:   while (space) {
544:     space_next = space->next;
545:     sp_idx     = space->idx;
546:     sp_idy     = space->idy;
547:     sp_val     = space->val;
548:     for (l=0; l<space->local_used; l++) {
549:       j = owner[i];
550:       if (bs2 == 1) {
551:         svalues[startv[j]] = sp_val[l];
552:       } else {
553:         PetscInt    k;
554:         PetscScalar *buf1,*buf2;
555:         buf1 = svalues+bs2*startv[j];
556:         buf2 = space->val + bs2*l;
557:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
558:       }
559:       sindices[starti[j]]             = sp_idx[l];
560:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
561:       startv[j]++;
562:       starti[j]++;
563:       i++;
564:     }
565:     space = space_next;
566:   }
567:   startv[0] = 0;
568:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

570:   for (i=0,count=0; i<size; i++) {
571:     if (sizes[i]) {
572:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
573:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
574:     }
575:   }
576: #if defined(PETSC_USE_INFO)
577:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
578:   for (i=0; i<size; i++) {
579:     if (sizes[i]) {
580:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
581:     }
582:   }
583: #endif
584:   PetscFree(nlengths);
585:   PetscFree(owner);
586:   PetscFree2(startv,starti);
587:   PetscFree(sizes);

589:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
590:   PetscMalloc1(2*nreceives,&recv_waits);

592:   for (i=0; i<nreceives; i++) {
593:     recv_waits[2*i]   = recv_waits1[i];
594:     recv_waits[2*i+1] = recv_waits2[i];
595:   }
596:   stash->recv_waits = recv_waits;

598:   PetscFree(recv_waits1);
599:   PetscFree(recv_waits2);

601:   stash->svalues         = svalues;
602:   stash->sindices        = sindices;
603:   stash->rvalues         = rvalues;
604:   stash->rindices        = rindices;
605:   stash->send_waits      = send_waits;
606:   stash->nsends          = nsends;
607:   stash->nrecvs          = nreceives;
608:   stash->reproduce_count = 0;
609:   return(0);
610: }

612: /*
613:    MatStashScatterGetMesg_Private - This function waits on the receives posted
614:    in the function MatStashScatterBegin_Private() and returns one message at
615:    a time to the calling function. If no messages are left, it indicates this
616:    by setting flg = 0, else it sets flg = 1.

618:    Input Parameters:
619:    stash - the stash

621:    Output Parameters:
622:    nvals - the number of entries in the current message.
623:    rows  - an array of row indices (or blocked indices) corresponding to the values
624:    cols  - an array of columnindices (or blocked indices) corresponding to the values
625:    vals  - the values
626:    flg   - 0 indicates no more message left, and the current call has no values associated.
627:            1 indicates that the current call successfully received a message, and the
628:              other output parameters nvals,rows,cols,vals are set appropriately.
629: */
630: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
631: {

635:   (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);
636:   return(0);
637: }

639: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
640: {
642:   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
643:   PetscInt       bs2;
644:   MPI_Status     recv_status;
645:   PetscBool      match_found = PETSC_FALSE;

648:   *flg = 0; /* When a message is discovered this is reset to 1 */
649:   /* Return if no more messages to process */
650:   if (stash->nprocessed == stash->nrecvs) return(0);

652:   bs2 = stash->bs*stash->bs;
653:   /* If a matching pair of receives are found, process them, and return the data to
654:      the calling function. Until then keep receiving messages */
655:   while (!match_found) {
656:     if (stash->reproduce) {
657:       i    = stash->reproduce_count++;
658:       MPI_Wait(stash->recv_waits+i,&recv_status);
659:     } else {
660:       MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
661:     }
662:     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");

664:     /* Now pack the received message into a structure which is usable by others */
665:     if (i % 2) {
666:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);

668:       flg_v[2*recv_status.MPI_SOURCE] = i/2;

670:       *nvals = *nvals/bs2;
671:     } else {
672:       MPI_Get_count(&recv_status,MPIU_INT,nvals);

674:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;

676:       *nvals = *nvals/2; /* This message has both row indices and col indices */
677:     }

679:     /* Check if we have both messages from this proc */
680:     i1 = flg_v[2*recv_status.MPI_SOURCE];
681:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
682:     if (i1 != -1 && i2 != -1) {
683:       *rows = stash->rindices[i2];
684:       *cols = *rows + *nvals;
685:       *vals = stash->rvalues[i1];
686:       *flg  = 1;
687:       stash->nprocessed++;
688:       match_found = PETSC_TRUE;
689:     }
690:   }
691:   return(0);
692: }

694: typedef struct {
695:   PetscInt row;
696:   PetscInt col;
697:   PetscScalar vals[1];          /* Actually an array of length bs2 */
698: } MatStashBlock;

700: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
701: {
703:   PetscMatStashSpace space;
704:   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
705:   PetscScalar **valptr;

708:   PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);
709:   for (space=stash->space_head,cnt=0; space; space=space->next) {
710:     for (i=0; i<space->local_used; i++) {
711:       row[cnt] = space->idx[i];
712:       col[cnt] = space->idy[i];
713:       valptr[cnt] = &space->val[i*bs2];
714:       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
715:       cnt++;
716:     }
717:   }
718:   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
719:   PetscSortIntWithArrayPair(n,row,col,perm);
720:   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
721:   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
722:     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
723:       PetscInt colstart;
724:       PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);
725:       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
726:         PetscInt j,l;
727:         MatStashBlock *block;
728:         PetscSegBufferGet(stash->segsendblocks,1,&block);
729:         block->row = row[rowstart];
730:         block->col = col[colstart];
731:         PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));
732:         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
733:           if (insertmode == ADD_VALUES) {
734:             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
735:           } else {
736:             PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));
737:           }
738:         }
739:         colstart = j;
740:       }
741:       rowstart = i;
742:     }
743:   }
744:   PetscFree4(row,col,valptr,perm);
745:   return(0);
746: }

748: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
749: {

753:   if (stash->blocktype == MPI_DATATYPE_NULL) {
754:     PetscInt     bs2 = PetscSqr(stash->bs);
755:     PetscMPIInt  blocklens[2];
756:     MPI_Aint     displs[2];
757:     MPI_Datatype types[2],stype;
758:     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
759:      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
760:      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
761:      */
762:     struct DummyBlock {PetscInt row,col; PetscReal vals;};

764:     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
765:     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
766:       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
767:     }
768:     PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);
769:     PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);
770:     PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);
771:     blocklens[0] = 2;
772:     blocklens[1] = bs2;
773:     displs[0] = offsetof(struct DummyBlock,row);
774:     displs[1] = offsetof(struct DummyBlock,vals);
775:     types[0] = MPIU_INT;
776:     types[1] = MPIU_SCALAR;
777:     MPI_Type_create_struct(2,blocklens,displs,types,&stype);
778:     MPI_Type_commit(&stype);
779:     MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype); /* MPI-2 */
780:     MPI_Type_commit(&stash->blocktype);
781:     MPI_Type_free(&stype);
782:   }
783:   return(0);
784: }

786: /* Callback invoked after target rank has initiatied receive of rendezvous message.
787:  * Here we post the main sends.
788:  */
789: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
790: {
791:   MatStash *stash = (MatStash*)ctx;
792:   MatStashHeader *hdr = (MatStashHeader*)sdata;

796:   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
797:   MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
798:   stash->sendframes[rankid].count = hdr->count;
799:   stash->sendframes[rankid].pending = 1;
800:   return(0);
801: }

803: /* Callback invoked by target after receiving rendezvous message.
804:  * Here we post the main recvs.
805:  */
806: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
807: {
808:   MatStash *stash = (MatStash*)ctx;
809:   MatStashHeader *hdr = (MatStashHeader*)rdata;
810:   MatStashFrame *frame;

814:   PetscSegBufferGet(stash->segrecvframe,1,&frame);
815:   PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);
816:   MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
817:   frame->count = hdr->count;
818:   frame->pending = 1;
819:   return(0);
820: }

822: /*
823:  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
824:  */
825: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
826: {
828:   size_t nblocks;
829:   char *sendblocks;

832: #if defined(PETSC_USE_DEBUG)
833:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
834:     InsertMode addv;
835:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
836:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837:   }
838: #endif

840:   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
841:     MatStashScatterDestroy_BTS(stash);
842:   }

844:   MatStashBlockTypeSetUp(stash);
845:   MatStashSortCompress_Private(stash,mat->insertmode);
846:   PetscSegBufferGetSize(stash->segsendblocks,&nblocks);
847:   PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);
848:   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
849:     PetscInt i;
850:     size_t b;
851:     for (i=0,b=0; i<stash->nsendranks; i++) {
852:       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
853:       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
854:       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
855:       for ( ; b<nblocks; b++) {
856:         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
857:         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
858:         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
859:         stash->sendhdr[i].count++;
860:       }
861:     }
862:   } else {                      /* Dynamically count and pack (first time) */
863:     PetscInt sendno;
864:     size_t i,rowstart;

866:     /* Count number of send ranks and allocate for sends */
867:     stash->nsendranks = 0;
868:     for (rowstart=0; rowstart<nblocks; ) {
869:       PetscInt owner;
870:       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
871:       PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
872:       if (owner < 0) owner = -(owner+2);
873:       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
874:         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
875:         if (sendblock_i->row >= owners[owner+1]) break;
876:       }
877:       stash->nsendranks++;
878:       rowstart = i;
879:     }
880:     PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);

882:     /* Set up sendhdrs and sendframes */
883:     sendno = 0;
884:     for (rowstart=0; rowstart<nblocks; ) {
885:       PetscInt owner;
886:       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
887:       PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
888:       if (owner < 0) owner = -(owner+2);
889:       stash->sendranks[sendno] = owner;
890:       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
891:         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
892:         if (sendblock_i->row >= owners[owner+1]) break;
893:       }
894:       stash->sendframes[sendno].buffer = sendblock_rowstart;
895:       stash->sendframes[sendno].pending = 0;
896:       stash->sendhdr[sendno].count = i - rowstart;
897:       sendno++;
898:       rowstart = i;
899:     }
900:     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
901:   }

903:   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
904:    * message or a dummy entry of some sort. */
905:   if (mat->insertmode == INSERT_VALUES) {
906:     size_t i;
907:     for (i=0; i<nblocks; i++) {
908:       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
909:       sendblock_i->row = -(sendblock_i->row+1);
910:     }
911:   }

913:   if (stash->subset_off_proc && mat->subsetoffprocentries) {
914:     PetscMPIInt i,tag;
915:     PetscCommGetNewTag(stash->comm,&tag);
916:     for (i=0; i<stash->nrecvranks; i++) {
917:       MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);
918:     }
919:     for (i=0; i<stash->nsendranks; i++) {
920:       MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);
921:     }
922:     stash->use_status = PETSC_TRUE; /* Use count from message status. */
923:   } else {
924:     PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
925:                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
926:                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);
927:     PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);
928:     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
929:   }

931:   PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);
932:   stash->recvframe_active = NULL;
933:   stash->recvframe_i      = 0;
934:   stash->some_i           = 0;
935:   stash->some_count       = 0;
936:   stash->recvcount        = 0;
937:   stash->subset_off_proc  = mat->subsetoffprocentries;
938:   stash->insertmode       = &mat->insertmode;
939:   return(0);
940: }

942: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
943: {
945:   MatStashBlock *block;

948:   *flg = 0;
949:   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
950:     if (stash->some_i == stash->some_count) {
951:       if (stash->recvcount == stash->nrecvranks) return(0); /* Done */
952:       MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);
953:       stash->some_i = 0;
954:     }
955:     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
956:     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
957:     if (stash->use_status) { /* Count what was actually sent */
958:       MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);
959:     }
960:     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
961:       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
962:       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
963:       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
964:       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
965:     }
966:     stash->some_i++;
967:     stash->recvcount++;
968:     stash->recvframe_i = 0;
969:   }
970:   *n = 1;
971:   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
972:   if (block->row < 0) block->row = -(block->row + 1);
973:   *row = &block->row;
974:   *col = &block->col;
975:   *val = block->vals;
976:   stash->recvframe_i++;
977:   *flg = 1;
978:   return(0);
979: }

981: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
982: {

986:   MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);
987:   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
988:     void *dummy;
989:     PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);
990:   } else {                      /* No reuse, so collect everything. */
991:     MatStashScatterDestroy_BTS(stash);
992:   }

994:   /* Now update nmaxold to be app 10% more than max n used, this way the
995:      wastage of space is reduced the next time this stash is used.
996:      Also update the oldmax, only if it increases */
997:   if (stash->n) {
998:     PetscInt bs2     = stash->bs*stash->bs;
999:     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1000:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1001:   }

1003:   stash->nmax       = 0;
1004:   stash->n          = 0;
1005:   stash->reallocs   = -1;
1006:   stash->nprocessed = 0;

1008:   PetscMatStashSpaceDestroy(&stash->space_head);

1010:   stash->space = 0;

1012:   return(0);
1013: }

1015: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1016: {

1020:   PetscSegBufferDestroy(&stash->segsendblocks);
1021:   PetscSegBufferDestroy(&stash->segrecvframe);
1022:   stash->recvframes = NULL;
1023:   PetscSegBufferDestroy(&stash->segrecvblocks);
1024:   if (stash->blocktype != MPI_DATATYPE_NULL) {
1025:     MPI_Type_free(&stash->blocktype);
1026:   }
1027:   stash->nsendranks = 0;
1028:   stash->nrecvranks = 0;
1029:   PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);
1030:   PetscFree(stash->sendreqs);
1031:   PetscFree(stash->recvreqs);
1032:   PetscFree(stash->recvranks);
1033:   PetscFree(stash->recvhdr);
1034:   PetscFree2(stash->some_indices,stash->some_statuses);
1035:   return(0);
1036: }