2: #include <petsc/private/matimpl.h> 4: #define DEFAULT_STASH_SIZE 10000 6: static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8: static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9: static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
14: /*
15: MatStashCreate_Private - Creates a stash,currently used for all the parallel
16: matrix implementations. The stash is where elements of a matrix destined
17: to be stored on other processors are kept until matrix assembly is done.
19: This is a simple minded stash. Simply adds entries to end of stash.
21: Input Parameters:
22: comm - communicator, required for scatters.
23: bs - stash block size. used when stashing blocks of values
25: Output Parameters:
26: stash - the newly created stash
27: */
28: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 29: {
31: PetscInt max,*opt,nopt,i;
32: PetscBool flg;
35: /* Require 2 tags,get the second using PetscCommGetNewTag() */
36: stash->comm = comm;
38: PetscCommGetNewTag(stash->comm,&stash->tag1);
39: PetscCommGetNewTag(stash->comm,&stash->tag2);
40: MPI_Comm_size(stash->comm,&stash->size);
41: MPI_Comm_rank(stash->comm,&stash->rank);
42: PetscMalloc1(2*stash->size,&stash->flg_v);
43: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46: nopt = stash->size;
47: PetscMalloc1(nopt,&opt);
48: PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);
49: if (flg) {
50: if (nopt == 1) max = opt[0];
51: else if (nopt == stash->size) max = opt[stash->rank];
52: else if (stash->rank < nopt) max = opt[stash->rank];
53: else max = 0; /* Use default */
54: stash->umax = max;
55: } else {
56: stash->umax = 0;
57: }
58: PetscFree(opt);
59: if (bs <= 0) bs = 1;
61: stash->bs = bs;
62: stash->nmax = 0;
63: stash->oldnmax = 0;
64: stash->n = 0;
65: stash->reallocs = -1;
66: stash->space_head = 0;
67: stash->space = 0;
69: stash->send_waits = 0;
70: stash->recv_waits = 0;
71: stash->send_status = 0;
72: stash->nsends = 0;
73: stash->nrecvs = 0;
74: stash->svalues = 0;
75: stash->rvalues = 0;
76: stash->rindices = 0;
77: stash->nprocessed = 0;
78: stash->reproduce = PETSC_FALSE;
79: stash->blocktype = MPI_DATATYPE_NULL;
81: PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);
82: PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);
83: if (flg) {
84: stash->ScatterBegin = MatStashScatterBegin_BTS;
85: stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86: stash->ScatterEnd = MatStashScatterEnd_BTS;
87: stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88: } else {
89: stash->ScatterBegin = MatStashScatterBegin_Ref;
90: stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
91: stash->ScatterEnd = MatStashScatterEnd_Ref;
92: stash->ScatterDestroy = NULL;
93: }
94: return(0);
95: }
97: /*
98: MatStashDestroy_Private - Destroy the stash
99: */
100: PetscErrorCode MatStashDestroy_Private(MatStash *stash)101: {
105: PetscMatStashSpaceDestroy(&stash->space_head);
106: if (stash->ScatterDestroy) {(*stash->ScatterDestroy)(stash);}
108: stash->space = 0;
110: PetscFree(stash->flg_v);
111: return(0);
112: }
114: /*
115: MatStashScatterEnd_Private - This is called as the final stage of
116: scatter. The final stages of message passing is done here, and
117: all the memory used for message passing is cleaned up. This
118: routine also resets the stash, and deallocates the memory used
119: for the stash. It also keeps track of the current memory usage
120: so that the same value can be used the next time through.
121: */
122: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)123: {
127: (*stash->ScatterEnd)(stash);
128: return(0);
129: }
131: static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)132: {
134: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
135: MPI_Status *send_status;
138: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
139: /* wait on sends */
140: if (nsends) {
141: PetscMalloc1(2*nsends,&send_status);
142: MPI_Waitall(2*nsends,stash->send_waits,send_status);
143: PetscFree(send_status);
144: }
146: /* Now update nmaxold to be app 10% more than max n used, this way the
147: wastage of space is reduced the next time this stash is used.
148: Also update the oldmax, only if it increases */
149: if (stash->n) {
150: bs2 = stash->bs*stash->bs;
151: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
152: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
153: }
155: stash->nmax = 0;
156: stash->n = 0;
157: stash->reallocs = -1;
158: stash->nprocessed = 0;
160: PetscMatStashSpaceDestroy(&stash->space_head);
162: stash->space = 0;
164: PetscFree(stash->send_waits);
165: PetscFree(stash->recv_waits);
166: PetscFree2(stash->svalues,stash->sindices);
167: PetscFree(stash->rvalues[0]);
168: PetscFree(stash->rvalues);
169: PetscFree(stash->rindices[0]);
170: PetscFree(stash->rindices);
171: return(0);
172: }
174: /*
175: MatStashGetInfo_Private - Gets the relavant statistics of the stash
177: Input Parameters:
178: stash - the stash
179: nstash - the size of the stash. Indicates the number of values stored.
180: reallocs - the number of additional mallocs incurred.
182: */
183: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)184: {
185: PetscInt bs2 = stash->bs*stash->bs;
188: if (nstash) *nstash = stash->n*bs2;
189: if (reallocs) {
190: if (stash->reallocs < 0) *reallocs = 0;
191: else *reallocs = stash->reallocs;
192: }
193: return(0);
194: }
196: /*
197: MatStashSetInitialSize_Private - Sets the initial size of the stash
199: Input Parameters:
200: stash - the stash
201: max - the value that is used as the max size of the stash.
202: this value is used while allocating memory.
203: */
204: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)205: {
207: stash->umax = max;
208: return(0);
209: }
211: /* MatStashExpand_Private - Expand the stash. This function is called
212: when the space in the stash is not sufficient to add the new values
213: being inserted into the stash.
215: Input Parameters:
216: stash - the stash
217: incr - the minimum increase requested
219: Notes:
220: This routine doubles the currently used memory.
221: */
222: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)223: {
225: PetscInt newnmax,bs2= stash->bs*stash->bs;
228: /* allocate a larger stash */
229: if (!stash->oldnmax && !stash->nmax) { /* new stash */
230: if (stash->umax) newnmax = stash->umax/bs2;
231: else newnmax = DEFAULT_STASH_SIZE/bs2;
232: } else if (!stash->nmax) { /* resuing stash */
233: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
234: else newnmax = stash->oldnmax/bs2;
235: } else newnmax = stash->nmax*2;
236: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
238: /* Get a MatStashSpace and attach it to stash */
239: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
240: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
241: stash->space_head = stash->space;
242: }
244: stash->reallocs++;
245: stash->nmax = newnmax;
246: return(0);
247: }
248: /*
249: MatStashValuesRow_Private - inserts values into the stash. This function
250: expects the values to be roworiented. Multiple columns belong to the same row
251: can be inserted with a single call to this function.
253: Input Parameters:
254: stash - the stash
255: row - the global row correspoiding to the values
256: n - the number of elements inserted. All elements belong to the above row.
257: idxn - the global column indices corresponding to each of the values.
258: values - the values inserted
259: */
260: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)261: {
262: PetscErrorCode ierr;
263: PetscInt i,k,cnt = 0;
264: PetscMatStashSpace space=stash->space;
267: /* Check and see if we have sufficient memory */
268: if (!space || space->local_remaining < n) {
269: MatStashExpand_Private(stash,n);
270: }
271: space = stash->space;
272: k = space->local_used;
273: for (i=0; i<n; i++) {
274: if (ignorezeroentries && (values[i] == 0.0)) continue;
275: space->idx[k] = row;
276: space->idy[k] = idxn[i];
277: space->val[k] = values[i];
278: k++;
279: cnt++;
280: }
281: stash->n += cnt;
282: space->local_used += cnt;
283: space->local_remaining -= cnt;
284: return(0);
285: }
287: /*
288: MatStashValuesCol_Private - inserts values into the stash. This function
289: expects the values to be columnoriented. Multiple columns belong to the same row
290: can be inserted with a single call to this function.
292: Input Parameters:
293: stash - the stash
294: row - the global row correspoiding to the values
295: n - the number of elements inserted. All elements belong to the above row.
296: idxn - the global column indices corresponding to each of the values.
297: values - the values inserted
298: stepval - the consecutive values are sepated by a distance of stepval.
299: this happens because the input is columnoriented.
300: */
301: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)302: {
303: PetscErrorCode ierr;
304: PetscInt i,k,cnt = 0;
305: PetscMatStashSpace space=stash->space;
308: /* Check and see if we have sufficient memory */
309: if (!space || space->local_remaining < n) {
310: MatStashExpand_Private(stash,n);
311: }
312: space = stash->space;
313: k = space->local_used;
314: for (i=0; i<n; i++) {
315: if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
316: space->idx[k] = row;
317: space->idy[k] = idxn[i];
318: space->val[k] = values[i*stepval];
319: k++;
320: cnt++;
321: }
322: stash->n += cnt;
323: space->local_used += cnt;
324: space->local_remaining -= cnt;
325: return(0);
326: }
328: /*
329: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
330: This function expects the values to be roworiented. Multiple columns belong
331: to the same block-row can be inserted with a single call to this function.
332: This function extracts the sub-block of values based on the dimensions of
333: the original input block, and the row,col values corresponding to the blocks.
335: Input Parameters:
336: stash - the stash
337: row - the global block-row correspoiding to the values
338: n - the number of elements inserted. All elements belong to the above row.
339: idxn - the global block-column indices corresponding to each of the blocks of
340: values. Each block is of size bs*bs.
341: values - the values inserted
342: rmax - the number of block-rows in the original block.
343: cmax - the number of block-columsn on the original block.
344: idx - the index of the current block-row in the original block.
345: */
346: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)347: {
348: PetscErrorCode ierr;
349: PetscInt i,j,k,bs2,bs=stash->bs,l;
350: const PetscScalar *vals;
351: PetscScalar *array;
352: PetscMatStashSpace space=stash->space;
355: if (!space || space->local_remaining < n) {
356: MatStashExpand_Private(stash,n);
357: }
358: space = stash->space;
359: l = space->local_used;
360: bs2 = bs*bs;
361: for (i=0; i<n; i++) {
362: space->idx[l] = row;
363: space->idy[l] = idxn[i];
364: /* Now copy over the block of values. Store the values column oriented.
365: This enables inserting multiple blocks belonging to a row with a single
366: funtion call */
367: array = space->val + bs2*l;
368: vals = values + idx*bs2*n + bs*i;
369: for (j=0; j<bs; j++) {
370: for (k=0; k<bs; k++) array[k*bs] = vals[k];
371: array++;
372: vals += cmax*bs;
373: }
374: l++;
375: }
376: stash->n += n;
377: space->local_used += n;
378: space->local_remaining -= n;
379: return(0);
380: }
382: /*
383: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
384: This function expects the values to be roworiented. Multiple columns belong
385: to the same block-row can be inserted with a single call to this function.
386: This function extracts the sub-block of values based on the dimensions of
387: the original input block, and the row,col values corresponding to the blocks.
389: Input Parameters:
390: stash - the stash
391: row - the global block-row correspoiding to the values
392: n - the number of elements inserted. All elements belong to the above row.
393: idxn - the global block-column indices corresponding to each of the blocks of
394: values. Each block is of size bs*bs.
395: values - the values inserted
396: rmax - the number of block-rows in the original block.
397: cmax - the number of block-columsn on the original block.
398: idx - the index of the current block-row in the original block.
399: */
400: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)401: {
402: PetscErrorCode ierr;
403: PetscInt i,j,k,bs2,bs=stash->bs,l;
404: const PetscScalar *vals;
405: PetscScalar *array;
406: PetscMatStashSpace space=stash->space;
409: if (!space || space->local_remaining < n) {
410: MatStashExpand_Private(stash,n);
411: }
412: space = stash->space;
413: l = space->local_used;
414: bs2 = bs*bs;
415: for (i=0; i<n; i++) {
416: space->idx[l] = row;
417: space->idy[l] = idxn[i];
418: /* Now copy over the block of values. Store the values column oriented.
419: This enables inserting multiple blocks belonging to a row with a single
420: funtion call */
421: array = space->val + bs2*l;
422: vals = values + idx*bs2*n + bs*i;
423: for (j=0; j<bs; j++) {
424: for (k=0; k<bs; k++) array[k] = vals[k];
425: array += bs;
426: vals += rmax*bs;
427: }
428: l++;
429: }
430: stash->n += n;
431: space->local_used += n;
432: space->local_remaining -= n;
433: return(0);
434: }
435: /*
436: MatStashScatterBegin_Private - Initiates the transfer of values to the
437: correct owners. This function goes through the stash, and check the
438: owners of each stashed value, and sends the values off to the owner
439: processors.
441: Input Parameters:
442: stash - the stash
443: owners - an array of size 'no-of-procs' which gives the ownership range
444: for each node.
446: Notes: The 'owners' array in the cased of the blocked-stash has the
447: ranges specified blocked global indices, and for the regular stash in
448: the proper global indices.
449: */
450: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)451: {
455: (*stash->ScatterBegin)(mat,stash,owners);
456: return(0);
457: }
459: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)460: {
461: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
462: PetscInt size=stash->size,nsends;
463: PetscErrorCode ierr;
464: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
465: PetscScalar **rvalues,*svalues;
466: MPI_Comm comm = stash->comm;
467: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
468: PetscMPIInt *sizes,*nlengths,nreceives;
469: PetscInt *sp_idx,*sp_idy;
470: PetscScalar *sp_val;
471: PetscMatStashSpace space,space_next;
474: { /* make sure all processors are either in INSERTMODE or ADDMODE */
475: InsertMode addv;
476: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
477: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
478: mat->insertmode = addv; /* in case this processor had no cache */
479: }
481: bs2 = stash->bs*stash->bs;
483: /* first count number of contributors to each processor */
484: PetscCalloc1(size,&sizes);
485: PetscCalloc1(size,&nlengths);
486: PetscMalloc1(stash->n+1,&owner);
488: i = j = 0;
489: lastidx = -1;
490: space = stash->space_head;
491: while (space) {
492: space_next = space->next;
493: sp_idx = space->idx;
494: for (l=0; l<space->local_used; l++) {
495: /* if indices are NOT locally sorted, need to start search at the beginning */
496: if (lastidx > (idx = sp_idx[l])) j = 0;
497: lastidx = idx;
498: for (; j<size; j++) {
499: if (idx >= owners[j] && idx < owners[j+1]) {
500: nlengths[j]++; owner[i] = j; break;
501: }
502: }
503: i++;
504: }
505: space = space_next;
506: }
507: /* Now check what procs get messages - and compute nsends. */
508: for (i=0, nsends=0; i<size; i++) {
509: if (nlengths[i]) {
510: sizes[i] = 1; nsends++;
511: }
512: }
514: {PetscMPIInt *onodes,*olengths;
515: /* Determine the number of messages to expect, their lengths, from from-ids */
516: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
517: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
518: /* since clubbing row,col - lengths are multiplied by 2 */
519: for (i=0; i<nreceives; i++) olengths[i] *=2;
520: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
521: /* values are size 'bs2' lengths (and remove earlier factor 2 */
522: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
523: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
524: PetscFree(onodes);
525: PetscFree(olengths);}
527: /* do sends:
528: 1) starts[i] gives the starting index in svalues for stuff going to
529: the ith processor
530: */
531: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
532: PetscMalloc1(2*nsends,&send_waits);
533: PetscMalloc2(size,&startv,size,&starti);
534: /* use 2 sends the first with all_a, the next with all_i and all_j */
535: startv[0] = 0; starti[0] = 0;
536: for (i=1; i<size; i++) {
537: startv[i] = startv[i-1] + nlengths[i-1];
538: starti[i] = starti[i-1] + 2*nlengths[i-1];
539: }
541: i = 0;
542: space = stash->space_head;
543: while (space) {
544: space_next = space->next;
545: sp_idx = space->idx;
546: sp_idy = space->idy;
547: sp_val = space->val;
548: for (l=0; l<space->local_used; l++) {
549: j = owner[i];
550: if (bs2 == 1) {
551: svalues[startv[j]] = sp_val[l];
552: } else {
553: PetscInt k;
554: PetscScalar *buf1,*buf2;
555: buf1 = svalues+bs2*startv[j];
556: buf2 = space->val + bs2*l;
557: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
558: }
559: sindices[starti[j]] = sp_idx[l];
560: sindices[starti[j]+nlengths[j]] = sp_idy[l];
561: startv[j]++;
562: starti[j]++;
563: i++;
564: }
565: space = space_next;
566: }
567: startv[0] = 0;
568: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
570: for (i=0,count=0; i<size; i++) {
571: if (sizes[i]) {
572: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
573: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
574: }
575: }
576: #if defined(PETSC_USE_INFO)
577: PetscInfo1(NULL,"No of messages: %d \n",nsends);
578: for (i=0; i<size; i++) {
579: if (sizes[i]) {
580: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
581: }
582: }
583: #endif
584: PetscFree(nlengths);
585: PetscFree(owner);
586: PetscFree2(startv,starti);
587: PetscFree(sizes);
589: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
590: PetscMalloc1(2*nreceives,&recv_waits);
592: for (i=0; i<nreceives; i++) {
593: recv_waits[2*i] = recv_waits1[i];
594: recv_waits[2*i+1] = recv_waits2[i];
595: }
596: stash->recv_waits = recv_waits;
598: PetscFree(recv_waits1);
599: PetscFree(recv_waits2);
601: stash->svalues = svalues;
602: stash->sindices = sindices;
603: stash->rvalues = rvalues;
604: stash->rindices = rindices;
605: stash->send_waits = send_waits;
606: stash->nsends = nsends;
607: stash->nrecvs = nreceives;
608: stash->reproduce_count = 0;
609: return(0);
610: }
612: /*
613: MatStashScatterGetMesg_Private - This function waits on the receives posted
614: in the function MatStashScatterBegin_Private() and returns one message at
615: a time to the calling function. If no messages are left, it indicates this
616: by setting flg = 0, else it sets flg = 1.
618: Input Parameters:
619: stash - the stash
621: Output Parameters:
622: nvals - the number of entries in the current message.
623: rows - an array of row indices (or blocked indices) corresponding to the values
624: cols - an array of columnindices (or blocked indices) corresponding to the values
625: vals - the values
626: flg - 0 indicates no more message left, and the current call has no values associated.
627: 1 indicates that the current call successfully received a message, and the
628: other output parameters nvals,rows,cols,vals are set appropriately.
629: */
630: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)631: {
635: (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);
636: return(0);
637: }
639: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)640: {
642: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
643: PetscInt bs2;
644: MPI_Status recv_status;
645: PetscBool match_found = PETSC_FALSE;
648: *flg = 0; /* When a message is discovered this is reset to 1 */
649: /* Return if no more messages to process */
650: if (stash->nprocessed == stash->nrecvs) return(0);
652: bs2 = stash->bs*stash->bs;
653: /* If a matching pair of receives are found, process them, and return the data to
654: the calling function. Until then keep receiving messages */
655: while (!match_found) {
656: if (stash->reproduce) {
657: i = stash->reproduce_count++;
658: MPI_Wait(stash->recv_waits+i,&recv_status);
659: } else {
660: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
661: }
662: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
664: /* Now pack the received message into a structure which is usable by others */
665: if (i % 2) {
666: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
668: flg_v[2*recv_status.MPI_SOURCE] = i/2;
670: *nvals = *nvals/bs2;
671: } else {
672: MPI_Get_count(&recv_status,MPIU_INT,nvals);
674: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
676: *nvals = *nvals/2; /* This message has both row indices and col indices */
677: }
679: /* Check if we have both messages from this proc */
680: i1 = flg_v[2*recv_status.MPI_SOURCE];
681: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
682: if (i1 != -1 && i2 != -1) {
683: *rows = stash->rindices[i2];
684: *cols = *rows + *nvals;
685: *vals = stash->rvalues[i1];
686: *flg = 1;
687: stash->nprocessed++;
688: match_found = PETSC_TRUE;
689: }
690: }
691: return(0);
692: }
694: typedef struct {
695: PetscInt row;
696: PetscInt col;
697: PetscScalar vals[1]; /* Actually an array of length bs2 */
698: } MatStashBlock;
700: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)701: {
703: PetscMatStashSpace space;
704: PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
705: PetscScalar **valptr;
708: PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);
709: for (space=stash->space_head,cnt=0; space; space=space->next) {
710: for (i=0; i<space->local_used; i++) {
711: row[cnt] = space->idx[i];
712: col[cnt] = space->idy[i];
713: valptr[cnt] = &space->val[i*bs2];
714: perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
715: cnt++;
716: }
717: }
718: if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
719: PetscSortIntWithArrayPair(n,row,col,perm);
720: /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
721: for (rowstart=0,cnt=0,i=1; i<=n; i++) {
722: if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
723: PetscInt colstart;
724: PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);
725: for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
726: PetscInt j,l;
727: MatStashBlock *block;
728: PetscSegBufferGet(stash->segsendblocks,1,&block);
729: block->row = row[rowstart];
730: block->col = col[colstart];
731: PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));
732: for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
733: if (insertmode == ADD_VALUES) {
734: for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
735: } else {
736: PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));
737: }
738: }
739: colstart = j;
740: }
741: rowstart = i;
742: }
743: }
744: PetscFree4(row,col,valptr,perm);
745: return(0);
746: }
748: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)749: {
753: if (stash->blocktype == MPI_DATATYPE_NULL) {
754: PetscInt bs2 = PetscSqr(stash->bs);
755: PetscMPIInt blocklens[2];
756: MPI_Aint displs[2];
757: MPI_Datatype types[2],stype;
758: /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of
759: * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct
760: * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
761: */
762: struct DummyBlock {PetscInt row,col; PetscReal vals;};
764: stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
765: if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
766: stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
767: }
768: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);
769: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);
770: PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);
771: blocklens[0] = 2;
772: blocklens[1] = bs2;
773: displs[0] = offsetof(struct DummyBlock,row);
774: displs[1] = offsetof(struct DummyBlock,vals);
775: types[0] = MPIU_INT;
776: types[1] = MPIU_SCALAR;
777: MPI_Type_create_struct(2,blocklens,displs,types,&stype);
778: MPI_Type_commit(&stype);
779: MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype); /* MPI-2 */
780: MPI_Type_commit(&stash->blocktype);
781: MPI_Type_free(&stype);
782: }
783: return(0);
784: }
786: /* Callback invoked after target rank has initiatied receive of rendezvous message.
787: * Here we post the main sends.
788: */
789: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)790: {
791: MatStash *stash = (MatStash*)ctx;
792: MatStashHeader *hdr = (MatStashHeader*)sdata;
796: if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
797: MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
798: stash->sendframes[rankid].count = hdr->count;
799: stash->sendframes[rankid].pending = 1;
800: return(0);
801: }
803: /* Callback invoked by target after receiving rendezvous message.
804: * Here we post the main recvs.
805: */
806: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)807: {
808: MatStash *stash = (MatStash*)ctx;
809: MatStashHeader *hdr = (MatStashHeader*)rdata;
810: MatStashFrame *frame;
814: PetscSegBufferGet(stash->segrecvframe,1,&frame);
815: PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);
816: MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
817: frame->count = hdr->count;
818: frame->pending = 1;
819: return(0);
820: }
822: /*
823: * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
824: */
825: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])826: {
828: size_t nblocks;
829: char *sendblocks;
832: #if defined(PETSC_USE_DEBUG)
833: { /* make sure all processors are either in INSERTMODE or ADDMODE */
834: InsertMode addv;
835: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
836: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837: }
838: #endif
840: if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
841: MatStashScatterDestroy_BTS(stash);
842: }
844: MatStashBlockTypeSetUp(stash);
845: MatStashSortCompress_Private(stash,mat->insertmode);
846: PetscSegBufferGetSize(stash->segsendblocks,&nblocks);
847: PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);
848: if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
849: PetscInt i;
850: size_t b;
851: for (i=0,b=0; i<stash->nsendranks; i++) {
852: stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
853: /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
854: stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
855: for ( ; b<nblocks; b++) {
856: MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
857: if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
858: if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
859: stash->sendhdr[i].count++;
860: }
861: }
862: } else { /* Dynamically count and pack (first time) */
863: PetscInt sendno;
864: size_t i,rowstart;
866: /* Count number of send ranks and allocate for sends */
867: stash->nsendranks = 0;
868: for (rowstart=0; rowstart<nblocks; ) {
869: PetscInt owner;
870: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
871: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
872: if (owner < 0) owner = -(owner+2);
873: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
874: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
875: if (sendblock_i->row >= owners[owner+1]) break;
876: }
877: stash->nsendranks++;
878: rowstart = i;
879: }
880: PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);
882: /* Set up sendhdrs and sendframes */
883: sendno = 0;
884: for (rowstart=0; rowstart<nblocks; ) {
885: PetscInt owner;
886: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
887: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
888: if (owner < 0) owner = -(owner+2);
889: stash->sendranks[sendno] = owner;
890: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
891: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
892: if (sendblock_i->row >= owners[owner+1]) break;
893: }
894: stash->sendframes[sendno].buffer = sendblock_rowstart;
895: stash->sendframes[sendno].pending = 0;
896: stash->sendhdr[sendno].count = i - rowstart;
897: sendno++;
898: rowstart = i;
899: }
900: if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
901: }
903: /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
904: * message or a dummy entry of some sort. */
905: if (mat->insertmode == INSERT_VALUES) {
906: size_t i;
907: for (i=0; i<nblocks; i++) {
908: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
909: sendblock_i->row = -(sendblock_i->row+1);
910: }
911: }
913: if (stash->subset_off_proc && mat->subsetoffprocentries) {
914: PetscMPIInt i,tag;
915: PetscCommGetNewTag(stash->comm,&tag);
916: for (i=0; i<stash->nrecvranks; i++) {
917: MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);
918: }
919: for (i=0; i<stash->nsendranks; i++) {
920: MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);
921: }
922: stash->use_status = PETSC_TRUE; /* Use count from message status. */
923: } else {
924: PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
925: &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
926: MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);
927: PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);
928: stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
929: }
931: PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);
932: stash->recvframe_active = NULL;
933: stash->recvframe_i = 0;
934: stash->some_i = 0;
935: stash->some_count = 0;
936: stash->recvcount = 0;
937: stash->subset_off_proc = mat->subsetoffprocentries;
938: stash->insertmode = &mat->insertmode;
939: return(0);
940: }
942: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)943: {
945: MatStashBlock *block;
948: *flg = 0;
949: while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
950: if (stash->some_i == stash->some_count) {
951: if (stash->recvcount == stash->nrecvranks) return(0); /* Done */
952: MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);
953: stash->some_i = 0;
954: }
955: stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
956: stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
957: if (stash->use_status) { /* Count what was actually sent */
958: MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);
959: }
960: if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
961: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
962: if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
963: if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
964: if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
965: }
966: stash->some_i++;
967: stash->recvcount++;
968: stash->recvframe_i = 0;
969: }
970: *n = 1;
971: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
972: if (block->row < 0) block->row = -(block->row + 1);
973: *row = &block->row;
974: *col = &block->col;
975: *val = block->vals;
976: stash->recvframe_i++;
977: *flg = 1;
978: return(0);
979: }
981: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)982: {
986: MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);
987: if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
988: void *dummy;
989: PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);
990: } else { /* No reuse, so collect everything. */
991: MatStashScatterDestroy_BTS(stash);
992: }
994: /* Now update nmaxold to be app 10% more than max n used, this way the
995: wastage of space is reduced the next time this stash is used.
996: Also update the oldmax, only if it increases */
997: if (stash->n) {
998: PetscInt bs2 = stash->bs*stash->bs;
999: PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1000: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1001: }
1003: stash->nmax = 0;
1004: stash->n = 0;
1005: stash->reallocs = -1;
1006: stash->nprocessed = 0;
1008: PetscMatStashSpaceDestroy(&stash->space_head);
1010: stash->space = 0;
1012: return(0);
1013: }
1015: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)1016: {
1020: PetscSegBufferDestroy(&stash->segsendblocks);
1021: PetscSegBufferDestroy(&stash->segrecvframe);
1022: stash->recvframes = NULL;
1023: PetscSegBufferDestroy(&stash->segrecvblocks);
1024: if (stash->blocktype != MPI_DATATYPE_NULL) {
1025: MPI_Type_free(&stash->blocktype);
1026: }
1027: stash->nsendranks = 0;
1028: stash->nrecvranks = 0;
1029: PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);
1030: PetscFree(stash->sendreqs);
1031: PetscFree(stash->recvreqs);
1032: PetscFree(stash->recvranks);
1033: PetscFree(stash->recvhdr);
1034: PetscFree2(stash->some_indices,stash->some_statuses);
1035: return(0);
1036: }