Actual source code: hem.c


  2: #include <petsc/private/matimpl.h>
  3: #include <../src/mat/impls/aij/seq/aij.h>
  4: #include <../src/mat/impls/aij/mpi/mpiaij.h>

  6: /* linked list methods
  7:  *
  8:  *  PetscCDCreate
  9:  */
 10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
 11: {
 12:   PetscCoarsenData *ail;

 14:   /* alocate pool, partially */
 15:   PetscNew(&ail);
 16:   *a_out               = ail;
 17:   ail->pool_list.next  = NULL;
 18:   ail->pool_list.array = NULL;
 19:   ail->chk_sz          = 0;
 20:   /* allocate array */
 21:   ail->size            = a_size;
 22:   PetscCalloc1(a_size, &ail->array);
 23:   ail->extra_nodes     = NULL;
 24:   ail->mat             = NULL;
 25:   return 0;
 26: }

 28: /* NPDestroy
 29:  */
 30: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
 31: {
 32:   PetscCDArrNd   *n = &ail->pool_list;

 34:   n = n->next;
 35:   while (n) {
 36:     PetscCDArrNd *lstn = n;
 37:     n    = n->next;
 38:     PetscFree(lstn);
 39:   }
 40:   if (ail->pool_list.array) {
 41:     PetscFree(ail->pool_list.array);
 42:   }
 43:   PetscFree(ail->array);
 44:   /* delete this (+agg+pool array) */
 45:   PetscFree(ail);
 46:   return 0;
 47: }

 49: /* PetscCDSetChuckSize
 50:  */
 51: PetscErrorCode PetscCDSetChuckSize(PetscCoarsenData *ail, PetscInt a_sz)
 52: {
 53:   ail->chk_sz = a_sz;
 54:   return 0;
 55: }

 57: /*  PetscCDGetNewNode
 58:  */
 59: PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
 60: {
 61:   *a_out = NULL;                /* squelch -Wmaybe-uninitialized */
 62:   if (ail->extra_nodes) {
 63:     PetscCDIntNd *node = ail->extra_nodes;
 64:     ail->extra_nodes = node->next;
 65:     node->gid        = a_id;
 66:     node->next       = NULL;
 67:     *a_out           = node;
 68:   } else {
 69:     if (!ail->pool_list.array) {
 70:       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
 71:       PetscMalloc1(ail->chk_sz, &ail->pool_list.array);
 72:       ail->new_node       = ail->pool_list.array;
 73:       ail->new_left       = ail->chk_sz;
 74:       ail->new_node->next = NULL;
 75:     } else if (!ail->new_left) {
 76:       PetscCDArrNd *node;
 77:       PetscMalloc(ail->chk_sz*sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node);
 78:       node->array         = (PetscCDIntNd*)(node + 1);
 79:       node->next          = ail->pool_list.next;
 80:       ail->pool_list.next = node;
 81:       ail->new_left       = ail->chk_sz;
 82:       ail->new_node       = node->array;
 83:     }
 84:     ail->new_node->gid  = a_id;
 85:     ail->new_node->next = NULL;
 86:     *a_out              = ail->new_node++; ail->new_left--;
 87:   }
 88:   return 0;
 89: }

 91: /* PetscCDIntNdSetID
 92:  */
 93: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
 94: {
 95:   a_this->gid = a_id;
 96:   return 0;
 97: }

 99: /* PetscCDIntNdGetID
100:  */
101: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
102: {
103:   *a_gid = a_this->gid;
104:   return 0;
105: }

107: /* PetscCDGetHeadPos
108:  */
109: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
110: {
112:   *pos = ail->array[a_idx];
113:   return 0;
114: }

116: /* PetscCDGetNextPos
117:  */
118: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
119: {
121:   *pos = (*pos)->next;
122:   return 0;
123: }

125: /* PetscCDAppendID
126:  */
127: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
128: {
129:   PetscCDIntNd   *n,*n2;

131:   PetscCDGetNewNode(ail, &n, a_id);
133:   if (!(n2=ail->array[a_idx])) ail->array[a_idx] = n;
134:   else {
135:     do {
136:       if (!n2->next) {
137:         n2->next = n;
139:         break;
140:       }
141:       n2 = n2->next;
142:     } while (n2);
144:   }
145:   return 0;
146: }

148: /* PetscCDAppendNode
149:  */
150: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_n)
151: {
152:   PetscCDIntNd *n2;

155:   if (!(n2=ail->array[a_idx])) ail->array[a_idx] = a_n;
156:   else {
157:     do {
158:       if (!n2->next) {
159:         n2->next  = a_n;
160:         a_n->next = NULL;
161:         break;
162:       }
163:       n2 = n2->next;
164:     } while (n2);
166:   }
167:   return 0;
168: }

170: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
171:  */
172: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_last)
173: {
174:   PetscCDIntNd *del;

178:   del          = a_last->next;
179:   a_last->next = del->next;
180:   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
181:   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
182:   return 0;
183: }

185: /* PetscCDPrint
186:  */
187: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, MPI_Comm comm)
188: {
189:   PetscCDIntNd   *n;
190:   PetscInt       ii,kk;
191:   PetscMPIInt    rank;

193:   MPI_Comm_rank(comm, &rank);
194:   for (ii=0; ii<ail->size; ii++) {
195:     kk = 0;
196:     n  = ail->array[ii];
197:     if (n) PetscPrintf(comm,"[%d]%s list %d:\n",rank,PETSC_FUNCTION_NAME,ii);
198:     while (n) {
199:       PetscPrintf(comm,"\t[%d] %" PetscInt_FMT ") id %" PetscInt_FMT "\n",rank,++kk,n->gid);
200:       n = n->next;
201:     }
202:   }
203:   return 0;
204: }

206: /* PetscCDAppendRemove
207:  */
208: PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
209: {
210:   PetscCDIntNd *n;

215:   n = ail->array[a_destidx];
216:   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
217:   else {
218:     do {
219:       if (!n->next) {
220:         n->next = ail->array[a_srcidx];
221:         break;
222:       }
223:       n = n->next;
224:     } while (1);
225:   }
226:   ail->array[a_srcidx] = NULL;
227:   return 0;
228: }

230: /* PetscCDRemoveAll
231:  */
232: PetscErrorCode PetscCDRemoveAll(PetscCoarsenData *ail, PetscInt a_idx)
233: {
234:   PetscCDIntNd *rem,*n1;

237:   rem               = ail->array[a_idx];
238:   ail->array[a_idx] = NULL;
239:   if (!(n1=ail->extra_nodes)) ail->extra_nodes = rem;
240:   else {
241:     while (n1->next) n1 = n1->next;
242:     n1->next = rem;
243:   }
244:   return 0;
245: }

247: /* PetscCDSizeAt
248:  */
249: PetscErrorCode PetscCDSizeAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
250: {
251:   PetscCDIntNd *n1;
252:   PetscInt     sz = 0;

255:   n1 = ail->array[a_idx];
256:   while (n1) {
257:     n1 = n1->next;
258:     sz++;
259:   }
260:   *a_sz = sz;
261:   return 0;
262: }

264: /* PetscCDEmptyAt
265:  */
266: PetscErrorCode PetscCDEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
267: {
269:   *a_e = (PetscBool)(ail->array[a_idx]==NULL);
270:   return 0;
271: }

273: /* PetscCDGetMIS
274:  */
275: PetscErrorCode PetscCDGetMIS(PetscCoarsenData *ail, IS *a_mis)
276: {
277:   PetscCDIntNd   *n;
278:   PetscInt       ii,kk;
279:   PetscInt       *permute;

281:   for (ii=kk=0; ii<ail->size; ii++) {
282:     n = ail->array[ii];
283:     if (n) kk++;
284:   }
285:   PetscMalloc1(kk, &permute);
286:   for (ii=kk=0; ii<ail->size; ii++) {
287:     n = ail->array[ii];
288:     if (n) permute[kk++] = ii;
289:   }
290:   ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis);
291:   return 0;
292: }

294: /* PetscCDGetMat
295:  */
296: PetscErrorCode PetscCDGetMat(const PetscCoarsenData *ail, Mat *a_mat)
297: {
298:   *a_mat = ail->mat;
299:   return 0;
300: }

302: /* PetscCDSetMat
303:  */
304: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
305: {
306:   ail->mat = a_mat;
307:   return 0;
308: }

310: /* PetscCDGetASMBlocks
311:  */
312: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, Mat mat, PetscInt *a_sz, IS **a_local_is)
313: {
314:   PetscCDIntNd   *n;
315:   PetscInt       lsz,ii,kk,*idxs,jj,s,e,gid;
316:   IS             *is_loc,is_bcs;

318:   for (ii=kk=0; ii<ail->size; ii++) {
319:     if (ail->array[ii]) kk++;
320:   }
321:   /* count BCs */
322:   MatGetOwnershipRange(mat, &s, &e);
323:   for (gid=s,lsz=0; gid<e; gid++) {
324:     MatGetRow(mat,gid,&jj,NULL,NULL);
325:     if (jj<2) lsz++;
326:     MatRestoreRow(mat,gid,&jj,NULL,NULL);
327:   }
328:   if (lsz) {
329:     PetscMalloc1(a_bs*lsz, &idxs);
330:     for (gid=s,lsz=0; gid<e; gid++) {
331:       MatGetRow(mat,gid,&jj,NULL,NULL);
332:       if (jj<2) {
333:         for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
334:       }
335:       MatRestoreRow(mat,gid,&jj,NULL,NULL);
336:     }
337:     ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_bcs);
338:     *a_sz = kk + 1; /* out */
339:   } else {
340:     is_bcs=NULL;
341:     *a_sz = kk; /* out */
342:   }
343:   PetscMalloc1(*a_sz, &is_loc);

345:   for (ii=kk=0; ii<ail->size; ii++) {
346:     for (lsz=0, n=ail->array[ii]; n; lsz++, n=n->next) /* void */;
347:     if (lsz) {
348:       PetscMalloc1(a_bs*lsz, &idxs);
349:       for (lsz = 0, n=ail->array[ii]; n; n = n->next) {
350:         PetscCDIntNdGetID(n, &gid);
351:         for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
352:       }
353:       ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]);
354:     }
355:   }
356:   if (is_bcs) {
357:     is_loc[kk++] = is_bcs;
358:   }
360:   *a_local_is = is_loc; /* out */

362:   return 0;
363: }

365: /* ********************************************************************** */
366: /* edge for priority queue */
367: typedef struct edge_tag {
368:   PetscReal weight;
369:   PetscInt  lid0,gid1,cpid1;
370: } Edge;

372: static int gamg_hem_compare(const void *a, const void *b)
373: {
374:   PetscReal va = ((Edge*)a)->weight, vb = ((Edge*)b)->weight;
375:   return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
376: }

378: /* -------------------------------------------------------------------------- */
379: /*
380:    heavyEdgeMatchAgg - parallel heavy edge matching (HEM). MatAIJ specific!!!

382:    Input Parameter:
383:    . perm - permutation
384:    . a_Gmat - global matrix of graph (data not defined)

386:    Output Parameter:
387:    . a_locals_llist - array of list of local nodes rooted at local node
388: */
389: static PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscCoarsenData **a_locals_llist)
390: {
391:   PetscBool        isMPI;
392:   MPI_Comm         comm;
393:   PetscInt         sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
394:   PetscMPIInt      rank,size;
395:   const PetscInt   nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
396:   PetscInt         *lid_cprowID,*lid_gid;
397:   PetscBool        *lid_matched;
398:   Mat_SeqAIJ       *matA, *matB=NULL;
399:   Mat_MPIAIJ       *mpimat     =NULL;
400:   PetscScalar      one         =1.;
401:   PetscCoarsenData *agg_llists = NULL,*deleted_list = NULL;
402:   Mat              cMat,tMat,P;
403:   MatScalar        *ap;
404:   PetscMPIInt      tag1,tag2;

406:   PetscObjectGetComm((PetscObject)a_Gmat,&comm);
407:   MPI_Comm_rank(comm, &rank);
408:   MPI_Comm_size(comm, &size);
409:   MatGetOwnershipRange(a_Gmat, &my0, &Iend);
410:   PetscCommGetNewTag(comm, &tag1);
411:   PetscCommGetNewTag(comm, &tag2);

413:   PetscMalloc1(nloc, &lid_gid); /* explicit array needed */
414:   PetscMalloc1(nloc, &lid_cprowID);
415:   PetscMalloc1(nloc, &lid_matched);

417:   PetscCDCreate(nloc, &agg_llists);
418:   /* PetscCDSetChuckSize(agg_llists, nloc+1); */
419:   *a_locals_llist = agg_llists;
420:   PetscCDCreate(size, &deleted_list);
421:   PetscCDSetChuckSize(deleted_list, 100);
422:   /* setup 'lid_gid' for scatters and add self to all lists */
423:   for (kk=0; kk<nloc; kk++) {
424:     lid_gid[kk] = kk + my0;
425:     PetscCDAppendID(agg_llists, kk, my0+kk);
426:   }

428:   /* make a copy of the graph, this gets destroyed in iterates */
429:   MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);
430:   PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);
431:   iter = 0;
432:   while (iter++ < n_iter) {
433:     PetscScalar    *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
434:     PetscBool      *cpcol_matched;
435:     PetscMPIInt    *cpcol_pe,proc;
436:     Vec            locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
437:     PetscInt       nEdges,n_nz_row,jj;
438:     Edge           *Edges;
439:     PetscInt       gid;
440:     const PetscInt *perm_ix, n_sub_its = 120;

442:     /* get submatrices of cMat */
443:     if (isMPI) {
444:       mpimat = (Mat_MPIAIJ*)cMat->data;
445:       matA   = (Mat_SeqAIJ*)mpimat->A->data;
446:       matB   = (Mat_SeqAIJ*)mpimat->B->data;
447:       if (!matB->compressedrow.use) {
448:         /* force construction of compressed row data structure since code below requires it */
449:         MatCheckCompressedRow(mpimat->B,matB->nonzerorowcnt,&matB->compressedrow,matB->i,mpimat->B->rmap->n,-1.0);
450:       }
451:     } else {
452:       matA = (Mat_SeqAIJ*)cMat->data;
453:     }

455:     /* set max edge on nodes */
456:     MatCreateVecs(cMat, &locMaxEdge, NULL);
457:     MatCreateVecs(cMat, &locMaxPE, NULL);

459:     /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
460:     if (mpimat) {
461:       Vec         vec;
462:       PetscScalar vval;

464:       MatCreateVecs(cMat, &vec, NULL);
465:       /* cpcol_pe */
466:       vval = (PetscScalar)(rank);
467:       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
468:         VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
469:       }
470:       VecAssemblyBegin(vec);
471:       VecAssemblyEnd(vec);
472:       VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
473:       VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
474:       VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */
475:       VecGetLocalSize(mpimat->lvec, &n);
476:       PetscMalloc1(n, &cpcol_pe);
477:       for (kk=0; kk<n; kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
478:       VecRestoreArray(mpimat->lvec, &cpcol_gid);

480:       /* cpcol_gid */
481:       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
482:         vval = (PetscScalar)(gid);
483:         VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
484:       }
485:       VecAssemblyBegin(vec);
486:       VecAssemblyEnd(vec);
487:       VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
488:       VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
489:       VecDestroy(&vec);
490:       VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */

492:       /* cpcol_matched */
493:       VecGetLocalSize(mpimat->lvec, &n);
494:       PetscMalloc1(n, &cpcol_matched);
495:       for (kk=0; kk<n; kk++) cpcol_matched[kk] = PETSC_FALSE;
496:     }

498:     /* need an inverse map - locals */
499:     for (kk=0; kk<nloc; kk++) lid_cprowID[kk] = -1;
500:     /* set index into compressed row 'lid_cprowID' */
501:     if (matB) {
502:       for (ix=0; ix<matB->compressedrow.nrows; ix++) {
503:         lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
504:       }
505:     }

507:     /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
508:     for (nEdges=0,kk=0,gid=my0; kk<nloc; kk++,gid++) {
509:       PetscReal   max_e = 0., tt;
510:       PetscScalar vval;
511:       PetscInt    lid   = kk;
512:       PetscMPIInt max_pe=rank,pe;

514:       ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
515:       ap = matA->a + ii[lid];
516:       for (jj=0; jj<n; jj++) {
517:         PetscInt lidj = idx[jj];
518:         if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
519:         if (lidj > lid) nEdges++;
520:       }
521:       if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
522:         ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
523:         ap  = matB->a + ii[ix];
524:         idx = matB->j + ii[ix];
525:         for (jj=0; jj<n; jj++) {
526:           if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
527:           nEdges++;
528:           if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
529:         }
530:       }
531:       vval = max_e;
532:       VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);

534:       vval = (PetscScalar)max_pe;
535:       VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
536:     }
537:     VecAssemblyBegin(locMaxEdge);
538:     VecAssemblyEnd(locMaxEdge);
539:     VecAssemblyBegin(locMaxPE);
540:     VecAssemblyEnd(locMaxPE);

542:     /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
543:     if (mpimat) {
544:       VecDuplicate(mpimat->lvec, &ghostMaxEdge);
545:       VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
546:       VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
547:       VecGetArray(ghostMaxEdge, &cpcol_max_ew);

549:       VecDuplicate(mpimat->lvec, &ghostMaxPE);
550:       VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
551:       VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
552:       VecGetArray(ghostMaxPE, &cpcol_max_pe);
553:     }

555:     /* setup sorted list of edges */
556:     PetscMalloc1(nEdges, &Edges);
557:     ISGetIndices(perm, &perm_ix);
558:     for (nEdges=n_nz_row=kk=0; kk<nloc; kk++) {
559:       PetscInt nn, lid = perm_ix[kk];
560:       ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
561:       ap = matA->a + ii[lid];
562:       for (jj=0; jj<n; jj++) {
563:         PetscInt lidj = idx[jj];
564:         if (lidj > lid) {
565:           Edges[nEdges].lid0   = lid;
566:           Edges[nEdges].gid1   = lidj + my0;
567:           Edges[nEdges].cpid1  = -1;
568:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
569:           nEdges++;
570:         }
571:       }
572:       if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
573:         ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
574:         ap  = matB->a + ii[ix];
575:         idx = matB->j + ii[ix];
576:         nn += n;
577:         for (jj=0; jj<n; jj++) {
578:           Edges[nEdges].lid0   = lid;
579:           Edges[nEdges].gid1   = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
580:           Edges[nEdges].cpid1  = idx[jj];
581:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
582:           nEdges++;
583:         }
584:       }
585:       if (nn > 1) n_nz_row++;
586:       else if (iter == 1) {
587:         /* should select this because it is technically in the MIS but lets not */
588:         PetscCDRemoveAll(agg_llists, lid);
589:       }
590:     }
591:     ISRestoreIndices(perm,&perm_ix);

593:     qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);

595:     /* projection matrix */
596:     MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, NULL, 1, NULL, &P);

598:     /* clear matched flags */
599:     for (kk=0; kk<nloc; kk++) lid_matched[kk] = PETSC_FALSE;
600:     /* process - communicate - process */
601:     for (sub_it=0; sub_it<n_sub_its; sub_it++) {
602:       PetscInt nactive_edges;

604:       VecGetArray(locMaxEdge, &lid_max_ew);
605:       for (kk=nactive_edges=0; kk<nEdges; kk++) {
606:         /* HEM */
607:         const Edge     *e   = &Edges[kk];
608:         const PetscInt lid0 =e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
609:         PetscBool      isOK = PETSC_TRUE;

611:         /* skip if either (local) vertex is done already */
612:         if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) continue;

614:         /* skip if ghost vertex is done */
615:         if (cpid1 != -1 && cpcol_matched[cpid1]) continue;

617:         nactive_edges++;
618:         /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
619:         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + PETSC_SMALL) continue;

621:         if (cpid1 == -1) {
622:           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + PETSC_SMALL) continue;
623:         } else {
624:           /* see if edge might get matched on other proc */
625:           PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
626:           if (g_max_e > e->weight + PETSC_SMALL) continue;
627:           else if (e->weight > g_max_e - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
628:             /* check for max_e == to this edge and larger processor that will deal with this */
629:             continue;
630:           }
631:         }

633:         /* check ghost for v0 */
634:         if (isOK) {
635:           PetscReal max_e,ew;
636:           if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
637:             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
638:             ap  = matB->a + ii[ix];
639:             idx = matB->j + ii[ix];
640:             for (jj=0; jj<n && isOK; jj++) {
641:               PetscInt lidj = idx[jj];
642:               if (cpcol_matched[lidj]) continue;
643:               ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
644:               /* check for max_e == to this edge and larger processor that will deal with this */
645:               if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid0]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
646:                 isOK = PETSC_FALSE;
647:               }
648:             }
649:           }

651:           /* for v1 */
652:           if (cpid1 == -1 && isOK) {
653:             if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
654:               ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
655:               ap  = matB->a + ii[ix];
656:               idx = matB->j + ii[ix];
657:               for (jj=0; jj<n && isOK; jj++) {
658:                 PetscInt lidj = idx[jj];
659:                 if (cpcol_matched[lidj]) continue;
660:                 ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
661:                 /* check for max_e == to this edge and larger processor that will deal with this */
662:                 if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid1]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
663:                   isOK = PETSC_FALSE;
664:                 }
665:               }
666:             }
667:           }
668:         }

670:         /* do it */
671:         if (isOK) {
672:           if (cpid1 == -1) {
673:             lid_matched[lid1] = PETSC_TRUE;  /* keep track of what we've done this round */
674:             PetscCDAppendRemove(agg_llists, lid0, lid1);
675:           } else if (sub_it != n_sub_its-1) {
676:             /* add gid1 to list of ghost deleted by me -- I need their children */
677:             proc = cpcol_pe[cpid1];

679:             cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */

681:             PetscCDAppendID(deleted_list, proc, cpid1); /* cache to send messages */
682:             PetscCDAppendID(deleted_list, proc, lid0);
683:           } else continue;

685:           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
686:           /* set projection */
687:           MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);
688:           MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);
689:         } /* matched */
690:       } /* edge loop */

692:       /* deal with deleted ghost on first pass */
693:       if (size>1 && sub_it != n_sub_its-1) {
694: #define REQ_BF_SIZE 100
695:         PetscCDIntNd *pos;
696:         PetscBool    ise = PETSC_FALSE;
697:         PetscInt     nSend1, **sbuffs1,nSend2;
698:         MPI_Request  *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
699:         MPI_Status   status;

701:         /* send request */
702:         for (proc=0,nSend1=0; proc<size; proc++) {
703:           PetscCDEmptyAt(deleted_list,proc,&ise);
704:           if (!ise) nSend1++;
705:         }
706:         PetscMalloc1(nSend1, &sbuffs1);
707:         for (proc=0,nSend1=0; proc<size; proc++) {
708:           /* count ghosts */
709:           PetscCDSizeAt(deleted_list,proc,&n);
710:           if (n>0) {
711: #define CHUNCK_SIZE 100
712:             PetscInt    *sbuff,*pt;
713:             MPI_Request *request;
714:             n   /= 2;
715:             PetscMalloc1(2 + 2*n + n*CHUNCK_SIZE + 2, &sbuff);
716:             /* save requests */
717:             sbuffs1[nSend1] = sbuff;
718:             request         = (MPI_Request*)sbuff;
719:             sbuff           = pt = (PetscInt*)(request+1);
720:             *pt++           = n; *pt++ = rank;

722:             PetscCDGetHeadPos(deleted_list,proc,&pos);
723:             while (pos) {
724:               PetscInt lid0, cpid, gid;
725:               PetscCDIntNdGetID(pos, &cpid);
726:               gid   = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
727:               PetscCDGetNextPos(deleted_list,proc,&pos);
728:               PetscCDIntNdGetID(pos, &lid0);
729:               PetscCDGetNextPos(deleted_list,proc,&pos);
730:               *pt++ = gid; *pt++ = lid0;
731:             }
732:             /* send request tag1 [n, proc, n*[gid1,lid0] ] */
733:             MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, comm, request);
734:             /* post receive */
735:             request        = (MPI_Request*)pt;
736:             rreqs2[nSend1] = request; /* cache recv request */
737:             pt             = (PetscInt*)(request+1);
738:             MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, comm, request);
739:             /* clear list */
740:             PetscCDRemoveAll(deleted_list, proc);
741:             nSend1++;
742:           }
743:         }
744:         /* receive requests, send response, clear lists */
745:         kk     = nactive_edges;
746:         MPIU_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,comm); /* not correct syncronization and global */
747:         nSend2 = 0;
748:         while (1) {
749: #define BF_SZ 10000
750:           PetscMPIInt flag,count;
751:           PetscInt    rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
752:           MPI_Request *request;

754:           MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &status);
755:           if (!flag) break;
756:           MPI_Get_count(&status, MPIU_INT, &count);
758:           proc = status.MPI_SOURCE;
759:           /* receive request tag1 [n, proc, n*[gid1,lid0] ] */
760:           MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, comm, &status);
761:           /* count sends */
762:           pt = rbuff; count3 = count2 = 0;
763:           n  = *pt++; kk = *pt++;
764:           while (n--) {
765:             PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;
767:             lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
768:             PetscCDSizeAt(agg_llists, lid1, &kk);
769:             count2           += kk + 2;
770:             count3++; /* number of verts requested (n) */
771:           }
773:           /* send tag2 *[lid0, n, n*[gid] ] */
774:           PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);
775:           request          = (MPI_Request*)sbuff;
776:           sreqs2[nSend2++] = request; /* cache request */
778:           pt2 = sbuff = (PetscInt*)(request+1);
779:           pt  = rbuff;
780:           n   = *pt++; kk = *pt++;
781:           while (n--) {
782:             /* read [n, proc, n*[gid1,lid0] */
783:             PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;
784:             /* write [lid0, n, n*[gid] ] */
785:             *pt2++ = lid0;
786:             pt3    = pt2++; /* save pointer for later */
787:             PetscCDGetHeadPos(agg_llists,lid1,&pos);
788:             while (pos) {
789:               PetscInt gid;
790:               PetscCDIntNdGetID(pos, &gid);
791:               PetscCDGetNextPos(agg_llists,lid1,&pos);
792:               *pt2++ = gid;
793:             }
794:             *pt3 = (pt2-pt3)-1;
795:             /* clear list */
796:             PetscCDRemoveAll(agg_llists, lid1);
797:           }
798:           /* send requested data tag2 *[lid0, n, n*[gid1] ] */
799:           MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, comm, request);
800:         }

802:         /* receive tag2 *[lid0, n, n*[gid] ] */
803:         for (kk=0; kk<nSend1; kk++) {
804:           PetscMPIInt count;
805:           MPI_Request *request;
806:           PetscInt    *pt, *pt2;

808:           request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
809:           MPI_Wait(request, &status);
810:           MPI_Get_count(&status, MPIU_INT, &count);
811:           pt      = pt2 = (PetscInt*)(request+1);
812:           while (pt-pt2 < count) {
813:             PetscInt lid0 = *pt++, n = *pt++;
814:             while (n--) {
815:               PetscInt gid1 = *pt++;
816:               PetscCDAppendID(agg_llists, lid0, gid1);
817:             }
818:           }
819:         }

821:         /* wait for tag1 isends */
822:         while (nSend1--) {
823:           MPI_Request *request;
824:           request = (MPI_Request*)sbuffs1[nSend1];
825:           MPI_Wait(request, &status);
826:           PetscFree(request);
827:         }
828:         PetscFree(sbuffs1);

830:         /* wait for tag2 isends */
831:         while (nSend2--) {
832:           MPI_Request *request = sreqs2[nSend2];
833:           MPI_Wait(request, &status);
834:           PetscFree(request);
835:         }

837:         VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
838:         VecRestoreArray(ghostMaxPE, &cpcol_max_pe);

840:         /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
841:         for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
842:           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
843:           VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
844:         }
845:         VecAssemblyBegin(locMaxPE);
846:         VecAssemblyEnd(locMaxPE);
847:         VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
848:         VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
849:         VecGetArray(ghostMaxEdge, &cpcol_max_ew);
850:         VecGetLocalSize(mpimat->lvec, &n);
851:         for (kk=0; kk<n; kk++) {
852:           cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
853:         }
854:         VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
855:       } /* size > 1 */

857:       /* compute 'locMaxEdge' */
858:       VecRestoreArray(locMaxEdge, &lid_max_ew);
859:       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
860:         PetscReal max_e = 0.,tt;
861:         PetscScalar vval;
862:         PetscInt lid = kk;

864:         if (lid_matched[lid]) vval = 0.;
865:         else {
866:           ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
867:           ap = matA->a + ii[lid];
868:           for (jj=0; jj<n; jj++) {
869:             PetscInt lidj = idx[jj];
870:             if (lid_matched[lidj]) continue; /* this is new - can change local max */
871:             if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
872:           }
873:           if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
874:             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
875:             ap  = matB->a + ii[ix];
876:             idx = matB->j + ii[ix];
877:             for (jj=0; jj<n; jj++) {
878:               PetscInt lidj = idx[jj];
879:               if (cpcol_matched[lidj]) continue;
880:               if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
881:             }
882:           }
883:         }
884:         vval = (PetscScalar)max_e;
885:         VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
886:       }
887:       VecAssemblyBegin(locMaxEdge);
888:       VecAssemblyEnd(locMaxEdge);

890:       if (size>1 && sub_it != n_sub_its-1) {
891:         /* compute 'cpcol_max_ew' */
892:         VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
893:         VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
894:         VecGetArray(ghostMaxEdge, &cpcol_max_ew);
895:         VecGetArray(locMaxEdge, &lid_max_ew);

897:         /* compute 'cpcol_max_pe' */
898:         for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
899:           PetscInt lid = kk;
900:           PetscReal ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
901:           PetscScalar vval;
902:           PetscMPIInt max_pe=rank,pe;

904:           if (lid_matched[lid]) vval = (PetscScalar)rank;
905:           else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
906:             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
907:             ap  = matB->a + ii[ix];
908:             idx = matB->j + ii[ix];
909:             for (jj=0; jj<n; jj++) {
910:               PetscInt lidj = idx[jj];
911:               if (cpcol_matched[lidj]) continue;
912:               ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
913:               /* get max pe that has a max_e == to this edge w */
914:               if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - PETSC_SMALL && ew > v0_max_e - PETSC_SMALL) max_pe = pe;
915:             }
916:             vval = (PetscScalar)max_pe;
917:           }
918:           VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
919:         }
920:         VecAssemblyBegin(locMaxPE);
921:         VecAssemblyEnd(locMaxPE);

923:         VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
924:         VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
925:         VecGetArray(ghostMaxPE, &cpcol_max_pe);
926:         VecRestoreArray(locMaxEdge, &lid_max_ew);
927:       } /* deal with deleted ghost */
928:       PetscInfo(a_Gmat,"\t %" PetscInt_FMT ".%" PetscInt_FMT ": %" PetscInt_FMT " active edges.\n",iter,sub_it,nactive_edges);
929:       if (!nactive_edges) break;
930:     } /* sub_it loop */

932:     /* clean up iteration */
933:     PetscFree(Edges);
934:     if (mpimat) {
935:       VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
936:       VecDestroy(&ghostMaxEdge);
937:       VecRestoreArray(ghostMaxPE, &cpcol_max_pe);
938:       VecDestroy(&ghostMaxPE);
939:       PetscFree(cpcol_pe);
940:       PetscFree(cpcol_matched);
941:     }

943:     VecDestroy(&locMaxEdge);
944:     VecDestroy(&locMaxPE);

946:     if (mpimat) {
947:       VecRestoreArray(mpimat->lvec, &cpcol_gid);
948:     }

950:     /* create next G if needed */
951:     if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
952:       MatDestroy(&P);
953:       MatDestroy(&cMat);
954:       break;
955:     } else {
956:       Vec diag;
957:       /* add identity for unmatched vertices so they stay alive */
958:       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
959:         if (!lid_matched[kk]) {
960:           gid  = kk+my0;
961:           MatGetRow(cMat,gid,&n,NULL,NULL);
962:           if (n>1) {
963:             MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);
964:           }
965:           MatRestoreRow(cMat,gid,&n,NULL,NULL);
966:         }
967:       }
968:       MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);
969:       MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);

971:       /* project to make new graph with colapsed edges */
972:       MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);
973:       MatDestroy(&P);
974:       MatDestroy(&cMat);
975:       cMat = tMat;
976:       MatCreateVecs(cMat, &diag, NULL);
977:       MatGetDiagonal(cMat, diag); /* effectively PCJACOBI */
978:       VecReciprocal(diag);
979:       VecSqrtAbs(diag);
980:       MatDiagonalScale(cMat, diag, diag);
981:       VecDestroy(&diag);
982:     }
983:   } /* coarsen iterator */

985:   /* make fake matrix */
986:   if (size>1) {
987:     Mat          mat;
988:     PetscCDIntNd *pos;
989:     PetscInt     gid, NN, MM, jj = 0, mxsz = 0;

991:     for (kk=0; kk<nloc; kk++) {
992:       PetscCDSizeAt(agg_llists, kk, &jj);
993:       if (jj > mxsz) mxsz = jj;
994:     }
995:     MatGetSize(a_Gmat, &MM, &NN);
996:     if (mxsz > MM-nloc) mxsz = MM-nloc;

998:     MatCreateAIJ(comm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, NULL, mxsz, NULL, &mat);

1000:     for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
1001:       /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)) { */
1002:       PetscCDGetHeadPos(agg_llists,kk,&pos);
1003:       while (pos) {
1004:         PetscInt gid1;
1005:         PetscCDIntNdGetID(pos, &gid1);
1006:         PetscCDGetNextPos(agg_llists,kk,&pos);

1008:         if (gid1 < my0 || gid1 >= my0+nloc) {
1009:           MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);
1010:         }
1011:       }
1012:     }
1013:     MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1014:     MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1015:     PetscCDSetMat(agg_llists, mat);
1016:   }

1018:   PetscFree(lid_cprowID);
1019:   PetscFree(lid_gid);
1020:   PetscFree(lid_matched);
1021:   PetscCDDestroy(deleted_list);
1022:   return 0;
1023: }

1025: /*
1026:    HEM coarsen, simple greedy.
1027: */
1028: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1029: {
1030:   Mat            mat = coarse->graph;

1032:   if (!coarse->perm) {
1033:     IS       perm;
1034:     PetscInt n,m;

1036:     MatGetLocalSize(mat, &m, &n);
1037:     ISCreateStride(PetscObjectComm((PetscObject)mat), m, 0, 1, &perm);
1038:     heavyEdgeMatchAgg(perm, mat, &coarse->agg_lists);
1039:     ISDestroy(&perm);
1040:   } else {
1041:     heavyEdgeMatchAgg(coarse->perm, mat, &coarse->agg_lists);
1042:   }
1043:   return 0;
1044: }

1046: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse,PetscViewer viewer)
1047: {
1048:   PetscMPIInt    rank;
1049:   PetscBool      iascii;

1051:   MPI_Comm_rank(PetscObjectComm((PetscObject)coarse),&rank);
1052:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1053:   if (iascii) {
1054:     PetscViewerASCIIPushSynchronized(viewer);
1055:     PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] HEM aggregator\n",rank);
1056:     PetscViewerFlush(viewer);
1057:     PetscViewerASCIIPopSynchronized(viewer);
1058:   }
1059:   return 0;
1060: }

1062: /*MC
1063:    MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener

1065:    Level: beginner

1067: .seealso: MatCoarsenSetType(), MatCoarsenType, MatCoarsenCreate()

1069: M*/

1071: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1072: {
1073:   coarse->ops->apply   = MatCoarsenApply_HEM;
1074:   coarse->ops->view    = MatCoarsenView_HEM;
1075:   return 0;
1076: }