dune-istl  2.2.0
repartition.hh
Go to the documentation of this file.
1 #ifndef DUNE_REPARTITION_HH
2 #define DUNE_REPARTITION_HH
3 
4 #include <cassert>
5 #include <map>
6 #include <utility>
7 
8 #if HAVE_PARMETIS
9 #include <parmetis.h>
10 #endif
11 
12 #include <dune/common/timer.hh>
13 #include <dune/common/enumset.hh>
14 #include <dune/common/mpitraits.hh>
15 #include <dune/common/stdstreams.hh>
16 #include <dune/common/parallel/communicator.hh>
17 #include <dune/common/parallel/indexset.hh>
18 #include <dune/common/parallel/indicessyncer.hh>
19 #include <dune/common/parallel/remoteindices.hh>
20 
22 #include <dune/istl/paamg/graph.hh>
23 
32 namespace Dune
33 {
34 #if HAVE_MPI
35 
48  template<class G, class T1, class T2>
50  {
52  typedef typename IndexSet::LocalIndex::Attribute Attribute;
53 
54  IndexSet& indexSet = oocomm.indexSet();
56 
57  // The type of the const vertex iterator.
58  typedef typename G::ConstVertexIterator VertexIterator;
59 
60 
61  std::size_t sum=0, needed = graph.noVertices()-indexSet.size();
62  std::vector<std::size_t> neededall(oocomm.communicator().size(), 0);
63 
64  MPI_Allgather(&needed, 1, MPITraits<std::size_t>::getType() , &(neededall[0]), 1, MPITraits<std::size_t>::getType(), oocomm.communicator());
65  for(int i=0; i<oocomm.communicator().size(); ++i)
66  sum=sum+neededall[i]; // MAke this for generic
67 
68  if(sum==0)
69  // Nothing to do
70  return;
71 
72  //Compute Maximum Global Index
73  T1 maxgi=0;
74  typedef typename IndexSet::const_iterator Iterator;
75  Iterator end;
76  end = indexSet.end();
77  for(Iterator it = indexSet.begin(); it != end; ++it)
78  maxgi=std::max(maxgi,it->global());
79 
80  //Process p creates global indices consecutively
81  //starting atmaxgi+\sum_{i=1}^p neededall[i]
82  // All created indices are owned by the process
83  maxgi=oocomm.communicator().max(maxgi);
84  ++maxgi;//Sart with the next free index.
85 
86  for(int i=0; i<oocomm.communicator().rank(); ++i)
87  maxgi=maxgi+neededall[i]; // TODO: make this more generic
88 
89  // Store the global index information for repairing the remote index information
90  std::map<int,SLList<std::pair<T1,Attribute> > > globalIndices;
91  storeGlobalIndicesOfRemoteIndices(globalIndices, oocomm.remoteIndices(), indexSet);
92  indexSet.beginResize();
93 
94  for(VertexIterator vertex = graph.begin(), vend=graph.end(); vertex != vend; ++vertex){
95  const typename IndexSet::IndexPair* pair=lookup.pair(*vertex);
96  if(pair==0){
97  // No index yet, add new one
98  indexSet.add(maxgi, typename IndexSet::LocalIndex(*vertex, OwnerOverlapCopyAttributeSet::owner, false));
99  ++maxgi;
100  }
101  }
102 
103  indexSet.endResize();
104 
105  repairLocalIndexPointers(globalIndices, oocomm.remoteIndices(), indexSet);
106 
107  oocomm.freeGlobalLookup();
108  oocomm.buildGlobalLookup();
109 #ifdef DEBUG_REPART
110  std::cout<<"Holes are filled!"<<std::endl;
111  std::cout<<oocomm.communicator().rank()<<": "<<oocomm.indexSet()<<std::endl;
112 #endif
113  }
114 
115  namespace
116  {
117 
118  class ParmetisDuneIndexMap
119  {
120  public:
121  template<class Graph, class OOComm>
122  ParmetisDuneIndexMap(const Graph& graph, const OOComm& com);
123  int toParmetis(int i) const
124  {
125  return duneToParmetis[i];
126  }
127  int toLocalParmetis(int i) const
128  {
129  return duneToParmetis[i]-base_;
130  }
131  int operator[](int i) const
132  {
133  return duneToParmetis[i];
134  }
135  int toDune(int i) const
136  {
137  return parmetisToDune[i];
138  }
139  std::vector<int>::size_type numOfOwnVtx() const
140  {
141  return parmetisToDune.size();
142  }
143  int* vtxDist()
144  {
145  return &vtxDist_[0];
146  }
148  private:
149  int base_;
150  std::vector<int> duneToParmetis;
151  std::vector<int> parmetisToDune;
152  // range of vertices for processor i: vtxdist[i] to vtxdist[i+1] (parmetis global)
153  std::vector<int> vtxDist_;
154  };
155 
156  template<class G, class OOComm>
157  ParmetisDuneIndexMap::ParmetisDuneIndexMap(const G& graph, const OOComm& oocomm)
158  : duneToParmetis(graph.noVertices(), -1), vtxDist_(oocomm.communicator().size()+1)
159  {
160  int npes=oocomm.communicator().size(), mype=oocomm.communicator().rank();
161 
162  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
163  typedef typename OOComm::OwnerSet OwnerSet;
164 
165  int numOfOwnVtx=0;
166  Iterator end = oocomm.indexSet().end();
167  for(Iterator index = oocomm.indexSet().begin(); index != end; ++index) {
168  if (OwnerSet::contains(index->local().attribute())) {
169  numOfOwnVtx++;
170  }
171  }
172  parmetisToDune.resize(numOfOwnVtx);
173  std::vector<int> globalNumOfVtx(npes);
174  // make this number available to all processes
175  MPI_Allgather(&numOfOwnVtx, 1, MPI_INT, &(globalNumOfVtx[0]), 1, MPI_INT, oocomm.communicator());
176 
177  int base=0;
178  vtxDist_[0] = 0;
179  for(int i=0; i<npes; i++) {
180  if (i<mype) {
181  base += globalNumOfVtx[i];
182  }
183  vtxDist_[i+1] = vtxDist_[i] + globalNumOfVtx[i];
184  }
186  base_=base;
187 
188  // The type of the const vertex iterator.
189  typedef typename G::ConstVertexIterator VertexIterator;
190 #ifdef DEBUG_REPART
191  std::cout << oocomm.communicator().rank()<<" vtxDist: ";
192  for(int i=0; i<= npes;++i)
193  std::cout << vtxDist_[i]<<" ";
194  std::cout<<std::endl;
195 #endif
196 
197  // Traverse the graph and assign a new consecutive number/index
198  // starting by "base" to all owner vertices.
199  // The new index is used as the ParMETIS global index and is
200  // stored in the vector "duneToParmetis"
201  VertexIterator vend = graph.end();
202  for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex) {
203  const typename OOComm::ParallelIndexSet::IndexPair* index=oocomm.globalLookup().pair(*vertex);
204  assert(index);
205  if (OwnerSet::contains(index->local().attribute())) {
206  // assign and count the index
207  parmetisToDune[base-base_]=index->local();
208  duneToParmetis[index->local()] = base++;
209  }
210  }
211 
212  // At this point, every process knows the ParMETIS global index
213  // of it's owner vertices. The next step is to get the
214  // ParMETIS global index of the overlap vertices from the
215  // associated processes. To do this, the Dune::Interface class
216  // is used.
217 #ifdef DEBUG_REPART
218  std::cout <<oocomm.communicator().rank()<<": before ";
219  for(std::size_t i=0; i<duneToParmetis.size(); ++i)
220  std::cout<<duneToParmetis[i]<<" ";
221  std::cout<<std::endl;
222 #endif
223  oocomm.copyOwnerToAll(duneToParmetis,duneToParmetis);
224 #ifdef DEBUG_REPART
225  std::cout <<oocomm.communicator().rank()<<": after ";
226  for(std::size_t i=0; i<duneToParmetis.size(); ++i)
227  std::cout<<duneToParmetis[i]<<" ";
228  std::cout<<std::endl;
229 #endif
230  }
231  }
232 
234  : public Interface
235  {
236  void setCommunicator(MPI_Comm comm)
237  {
238  communicator_=comm;
239  }
240  template<class Flags,class IS>
241  void buildSendInterface(const std::vector<int>& toPart, const IS& idxset)
242  {
243  typedef std::vector<int>::const_iterator Iter;
244  std::map<int,int> sizes;
245 
246  typedef typename IS::const_iterator IIter;
247  for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
248  if(Flags::contains(i->local().attribute()))
249  ++sizes[toPart[i->local()]];
250 
251  // Allocate the necessary space
252  typedef std::map<int,int>::const_iterator MIter;
253  for(MIter i=sizes.begin(), end=sizes.end(); i!=end; ++i)
254  interfaces()[i->first].first.reserve(i->second);
255 
256  //Insert the interface information
257  typedef typename IS::const_iterator IIter;
258  for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i)
259  if(Flags::contains(i->local().attribute()))
260  interfaces()[toPart[i->local()]].first.add(i->local());
261  }
262 
263  void reserveSpaceForReceiveInterface(int proc, int size)
264  {
265  interfaces()[proc].second.reserve(size);
266  }
267  void addReceiveIndex(int proc, std::size_t idx)
268  {
269  interfaces()[proc].second.add(idx);
270  }
271  template<typename TG>
272  void buildReceiveInterface(std::vector<std::pair<TG,int> >& indices)
273  {
274  typedef typename std::vector<std::pair<TG,int> >::const_iterator VIter;
275  std::size_t i=0;
276  for(VIter idx=indices.begin(); idx!= indices.end(); ++idx){
277  interfaces()[idx->second].second.add(i++);
278  }
279  }
280 
282  {
283  }
284 
285  };
286 
287  namespace
288  {
298  template<class GI>
299  void createSendBuf(std::vector<GI>& ownerVec, std::set<GI>& overlapVec, std::set<int>& neighbors, char *sendBuf, int buffersize, MPI_Comm comm) {
300  // Pack owner vertices
301  std::size_t s=ownerVec.size();
302  int pos=0;
303  if(s==0)
304  ownerVec.resize(1); // otherwise would read beyond the memory bound
305  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
306  MPI_Pack(&(ownerVec[0]), s, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
307  s = overlapVec.size();
308  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
309  typedef typename std::set<GI>::iterator Iter;
310  for(Iter i=overlapVec.begin(), end= overlapVec.end(); i != end; ++i)
311  MPI_Pack(const_cast<GI*>(&(*i)), 1, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
312 
313  s=neighbors.size();
314  MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
315  typedef typename std::set<int>::iterator IIter;
316 
317  for(IIter i=neighbors.begin(), end= neighbors.end(); i != end; ++i)
318  MPI_Pack(const_cast<int*>(&(*i)), 1, MPI_INT, sendBuf, buffersize, &pos, comm);
319  }
328  template<class GI>
329  void saveRecvBuf(char *recvBuf, int bufferSize, std::vector<std::pair<GI,int> >& ownerVec,
330  std::set<GI>& overlapVec, std::set<int>& neighbors, RedistributeInterface& inf, int from, MPI_Comm comm) {
331  std::size_t size;
332  int pos=0;
333  // unpack owner vertices
334  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
335  inf.reserveSpaceForReceiveInterface(from, size);
336  ownerVec.reserve(ownerVec.size()+size);
337  for(;size!=0;--size){
338  GI gi;
339  MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
340  ownerVec.push_back(std::make_pair(gi,from));
341  }
342  // unpack overlap vertices
343  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
344  typename std::set<GI>::iterator ipos = overlapVec.begin();
345  Dune::dverb << "unpacking "<<size<<" overlap"<<std::endl;
346  for(;size!=0;--size){
347  GI gi;
348  MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
349  ipos=overlapVec.insert(ipos, gi);
350  }
351  //unpack neighbors
352  MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm);
353  Dune::dverb << "unpacking "<<size<<" neighbors"<<std::endl;
354  typename std::set<int>::iterator npos = neighbors.begin();
355  for(;size!=0;--size){
356  int n;
357  MPI_Unpack(recvBuf, bufferSize, &pos, &n, 1, MPI_INT, comm);
358  npos=neighbors.insert(npos, n);
359  }
360  }
361 
375  template<typename T>
376  void getDomain(const MPI_Comm& comm, T *part, int numOfOwnVtx, int nparts, int *myDomain, int domainMapping[]) {
377  int npes, mype;
378  MPI_Comm_size(comm, &npes);
379  MPI_Comm_rank(comm, &mype);
380  MPI_Status status;
381 
382  *myDomain = -1;
383  int i=0;
384  int j=0;
385 
386  int domain[nparts];
387  int assigned[npes];
388  // init
389  for (i=0; i<nparts; i++) {
390  domainMapping[i] = -1;
391  domain[i] = 0;
392  }
393  for (i=0; i<npes; i++) {
394  assigned[i] = -0;
395  }
396  // count the occurance of domains
397  for (i=0; i<numOfOwnVtx; i++) {
398  domain[part[i]]++;
399  }
400 
401  int *domainMatrix = new int[npes * nparts];
402  // init
403  for(i=0; i<npes*nparts; i++) {
404  domainMatrix[i]=-1;
405  }
406 
407  // init buffer with the own domain
408  int *buf = new int[nparts];
409  for (i=0; i<nparts; i++) {
410  buf[i] = domain[i];
411  domainMatrix[mype*nparts+i] = domain[i];
412  }
413  int pe=0;
414  int src = (mype-1+npes)%npes;
415  int dest = (mype+1)%npes;
416  // ring communication, we need n-1 communications for n processors
417  for (i=0; i<npes-1; i++) {
418  MPI_Sendrecv_replace(buf, nparts, MPI_INT, dest, 0, src, 0, comm, &status);
419  // pe is the process of the actual received buffer
420  pe = ((mype-1-i)+npes)%npes;
421  for(j=0; j<nparts; j++) {
422  // save the values to the domain matrix
423  domainMatrix[pe*nparts+j] = buf[j];
424  }
425  }
426  delete[] buf;
427 
428  // Start the domain calculation.
429  // The process which contains the maximum number of vertices of a
430  // particular domain is selected to choose it's favorate domain
431  int maxOccurance = 0;
432  pe = -1;
433  for(i=0; i<nparts; i++) {
434  for(j=0; j<npes; j++) {
435  // process has no domain assigned
436  if (assigned[j]==0) {
437  if (maxOccurance < domainMatrix[j*nparts+i]) {
438  maxOccurance = domainMatrix[j*nparts+i];
439  pe = j;
440  }
441  }
442 
443  }
444  if (pe!=-1) {
445  // process got a domain, ...
446  domainMapping[i] = pe;
447  // ...mark as assigned
448  assigned[pe] = 1;
449  if (pe==mype) {
450  *myDomain = i;
451  }
452  pe = -1;
453  }
454  maxOccurance = 0;
455  }
456 
457  delete[] domainMatrix;
458 
459  }
460 
461  struct SortFirst
462  {
463  template<class T>
464  bool operator()(const T& t1, const T& t2) const
465  {
466  return t1<t2;
467  }
468  };
469 
470 
481  template<class GI>
482  void mergeVec(std::vector<std::pair<GI, int> >& ownerVec, std::set<GI>& overlapSet) {
483 
484  typedef typename std::vector<std::pair<GI,int> >::const_iterator VIter;
485 #ifdef DEBUG_REPART
486  // Safty check for duplicates.
487  if(ownerVec.size()>0)
488  {
489  VIter old=ownerVec.begin();
490  for(VIter i=old+1, end=ownerVec.end(); i != end; old=i++)
491  {
492  if(i->first==old->first)
493  {
494  std::cerr<<"Value at indes"<<old-ownerVec.begin()<<" is the same as at index "
495  <<i-ownerVec.begin()<<" ["<<old->first<<","<<old->second<<"]==["
496  <<i->first<<","<<i->second<<"]"<<std::endl;
497  throw "Huch!";
498  }
499  }
500  }
501 
502 #endif
503 
504  typedef typename std::set<GI>::iterator SIter;
505  VIter v=ownerVec.begin(), vend=ownerVec.end();
506  for(SIter s=overlapSet.begin(), send=overlapSet.end(); s!=send;)
507  {
508  while(v!=vend && v->first<*s) ++v;
509  if(v!=vend && v->first==*s){
510  // Move to the next element before erasing
511  // thus s stays valid!
512  SIter tmp=s;
513  ++s;
514  overlapSet.erase(tmp);
515  }else
516  ++s;
517  }
518  }
519 
520 
534  template<class OwnerSet, class Graph, class IS, class GI>
535  void getNeighbor(const Graph& g, std::vector<int>& part,
536  typename Graph::VertexDescriptor vtx, const IS& indexSet,
537  int toPe, std::set<GI>& neighbor, std::set<int>& neighborProcs) {
538  typedef typename Graph::ConstEdgeIterator Iter;
539  for(Iter edge=g.beginEdges(vtx), end=g.endEdges(vtx); edge!=end; ++edge)
540  {
541  const typename IS::IndexPair* pindex = indexSet.pair(edge.target());
542  assert(pindex);
543  if(part[pindex->local()]!=toPe || !OwnerSet::contains(pindex->local().attribute()))
544  {
545  // is sent to another process and therefore becomes overlap
546  neighbor.insert(pindex->global());
547  neighborProcs.insert(part[pindex->local()]);
548  }
549  }
550  }
551 
552  template<class T, class I>
553  void my_push_back(std::vector<T>& ownerVec, const I& index, int proc)
554  {
555  ownerVec.push_back(index);
556  }
557 
558  template<class T, class I>
559  void my_push_back(std::vector<std::pair<T,int> >& ownerVec, const I& index, int proc)
560  {
561  ownerVec.push_back(std::make_pair(index,proc));
562  }
563  template<class T>
564  void reserve(std::vector<T>&, RedistributeInterface&, int)
565  {
566  }
567  template<class T>
568  void reserve(std::vector<std::pair<T,int> >& ownerVec, RedistributeInterface& redist, int proc)
569  {
570  redist.reserveSpaceForReceiveInterface(proc, ownerVec.size());
571  }
572 
573 
591  template<class OwnerSet, class G, class IS, class T, class GI>
592  void getOwnerOverlapVec(const G& graph, std::vector<int>& part, IS& indexSet,
593  int myPe, int toPe, std::vector<T>& ownerVec, std::set<GI>& overlapSet,
594  RedistributeInterface& redist, std::set<int>& neighborProcs) {
595 
596  //typedef typename IndexSet::const_iterator Iterator;
597  typedef typename IS::const_iterator Iterator;
598  for(Iterator index = indexSet.begin(); index != indexSet.end(); ++index) {
599  // Only Process owner vertices, the others are not in the parmetis graph.
600  if(OwnerSet::contains(index->local().attribute()))
601  {
602  if(part[index->local()]==toPe)
603  {
604  getNeighbor<OwnerSet>(graph, part, index->local(), indexSet,
605  toPe, overlapSet, neighborProcs);
606  my_push_back(ownerVec, index->global(), toPe);
607  }
608  }
609  }
610  reserve(ownerVec, redist, toPe);
611 
612  }
613 
614 
621  template<class F, class IS>
622  inline bool isOwner(IS& indexSet, int index) {
623 
624  const typename IS::IndexPair* pindex=indexSet.pair(index);
625 
626  assert(pindex);
627  return F::contains(pindex->local().attribute());
628  }
629 
630 
631  class BaseEdgeFunctor
632  {
633  public:
634  BaseEdgeFunctor(int* adj,const ParmetisDuneIndexMap& data)
635  :i_(), adj_(adj), data_(data)
636  {}
637 
638  template<class T>
639  void operator()(const T& edge)
640  {
641  // Get the egde weight
642  // const Weight& weight=edge.weight();
643  adj_[i_] = data_.toParmetis(edge.target());
644  i_++;
645  }
646  std::size_t index()
647  {
648  return i_;
649  }
650 
651  private:
652  std::size_t i_;
653  int* adj_;
654  const ParmetisDuneIndexMap& data_;
655  };
656 
657  template<typename G>
658  struct EdgeFunctor
659  : public BaseEdgeFunctor
660  {
661  EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
662  : BaseEdgeFunctor(adj, data)
663  {}
664 
665  int* getWeights()
666  {
667  return NULL;
668  }
669  void free(){}
670  };
671 
672  template<class G, class V, class E, class VM, class EM>
673  class EdgeFunctor<Dune::Amg::PropertiesGraph<G,V,E,VM,EM> >
674  : public BaseEdgeFunctor
675  {
676  public:
677  EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s)
678  :BaseEdgeFunctor(adj, data)
679  {
680  weight_=new int[s];
681  }
682 
683  template<class T>
684  void operator()(const T& edge)
685  {
686  weight_[index()]=edge.properties().depends()?3:1;
687  BaseEdgeFunctor::operator()(edge);
688  }
689  int* getWeights()
690  {
691  return weight_;
692  }
693  void free(){
694  if(weight_!=0){
695  delete weight_;
696  weight_=0;
697  }
698  }
699  private:
700  int* weight_;
701  };
702 
703 
704 
718  template<class F, class G, class IS, class EW>
719  void getAdjArrays(G& graph, IS& indexSet, int *xadj,
720  EW& ew)
721  {
722  int j=0;
723 
724  // The type of the const vertex iterator.
725  typedef typename G::ConstVertexIterator VertexIterator;
726  //typedef typename IndexSet::const_iterator Iterator;
727  typedef typename IS::const_iterator Iterator;
728 
729  VertexIterator vend = graph.end();
730  Iterator end;
731 
732  for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex){
733  if (isOwner<F>(indexSet,*vertex)) {
734  // The type of const edge iterator.
735  typedef typename G::ConstEdgeIterator EdgeIterator;
736  EdgeIterator eend = vertex.end();
737  xadj[j] = ew.index();
738  j++;
739  for(EdgeIterator edge = vertex.begin(); edge != eend; ++edge){
740  ew(edge);
741  }
742  }
743  }
744  xadj[j] = ew.index();
745  }
746  }// end anonymous namespace
747 
748  template<class G, class T1, class T2>
749  bool buildCommunication(const G& graph, std::vector<int>& realparts,
752  RedistributeInterface& redistInf,
753  bool verbose=false);
754 #if HAVE_PARMETIS
755  extern "C"{
756  void METIS_PartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt,
757  idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts,
758  int *options, int *edgecut, idxtype *part);
759 
760  void METIS_PartGraphRecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt,
761  idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts,
762  int *options, int *edgecut, idxtype *part);
763  }
764 #endif
765 
766  template<class S, class T>
767  inline void print_carray(S& os, T* array, std::size_t l)
768  {
769  for(T *cur=array, *end=array+l; cur!=end; ++cur)
770  os<<*cur<<" ";
771  }
772 #if !HAVE_PARMETIS
773  typedef std::size_t idxtype;
774 #endif
775 
776  inline bool isValidGraph(std::size_t noVtx, std::size_t gnoVtx, idxtype noEdges, idxtype* xadj,
777  idxtype* adjncy, bool checkSymmetry)
778  {
779  bool correct=true;
780 
781  for(idxtype vtx=0; vtx<(idxtype)noVtx; ++vtx){
782  if(xadj[vtx]>noEdges||xadj[vtx]<0){
783  std::cerr <<"Check graph: xadj["<<vtx<<"]="<<xadj[vtx]<<" (>"
784  <<noEdges<<") out of range!"<<std::endl;
785  correct=false;
786  }
787  if(xadj[vtx+1]>noEdges||xadj[vtx+1]<0){
788  std::cerr <<"Check graph: xadj["<<vtx+1<<"]="<<xadj[vtx+1]<<" (>"
789  <<noEdges<<") out of range!"<<std::endl;
790  correct=false;
791  }
792  // Check numbers in adjncy
793  for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){
794  if(adjncy[i]<0||((std::size_t)adjncy[i])>gnoVtx){
795  std::cerr<<" Edge "<<adjncy[i]<<" out of range ["<<0<<","<<noVtx<<")"
796  <<std::endl;
797  correct=false;
798  }
799  }
800  if(checkSymmetry){
801  for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){
802  idxtype target=adjncy[i];
803  // search for symmetric edge
804  int found=0;
805  for(idxtype j=xadj[target]; j< xadj[target+1];++j)
806  if(adjncy[j]==vtx)
807  found++;
808  if(found!=1){
809  std::cerr<<"Edge ("<<target<<","<<vtx<<") "<<i<<" time"<<std::endl;
810  correct=false;
811  }
812  }
813  }
814  }
815  return correct;
816  }
817 
818  template<class M, class T1, class T2>
821  RedistributeInterface& redistInf,
822  bool verbose=false)
823  {
824  if(verbose && oocomm.communicator().rank()==0)
825  std::cout<<"Repartitioning from "<<oocomm.communicator().size()
826  <<" to "<<nparts<<" parts"<<std::endl;
827  Timer time;
828  int rank = oocomm.communicator().rank();
829 #if !HAVE_PARMETIS
830  int* part = new int[1];
831  part[0]=0;
832 #else
833  idxtype* part = new idxtype[1]; // where all our data moves to
834 
835  if(nparts>1){
836 
837  part[0]=rank;
838 
839  { // sublock for automatic memory deletion
840 
841  // Build the graph of the communication scheme and create an appropriate indexset.
842  // calculate the neighbour vertices
843  int noNeighbours = oocomm.remoteIndices().neighbours();
844  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::RemoteIndices RemoteIndices;
845  typedef typename RemoteIndices::const_iterator
846  NeighbourIterator;
847 
848  for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
849  ++n)
850  if(n->first==rank){
851  //do not include ourselves.
852  --noNeighbours;
853  break;
854  }
855 
856  // A parmetis graph representing the communication graph.
857  // The diagonal entries are the number of nodes on the process.
858  // The offdiagonal entries are the number of edges leading to other processes.
859 
860  idxtype *xadj=new idxtype[2], *vwgt = 0;
861  idxtype *vtxdist=new idxtype[oocomm.communicator().size()+1];
862  idxtype * adjncy=new idxtype[noNeighbours], *adjwgt = 0;
863 
864  // each process has exactly one vertex!
865  for(int i=0; i<oocomm.communicator().size(); ++i)
866  vtxdist[i]=i;
867  vtxdist[oocomm.communicator().size()]=oocomm.communicator().size();
868 
869  xadj[0]=0;
870  xadj[1]=noNeighbours;
871 
872  // count edges to other processor
873  // a vector mapping the index to the owner
874  // std::vector<int> owner(mat.N(), oocomm.communicator().rank());
875  // for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
876  // ++n)
877  // {
878  // if(n->first!=oocomm.communicator().rank()){
879  // typedef typename RemoteIndices::RemoteIndexList RIList;
880  // const RIList& rlist = *(n->second.first);
881  // typedef typename RIList::const_iterator LIter;
882  // for(LIter entry=rlist.begin(); entry!=rlist.end(); ++entry){
883  // if(entry->attribute()==OwnerOverlapCopyAttributeSet::owner)
884  // owner[entry->localIndexPair().local()] = n->first;
885  // }
886  // }
887  // }
888 
889  // std::map<int,idxtype> edgecount; // edges to other processors
890  // typedef typename M::ConstRowIterator RIter;
891  // typedef typename M::ConstColIterator CIter;
892 
893  // // calculate edge count
894  // for(RIter row=mat.begin(), endr=mat.end(); row != endr; ++row)
895  // if(owner[row.index()]==OwnerOverlapCopyAttributeSet::owner)
896  // for(CIter entry= row->begin(), ende = row->end(); entry != ende; ++entry)
897  // ++edgecount[owner[entry.index()]];
898 
899  // setup edge and weight pattern
900  typedef typename RemoteIndices::const_iterator NeighbourIterator;
902  typedef typename IndexSet::LocalIndex LocalIndex;
903 
904  idxtype* adjp=adjncy;
905 
906 #ifdef USE_WEIGHTS
907  vwgt = new idxtype[1];
908  vwgt[0]= mat.N(); // weight is numer of rows TODO: Should actually be the nonzeros.
909 
910  adjwgt = new idxtype[noNeighbours];
911  idxtype* adjwp=adjwgt;
912 #endif
913 
914  for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end();
915  ++n)
916  if(n->first != rank){
917  *adjp=n->first;
918  ++adjp;
919 #ifdef USE_WEIGHTS
920  *adjwp=1;//edgecount[n->first];
921  ++adjwp;
922 #endif
923  }
924  assert(isValidGraph(vtxdist[rank+1]-vtxdist[rank],
925  vtxdist[oocomm.communicator().size()],
926  noNeighbours, xadj, adjncy, false));
927 
928  int wgtflag=0, numflag=0, edgecut;
929 #ifdef USE_WEIGHTS
930  wgtflag=3;
931 #endif
932  float *tpwgts = new float[nparts];
933  for(int i=0; i<nparts; ++i)
934  tpwgts[i]=1.0/nparts;
935  int options[5] ={ 0,1,15,0,0};
936  MPI_Comm comm=oocomm.communicator();
937 
938  Dune::dinfo<<rank<<" vtxdist: ";
939  print_carray(Dune::dinfo, vtxdist, oocomm.communicator().size()+1);
940  Dune::dinfo<<std::endl<<rank<<" xadj: ";
941  print_carray(Dune::dinfo, xadj, 2);
942  Dune::dinfo<<std::endl<<rank<<" adjncy: ";
943  print_carray(Dune::dinfo, adjncy, noNeighbours);
944 
945 #ifdef USE_WEIGHTS
946  Dune::dinfo<<std::endl<<rank<<" vwgt: ";
947  print_carray(Dune::dinfo, vwgt, 1);
948  Dune::dinfo<<std::endl<<rank<<" adwgt: ";
949  print_carray(Dune::dinfo, adjwgt, noNeighbours);
950 #endif
951  Dune::dinfo<<std::endl;
952  oocomm.communicator().barrier();
953  if(verbose && oocomm.communicator().rank()==0)
954  std::cout<<"Creating comm graph took "<<time.elapsed()<<std::endl;
955  time.reset();
956 
957 #ifdef PARALLEL_PARTITION
958  float ubvec = 1.15;
959  int ncon=1;
960 
961  //=======================================================
962  // ParMETIS_V3_PartKway
963  //=======================================================
964  ParMETIS_V3_PartKway(vtxdist, xadj, adjncy,
965  vwgt, adjwgt, &wgtflag,
966  &numflag, &ncon, &nparts, tpwgts, &ubvec, options, &edgecut, part,
967  &comm);
968  if(verbose && oocomm.communicator().rank()==0)
969  std::cout<<"ParMETIS took "<<time.elapsed()<<std::endl;
970  time.reset();
971 #else
972  Timer time1;
973  std::size_t gnoedges=0;
974  int* noedges = 0;
975  noedges = new int[oocomm.communicator().size()];
976  Dune::dverb<<"noNeighbours: "<<noNeighbours<<std::endl;
977  // gather number of edges for each vertex.
978  MPI_Allgather(&noNeighbours,1,MPI_INT,noedges,1, MPI_INT,oocomm.communicator());
979 
980  if(verbose && oocomm.communicator().rank()==0)
981  std::cout<<"Gathering noedges took "<<time1.elapsed()<<std::endl;
982  time1.reset();
983 
984  int noVertices = vtxdist[oocomm.communicator().size()];
985  idxtype *gxadj = 0;
986  idxtype *gvwgt = 0;
987  idxtype *gadjncy = 0;
988  idxtype *gadjwgt = 0;
989  idxtype *gpart = 0;
990  int* displ = 0;
991  int* noxs = 0;
992  int* xdispl = 0; // displacement for xadj
993  int* novs = 0;
994  int* vdispl=0; // real vertex displacement
995  std::size_t localNoVtx=vtxdist[rank+1]-vtxdist[rank];
996  std::size_t gxadjlen = vtxdist[oocomm.communicator().size()]-vtxdist[0]+oocomm.communicator().size();
997 
998  {
999  Dune::dinfo<<"noedges: ";
1000  print_carray(Dune::dinfo, noedges, oocomm.communicator().size());
1001  Dune::dinfo<<std::endl;
1002  displ = new int[oocomm.communicator().size()];
1003  xdispl = new int[oocomm.communicator().size()];
1004  noxs = new int[oocomm.communicator().size()];
1005  vdispl = new int[oocomm.communicator().size()];
1006  novs = new int[oocomm.communicator().size()];
1007 
1008  for(int i=0; i < oocomm.communicator().size(); ++i){
1009  noxs[i]=vtxdist[i+1]-vtxdist[i]+1;
1010  novs[i]=vtxdist[i+1]-vtxdist[i];
1011  }
1012 
1013  idxtype *so= vtxdist;
1014  int offset = 0;
1015  for(int *xcurr = xdispl, *vcurr = vdispl, *end=vdispl+oocomm.communicator().size();
1016  vcurr!=end; ++vcurr, ++xcurr, ++so, ++offset){
1017  *vcurr = *so;
1018  *xcurr = offset + *so;
1019  }
1020 
1021  int *pdispl =displ;
1022  int cdispl = 0;
1023  *pdispl = 0;
1024  for(int *curr=noedges, *end=noedges+oocomm.communicator().size()-1;
1025  curr!=end; ++curr){
1026  ++pdispl; // next displacement
1027  cdispl += *curr; // next value
1028  *pdispl = cdispl;
1029  }
1030  Dune::dinfo<<"displ: ";
1031  print_carray(Dune::dinfo, displ, oocomm.communicator().size());
1032  Dune::dinfo<<std::endl;
1033 
1034  // calculate global number of edges
1035  // It is bigger than the actual one as we habe size-1 additional end entries
1036  for(int *curr=noedges, *end=noedges+oocomm.communicator().size();
1037  curr!=end; ++curr)
1038  gnoedges += *curr;
1039 
1040  // alocate gobal graph
1041  Dune::dinfo<<"gxadjlen: "<<gxadjlen<<" noVertices: "<<noVertices
1042  <<" gnoedges: "<<gnoedges<<std::endl;
1043  gxadj = new idxtype[gxadjlen];
1044  gpart = new idxtype[noVertices];
1045 #ifdef USE_WEIGHTS
1046  gvwgt = new idxtype[noVertices];
1047  gadjwgt = new idxtype[gnoedges];
1048 #endif
1049  gadjncy = new idxtype[gnoedges];
1050  }
1051 
1052  if(verbose && oocomm.communicator().rank()==0)
1053  std::cout<<"Preparing global graph took "<<time1.elapsed()<<std::endl;
1054  time1.reset();
1055  // Communicate data
1056 
1057  MPI_Allgatherv(xadj,2,MPITraits<idxtype>::getType(),
1058  gxadj,noxs,xdispl,MPITraits<idxtype>::getType(),
1059  comm);
1060  MPI_Allgatherv(adjncy,noNeighbours,MPITraits<idxtype>::getType(),
1061  gadjncy,noedges,displ,MPITraits<idxtype>::getType(),
1062  comm);
1063 #ifdef USE_WEIGHTS
1064  MPI_Allgatherv(adjwgt,noNeighbours,MPITraits<idxtype>::getType(),
1065  gadjwgt,noedges,displ,MPITraits<idxtype>::getType(),
1066  comm);
1067  MPI_Allgatherv(vwgt,localNoVtx,MPITraits<idxtype>::getType(),
1068  gvwgt,novs,vdispl,MPITraits<idxtype>::getType(),
1069  comm);
1070 #endif
1071  if(verbose && oocomm.communicator().rank()==0)
1072  std::cout<<"Gathering global graph data took "<<time1.elapsed()<<std::endl;
1073  time1.reset();
1074 
1075  {
1076  // create the real gxadj array
1077  // i.e. shift entries and add displacements.
1078 
1079  print_carray(Dune::dinfo, gxadj, gxadjlen);
1080 
1081  int offset = 0;
1082  idxtype increment = vtxdist[1];
1083  idxtype *start=gxadj+1;
1084  for(int i=1; i<oocomm.communicator().size(); ++i){
1085  offset+=1;
1086  int lprev = vtxdist[i]-vtxdist[i-1];
1087  int l = vtxdist[i+1]-vtxdist[i];
1088  start+=lprev;
1089  assert((start+l+offset)-gxadj<=static_cast<idxtype>(gxadjlen));
1090  increment = *(start-1);
1091  std::transform(start+offset, start+l+offset, start, std::bind2nd(std::plus<idxtype>(), increment));
1092  }
1093  Dune::dinfo<<std::endl<<"shifted xadj:";
1094  print_carray(Dune::dinfo, gxadj, noVertices+1);
1095  Dune::dinfo<<std::endl<<" gadjncy: ";
1096  print_carray(Dune::dinfo, gadjncy, gnoedges);
1097 #ifdef USE_WEIGHTS
1098  Dune::dinfo<<std::endl<<" gvwgt: ";
1099  print_carray(Dune::dinfo, gvwgt, noVertices);
1100  Dune::dinfo<<std::endl<<"adjwgt: ";
1101  print_carray(Dune::dinfo, gadjwgt, gnoedges);
1102  Dune::dinfo<<std::endl;
1103 #endif
1104  // everything should be fine now!!!
1105  if(verbose && oocomm.communicator().rank()==0)
1106  std::cout<<"Postprocesing global graph data took "<<time1.elapsed()<<std::endl;
1107  time1.reset();
1108 #ifndef NDEBUG
1109  assert(isValidGraph(noVertices, noVertices, gnoedges,
1110  gxadj, gadjncy, true));
1111 #endif
1112 
1113  if(verbose && oocomm.communicator().rank()==0)
1114  std::cout<<"Creating grah one 1 process took "<<time.elapsed()<<std::endl;
1115  time.reset();
1116  options[0]=0; options[1]=1; options[2]=1; options[3]=3; options[4]=3;
1117  // Call metis
1118  METIS_PartGraphRecursive(&noVertices, gxadj, gadjncy, gvwgt, gadjwgt, &wgtflag,
1119  &numflag, &nparts, options, &edgecut, gpart);
1120 
1121  if(verbose && oocomm.communicator().rank()==0)
1122  std::cout<<"METIS took "<<time.elapsed()<<std::endl;
1123  time.reset();
1124 
1125  Dune::dinfo<<std::endl<<"part:";
1126  print_carray(Dune::dinfo, gpart, noVertices);
1127 
1128  delete[] gxadj;
1129  delete[] gadjncy;
1130 #ifdef USE_WEIGHTS
1131  delete[] gvwgt;
1132  delete[] gadjwgt;
1133 #endif
1134  }
1135  // Scatter result
1136  MPI_Scatter(gpart, 1, MPITraits<idxtype>::getType(), part, 1,
1137  MPITraits<idxtype>::getType(), 0, comm);
1138 
1139  {
1140  // release remaining memory
1141  delete[] gpart;
1142  delete[] noedges;
1143  delete[] displ;
1144  }
1145 
1146 
1147 #endif
1148  delete[] xadj;
1149  delete[] vtxdist;
1150  delete[] adjncy;
1151 #ifdef USE_WEIGHTS
1152  delete[] vwgt;
1153  delete[] adjwgt;
1154 #endif
1155  delete[] tpwgts;
1156  }
1157  }else{
1158  part[0]=0;
1159  }
1160 #endif
1161  Dune::dinfo<<" repart "<<rank <<" -> "<< part[0]<<std::endl;
1162 
1163  std::vector<int> realpart(mat.N(), part[0]);
1164  delete[] part;
1165 
1166  oocomm.copyOwnerToAll(realpart, realpart);
1167 
1168  if(verbose && oocomm.communicator().rank()==0)
1169  std::cout<<"Scattering repartitioning took "<<time.elapsed()<<std::endl;
1170  time.reset();
1171 
1172 
1173  oocomm.buildGlobalLookup(mat.N());
1174  Dune::Amg::MatrixGraph<M> graph(const_cast<M&>(mat));
1175  fillIndexSetHoles(graph, oocomm);
1176  if(verbose && oocomm.communicator().rank()==0)
1177  std::cout<<"Filling index set took "<<time.elapsed()<<std::endl;
1178  time.reset();
1179 
1180  if(verbose){
1181  int noNeighbours=oocomm.remoteIndices().neighbours();
1182  noNeighbours = oocomm.communicator().sum(noNeighbours)
1183  / oocomm.communicator().size();
1184  if(oocomm.communicator().rank()==0)
1185  std::cout<<"Average no neighbours was "<<noNeighbours<<std::endl;
1186  }
1187  bool ret = buildCommunication(graph, realpart, oocomm, outcomm, redistInf,
1188  verbose);
1189  if(verbose && oocomm.communicator().rank()==0)
1190  std::cout<<"Building index sets took "<<time.elapsed()<<std::endl;
1191  time.reset();
1192 
1193 
1194  return ret;
1195 
1196  }
1197 
1212  template<class G, class T1, class T2>
1213  bool graphRepartition(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, int nparts,
1215  RedistributeInterface& redistInf,
1216  bool verbose=false)
1217  {
1218  Timer time;
1219 
1220  MPI_Comm comm=oocomm.communicator();
1221  oocomm.buildGlobalLookup(graph.noVertices());
1222  fillIndexSetHoles(graph, oocomm);
1223 
1224  if(verbose && oocomm.communicator().rank()==0)
1225  std::cout<<"Filling holes took "<<time.elapsed()<<std::endl;
1226  time.reset();
1227 
1228  // simple precondition checks
1229 
1230 #ifdef PERF_REPART
1231  // Profiling variables
1232  double t1=0.0, t2=0.0, t3=0.0, t4=0.0, tSum=0.0;
1233 #endif
1234 
1235 
1236  // MPI variables
1237  int mype = oocomm.communicator().rank();
1238 
1239  assert(nparts<=oocomm.communicator().size());
1240 
1242  typedef std::vector<GI> GlobalVector;
1243  int myDomain;
1244 
1245  //
1246  // 1) Prepare the required parameters for using ParMETIS
1247  // Especially the arrays that represent the graph must be
1248  // generated by the DUNE Graph and IndexSet input variables.
1249  // These are the arrays:
1250  // - vtxdist
1251  // - xadj
1252  // - adjncy
1253  //
1254  //
1255 #ifdef PERF_REPART
1256  // reset timer for step 1)
1257  t1=MPI_Wtime();
1258 #endif
1259 
1260 
1261  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm;
1262  typedef typename OOComm::OwnerSet OwnerSet;
1263 
1264  // Create the vtxdist array and parmetisVtxMapping.
1265  // Global communications are necessary
1266  // The parmetis global identifiers for the owner vertices.
1267  ParmetisDuneIndexMap indexMap(graph,oocomm);
1268 #if HAVE_PARMETIS
1269  idxtype *part = new idxtype[indexMap.numOfOwnVtx()];
1270 #else
1271  std::size_t *part = new std::size_t[indexMap.numOfOwnVtx()];
1272 #endif
1273  for(std::size_t i=0; i < indexMap.numOfOwnVtx(); ++i)
1274  part[i]=mype;
1275 
1276 #if !HAVE_PARMETIS
1277  if(oocomm.communicator().rank()==0 && nparts>1)
1278  std::cerr<<"ParMETIS not activated. Will repartition to 1 domain instead of requested "
1279  <<nparts<<" domains."<<std::endl;
1280  nparts=1; // No parmetis available, fallback to agglomerating to 1 process
1281  typedef std::size_t idxtype;
1282 
1283 #else
1284 
1285  if(nparts>1){
1286  // Create the xadj and adjncy arrays
1287  idxtype *xadj = new idxtype[indexMap.numOfOwnVtx()+1];
1288  idxtype *adjncy = new idxtype[graph.noEdges()];
1289  EdgeFunctor<G> ef(adjncy, indexMap, graph.noEdges());
1290  getAdjArrays<OwnerSet>(graph, oocomm.globalLookup(), xadj, ef);
1291 
1292  //
1293  // 2) Call ParMETIS
1294  //
1295  //
1296  int numflag=0, wgtflag=0, options[3], edgecut=0, ncon=1;
1297  float *tpwgts = NULL;
1298  float ubvec[1];
1299  options[0] = 0; // 0=default, 1=options are defined in [1]+[2]
1300 #ifdef DEBUG_REPART
1301  options[1] = 3; // show info: 0=no message
1302 #else
1303  options[1] = 0; // show info: 0=no message
1304 #endif
1305  options[2] = 1; // random number seed, default is 15
1306  wgtflag = (ef.getWeights()!=NULL)?1:0;
1307  numflag = 0;
1308  edgecut = 0;
1309  ncon=1;
1310  ubvec[0]=1.05; // recommended by ParMETIS
1311 
1312 #ifdef DEBUG_REPART
1313  if (mype == 0) {
1314  std::cout<<std::endl;
1315  std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1316  <<options[1]<<" "<<options[2]<<"}, Ncon: "
1317  <<ncon<<", Nparts: "<<nparts<<std::endl;
1318  }
1319 #endif
1320 #ifdef PERF_REPART
1321  // stop the time for step 1)
1322  t1=MPI_Wtime()-t1;
1323  // reset timer for step 2)
1324  t2=MPI_Wtime();
1325 #endif
1326 
1327  if(verbose){
1328  oocomm.communicator().barrier();
1329  if(oocomm.communicator().rank()==0)
1330  std::cout<<"Preparing for parmetis took "<<time.elapsed()<<std::endl;
1331  }
1332  time.reset();
1333 
1334  //=======================================================
1335  // ParMETIS_V3_PartKway
1336  //=======================================================
1337  ParMETIS_V3_PartKway(indexMap.vtxDist(), xadj, adjncy,
1338  NULL, ef.getWeights(), &wgtflag,
1339  &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &const_cast<MPI_Comm&>(comm));
1340 
1341 
1342  delete[] xadj;
1343  delete[] adjncy;
1344  ef.free();
1345 
1346 #ifdef DEBUG_REPART
1347  if (mype == 0) {
1348  std::cout<<std::endl;
1349  std::cout<<"ParMETIS_V3_PartKway reported a cut of "<<edgecut<<std::endl;
1350  std::cout<<std::endl;
1351  }
1352  std::cout<<mype<<": PARMETIS-Result: ";
1353  for(int i=0; i < indexMap.vtxDist()[mype+1]-indexMap.vtxDist()[mype]; ++i) {
1354  std::cout<<part[i]<<" ";
1355  }
1356  std::cout<<std::endl;
1357  std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1358  <<options[1]<<" "<<options[2]<<"}, Ncon: "
1359  <<ncon<<", Nparts: "<<nparts<<std::endl;
1360 #endif
1361 #ifdef PERF_REPART
1362  // stop the time for step 2)
1363  t2=MPI_Wtime()-t2;
1364  // reset timer for step 3)
1365  t3=MPI_Wtime();
1366 #endif
1367 
1368 
1369  if(verbose){
1370  oocomm.communicator().barrier();
1371  if(oocomm.communicator().rank()==0)
1372  std::cout<<"Parmetis took "<<time.elapsed()<<std::endl;
1373  }
1374  time.reset();
1375  }else
1376 #endif
1377  {
1378  // Everything goes to process 0!
1379  for(std::size_t i=0; i<indexMap.numOfOwnVtx();++i)
1380  part[i]=0;
1381  }
1382 
1383 
1384  //
1385  // 3) Find a optimal domain based on the ParMETIS repatitioning
1386  // result
1387  //
1388 
1389  int domainMapping[nparts];
1390  if(nparts>1)
1391  getDomain(comm, part, indexMap.numOfOwnVtx(), nparts, &myDomain, domainMapping);
1392  else
1393  domainMapping[0]=0;
1394 
1395 #ifdef DEBUG_REPART
1396  std::cout<<mype<<": myDomain: "<<myDomain<<std::endl;
1397  std::cout<<mype<<": DomainMapping: ";
1398  for(int j=0; j<nparts; j++) {
1399  std::cout<<" do: "<<j<<" pe: "<<domainMapping[j]<<" ";
1400  }
1401  std::cout<<std::endl;
1402 #endif
1403 
1404  // Make a domain mapping for the indexset and translate
1405  //domain number to real process number
1406  // domainMapping is the one of parmetis, that is without
1407  // the overlap/copy vertices
1408  std::vector<int> setPartition(oocomm.indexSet().size(), -1);
1409 
1410  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
1411  std::size_t i=0; // parmetis index
1412  for(Iterator index = oocomm.indexSet().begin(); index != oocomm.indexSet().end(); ++index)
1413  if(OwnerSet::contains(index->local().attribute())){
1414  setPartition[index->local()]=domainMapping[part[i++]];
1415  }
1416 
1417  delete[] part;
1418  oocomm.copyOwnerToAll(setPartition, setPartition);
1419  // communication only needed for ALU
1420  // (ghosts with same global id as owners on the same process)
1421  if (oocomm.getSolverCategory() ==
1422  static_cast<int>(SolverCategory::nonoverlapping))
1423  oocomm.copyCopyToAll(setPartition, setPartition);
1424  bool ret = buildCommunication(graph, setPartition, oocomm, outcomm, redistInf,
1425  verbose);
1426  if(verbose){
1427  oocomm.communicator().barrier();
1428  if(oocomm.communicator().rank()==0)
1429  std::cout<<"Creating indexsets took "<<time.elapsed()<<std::endl;
1430  }
1431  return ret;
1432  }
1433 
1434 
1435 
1436  template<class G, class T1, class T2>
1437  bool buildCommunication(const G& graph,
1438  std::vector<int>& setPartition, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm,
1440  RedistributeInterface& redistInf,
1441  bool verbose)
1442  {
1443  typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm;
1444  typedef typename OOComm::OwnerSet OwnerSet;
1445 
1446  Timer time;
1447 
1448  // Build the send interface
1449  redistInf.buildSendInterface<OwnerSet>(setPartition, oocomm.indexSet());
1450 
1451 #ifdef PERF_REPART
1452  // stop the time for step 3)
1453  t3=MPI_Wtime()-t3;
1454  // reset timer for step 4)
1455  t4=MPI_Wtime();
1456 #endif
1457 
1458 
1459  //
1460  // 4) Create the output IndexSet and RemoteIndices
1461  // 4.1) Determine the "send to" and "receive from" relation
1462  // according to the new partition using a MPI ring
1463  // communication.
1464  //
1465  // 4.2) Depends on the "send to" and "receive from" vector,
1466  // the processes will exchange the vertices each other
1467  //
1468  // 4.3) Create the IndexSet, RemoteIndices and the new MPI
1469  // communicator
1470  //
1471 
1472  //
1473  // 4.1) Let's start...
1474  //
1475  int npes = oocomm.communicator().size();
1476  int *sendTo = 0;
1477  int noSendTo = 0;
1478  std::set<int> recvFrom;
1479 
1480  // the max number of vertices is stored in the sendTo buffer,
1481  // not the number of vertices to send! Because the max number of Vtx
1482  // is used as the fixed buffer size by the MPI send/receive calls
1483 
1484  typedef typename std::vector<int>::const_iterator VIter;
1485  int mype = oocomm.communicator().rank();
1486 
1487  {
1488  std::set<int> tsendTo;
1489  for(VIter i=setPartition.begin(), iend = setPartition.end(); i!=iend; ++i)
1490  tsendTo.insert(*i);
1491 
1492  noSendTo = tsendTo.size();
1493  sendTo = new int[noSendTo];
1494  typedef std::set<int>::const_iterator iterator;
1495  int idx=0;
1496  for(iterator i=tsendTo.begin(); i != tsendTo.end(); ++i, ++idx)
1497  sendTo[idx]=*i;
1498  }
1499 
1500  //
1501  int* gnoSend= new int[oocomm.communicator().size()];
1502  int* gsendToDispl = new int[oocomm.communicator().size()+1];
1503 
1504  MPI_Allgather(&noSendTo, 1, MPI_INT, gnoSend, 1,
1505  MPI_INT, oocomm.communicator());
1506 
1507  // calculate total receive message size
1508  int totalNoRecv = 0;
1509  for(int i=0; i<npes; ++i)
1510  totalNoRecv += gnoSend[i];
1511 
1512  int *gsendTo = new int[totalNoRecv];
1513 
1514  // calculate displacement for allgatherv
1515  gsendToDispl[0]=0;
1516  for(int i=0; i<npes; ++i)
1517  gsendToDispl[i+1]=gsendToDispl[i]+gnoSend[i];
1518 
1519  // gather the data
1520  MPI_Allgatherv(sendTo, noSendTo, MPI_INT, gsendTo, gnoSend, gsendToDispl,
1521  MPI_INT, oocomm.communicator());
1522 
1523  // Extract from which processes we will receive data
1524  for(int proc=0; proc < npes; ++proc)
1525  for(int i=gsendToDispl[proc]; i < gsendToDispl[proc+1]; ++i)
1526  if(gsendTo[i]==mype)
1527  recvFrom.insert(proc);
1528 
1529  bool existentOnNextLevel = recvFrom.size()>0;
1530 
1531  // Delete memory
1532  delete[] gnoSend;
1533  delete[] gsendToDispl;
1534  delete[] gsendTo;
1535 
1536 
1537 #ifdef DEBUG_REPART
1538  if(recvFrom.size()){
1539  std::cout<<mype<<": recvFrom: ";
1540  typedef typename std::set<int>::const_iterator siter;
1541  for(siter i=recvFrom.begin(); i!= recvFrom.end(); ++i) {
1542  std::cout<<*i<<" ";
1543  }
1544  }
1545 
1546  std::cout<<std::endl<<std::endl;
1547  std::cout<<mype<<": sendTo: ";
1548  for(int i=0; i<noSendTo; i++) {
1549  std::cout<<sendTo[i]<<" ";
1550  }
1551  std::cout<<std::endl<<std::endl;
1552 #endif
1553 
1554  if(verbose)
1555  if(oocomm.communicator().rank()==0)
1556  std::cout<<" Communicating the receive information took "<<
1557  time.elapsed()<<std::endl;
1558  time.reset();
1559 
1560  //
1561  // 4.2) Start the communication
1562  //
1563 
1564  // Get all the owner and overlap vertices for myself ans save
1565  // it in the vectors myOwnerVec and myOverlapVec.
1566  // The received vertices from the other processes are simple
1567  // added to these vector.
1568  //
1569 
1570 
1571  typedef typename OOComm::ParallelIndexSet::GlobalIndex GI;
1572  typedef std::vector<GI> GlobalVector;
1573  std::vector<std::pair<GI,int> > myOwnerVec;
1574  std::set<GI> myOverlapSet;
1575  GlobalVector sendOwnerVec;
1576  std::set<GI> sendOverlapSet;
1577  std::set<int> myNeighbors;
1578 
1579  // getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
1580  // mype, mype, myOwnerVec, myOverlapSet, redistInf, myNeighbors);
1581 
1582  char **sendBuffers=new char*[noSendTo];
1583  MPI_Request *requests = new MPI_Request[noSendTo];
1584 
1585  // Create all messages to be sent
1586  for(int i=0; i < noSendTo; ++i){
1587  // clear the vector for sending
1588  sendOwnerVec.clear();
1589  sendOverlapSet.clear();
1590  // get all owner and overlap vertices for process j and save these
1591  // in the vectors sendOwnerVec and sendOverlapSet
1592  std::set<int> neighbors;
1593  getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
1594  mype, sendTo[i], sendOwnerVec, sendOverlapSet, redistInf,
1595  neighbors);
1596  // +2, we need 2 integer more for the length of each part
1597  // (owner/overlap) of the array
1598  int buffersize=0;
1599  int tsize;
1600  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &buffersize);
1601  MPI_Pack_size(sendOwnerVec.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1602  buffersize +=tsize;
1603  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1604  buffersize +=tsize;
1605  MPI_Pack_size(sendOverlapSet.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1606  buffersize += tsize;
1607  MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1608  buffersize += tsize;
1609  MPI_Pack_size(neighbors.size(), MPI_INT, oocomm.communicator(), &tsize);
1610  buffersize += tsize;
1611 
1612  sendBuffers[i] = new char[buffersize];
1613 
1614 #ifdef DEBUG_REPART
1615  std::cout<<mype<<" sending "<<sendOwnerVec.size()<<" owner and "<<
1616  sendOverlapSet.size()<<" overlap to "<<sendTo[i]<<" buffersize="<<buffersize<<std::endl;
1617 #endif
1618  createSendBuf(sendOwnerVec, sendOverlapSet, neighbors, sendBuffers[i], buffersize, oocomm.communicator());
1619  MPI_Issend(sendBuffers[i], buffersize, MPI_PACKED, sendTo[i], 99, oocomm.communicator(), requests+i);
1620  }
1621 
1622  if(verbose){
1623  oocomm.communicator().barrier();
1624  if(oocomm.communicator().rank()==0)
1625  std::cout<<" Creating sends took "<<
1626  time.elapsed()<<std::endl;
1627  }
1628  time.reset();
1629 
1630  // Receive Messages
1631  int noRecv = recvFrom.size();
1632  int oldbuffersize=0;
1633  char* recvBuf = 0;
1634  while(noRecv>0){
1635  // probe for an incoming message
1636  MPI_Status stat;
1637  MPI_Probe(MPI_ANY_SOURCE, 99, oocomm.communicator(), &stat);
1638  int buffersize;
1639  MPI_Get_count(&stat, MPI_PACKED, &buffersize);
1640 
1641  if(oldbuffersize<buffersize){
1642  // buffer too small, reallocate
1643  delete[] recvBuf;
1644  recvBuf = new char[buffersize];
1645  oldbuffersize = buffersize;
1646  }
1647  MPI_Recv(recvBuf, buffersize, MPI_PACKED, stat.MPI_SOURCE, 99, oocomm.communicator(), &stat);
1648  saveRecvBuf(recvBuf, buffersize, myOwnerVec, myOverlapSet, myNeighbors, redistInf,
1649  stat.MPI_SOURCE, oocomm.communicator());
1650  --noRecv;
1651  }
1652 
1653  if(recvBuf)
1654  delete[] recvBuf;
1655 
1656  time.reset();
1657  // Wait for sending messages to complete
1658  MPI_Status *statuses = new MPI_Status[noSendTo];
1659  int send = MPI_Waitall(noSendTo, requests, statuses);
1660 
1661  // check for errors
1662  if(send==MPI_ERR_IN_STATUS){
1663  std::cerr<<mype<<": Error in sending :"<<std::endl;
1664  // Search for the error
1665  for(int i=0; i< noSendTo; i++)
1666  if(statuses[i].MPI_ERROR!=MPI_SUCCESS){
1667  char message[300];
1668  int messageLength;
1669  MPI_Error_string(statuses[i].MPI_ERROR, message, &messageLength);
1670  std::cerr<<" source="<<statuses[i].MPI_SOURCE<<" message: ";
1671  for(int i=0; i< messageLength; i++)
1672  std::cout<<message[i];
1673  }
1674  std::cerr<<std::endl;
1675  }
1676 
1677  if(verbose){
1678  oocomm.communicator().barrier();
1679  if(oocomm.communicator().rank()==0)
1680  std::cout<<" Receiving and saving took "<<
1681  time.elapsed()<<std::endl;
1682  }
1683  time.reset();
1684 
1685  for(int i=0; i < noSendTo; ++i)
1686  delete[] sendBuffers[i];
1687 
1688  delete[] sendBuffers;
1689  delete[] statuses;
1690  delete[] requests;
1691 
1692  redistInf.setCommunicator(oocomm.communicator());
1693 
1694  //
1695  // 4.2) Create the IndexSet etc.
1696  //
1697 
1698  // build the new outputIndexSet
1699 
1700 
1701  int color=0;
1702 
1703  if (!existentOnNextLevel) {
1704  // this process is not used anymore
1705  color= MPI_UNDEFINED;
1706  }
1707  MPI_Comm outputComm;
1708 
1709  MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().rank(), &outputComm);
1710  outcomm = new OOComm(outputComm,oocomm.getSolverCategory(),true);
1711 
1712  // translate neighbor ranks.
1713  int newrank=outcomm->communicator().rank();
1714  int *newranks=new int[oocomm.communicator().size()];
1715  std::vector<int> tneighbors;
1716  tneighbors.reserve(myNeighbors.size());
1717 
1718  typename OOComm::ParallelIndexSet& outputIndexSet = outcomm->indexSet();
1719 
1720  MPI_Allgather(&newrank, 1, MPI_INT, newranks, 1,
1721  MPI_INT, oocomm.communicator());
1722  typedef typename std::set<int>::const_iterator IIter;
1723 
1724 #ifdef DEBUG_REPART
1725  std::cout<<oocomm.communicator().rank()<<" ";
1726  for(IIter i=myNeighbors.begin(), end=myNeighbors.end();
1727  i!=end; ++i){
1728  assert(newranks[*i]>=0);
1729  std::cout<<*i<<"->"<<newranks[*i]<<" ";
1730  tneighbors.push_back(newranks[*i]);
1731  }
1732  std::cout<<std::endl;
1733 #else
1734  for(IIter i=myNeighbors.begin(), end=myNeighbors.end();
1735  i!=end; ++i){
1736  tneighbors.push_back(newranks[*i]);
1737  }
1738 #endif
1739  delete[] newranks;
1740  myNeighbors.clear();
1741 
1742  if(verbose){
1743  oocomm.communicator().barrier();
1744  if(oocomm.communicator().rank()==0)
1745  std::cout<<" Calculating new neighbours ("<<tneighbors.size()<<") took "<<
1746  time.elapsed()<<std::endl;
1747  }
1748  time.reset();
1749 
1750 
1751  outputIndexSet.beginResize();
1752  // 1) add the owner vertices
1753  // Sort the owners
1754  std::sort(myOwnerVec.begin(), myOwnerVec.end(), SortFirst());
1755  // The owners are sorted according to there global index
1756  // Therefore the entries of ownerVec are the same as the
1757  // ones in the resulting index set.
1758  typedef typename OOComm::ParallelIndexSet::LocalIndex LocalIndex;
1759  typedef typename std::vector<std::pair<GI,int> >::const_iterator VPIter;
1760  int i=0;
1761  for(VPIter g=myOwnerVec.begin(), end =myOwnerVec.end(); g!=end; ++g, ++i ) {
1762  outputIndexSet.add(g->first,LocalIndex(i, OwnerOverlapCopyAttributeSet::owner, true));
1763  redistInf.addReceiveIndex(g->second, i);
1764  }
1765 
1766  if(verbose){
1767  oocomm.communicator().barrier();
1768  if(oocomm.communicator().rank()==0)
1769  std::cout<<" Adding owner indices took "<<
1770  time.elapsed()<<std::endl;
1771  }
1772  time.reset();
1773 
1774 
1775  // After all the vertices are received, the vectors must
1776  // be "merged" together to create the final vectors.
1777  // Because some vertices that are sent as overlap could now
1778  // already included as owner vertiecs in the new partition
1779  mergeVec(myOwnerVec, myOverlapSet);
1780 
1781  // Trick to free memory
1782  myOwnerVec.clear();
1783  myOwnerVec.swap(myOwnerVec);
1784 
1785  if(verbose){
1786  oocomm.communicator().barrier();
1787  if(oocomm.communicator().rank()==0)
1788  std::cout<<" Merging indices took "<<
1789  time.elapsed()<<std::endl;
1790  }
1791  time.reset();
1792 
1793 
1794  // 2) add the overlap vertices
1795  typedef typename std::set<GI>::const_iterator SIter;
1796  for(SIter g=myOverlapSet.begin(), end=myOverlapSet.end(); g!=end; ++g, i++) {
1797  outputIndexSet.add(*g,LocalIndex(i, OwnerOverlapCopyAttributeSet::copy, true));
1798  }
1799  myOverlapSet.clear();
1800  outputIndexSet.endResize();
1801 
1802 #ifdef DUNE_ISTL_WITH_CHECKING
1803  int numOfOwnVtx =0;
1804  typedef typename OOComm::ParallelIndexSet::const_iterator Iterator;
1805  Iterator end = outputIndexSet.end();
1806  for(Iterator index = outputIndexSet.begin(); index != end; ++index) {
1807  if (OwnerSet::contains(index->local().attribute())) {
1808  numOfOwnVtx++;
1809  }
1810  }
1811  numOfOwnVtx = oocomm.communicator().sum(numOfOwnVtx);
1812  // if(numOfOwnVtx!=indexMap.globalOwnerVertices)
1813  // {
1814  // std::cerr<<numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones!"<<std::endl;
1815  // DUNE_THROW(ISTLError, numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones"
1816  // <<" during repartitioning.");
1817  // }
1818  Iterator index=outputIndexSet.begin();
1819  if(index!=end){
1820  ++index;
1821  for(Iterator old = outputIndexSet.begin(); index != end; old=index++) {
1822  if(old->global()>index->global())
1823  DUNE_THROW(ISTLError, "Index set's globalindex not sorted correctly");
1824  }
1825  }
1826 #endif
1827  if(verbose){
1828  oocomm.communicator().barrier();
1829  if(oocomm.communicator().rank()==0)
1830  std::cout<<" Adding overlap indices took "<<
1831  time.elapsed()<<std::endl;
1832  }
1833  time.reset();
1834 
1835 
1836  if(color != MPI_UNDEFINED){
1837  outcomm->remoteIndices().setNeighbours(tneighbors);
1838  outcomm->remoteIndices().template rebuild<true>();
1839 
1840  }
1841 
1842  // release the memory
1843  delete[] sendTo;
1844 
1845  if(verbose){
1846  oocomm.communicator().barrier();
1847  if(oocomm.communicator().rank()==0)
1848  std::cout<<" Storing indexsets took "<<
1849  time.elapsed()<<std::endl;
1850  }
1851 
1852 #ifdef PERF_REPART
1853  // stop the time for step 4) and print the results
1854  t4=MPI_Wtime()-t4;
1855  tSum = t1 + t2 + t3 + t4;
1856  std::cout<<std::endl
1857  <<mype<<": WTime for step 1): "<<t1
1858  <<" 2): "<<t2
1859  <<" 3): "<<t3
1860  <<" 4): "<<t4
1861  <<" total: "<<tSum
1862  <<std::endl;
1863 #endif
1864 
1865  return color!=MPI_UNDEFINED;
1866 
1867  }
1868 #else
1869  template<class G, class P,class T1, class T2, class R>
1870  bool graphRepartition(const G& graph, P& oocomm, int nparts,
1871  P*& outcomm,
1872  R& redistInf,
1873  bool v=false)
1874  {
1875  if(nparts!=oocomm.size())
1876  DUNE_THROW(NotImplemented, "only available for MPI programs");
1877  }
1878 
1879 
1880  template<class G, class P,class T1, class T2, class R>
1881  bool commGraphRepartition(const G& graph, P& oocomm, int nparts,
1882  P*& outcomm,
1883  R& redistInf,
1884  bool v=false)
1885  {
1886  if(nparts!=oocomm.size())
1887  DUNE_THROW(NotImplemented, "only available for MPI programs");
1888  }
1889 #endif // HAVE_MPI
1890 } // end of namespace Dune
1891 #endif