ViennaCL - The Vienna Computing Library  1.2.0
amg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_AMG_HPP_
2 #define VIENNACL_LINALG_AMG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
26 #include <boost/numeric/ublas/matrix.hpp>
27 #include <boost/numeric/ublas/lu.hpp>
28 #include <boost/numeric/ublas/operation.hpp>
29 #include <boost/numeric/ublas/vector_proxy.hpp>
30 #include <boost/numeric/ublas/matrix_proxy.hpp>
31 #include <boost/numeric/ublas/vector.hpp>
32 #include <boost/numeric/ublas/triangular.hpp>
33 #include <vector>
34 #include <cmath>
35 #include "viennacl/forwards.h"
36 #include "viennacl/tools/tools.hpp"
37 #include "viennacl/linalg/prod.hpp"
39 
43 
44 #include <map>
45 
46 #ifdef _OPENMP
47  #include <omp.h>
48 #endif
49 
51 
52 #define VIENNACL_AMG_COARSE_LIMIT 50
53 #define VIENNACL_AMG_MAX_LEVELS 100
54 
55 namespace viennacl
56 {
57  namespace linalg
58  {
60 
61 
62 
70  template <typename InternalType1, typename InternalType2>
71  void amg_setup(InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
72  {
73  typedef typename InternalType1::value_type SparseMatrixType;
74  typedef typename InternalType2::value_type PointVectorType;
75  typedef typename SparseMatrixType::value_type ScalarType;
76  typedef typename SparseMatrixType::iterator1 InternalRowIterator;
77  typedef typename SparseMatrixType::iterator2 InternalColIterator;
78 
79  unsigned int i, iterations, c_points, f_points;
81 
82  // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
83  iterations = tag.get_coarselevels();
84  if (iterations == 0)
85  iterations = VIENNACL_AMG_MAX_LEVELS;
86 
87  // For parallel coarsenings build data structures (number of threads set automatically).
89  Slicing.init(iterations);
90 
91  for (i=0; i<iterations; ++i)
92  {
93  // Initialize Pointvector on level i and construct points.
94  Pointvector[i] = PointVectorType(A[i].size1());
95  Pointvector[i].init_points();
96 
97  // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
98  detail::amg::amg_coarse (i, A, Pointvector, Slicing, tag);
99 
100  // Calculate number of C and F points on level i.
101  c_points = Pointvector[i].get_cpoints();
102  f_points = Pointvector[i].get_fpoints();
103 
104  #if defined (DEBUG) //or defined(DEBUGBENCH)
105  std::cout << "Level " << i << ": ";
106  std::cout << "No of C points = " << c_points << ", ";
107  std::cout << "No of F points = " << f_points << std::endl;
108  #endif
109 
110  // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
111  if (c_points == 0 || f_points == 0)
112  break;
113 
114  // Construct interpolation matrix for level i.
115  detail::amg::amg_interpol (i, A, P, Pointvector, tag);
116 
117  // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
118  detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
119 
120  // Test triple matrix product. Very slow for large matrix sizes (ublas).
121  // test_triplematprod(A[i],P[i],A[i+1]);
122 
123  Pointvector[i].delete_points();
124 
125  #ifdef DEBUG
126  std::cout << "Coarse Grid Operator Matrix:" << std::endl;
127  printmatrix (A[i+1]);
128  #endif
129 
130  // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
131  if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
132  {
133  tag.set_coarselevels(i+1);
134  return;
135  }
136  }
137  tag.set_coarselevels(i);
138  }
139 
148  template <typename MatrixType, typename InternalType1, typename InternalType2>
149  void amg_init(MatrixType const & mat, InternalType1 & A, InternalType1 & P, InternalType2 & Pointvector, amg_tag & tag)
150  {
151  typedef typename MatrixType::value_type ScalarType;
152  typedef typename InternalType1::value_type SparseMatrixType;
153 
154  if (tag.get_coarselevels() > 0)
155  {
156  A.resize(tag.get_coarselevels()+1);
157  P.resize(tag.get_coarselevels());
158  Pointvector.resize(tag.get_coarselevels());
159  }
160  else
161  {
162  A.resize(VIENNACL_AMG_MAX_LEVELS+1);
163  P.resize(VIENNACL_AMG_MAX_LEVELS);
164  Pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
165  }
166 
167  // Insert operator matrix as operator for finest level.
168  SparseMatrixType A0 (mat);
169  A.insert_element (0, A0);
170  }
171 
181  template <typename InternalType1, typename InternalType2>
182  void amg_transform_cpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
183  {
184  typedef typename InternalType1::value_type MatrixType;
185 
186  // Resize internal data structures to actual size.
187  A.resize(tag.get_coarselevels()+1);
188  P.resize(tag.get_coarselevels());
189  R.resize(tag.get_coarselevels());
190 
191  // Transform into matrix type.
192  for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
193  {
194  A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
195  A[i] = A_setup[i];
196  }
197  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
198  {
199  P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
200  P[i] = P_setup[i];
201  }
202  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
203  {
204  R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
205  P_setup[i].set_trans(true);
206  R[i] = P_setup[i];
207  P_setup[i].set_trans(false);
208  }
209  }
210 
220  template <typename InternalType1, typename InternalType2>
221  void amg_transform_gpu (InternalType1 & A, InternalType1 & P, InternalType1 & R, InternalType2 & A_setup, InternalType2 & P_setup, amg_tag & tag)
222  {
223  typedef typename InternalType1::value_type MatrixType;
224  typedef typename InternalType2::value_type::value_type ScalarType;
225 
226  // Resize internal data structures to actual size.
227  A.resize(tag.get_coarselevels()+1);
228  P.resize(tag.get_coarselevels());
229  R.resize(tag.get_coarselevels());
230 
231  // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>.
232  for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
233  {
234  A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
235  viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
236  }
237  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
238  {
239  P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
240  viennacl::copy(*(P_setup[i].get_internal_pointer()),P[i]);
241  //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]);
242  }
243  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
244  {
245  R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
246  P_setup[i].set_trans(true);
247  viennacl::copy(*(P_setup[i].get_internal_pointer()),R[i]);
248  P_setup[i].set_trans(false);
249  }
250  }
251 
260  template <typename InternalVectorType, typename SparseMatrixType>
261  void amg_setup_apply (InternalVectorType & result, InternalVectorType & rhs, InternalVectorType & residual, SparseMatrixType const & A, amg_tag const & tag)
262  {
263  typedef typename InternalVectorType::value_type VectorType;
264 
265  result.resize(tag.get_coarselevels()+1);
266  rhs.resize(tag.get_coarselevels()+1);
267  residual.resize(tag.get_coarselevels());
268 
269  for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
270  {
271  result[level] = VectorType(A[level].size1());
272  result[level].clear();
273  rhs[level] = VectorType(A[level].size1());
274  rhs[level].clear();
275  }
276  for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
277  {
278  residual[level] = VectorType(A[level].size1());
279  residual[level].clear();
280  }
281  }
289  template <typename ScalarType, typename SparseMatrixType>
290  void amg_lu(boost::numeric::ublas::compressed_matrix<ScalarType> & op, boost::numeric::ublas::permutation_matrix<ScalarType> & Permutation, SparseMatrixType const & A)
291  {
292  typedef typename SparseMatrixType::const_iterator1 ConstRowIterator;
293  typedef typename SparseMatrixType::const_iterator2 ConstColIterator;
294 
295  // Copy to operator matrix. Needed
296  op.resize(A.size1(),A.size2(),false);
297  for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
298  for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
299  op (col_iter.index1(), col_iter.index2()) = *col_iter;
300 
301  // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()!
302  Permutation = boost::numeric::ublas::permutation_matrix<ScalarType> (op.size1());
303  boost::numeric::ublas::lu_factorize(op,Permutation);
304  }
305 
308  template <typename MatrixType>
310  {
311  typedef typename MatrixType::value_type ScalarType;
312  typedef boost::numeric::ublas::vector<ScalarType> VectorType;
315 
320 
321  boost::numeric::ublas::vector <SparseMatrixType> A_setup;
322  boost::numeric::ublas::vector <SparseMatrixType> P_setup;
323  boost::numeric::ublas::vector <MatrixType> A;
324  boost::numeric::ublas::vector <MatrixType> P;
325  boost::numeric::ublas::vector <MatrixType> R;
326  boost::numeric::ublas::vector <PointVectorType> Pointvector;
327 
328  mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
329  mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;
330 
331  mutable boost::numeric::ublas::vector <VectorType> result;
332  mutable boost::numeric::ublas::vector <VectorType> rhs;
333  mutable boost::numeric::ublas::vector <VectorType> residual;
334 
335  mutable bool done_init_apply;
336 
337  amg_tag _tag;
338  public:
339 
340  amg_precond(): Permutation(0) {}
346  amg_precond(MatrixType const & mat, amg_tag const & tag): Permutation(0)
347  {
348  _tag = tag;
349  // Initialize data structures.
350  amg_init (mat,A_setup,P_setup,Pointvector,_tag);
351 
352  done_init_apply = false;
353  }
354 
357  void setup()
358  {
359  // Start setup phase.
360  amg_setup(A_setup,P_setup,Pointvector,_tag);
361  // Transform to CPU-Matrixtype for precondition phase.
362  amg_transform_cpu(A,P,R,A_setup,P_setup,_tag);
363 
364  done_init_apply = false;
365  }
366 
371  void init_apply() const
372  {
373  // Setup precondition phase (Data structures).
374  amg_setup_apply(result,rhs,residual,A_setup,_tag);
375  // Do LU factorization for direct solve.
376  amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
377 
378  done_init_apply = true;
379  }
380 
386  template <typename VectorType>
387  ScalarType calc_complexity(VectorType & avgstencil)
388  {
389  avgstencil = VectorType (_tag.get_coarselevels()+1);
390  unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
391 
392  for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
393  {
394  level_coefficients = 0;
395  for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
396  {
397  for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
398  {
399  if (level == 0)
400  systemmat_nonzero++;
401  nonzero++;
402  level_coefficients++;
403  }
404  }
405  avgstencil[level] = level_coefficients/(double)A_setup[level].size1();
406  }
407  return nonzero/static_cast<double>(systemmat_nonzero);
408  }
409 
414  template <typename VectorType>
415  void apply(VectorType & vec) const
416  {
417  // Build data structures and do lu factorization before first iteration step.
418  if (!done_init_apply)
419  init_apply();
420 
421  int level;
422 
423  // Precondition operation (Yang, p.3)
424  rhs[0] = vec;
425  for (level=0; level <(signed)_tag.get_coarselevels(); level++)
426  {
427  result[level].clear();
428 
429  // Apply Smoother _presmooth times.
430  smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);
431 
432  #ifdef DEBUG
433  std::cout << "After presmooth:" << std::endl;
434  printvector(result[level]);
435  #endif
436 
437  // Compute residual.
438  residual[level] = rhs[level] - boost::numeric::ublas::prod (A[level],result[level]);
439 
440  #ifdef DEBUG
441  std::cout << "Residual:" << std::endl;
442  printvector(residual[level]);
443  #endif
444 
445  // Restrict to coarse level. Restricted residual is RHS of coarse level.
446  rhs[level+1] = boost::numeric::ublas::prod (R[level],residual[level]);
447 
448  #ifdef DEBUG
449  std::cout << "Restricted Residual: " << std::endl;
450  printvector(rhs[level+1]);
451  #endif
452  }
453 
454  // On highest level use direct solve to solve equation.
455  result[level] = rhs[level];
456  boost::numeric::ublas::lu_substitute(op,Permutation,result[level]);
457 
458  #ifdef DEBUG
459  std::cout << "After direct solve: " << std::endl;
460  printvector (result[level]);
461  #endif
462 
463  for (level=_tag.get_coarselevels()-1; level >= 0; level--)
464  {
465  #ifdef DEBUG
466  std::cout << "Coarse Error: " << std::endl;
467  printvector(result[level+1]);
468  #endif
469 
470  // Interpolate error to fine level. Correct solution by adding error.
471  result[level] += boost::numeric::ublas::prod (P[level], result[level+1]);
472 
473  #ifdef DEBUG
474  std::cout << "Corrected Result: " << std::endl;
475  printvector (result[level]);
476  #endif
477 
478  // Apply Smoother _postsmooth times.
479  smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
480 
481  #ifdef DEBUG
482  std::cout << "After postsmooth: " << std::endl;
483  printvector (result[level]);
484  #endif
485  }
486  vec = result[0];
487  }
488 
495  template <typename VectorType>
496  void smooth_jacobi(int level, int const iterations, VectorType & x, VectorType const & rhs) const
497  {
498  VectorType old_result (x.size());
499  unsigned int index;
500  ScalarType sum = 0, diag = 1;
501 
502  for (int i=0; i<iterations; ++i)
503  {
504  old_result = x;
505  x.clear();
506 #ifdef _OPENMP
507  #pragma omp parallel for private (sum,diag) shared (rhs,x)
508 #endif
509  for (index=0; index<A_setup[level].size1(); ++index)
510  {
511  InternalConstRowIterator row_iter = A_setup[level].begin1();
512  row_iter += index;
513  sum = 0;
514  diag = 1;
515  for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
516  {
517  if (col_iter.index1() == col_iter.index2())
518  diag = *col_iter;
519  else
520  sum += *col_iter * old_result[col_iter.index2()];
521  }
522  x[index]= _tag.get_jacobiweight() * (rhs[index] - sum) / diag + (1-_tag.get_jacobiweight()) * old_result[index];
523  }
524  }
525  }
526 
527  amg_tag & tag() { return _tag; }
528  };
529 
534  template <typename ScalarType, unsigned int MAT_ALIGNMENT>
535  class amg_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT> >
536  {
541 
546 
547  boost::numeric::ublas::vector <SparseMatrixType> A_setup;
548  boost::numeric::ublas::vector <SparseMatrixType> P_setup;
549  boost::numeric::ublas::vector <MatrixType> A;
550  boost::numeric::ublas::vector <MatrixType> P;
551  boost::numeric::ublas::vector <MatrixType> R;
552  boost::numeric::ublas::vector <PointVectorType> Pointvector;
553 
554  mutable boost::numeric::ublas::compressed_matrix<ScalarType> op;
555  mutable boost::numeric::ublas::permutation_matrix<ScalarType> Permutation;
556 
557  mutable boost::numeric::ublas::vector <VectorType> result;
558  mutable boost::numeric::ublas::vector <VectorType> rhs;
559  mutable boost::numeric::ublas::vector <VectorType> residual;
560 
561  mutable bool done_init_apply;
562 
563  amg_tag _tag;
564 
565  public:
566 
567  amg_precond(): Permutation(0) {}
568 
575  {
576  _tag = tag;
577 
578  // Copy to CPU. Internal structure of sparse matrix is used for copy operation.
579  std::vector<std::map<unsigned int, ScalarType> > mat2 = std::vector<std::map<unsigned int, ScalarType> >(mat.size1());
580  viennacl::copy(mat, mat2);
581 
582  // Initialize data structures.
583  amg_init (mat2,A_setup,P_setup,Pointvector,_tag);
584 
585  done_init_apply = false;
586  }
587 
590  void setup()
591  {
592  // Start setup phase.
593  amg_setup(A_setup,P_setup,Pointvector,_tag);
594  // Transform to GPU-Matrixtype for precondition phase.
595  amg_transform_gpu(A,P,R,A_setup,P_setup,_tag);
596 
597  done_init_apply = false;
598  }
599 
604  void init_apply() const
605  {
606  // Setup precondition phase (Data structures).
607  amg_setup_apply(result,rhs,residual,A_setup,_tag);
608  // Do LU factorization for direct solve.
609  amg_lu(op,Permutation,A_setup[_tag.get_coarselevels()]);
610 
611  done_init_apply = true;
612  }
613 
619  template <typename VectorType>
620  ScalarType calc_complexity(VectorType & avgstencil)
621  {
622  avgstencil = VectorType (_tag.get_coarselevels()+1);
623  unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
624 
625  for (unsigned int level=0; level < _tag.get_coarselevels()+1; ++level)
626  {
627  level_coefficients = 0;
628  for (InternalRowIterator row_iter = A_setup[level].begin1(); row_iter != A_setup[level].end1(); ++row_iter)
629  {
630  for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
631  {
632  if (level == 0)
633  systemmat_nonzero++;
634  nonzero++;
635  level_coefficients++;
636  }
637  }
638  avgstencil[level] = level_coefficients/(double)A[level].size1();
639  }
640  return nonzero/static_cast<double>(systemmat_nonzero);
641  }
642 
647  template <typename VectorType>
648  void apply(VectorType & vec) const
649  {
650  if (!done_init_apply)
651  init_apply();
652 
653  int level;
654 
655  // Precondition operation (Yang, p.3).
656  rhs[0] = vec;
657  for (level=0; level <(signed)_tag.get_coarselevels(); level++)
658  {
659  result[level].clear();
660 
661  // Apply Smoother _presmooth times.
662  smooth_jacobi (level, _tag.get_presmooth(), result[level], rhs[level]);
663 
664  #ifdef DEBUG
665  std::cout << "After presmooth: " << std::endl;
666  printvector(result[level]);
667  #endif
668 
669  // Compute residual.
670  residual[level] = rhs[level] - viennacl::linalg::prod (A[level],result[level]);
671 
672  #ifdef DEBUG
673  std::cout << "Residual: " << std::endl;
674  printvector(residual[level]);
675  #endif
676 
677  // Restrict to coarse level. Result is RHS of coarse level equation.
678  //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
679  rhs[level+1] = viennacl::linalg::prod(R[level],residual[level]);
680 
681  #ifdef DEBUG
682  std::cout << "Restricted Residual: " << std::endl;
683  printvector(rhs[level+1]);
684  #endif
685  }
686 
687  // On highest level use direct solve to solve equation (on the CPU)
688  //TODO: Use GPU direct solve!
689  result[level] = rhs[level];
690  boost::numeric::ublas::vector <ScalarType> result_cpu (result[level].size());
691 
692  copy (result[level],result_cpu);
693  boost::numeric::ublas::lu_substitute(op,Permutation,result_cpu);
694  copy (result_cpu, result[level]);
695 
696  #ifdef DEBUG
697  std::cout << "After direct solve: " << std::endl;
698  printvector (result[level]);
699  #endif
700 
701  for (level=_tag.get_coarselevels()-1; level >= 0; level--)
702  {
703  #ifdef DEBUG
704  std::cout << "Coarse Error: " << std::endl;
705  printvector(result[level+1]);
706  #endif
707 
708  // Interpolate error to fine level and correct solution.
709  result[level] += viennacl::linalg::prod(P[level],result[level+1]);
710 
711  #ifdef DEBUG
712  std::cout << "Corrected Result: " << std::endl;
713  printvector (result[level]);
714  #endif
715 
716  // Apply Smoother _postsmooth times.
717  smooth_jacobi (level, _tag.get_postsmooth(), result[level], rhs[level]);
718 
719  #ifdef DEBUG
720  std::cout << "After postsmooth: " << std::endl;
721  printvector (result[level]);
722  #endif
723  }
724  vec = result[0];
725  }
726 
733  template <typename VectorType>
734  void smooth_jacobi(int level, unsigned int iterations, VectorType & x, VectorType const & rhs) const
735  {
736  VectorType old_result (x.size());
737 
738  //viennacl::ocl::program & p = viennacl::ocl::current_context().add_program
739  // (viennacl::tools::make_double_kernel(jacobi_kernel,viennacl::ocl::current_device().info()), "jacobi_kernel");
740  //viennacl::ocl::kernel & k = p.add_kernel("jacobi");
741 
742  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<ScalarType, MAT_ALIGNMENT>::program_name(),
743  "jacobi");
744 
745  for (unsigned int i=0; i<iterations; ++i)
746  {
747  old_result = x;
748  x.clear();
749  viennacl::ocl::enqueue(k(A[level].handle1(), A[level].handle2(), A[level].handle(),
750  static_cast<ScalarType>(_tag.get_jacobiweight()),
751  old_result,
752  x,
753  rhs,
754  static_cast<cl_uint>(rhs.size())));
755 
756  }
757  }
758 
759  amg_tag & tag() { return _tag; }
760  };
761 
762  }
763 }
764 
765 
766 
767 #endif
768