ViennaCL - The Vienna Computing Library  1.2.0
fspai.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
20 #include <utility>
21 #include <iostream>
22 #include <fstream>
23 #include <string>
24 #include <algorithm>
25 #include <vector>
26 #include <math.h>
27 #include <map>
28 
29 //boost includes
30 #include "boost/numeric/ublas/vector.hpp"
31 #include "boost/numeric/ublas/matrix.hpp"
32 #include "boost/numeric/ublas/matrix_proxy.hpp"
33 #include "boost/numeric/ublas/vector_proxy.hpp"
34 #include "boost/numeric/ublas/storage.hpp"
35 #include "boost/numeric/ublas/io.hpp"
36 #include "boost/numeric/ublas/lu.hpp"
37 #include "boost/numeric/ublas/triangular.hpp"
38 #include "boost/numeric/ublas/matrix_expression.hpp"
39 
40 // ViennaCL includes
41 #include "viennacl/linalg/prod.hpp"
42 #include "viennacl/matrix.hpp"
46 #include "viennacl/scalar.hpp"
47 #include "viennacl/linalg/cg.hpp"
49 #include "viennacl/linalg/ilu.hpp"
50 //#include <omp.h>
51 
56 namespace viennacl
57 {
58  namespace linalg
59  {
60  namespace detail
61  {
62  namespace spai
63  {
64 
69  class fspai_tag{
76  public:
78  double residual_norm_threshold = 1e-3,
79  unsigned int iteration_limit = 5,
80  bool is_static = false,
81  bool is_right = false) :
82  _residual_norm_threshold(residual_norm_threshold),
83  _iteration_limit(iteration_limit),
84  _is_static(is_static),
85  _is_right(is_right){};
86 
87  inline const double getResidualNormThreshold() const
88  { return _residual_norm_threshold; }
89  inline const unsigned long getIterationLimit () const
90  { return _iteration_limit; }
91  inline const bool getIsStatic() const
92  { return _is_static; }
93  inline const bool getIsRight() const
94  { return _is_right; }
95  inline void setResidualNormThreshold(double residual_norm_threshold){
96  if(residual_norm_threshold > 0)
97  _residual_norm_threshold = residual_norm_threshold;
98  }
99  inline void setIterationLimit(unsigned long iteration_limit){
100  if(iteration_limit > 0)
101  _iteration_limit = iteration_limit;
102  }
103  inline void setIsRight(bool is_right){
104  _is_right = is_right;
105  }
106  inline void setIsStatic(bool is_static){
107  _is_static = is_static;
108  }
109 
110  private:
111  double _residual_norm_threshold;
112  unsigned long _iteration_limit;
113  bool _is_static;
114  bool _is_right;
115  };
116 
117 
118  //
119  // Helper: Store A in an STL container of type, exploiting symmetry
120  // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
121  //
122  template <typename MatrixType, typename ScalarType>
123  void sym_sparse_matrix_to_stl(MatrixType const & A, std::vector<std::map<unsigned int, ScalarType> > & STL_A)
124  {
125  STL_A.resize(A.size1());
126  for (typename MatrixType::const_iterator1 row_it = A.begin1();
127  row_it != A.end1();
128  ++row_it)
129  {
130  for (typename MatrixType::const_iterator2 col_it = row_it.begin();
131  col_it != row_it.end();
132  ++col_it)
133  {
134  if (col_it.index1() >= col_it.index2())
135  STL_A[col_it.index1()][col_it.index2()] = *col_it;
136  else
137  break; //go to next row
138  }
139  }
140  }
141 
142 
143  //
144  // Generate index sets J_k, k=0,...,N-1
145  //
146  template <typename MatrixType>
147  void generateJ(MatrixType const & A, std::vector<std::vector<size_t> > & J)
148  {
149  for (typename MatrixType::const_iterator1 row_it = A.begin1();
150  row_it != A.end1();
151  ++row_it)
152  {
153  for (typename MatrixType::const_iterator2 col_it = row_it.begin();
154  col_it != row_it.end();
155  ++col_it)
156  {
157  if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
158  {
159  J[col_it.index2()].push_back(col_it.index1());
160  J[col_it.index1()].push_back(col_it.index2());
161  }
162  else
163  break; //go to next row
164  }
165  }
166  }
167 
168 
169  //
170  // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
171  // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
172  //
173  template <typename ScalarType, typename MatrixType, typename VectorType>
174  void fill_blocks(std::vector< std::map<unsigned int, ScalarType> > & A,
175  std::vector<MatrixType> & blocks,
176  std::vector<std::vector<size_t> > const & J,
177  std::vector<VectorType> & Y)
178  {
179  for (size_t k=0; k<A.size(); ++k)
180  {
181  std::vector<size_t> const & Jk = J[k];
182  VectorType & yk = Y[k];
183  MatrixType & block_k = blocks[k];
184 
185  yk.resize(Jk.size());
186  block_k.resize(Jk.size(), Jk.size());
187  block_k.clear();
188 
189  for (size_t i=0; i<Jk.size(); ++i)
190  {
191  size_t row_index = Jk[i];
192  std::map<unsigned int, ScalarType> & A_row = A[row_index];
193 
194  //fill y_k:
195  yk[i] = A_row[k];
196 
197  for (size_t j=0; j<Jk.size(); ++j)
198  {
199  size_t col_index = Jk[j];
200  if (col_index <= row_index && A_row.find(col_index) != A_row.end()) //block is symmetric, thus store only lower triangular part
201  block_k(i, j) = A_row[col_index];
202  }
203  }
204  }
205  }
206 
207 
208  //
209  // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
210  //
211  template <typename MatrixType>
212  void cholesky_decompose(MatrixType & A)
213  {
214  for (size_t k=0; k<A.size2(); ++k)
215  {
216  if (A(k,k) <= 0)
217  {
218  std::cout << "k: " << k << std::endl;
219  std::cout << "A(k,k): " << A(k,k) << std::endl;
220  }
221 
222  assert(A(k,k) > 0);
223 
224  A(k,k) = std::sqrt(A(k,k));
225 
226  for (size_t i=k+1; i<A.size1(); ++i)
227  {
228  A(i,k) /= A(k,k);
229  for (size_t j=k+1; j<=i; ++j)
230  A(i,j) -= A(i,k) * A(j,k);
231  }
232  }
233  }
234 
235 
236  //
237  // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
238  //
239  template <typename MatrixType, typename VectorType>
240  void cholesky_solve(MatrixType const & L, VectorType & b)
241  {
242  typedef typename VectorType::value_type ScalarType;
243 
244  // inplace forward solve L x = b
245  for (size_t i=0; i<L.size1(); ++i)
246  {
247  for (size_t j=0; j<i; ++j)
248  b[i] -= L(i,j) * b[j];
249  b[i] /= L(i,i);
250  }
251 
252  // inplace backward solve L^T x = b:
253  for (size_t i=L.size1()-1; ; --i)
254  {
255  for (size_t k=i+1; k<L.size1(); ++k)
256  b[i] -= L(k,i) * b[k];
257  b[i] /= L(i,i);
258 
259  if (i==0) //size_t might be unsigned, therefore manual check for equality with zero here
260  break;
261  }
262  }
263 
264 
265 
266  //
267  // Compute the Cholesky factor L from the sparse vectors y_k
268  //
269  template <typename MatrixType, typename VectorType1>
270  void computeL(MatrixType const & A,
271  MatrixType & L,
272  MatrixType & L_trans,
273  std::vector<VectorType1> & Y,
274  std::vector<std::vector<size_t> > & J)
275  {
276  typedef typename VectorType1::value_type ScalarType;
277  typedef std::vector<std::map<unsigned int, ScalarType> > STLSparseMatrixType;
278 
279  STLSparseMatrixType L_temp(A.size1());
280 
281  for (size_t k=0; k<A.size1(); ++k)
282  {
283  std::vector<size_t> const & Jk = J[k];
284  VectorType1 const & yk = Y[k];
285 
286  //compute L(k,k):
287  ScalarType Lkk = A(k,k);
288  for (size_t i=0; i<Jk.size(); ++i)
289  Lkk -= A(Jk[i],k) * yk[i];
290 
291  Lkk = 1.0 / sqrt(Lkk);
292  L_temp[k][k] = Lkk;
293  L_trans(k,k) = Lkk;
294 
295  //write lower diagonal entries:
296  for (size_t i=0; i<Jk.size(); ++i)
297  {
298  L_temp[Jk[i]][k] = -Lkk * yk[i];
299  L_trans(k, Jk[i]) = -Lkk * yk[i];
300  }
301  } //for k
302 
303 
304  //build L from L_temp
305  for (size_t i=0; i<L_temp.size(); ++i)
306  for (typename std::map<unsigned int, ScalarType>::const_iterator it = L_temp[i].begin();
307  it != L_temp[i].end();
308  ++it)
309  L(i, it->first) = it->second;
310  }
311 
312 
313  //
314  // Top level FSPAI function
315  //
316  template <typename MatrixType>
317  void computeFSPAI(MatrixType const & A,
318  MatrixType const & PatternA,
319  MatrixType & L,
320  MatrixType & L_trans,
321  fspai_tag const & tag)
322  {
323  typedef typename MatrixType::value_type ScalarType;
324  typedef boost::numeric::ublas::matrix<ScalarType> DenseMatrixType;
325  typedef std::vector<std::map<unsigned int, ScalarType> > SparseMatrixType;
326 
327  //
328  // preprocessing: Store A in a STL container:
329  //
330  //std::cout << "Transferring to STL container:" << std::endl;
331  std::vector<std::vector<ScalarType> > y_k(A.size1());
332  SparseMatrixType STL_A(A.size1());
333  sym_sparse_matrix_to_stl(A, STL_A);
334 
335 
336  //
337  // Step 1: Generate pattern indices
338  //
339  //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
340  std::vector<std::vector<size_t> > J(A.size1());
341  generateJ(PatternA, J);
342 
343  //
344  // Step 2: Set up matrix blocks
345  //
346  //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
347  std::vector<DenseMatrixType> subblocks_A(A.size1());
348  fill_blocks(STL_A, subblocks_A, J, y_k);
349  STL_A.clear(); //not needed anymore
350 
351  //
352  // Step 3: Cholesky-factor blocks
353  //
354  //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
355  for (size_t i=0; i<subblocks_A.size(); ++i)
356  {
357  //std::cout << "Block before: " << subblocks_A[i] << std::endl;
358  cholesky_decompose(subblocks_A[i]);
359  //std::cout << "Block after: " << subblocks_A[i] << std::endl;
360  }
361 
362 
363  /*size_t num_bytes = 0;
364  for (size_t i=0; i<subblocks_A.size(); ++i)
365  num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
366  //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
367 
368  //
369  // Step 4: Solve for y_k
370  //
371  //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
372  for (size_t i=0; i<y_k.size(); ++i)
373  {
374  if (subblocks_A[i].size1() > 0) //block might be empty...
375  {
376  //y_k[i].resize(subblocks_A[i].size1());
377  //std::cout << "y_k[" << i << "]: ";
378  //for (size_t j=0; j<y_k[i].size(); ++j)
379  // std::cout << y_k[i][j] << " ";
380  //std::cout << std::endl;
381  cholesky_solve(subblocks_A[i], y_k[i]);
382  }
383  }
384 
385 
386  //
387  // Step 5: Set up Cholesky factors L and L_trans
388  //
389  //std::cout << "computeFSPAI(): Computing L..." << std::endl;
390  L.resize(A.size1(), A.size2(), false);
391  L.reserve(A.nnz(), false);
392  L_trans.resize(A.size1(), A.size2(), false);
393  L_trans.reserve(A.nnz(), false);
394  computeL(A, L, L_trans, y_k, J);
395 
396  //std::cout << "L: " << L << std::endl;
397  }
398 
399 
400 
401  }
402  }
403  }
404 }
405 
406 #endif