ViennaCL - The Vienna Computing Library  1.2.0
qr.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_QR_HPP
2 #define VIENNACL_LINALG_QR_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include <utility>
25 #include <iostream>
26 #include <fstream>
27 #include <string>
28 #include <algorithm>
29 #include <vector>
30 #include <math.h>
31 #include <cmath>
32 #include "boost/numeric/ublas/vector.hpp"
33 #include "boost/numeric/ublas/matrix.hpp"
34 #include "boost/numeric/ublas/matrix_proxy.hpp"
35 #include "boost/numeric/ublas/io.hpp"
36 #include "boost/numeric/ublas/matrix_expression.hpp"
37 
38 #include "viennacl/matrix.hpp"
39 #include "viennacl/linalg/prod.hpp"
40 
41 namespace viennacl
42 {
43  namespace linalg
44  {
45 
46  // orthogonalises j-th column of A
47  template <typename MatrixType, typename VectorType>
48  typename MatrixType::value_type setup_householder_vector(MatrixType const & A, VectorType & v, size_t j)
49  {
50  typedef typename MatrixType::value_type ScalarType;
51 
52  //compute norm of column below diagonal:
53  ScalarType sigma = 0;
54  ScalarType beta = 0;
55  for (size_t k = j+1; k<A.size1(); ++k)
56  sigma += A(k, j) * A(k, j);
57 
58  //get v from A:
59  for (size_t k = j+1; k<A.size1(); ++k)
60  v[k] = A(k, j);
61 
62  if (sigma == 0)
63  return 0;
64  else
65  {
66  ScalarType mu = sqrt(sigma + A(j,j)*A(j,j));
67  //std::cout << "mu: " << mu << std::endl;
68  //std::cout << "sigma: " << sigma << std::endl;
69 
70  ScalarType v1;
71  if (A(j,j) <= 0)
72  v1 = A(j,j) - mu;
73  else
74  v1 = -sigma / (A(j,j) + mu);
75 
76  beta = 2.0 * v1 * v1 / (sigma + v1 * v1);
77 
78  //divide v by its diagonal element v[j]
79  v[j] = 1;
80  for (size_t k = j+1; k<A.size1(); ++k)
81  v[k] /= v1;
82  }
83 
84  return beta;
85  }
86 
87  // Apply (I - beta v v^T) to the k-th column of A, where v is the reflector starting at j-th row/column
88  template <typename MatrixType, typename VectorType, typename ScalarType>
89  void householder_reflect(MatrixType & A, VectorType & v, ScalarType beta, size_t j, size_t k)
90  {
91  ScalarType v_in_col = A(j,k);
92  for (size_t i=j+1; i<A.size1(); ++i)
93  v_in_col += v[i] * A(i,k);
94 
95  for (size_t i=j; i<A.size1(); ++i)
96  A(i,k) -= beta * v_in_col * v[i];
97  }
98 
99  // Apply (I - beta v v^T) to A, where v is the reflector starting at j-th row/column
100  template <typename MatrixType, typename VectorType, typename ScalarType>
101  void householder_reflect(MatrixType & A, VectorType & v, ScalarType beta, size_t j)
102  {
103  size_t column_end = A.size2();
104 
105  for (size_t k=j; k<column_end; ++k) //over columns
106  householder_reflect(A, v, beta, j, k);
107  }
108 
109 
110  template <typename MatrixType, typename VectorType>
111  void write_householder_to_A(MatrixType & A, VectorType const & v, size_t j)
112  {
113  for (size_t i=j+1; i<A.size1(); ++i)
114  A(i,j) = v[i];
115  }
116 
117 
118  //takes an inplace QR matrix A and generates Q and R explicitly
119  template <typename MatrixType, typename VectorType>
120  void recoverQ(MatrixType const & A, VectorType const & betas, MatrixType & Q, MatrixType & R)
121  {
122  typedef typename MatrixType::value_type ScalarType;
123 
124  std::vector<ScalarType> v(A.size1());
125 
126  Q.clear();
127  R.clear();
128 
129  //
130  // Recover R from upper-triangular part of A:
131  //
132  size_t i_max = std::min(R.size1(), R.size2());
133  for (size_t i=0; i<i_max; ++i)
134  for (size_t j=i; j<R.size2(); ++j)
135  R(i,j) = A(i,j);
136 
137  //
138  // Recover Q by applying all the Householder reflectors to the identity matrix:
139  //
140  for (size_t i=0; i<Q.size1(); ++i)
141  Q(i,i) = 1.0;
142 
143  size_t j_max = std::min(A.size1(), A.size2());
144  for (size_t j=0; j<j_max; ++j)
145  {
146  size_t col_index = j_max - j - 1;
147  v[col_index] = 1.0;
148  for (size_t i=col_index+1; i<A.size1(); ++i)
149  v[i] = A(i, col_index);
150 
151  /*std::cout << "Recovery with beta = " << betas[col_index] << ", j=" << col_index << std::endl;
152  std::cout << "v: ";
153  for (size_t i=0; i<v.size(); ++i)
154  std::cout << v[i] << ", ";
155  std::cout << std::endl;*/
156 
157  if (betas[col_index] != 0)
158  householder_reflect(Q, v, betas[col_index], col_index);
159  }
160  }
161 
162  /*template<typename MatrixType>
163  std::vector<typename MatrixType::value_type> qr(MatrixType & A)
164  {
165  typedef typename MatrixType::value_type ScalarType;
166 
167  std::vector<ScalarType> betas(A.size2());
168  std::vector<ScalarType> v(A.size1());
169 
170  //copy A to Q:
171  for (size_t j=0; j<A.size2(); ++j)
172  {
173  betas[j] = setup_householder_vector(A, v, j);
174  householder_reflect(A, v, betas[j], j);
175  write_householder_to_A(A, v, j);
176  }
177 
178  return betas;
179  }*/
180 
181 
182 
183  class range
184  {
185  public:
186  range(size_t start, size_t end) : start_(start), end_(end) {}
187 
188  size_t lower() const { return start_; }
189  size_t upper() const { return end_; }
190 
191  private:
192  size_t start_;
193  size_t end_;
194  };
195 
196  template <typename MatrixType>
198  {
199  public:
200  typedef typename MatrixType::value_type value_type;
201 
202  sub_matrix(MatrixType & mat,
203  range row_range,
204  range col_range) : mat_(mat), row_range_(row_range), col_range_(col_range) {}
205 
206  value_type operator()(size_t row, size_t col) const
207  {
208  assert(row < size1());
209  assert(col < size2());
210  return mat_(row + row_range_.lower(), col + col_range_.lower());
211  }
212 
213  size_t size1() const { return row_range_.upper() - row_range_.lower(); }
214  size_t size2() const { return col_range_.upper() - col_range_.lower(); }
215 
216  private:
217  MatrixType & mat_;
218  range row_range_;
219  range col_range_;
220  };
221 
222 
223  //computes C = prod(A, B)
224  template <typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
225  void prod_AA(MatrixTypeA const & A, MatrixTypeB const & B, MatrixTypeC & C)
226  {
227  assert(C.size1() == A.size1());
228  assert(A.size2() == B.size1());
229  assert(B.size2() == C.size2());
230 
231  typedef typename MatrixTypeC::value_type ScalarType;
232 
233  for (size_t i=0; i<C.size1(); ++i)
234  {
235  for (size_t j=0; j<C.size2(); ++j)
236  {
237  ScalarType val = 0;
238  for (size_t k=0; k<A.size2(); ++k)
239  val += A(i, k) * B(k, j);
240  C(i, j) = val;
241  }
242  }
243  }
244 
245  //computes C = prod(A^T, B)
246  template <typename MatrixTypeA, typename MatrixTypeB, typename MatrixTypeC>
247  void prod_TA(MatrixTypeA const & A, MatrixTypeB const & B, MatrixTypeC & C)
248  {
249  assert(C.size1() == A.size2());
250  assert(A.size1() == B.size1());
251  assert(B.size2() == C.size2());
252 
253  typedef typename MatrixTypeC::value_type ScalarType;
254 
255  for (size_t i=0; i<C.size1(); ++i)
256  {
257  for (size_t j=0; j<C.size2(); ++j)
258  {
259  ScalarType val = 0;
260  for (size_t k=0; k<A.size1(); ++k)
261  val += A(k, i) * B(k, j);
262  C(i, j) = val;
263  }
264  }
265  }
266 
267 
268 
269  template<typename MatrixType>
270  std::vector<typename MatrixType::value_type> inplace_qr(MatrixType & A, std::size_t block_size = 32)
271  {
272  typedef typename MatrixType::value_type ScalarType;
273 
274  if ( A.size2() % block_size != 0 )
275  std::cout << "ViennaCL: Warning in inplace_qr(): Matrix columns are not divisible by block_size!" << std::endl;
276 
277  std::vector<ScalarType> betas(A.size2());
278  std::vector<ScalarType> v(A.size1());
279 
280  //size_t block_size = 90;
281  MatrixType Y(A.size1(), block_size); Y.clear();
282  MatrixType W(A.size1(), block_size); W.clear();
283 
284  //run over A in a block-wise manner:
285  for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
286  {
287  //determine Householder vectors:
288  for (size_t k = 0; k < block_size; ++k)
289  {
290  betas[j+k] = setup_householder_vector(A, v, j+k);
291  for (size_t l = k; l < block_size; ++l)
292  householder_reflect(A, v, betas[j+k], j+k, j+l);
293 
294  write_householder_to_A(A, v, j+k);
295  }
296 
297  //
298  // Setup Y:
299  //
300  for (size_t k = 0; k < block_size; ++k)
301  {
302  //write Householder to Y:
303  Y(k,k) = 1.0;
304  for (size_t l=k+1; l<A.size1(); ++l)
305  Y(l,k) = A(l, j+k);
306  }
307 
308  //
309  // Setup W:
310  //
311 
312  //first vector:
313  W(j, 0) = -betas[j];
314  for (size_t l=j+1; l<A.size1(); ++l)
315  W(l,0) = -betas[j] * A(l, j);
316 
317  //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
318  for (size_t k = 1; k < block_size; ++k)
319  {
320  //compute Y^T v_k:
321  std::vector<ScalarType> temp(k); //actually of size (k \times 1)
322  for (size_t l=0; l<k; ++l)
323  for (size_t n=j; n<A.size1(); ++n)
324  temp[l] += Y(n, l) * Y(n, k);
325 
326  //compute W * temp and add to z, which is directly written to W:
327  for (size_t n=0; n<A.size1(); ++n)
328  {
329  ScalarType val = 0;
330  for (size_t l=0; l<k; ++l)
331  val += temp[l] * W(n, l);
332  W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
333  }
334  }
335 
336  //
337  //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
338  //
339 
340  if (A.size2() - j - block_size > 0)
341  {
342  //temp = prod(W^T, A)
343 
344  MatrixType temp(block_size, A.size2() - j - block_size);
345 
346  boost::numeric::ublas::range A_rows(j, A.size1());
347  boost::numeric::ublas::range A_cols(j+block_size, A.size2());
348  boost::numeric::ublas::matrix_range<MatrixType> A_part(A, A_rows, A_cols);
349 
350  viennacl::matrix<ScalarType, viennacl::column_major> gpu_A_part(A_part.size1(), A_part.size2());
351  viennacl::copy(A_part, gpu_A_part);
352 
353  //transfer W
354  boost::numeric::ublas::range W_cols(0, block_size);
355  boost::numeric::ublas::matrix_range<MatrixType> W_part(W, A_rows, W_cols);
356  viennacl::matrix<ScalarType, viennacl::column_major> gpu_W(W_part.size1(), W_part.size2());
357  viennacl::copy(W_part, gpu_W);
358 
359  viennacl::matrix<ScalarType, viennacl::column_major> gpu_temp(gpu_W.size2(), gpu_A_part.size2());
360  gpu_temp = viennacl::linalg::prod(trans(gpu_W), gpu_A_part);
361 
362 
363 
364  //A += Y * temp:
365  boost::numeric::ublas::range Y_cols(0, Y.size2());
366  boost::numeric::ublas::matrix_range<MatrixType> Y_part(Y, A_rows, Y_cols);
367 
368  viennacl::matrix<ScalarType, viennacl::column_major> gpu_Y(Y_part.size1(), Y_part.size2());
369  viennacl::copy(Y_part, gpu_Y);
370 
371  //A_part += prod(Y_part, temp);
372  gpu_A_part += prod(gpu_Y, gpu_temp);
373 
374  MatrixType A_part_back(A_part.size1(), A_part.size2());
375  viennacl::copy(gpu_A_part, A_part_back);
376 
377  A_part = A_part_back;
378  //A_part += prod(Y_part, temp);
379  }
380  }
381 
382  return betas;
383  }
384 
385 
386  template<typename MatrixType>
387  std::vector<typename MatrixType::value_type> inplace_qr_ublas(MatrixType & A)
388  {
389  typedef typename MatrixType::value_type ScalarType;
390 
391  std::vector<ScalarType> betas(A.size2());
392  std::vector<ScalarType> v(A.size1());
393 
394  size_t block_size = 3;
395  MatrixType Y(A.size1(), block_size); Y.clear();
396  MatrixType W(A.size1(), block_size); W.clear();
397 
398  //run over A in a block-wise manner:
399  for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
400  {
401  //determine Householder vectors:
402  for (size_t k = 0; k < block_size; ++k)
403  {
404  betas[j+k] = setup_householder_vector(A, v, j+k);
405  for (size_t l = k; l < block_size; ++l)
406  householder_reflect(A, v, betas[j+k], j+k, j+l);
407 
408  write_householder_to_A(A, v, j+k);
409  }
410 
411  //
412  // Setup Y:
413  //
414  for (size_t k = 0; k < block_size; ++k)
415  {
416  //write Householder to Y:
417  Y(k,k) = 1.0;
418  for (size_t l=k+1; l<A.size1(); ++l)
419  Y(l,k) = A(l, j+k);
420  }
421 
422  //
423  // Setup W:
424  //
425 
426  //first vector:
427  W(j, 0) = -betas[j];
428  for (size_t l=j+1; l<A.size1(); ++l)
429  W(l,0) = -betas[j] * A(l, j);
430 
431  //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
432  for (size_t k = 1; k < block_size; ++k)
433  {
434  //compute Y^T v_k:
435  std::vector<ScalarType> temp(k); //actually of size (k \times 1)
436  for (size_t l=0; l<k; ++l)
437  for (size_t n=j; n<A.size1(); ++n)
438  temp[l] += Y(n, l) * Y(n, k);
439 
440  //compute W * temp and add to z, which is directly written to W:
441  for (size_t n=0; n<A.size1(); ++n)
442  {
443  ScalarType val = 0;
444  for (size_t l=0; l<k; ++l)
445  val += temp[l] * W(n, l);
446  W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
447  }
448  }
449 
450  //
451  //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
452  //
453 
454  if (A.size2() - j - block_size > 0)
455  {
456  //temp = prod(W^T, A)
457  MatrixType temp(block_size, A.size2() - j - block_size);
458 
459  boost::numeric::ublas::range A_rows(j, A.size1());
460  boost::numeric::ublas::range A_cols(j+block_size, A.size2());
461  boost::numeric::ublas::matrix_range<MatrixType> A_part(A, A_rows, A_cols);
462 
463  boost::numeric::ublas::range W_cols(0, block_size);
464  boost::numeric::ublas::matrix_range<MatrixType> W_part(W, A_rows, W_cols);
465 
466  temp = boost::numeric::ublas::prod(trans(W_part), A_part);
467 
468 
469  //A += Y * temp:
470  boost::numeric::ublas::range Y_cols(0, Y.size2());
471  boost::numeric::ublas::matrix_range<MatrixType> Y_part(Y, A_rows, Y_cols);
472 
473  A_part += prod(Y_part, temp);
474  }
475  }
476 
477  return betas;
478  }
479 
480 
481  template<typename MatrixType>
482  std::vector<typename MatrixType::value_type> inplace_qr_pure(MatrixType & A)
483  {
484  typedef typename MatrixType::value_type ScalarType;
485 
486  std::vector<ScalarType> betas(A.size2());
487  std::vector<ScalarType> v(A.size1());
488 
489  size_t block_size = 5;
490  MatrixType Y(A.size1(), block_size); Y.clear();
491  MatrixType W(A.size1(), block_size); W.clear();
492 
493  //run over A in a block-wise manner:
494  for (size_t j = 0; j < std::min(A.size1(), A.size2()); j += block_size)
495  {
496  //determine Householder vectors:
497  for (size_t k = 0; k < block_size; ++k)
498  {
499  betas[j+k] = setup_householder_vector(A, v, j+k);
500  for (size_t l = k; l < block_size; ++l)
501  householder_reflect(A, v, betas[j+k], j+k, j+l);
502 
503  write_householder_to_A(A, v, j+k);
504  }
505 
506  //
507  // Setup Y:
508  //
509  for (size_t k = 0; k < block_size; ++k)
510  {
511  //write Householder to Y:
512  Y(k,k) = 1.0;
513  for (size_t l=k+1; l<A.size1(); ++l)
514  Y(l,k) = A(l, j+k);
515  }
516 
517  //
518  // Setup W:
519  //
520 
521  //first vector:
522  W(j, 0) = -betas[j];
523  for (size_t l=j+1; l<A.size1(); ++l)
524  W(l,0) = -betas[j] * A(l, j);
525 
526  //k-th column of W is given by -beta * (Id + W*Y^T) v_k, where W and Y have k-1 columns
527  for (size_t k = 1; k < block_size; ++k)
528  {
529  //compute Y^T v_k:
530  std::vector<ScalarType> temp(k); //actually of size (k \times 1)
531  for (size_t l=0; l<k; ++l)
532  for (size_t n=j; n<A.size1(); ++n)
533  temp[l] += Y(n, l) * Y(n, k);
534 
535  //compute W * temp and add to z, which is directly written to W:
536  for (size_t n=0; n<A.size1(); ++n)
537  {
538  ScalarType val = 0;
539  for (size_t l=0; l<k; ++l)
540  val += temp[l] * W(n, l);
541  W(n, k) = -1.0 * betas[j+k] * (Y(n, k) + val);
542  }
543  }
544 
545  //
546  //apply (I+WY^T)^T = I + Y W^T to the remaining columns of A:
547  //
548 
549  if (A.size2() - j - block_size > 0)
550  {
551  //temp = prod(W^T, A)
552  MatrixType temp(block_size, A.size2() - j - block_size);
553  ScalarType entry = 0;
554  for (size_t l = 0; l < temp.size2(); ++l)
555  {
556  for (size_t k = 0; k < temp.size1(); ++k)
557  {
558  entry = 0;
559  for (size_t n = j; n < A.size1(); ++n)
560  entry += W(n, k) * A(n, j + block_size + l);
561  temp(k,l) = entry;
562  }
563  }
564 
565  //A += Y * temp:
566  for (size_t l = j+block_size; l < A.size2(); ++l)
567  {
568  for (size_t k = j; k<A.size1(); ++k)
569  {
570  ScalarType val = 0;
571  for (size_t n=0; n<block_size; ++n)
572  val += Y(k, n) * temp(n, l-j-block_size);
573  A(k, l) += val;
574  }
575  }
576  }
577  }
578 
579  return betas;
580  }
581 
582  } //linalg
583 } //viennacl
584 
585 
586 #endif