ViennaCL - The Vienna Computing Library  1.2.0
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include "viennacl/forwards.h"
25 #include "viennacl/ocl/device.hpp"
26 #include "viennacl/ocl/handle.hpp"
27 #include "viennacl/ocl/kernel.hpp"
28 #include "viennacl/scalar.hpp"
29 #include "viennacl/vector.hpp"
30 #include "viennacl/tools/tools.hpp"
32 #include "viennacl/traits/size.hpp"
40 
45 
50 
51 namespace viennacl
52 {
53  namespace linalg
54  {
55 
64  template<class TYPE, typename F, unsigned int ALIGNMENT>
68  {
69  assert(result.size1() == mat1.size1());
70  assert(result.size2() == mat1.size2());
71  assert(result.size1() == mat2.size1());
72  assert(result.size2() == mat2.size2());
73 
74  typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass;
75 
76  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "add");
77  assert( (mat1.internal_size() == mat2.internal_size())
78  && "Operands must have same dimension and memory layout in this version of ViennaCL!");
79  cl_uint size = std::min(mat1.internal_size(), mat2.internal_size());
80 
81  viennacl::ocl::enqueue(k(mat1, mat2, result, size));
82  }
83 
91  template <typename M1, typename M2>
94  >::type
95  inplace_add(M1 & result, M2 const & mat2)
96  {
97  assert(viennacl::traits::size1(result) == viennacl::traits::size1(mat2));
98  assert(viennacl::traits::size2(result) == viennacl::traits::size2(mat2));
99 
101 
102  size_t block_size = 15;
103 
104  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_add");
105  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(viennacl::traits::size1(result), block_size));
106  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(viennacl::traits::size2(result), block_size));
107  k.local_work_size(0, block_size);
108  k.local_work_size(1, block_size);
109 
111  cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)),
112  cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)),
113  cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)),
115  cl_uint(viennacl::traits::start1(mat2)), cl_uint(viennacl::traits::start2(mat2)),
116  cl_uint(viennacl::traits::size1(mat2)), cl_uint(viennacl::traits::size2(mat2)),
118  )
119  );
120  }
121 
130  /*
131  template <typename MatrixType>
132  void inplace_add(viennacl::matrix_range<MatrixType> & result,
133  const viennacl::matrix_range<MatrixType> & mat2)
134  {
135  assert(result.size1() == mat2.size1());
136  assert(result.size2() == mat2.size2());
137 
138  typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< MatrixType >::ResultType KernelClass;
139 
140  size_t block_size = 15;
141 
142  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_add");
143  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(result.size1(), block_size));
144  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(result.size2(), block_size));
145  k.local_work_size(0, block_size);
146  k.local_work_size(1, block_size);
147 
148  viennacl::ocl::enqueue(k(result.get(), cl_uint(result.start1()), cl_uint(result.start2()),
149  cl_uint(result.size1()), cl_uint(result.size2()),
150  cl_uint(result.get().internal_size1()), cl_uint(result.get().internal_size2()),
151  mat2.get(), cl_uint(mat2.start1()), cl_uint(mat2.start2()),
152  cl_uint(mat2.size1()), cl_uint(mat2.size2()),
153  cl_uint(mat2.get().internal_size1()), cl_uint(mat2.get().internal_size2())
154  )
155  );
156  } */
157 
166  /*
167  template<class TYPE, typename F, unsigned int ALIGNMENT>
168  void inplace_add(viennacl::matrix<TYPE, F, ALIGNMENT> & result,
169  const viennacl::matrix_range<viennacl::matrix<TYPE, F, ALIGNMENT> > & mat2)
170  {
171  viennacl::range r1(0, result.size1());
172  viennacl::range r2(0, result.size2());
173  viennacl::matrix_range<viennacl::matrix<TYPE, F, ALIGNMENT> > result_wrap(result, r1, r2);
174  inplace_add(result_wrap, mat2);
175  } */
176 
177 
178 
179 
188  template<class TYPE, typename F, unsigned int ALIGNMENT>
192  {
193  assert(result.size1() == mat1.size1());
194  assert(result.size2() == mat1.size2());
195  assert(result.size1() == mat2.size1());
196  assert(result.size2() == mat2.size2());
197 
198  typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass;
199 
200  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "sub");
201  assert( (mat1.internal_size() == mat2.internal_size())
202  && "Operands must have same dimension and memory layout in this version of ViennaCL!");
203  cl_uint size = std::min(mat1.internal_size(), mat2.internal_size());
204 
205  viennacl::ocl::enqueue(k(mat1, mat2, result, size));
206  }
207 
215  template<class TYPE, typename F, unsigned int ALIGNMENT>
218  {
219  assert(result.size1() == mat2.size1());
220  assert(result.size2() == mat2.size2());
221 
222  typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass;
223 
224  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_sub");
225  assert( (result.internal_size() == mat2.internal_size())
226  && "Operands must have same dimension and memory layout in this version of ViennaCL!");
227  cl_uint size = std::min(result.internal_size(), mat2.internal_size());
228 
229  viennacl::ocl::enqueue(k(result, mat2, size));
230  }
231 
239  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
241  SCALARTYPE val)
242  {
244 
245  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "cpu_inplace_mult");
246  viennacl::ocl::enqueue(k(result, val, cl_uint(result.internal_size())));
247  }
248 
249 
257  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
259  viennacl::scalar<SCALARTYPE> const & val)
260  {
262 
263  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_mult");
264  viennacl::ocl::enqueue(k(result, val, cl_uint(result.internal_size())));
265  }
266 
267 
268 
276  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
278  viennacl::scalar<SCALARTYPE> const & val)
279  {
281 
282  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_divide");
283  unsigned int size = result.internal_size();
284 
285  viennacl::ocl::enqueue(k(result, val, size));
286  }
287 
288  // A * x
296  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
301  {
304  op_prod >(mat, vec);
305  }
306 
315  template<class TYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
319  {
320  assert(mat.size2() == vec.size());
321  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
322  assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!");
323  result.resize(mat.size1());
324 
325  typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType KernelClass;
326 
327  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "vec_mul");
329  k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
330  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2()), vec, result));
331  }
332 
333 
334 
335  // trans(A) * x
343  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
346  op_trans>,
350  op_trans> & proxy,
352  {
355  op_trans>,
357  op_prod >(proxy, vec);
358  }
359 
368  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
372  {
373  assert(mat.size1() == vec.size()); //remember: mat is transposed!
374  // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
375  assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!");
376  result.resize(mat.size2());
377 
379 
380  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "trans_vec_mul");
381 
382  viennacl::ocl::enqueue(k(mat, cl_uint(mat.size1()), cl_uint(mat.size2()),
383  cl_uint(mat.internal_size1()), cl_uint(mat.internal_size2()), vec, result));
384  }
385 
388  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
391  op_trans> & mat,
394  {
395  trans_prod_impl(mat.lhs(), vec, result);
396  }
397 
398 
399 
405  template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
409  int block_size = 15) // [JW] added ability to set block size from outside ..
410  {
411  assert(A.size1() == C.size1());
412  assert(A.size2() == B.size1());
413  assert(B.size2() == C.size2());
414  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
415  assert(C.handle() != A.handle()
416  && C.handle() != B.handle()
417  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
418 
421  viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass;
422  KernelClass::init();
423 
424  //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl;
425  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AA");
426 
427  /*k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1() / 2, block_size / 2));
428  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2() / 2, block_size / 2));
429  k.local_work_size(0, block_size / 2);
430  k.local_work_size(1, block_size / 2);*/
431 
432  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
433  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
434  k.local_work_size(0, block_size);
435  k.local_work_size(1, block_size);
436 
438  k(A, cl_uint(0), cl_uint(0),
439  cl_uint(A.size1()), cl_uint(A.size2()),
440  cl_uint(A.internal_size1()), cl_uint(A.internal_size2()),
441  B, cl_uint(0), cl_uint(0),
442  cl_uint(B.size1()), cl_uint(B.size2()),
443  cl_uint(B.internal_size1()), cl_uint(B.internal_size2()),
444  C, cl_uint(0), cl_uint(0),
445  cl_uint(C.size1()), cl_uint(C.size2()),
446  cl_uint(C.internal_size1()), cl_uint(C.internal_size2()),
447  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
448  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) ));
449  }
450 
451 
457  template<typename T1, typename T2, typename T3>
459  const viennacl::matrix_range<T2> & B,
461  int block_size = 15) // [JW] added ability to set block size from outside ..
462  {
463  typedef typename T1::value_type::value_type value_type;
464 
465  assert(A.size1() == C.size1());
466  assert(A.size2() == B.size1());
467  assert(B.size2() == C.size2());
468  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
469  assert(C.get().handle() != A.get().handle()
470  && C.get().handle() != B.get().handle()
471  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
472 
474  KernelClass::init();
475 
476  //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl;
477  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AA");
478 
479  /*k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1() / 2, block_size / 2));
480  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2() / 2, block_size / 2));
481  k.local_work_size(0, block_size / 2);
482  k.local_work_size(1, block_size / 2);*/
483 
484  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
485  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
486  k.local_work_size(0, block_size);
487  k.local_work_size(1, block_size);
488 
490  k(A.get(), cl_uint(A.start1()), cl_uint(A.start2()),
491  cl_uint(A.size1()), cl_uint(A.size2()),
492  cl_uint(A.get().internal_size1()), cl_uint(A.get().internal_size2()),
493  B.get(), cl_uint(B.start1()), cl_uint(B.start2()),
494  cl_uint(B.size1()), cl_uint(B.size2()),
495  cl_uint(B.get().internal_size1()), cl_uint(B.get().internal_size2()),
496  C.get(), cl_uint(C.start1()), cl_uint(C.start2()),
497  cl_uint(C.size1()), cl_uint(C.size2()),
498  cl_uint(C.get().internal_size1()), cl_uint(C.get().internal_size2()),
499  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size),
500  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size) ));
501  }
502 
503 
504 
510  template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
513  op_trans> & A,
516  {
517  assert(A.size2() == C.size1());
518  assert(A.size1() == B.size1());
519  assert(B.size2() == C.size2());
520  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
521  assert(C.handle() != A.lhs().handle()
522  && C.handle() != B.handle()
523  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
524 
525  int block_size = 15;
526 
529  viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass;
530  KernelClass::init();
531 
532  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TA");
533 
534  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
535  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
536 
537  k.local_work_size(0, block_size);
538  k.local_work_size(1, block_size);
540  k(A.lhs(), cl_uint(0), cl_uint(0),
541  cl_uint(A.lhs().size1()), cl_uint(A.lhs().size2()),
542  cl_uint(A.lhs().internal_size1()), cl_uint(A.lhs().internal_size2()),
543  B, cl_uint(0), cl_uint(0),
544  cl_uint(B.size1()), cl_uint(B.size2()),
545  cl_uint(B.internal_size1()), cl_uint(B.internal_size2()),
546  C, cl_uint(0), cl_uint(0),
547  cl_uint(C.size1()), cl_uint(C.size2()),
548  cl_uint(C.internal_size1()), cl_uint(C.internal_size2()),
549  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
550  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )
551  );
552  }
553 
554 
560  template <typename M1, typename M2, typename M3>
562  const matrix_range<M1>,
563  op_trans> & A_trans,
564  const viennacl::matrix_range<M2> & B,
566  {
567  typedef typename M1::value_type::value_type value_type;
568  assert(A_trans.size2() == C.size1());
569  assert(A_trans.size1() == B.size1());
570  assert(B.size2() == C.size2());
571  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
572  assert(C.get().handle() != A_trans.lhs().get().handle()
573  && C.get().handle() != B.get().handle()
574  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
575 
576  int block_size = 15;
577 
579  KernelClass::init();
580 
581  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TA");
582 
583  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
584  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
585 
586  k.local_work_size(0, block_size);
587  k.local_work_size(1, block_size);
588 
589  const matrix_range<M1> & A = A_trans.lhs();
591  k(A.get(), cl_uint(A.start1()), cl_uint(A.start2()),
592  cl_uint(A.size1()), cl_uint(A.size2()),
593  cl_uint(A.get().internal_size1()), cl_uint(A.get().internal_size2()),
594  B.get(), cl_uint(B.start1()), cl_uint(B.start2()),
595  cl_uint(B.size1()), cl_uint(B.size2()),
596  cl_uint(B.get().internal_size1()), cl_uint(B.get().internal_size2()),
597  C.get(), cl_uint(C.start1()), cl_uint(C.start2()),
598  cl_uint(C.size1()), cl_uint(C.size2()),
599  cl_uint(C.get().internal_size1()), cl_uint(C.get().internal_size2()),
600  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size),
601  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size) )
602  );
603  }
604 
605 
606 
607 
613  template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
617  op_trans> & B,
619  {
620  assert(A.size1() == C.size1());
621  assert(A.size2() == B.size2());
622  assert(B.size1() == C.size2());
623  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
624  assert(C.handle() != A.handle()
625  && C.handle() != B.lhs().handle()
626  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
627 
628  int block_size = 15;
629 
632  viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass;
633  KernelClass::init();
634 
635  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AT");
636 
637  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
638  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
639 
640  k.local_work_size(0, block_size);
641  k.local_work_size(1, block_size);
643  k(A, cl_uint(0), cl_uint(0),
644  cl_uint(A.size1()), cl_uint(A.size2()),
645  cl_uint(A.internal_size1()), cl_uint(A.internal_size2()),
646  B.lhs(), cl_uint(0), cl_uint(0),
647  cl_uint(B.lhs().size1()), cl_uint(B.lhs().size2()),
648  cl_uint(B.lhs().internal_size1()), cl_uint(B.lhs().internal_size2()),
649  C, cl_uint(0), cl_uint(0),
650  cl_uint(C.size1()), cl_uint(C.size2()),
651  cl_uint(C.internal_size1()), cl_uint(C.internal_size2()),
652  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
653  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )
654  );
655  }
656 
657 
663  template <typename M1, typename M2, typename M3>
666  const matrix_range<M2>,
667  op_trans> & B_trans,
669  {
670  typedef typename M1::value_type::value_type value_type;
671  assert(A.size1() == C.size1());
672  assert(A.size2() == B_trans.size2());
673  assert(B_trans.size1() == C.size2());
674  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
675  assert(C.get().handle() != A.get().handle()
676  && C.get().handle() != B_trans.lhs().get().handle()
677  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
678 
679  int block_size = 15;
680 
682  KernelClass::init();
683 
684  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AT");
685 
686  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
687  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
688 
689  k.local_work_size(0, block_size);
690  k.local_work_size(1, block_size);
691  const matrix_range<M2> & B = B_trans.lhs();
693  k(A.get(), cl_uint(A.start1()), cl_uint(A.start2()),
694  cl_uint(A.size1()), cl_uint(A.size2()),
695  cl_uint(A.get().internal_size1()), cl_uint(A.get().internal_size2()),
696  B.get(), cl_uint(B.start1()), cl_uint(B.start2()),
697  cl_uint(B.size1()), cl_uint(B.size2()),
698  cl_uint(B.get().internal_size1()), cl_uint(B.get().internal_size2()),
699  C.get(), cl_uint(C.start1()), cl_uint(C.start2()),
700  cl_uint(C.size1()), cl_uint(C.size2()),
701  cl_uint(C.get().internal_size1()), cl_uint(C.get().internal_size2()),
702  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size),
703  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size) )
704  );
705  }
706 
707 
708 
709 
710 
711 
712 
713 
714 
720  template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
723  op_trans> & A,
726  op_trans> & B,
728  {
729  assert(A.size2() == C.size1());
730  assert(A.size1() == B.size2());
731  assert(B.size1() == C.size2());
732  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
733  assert(C.handle() != A.lhs().handle()
734  && C.handle() != B.lhs().handle()
735  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
736 
737  int block_size = 15;
738 
741  viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType KernelClass;
742  KernelClass::init();
743 
744  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TT");
745 
746  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
747  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
748 
749  k.local_work_size(0, block_size);
750  k.local_work_size(1, block_size);
752  k(A.lhs(), cl_uint(0), cl_uint(0),
753  cl_uint(A.lhs().size1()), cl_uint(A.lhs().size2()),
754  cl_uint(A.lhs().internal_size1()), cl_uint(A.lhs().internal_size2()),
755  B.lhs(), cl_uint(0), cl_uint(0),
756  cl_uint(B.lhs().size1()), cl_uint(B.lhs().size2()),
757  cl_uint(B.lhs().internal_size1()), cl_uint(B.lhs().internal_size2()),
758  C, cl_uint(0), cl_uint(0),
759  cl_uint(C.size1()), cl_uint(C.size2()),
760  cl_uint(C.internal_size1()), cl_uint(C.internal_size2()),
761  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
762  viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) )
763  );
764  }
765 
766 
772  template <typename M1, typename M2, typename M3>
774  const matrix_range<M1>,
775  op_trans> & A_trans,
777  const matrix_range<M2>,
778  op_trans> & B_trans,
780  {
781  typedef typename M1::value_type::value_type value_type;
782  assert(A_trans.size2() == C.size1());
783  assert(A_trans.size1() == B_trans.size2());
784  assert(B_trans.size1() == C.size2());
785  // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
786  assert(C.get().handle() != A_trans.lhs().get().handle()
787  && C.get().handle() != B_trans.lhs().get().handle()
788  && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
789 
790  int block_size = 15;
791 
793  KernelClass::init();
794 
795  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TT");
796 
797  k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
798  k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
799 
800  k.local_work_size(0, block_size);
801  k.local_work_size(1, block_size);
802  const matrix_range<M1> & A = A_trans.lhs();
803  const matrix_range<M2> & B = B_trans.lhs();
805  k(A.get(), cl_uint(A.start1()), cl_uint(A.start2()),
806  cl_uint(A.size1()), cl_uint(A.size2()),
807  cl_uint(A.get().internal_size1()), cl_uint(A.get().internal_size2()),
808  B.get(), cl_uint(B.start1()), cl_uint(B.start2()),
809  cl_uint(B.size1()), cl_uint(B.size2()),
810  cl_uint(B.get().internal_size1()), cl_uint(B.get().internal_size2()),
811  C.get(), cl_uint(C.start1()), cl_uint(C.start2()),
812  cl_uint(C.size1()), cl_uint(C.size2()),
813  cl_uint(C.get().internal_size1()), cl_uint(C.get().internal_size2()),
814  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size),
815  viennacl::ocl::local_mem(sizeof(value_type) * block_size * block_size) )
816  );
817  }
818 
819 
820 
821 
822 
823 
824 
825 
826 
832  template<class SCALARTYPE, unsigned int VA1, unsigned int VA2>
837  {
840  op_prod>(vec1, vec2);
841  }
842 
843 
844 
853  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
857  {
858  assert(mat1.size1() == vec1.size());
859  assert(mat1.size2() == vec2.size());
860 
862 
863  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "rank1_update");
864 
865  viennacl::ocl::enqueue(k(mat1, cl_uint(mat1.size1()), cl_uint(mat1.size2()),
866  cl_uint(mat1.internal_size1()), cl_uint(mat1.internal_size2()), vec1, vec2));
867  }
868 
869 
879  template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
881  SCALARTYPE val,
884  {
885  assert(mat1.size1() == vec1.size());
886  assert(mat1.size2() == vec2.size());
887 
889 
890  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "scaled_rank1_update");
891 
892  viennacl::ocl::enqueue(k(mat1, cl_uint(mat1.size1()), cl_uint(mat1.size2()),
893  cl_uint(mat1.internal_size1()), cl_uint(mat1.internal_size2()),
894  val, vec1, vec2));
895  }
896 
897  } //namespace linalg
898 
899 
900  //v = A * x
905  template <typename SCALARTYPE, unsigned int ALIGNMENT>
906  template <typename F, unsigned int MAT_ALIGNMENT>
910  viennacl::op_prod> & proxy)
911  {
912  // check for the special case x = A * x
913  if (proxy.rhs().handle() == this->handle())
914  {
915  viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
916  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
917  *this = result;
918  return *this;
919  }
920  else
921  {
922  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
923  return *this;
924  }
925  return *this;
926  }
927 
928  //v += A * x
933  template <typename SCALARTYPE, unsigned int ALIGNMENT>
934  template <typename F, unsigned int MAT_ALIGNMENT>
938  op_prod> & proxy)
939  {
940  vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
941  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
942  *this += result;
943  return *this;
944  }
945 
950  template <typename SCALARTYPE, unsigned int ALIGNMENT>
951  template <typename F, unsigned int MAT_ALIGNMENT>
955  op_prod> & proxy)
956  {
957  vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
958  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
959  *this -= result;
960  return *this;
961  }
962 
963 
964  //free functions:
969  template <typename SCALARTYPE, unsigned int ALIGNMENT>
970  template <typename F, unsigned int MAT_ALIGNMENT>
974  op_prod> & proxy)
975  {
976  assert(proxy.lhs().size1() == size());
978  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
979  result += *this;
980  return result;
981  }
982 
987  template <typename SCALARTYPE, unsigned int ALIGNMENT>
988  template <typename F, unsigned int MAT_ALIGNMENT>
992  op_prod> & proxy)
993  {
994  assert(proxy.lhs().size1() == size());
996  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
997  result = *this - result;
998  return result;
999  }
1000 
1001 
1003 
1004 
1005  //v = trans(A) * x
1010  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1011  template <typename F, unsigned int MAT_ALIGNMENT>
1015  op_trans>,
1017  viennacl::op_prod> & proxy)
1018  {
1019  // check for the special case x = trans(A) * x
1020  if (proxy.rhs().handle() == this->handle())
1021  {
1022  viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
1023  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1024  *this = result;
1025  return *this;
1026  }
1027  else
1028  {
1029  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
1030  return *this;
1031  }
1032  return *this;
1033  }
1034 
1035  //v += A * x
1040  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1041  template <typename F, unsigned int MAT_ALIGNMENT>
1045  op_trans>,
1047  op_prod> & proxy)
1048  {
1049  vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
1050  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1051  *this += result;
1052  return *this;
1053  }
1054 
1059  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1060  template <typename F, unsigned int MAT_ALIGNMENT>
1064  op_trans>,
1066  op_prod> & proxy)
1067  {
1068  vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
1069  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1070  *this -= result;
1071  return *this;
1072  }
1073 
1074 
1075  //free functions:
1080  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1081  template <typename F, unsigned int MAT_ALIGNMENT>
1085  op_trans>,
1087  op_prod> & proxy)
1088  {
1089  assert(proxy.lhs().size1() == size());
1091  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1092  result += *this;
1093  return result;
1094  }
1095 
1100  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1101  template <typename F, unsigned int MAT_ALIGNMENT>
1105  op_trans>,
1107  op_prod> & proxy)
1108  {
1109  assert(proxy.lhs().size1() == size());
1111  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1112  result = *this - result;
1113  return result;
1114  }
1115 
1116 
1117 } //namespace viennacl
1118 
1119 
1120 #endif