ViennaCL - The Vienna Computing Library  1.2.0
vector.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_VECTOR_HPP_
2 #define VIENNACL_VECTOR_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
26 #include "viennacl/forwards.h"
27 #include "viennacl/ocl/backend.hpp"
28 #include "viennacl/scalar.hpp"
29 #include "viennacl/tools/tools.hpp"
32 
33 namespace viennacl
34 {
35 
48  template <typename LHS, typename RHS, typename OP>
50  {
51  public:
55 
56  vector_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
57 
60  LHS & lhs() const { return _lhs; }
63  RHS & rhs() const { return _rhs; }
64 
66  std::size_t size() const { return viennacl::tools::VECTOR_SIZE_DEDUCER<LHS, RHS, OP>::size(_lhs, _rhs); }
67 
68  private:
70  LHS & _lhs;
72  RHS & _rhs;
73  };
74 
93  template<class SCALARTYPE, unsigned int ALIGNMENT>
95  {
97  public:
99  typedef long difference_type;
100 
107  const_vector_iterator(viennacl::ocl::handle<cl_mem> const & elements, cl_uint index) : elements_(elements), index_(index) {};
108 
109 
110  value_type operator*(void) const
111  {
112  value_type result;
114  return result;
115  }
116  self_type operator++(void) { ++index_; return *this; }
117  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
118 
119  bool operator==(self_type const & other) const { return index_ == other.index_; }
120  bool operator!=(self_type const & other) const { return index_ != other.index_; }
121 
122 // self_type & operator=(self_type const & other)
123 // {
124 // _index = other._index;
125 // elements_ = other._elements;
126 // return *this;
127 // }
128 
129  difference_type operator-(self_type const & other) const { difference_type result = index_; return result - other.index_; }
130  self_type operator+(difference_type diff) const { return self_type(elements_, index_ + diff); }
131 
132  std::size_t index() const { return index_; }
133  viennacl::ocl::handle<cl_mem> const & handle() const { return elements_; }
134 
135  protected:
138  std::size_t index_;
139  };
140 
141 
161  template<class SCALARTYPE, unsigned int ALIGNMENT>
162  class vector_iterator : public const_vector_iterator<SCALARTYPE, ALIGNMENT>
163  {
166  public:
168  vector_iterator(viennacl::ocl::handle<cl_mem> const & elements, std::size_t index) : base_type(elements, index) {};
174  vector_iterator(base_type const & b) : base_type(b) {};
175 
177  {
178  typename base_type::value_type result;
180  return result;
181  }
182 
184 
185  operator base_type() const
186  {
188  }
189  };
190 
191  // forward definition in VCLForwards.h!
200  template<class SCALARTYPE, unsigned int ALIGNMENT>
201  class vector
202  {
203 
204  public:
210 
211  static const int alignment = ALIGNMENT;
212 
215  vector() : size_(0) { viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::init(); }
216 
221  explicit vector(size_type vec_size) : size_(vec_size)
222  {
223  viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::init();
224 
225  if (size_ > 0)
226  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
227 
228  //force entries above size_ to zero:
229  if (size_ < internal_size())
230  {
231  std::vector<SCALARTYPE> temp(internal_size() - size_);
232  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(), elements_, CL_TRUE, sizeof(SCALARTYPE)*size_, sizeof(SCALARTYPE)*(internal_size() - size_), &(temp[0]), 0, NULL, NULL);
233  //assert(err == CL_SUCCESS);
234  VIENNACL_ERR_CHECK(err);
235  }
236  }
237 
246  explicit vector(cl_mem existing_mem, size_type vec_size) : size_(vec_size), elements_(existing_mem)
247  {
248  elements_.inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
249  }
250 
251  template <typename LHS, typename RHS, typename OP>
252  vector(vector_expression<LHS, RHS, OP> const & other) : size_(other.size())
253  {
254  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*other.size());
255  *this = other;
256  }
257 
263  size_(vec.size())
264  {
265  viennacl::linalg::kernels::vector<SCALARTYPE, 1>::init();
266 
267  if (size() != 0)
268  {
269  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
270  cl_int err;
271  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), vec.handle(), elements_, 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
272  //assert(err == CL_SUCCESS);
273  VIENNACL_ERR_CHECK(err);
274  }
275  }
276 
280  {
281  resize(vec.size());
282  if (size() != 0)
283  {
284  cl_int err;
285  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), vec.handle(), elements_, 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
286  VIENNACL_ERR_CHECK(err);
287  }
288  return *this;
289  }
290 
291 
296  template <typename VectorType> //use template to cover const/non-const of VectorType:
298  const scalar<SCALARTYPE>,
299  op_prod> & proxy)
300  {
301  resize(proxy.lhs().size());
302  //std::cout << "vector::operator=(vec_times_scalar_proxy)" << std::endl;
303  viennacl::linalg::mult(proxy.lhs(), proxy.rhs(), *this);
304  return *this;
305  }
306 
311  template <typename VectorType> //use template to cover const/non-const of VectorType:
313  const SCALARTYPE,
314  op_prod> & proxy)
315  {
316  resize(proxy.lhs().size());
317  viennacl::linalg::mult(proxy.lhs(), proxy.rhs(), *this);
318  return *this;
319  }
320 
325  template <typename VectorType> //use template to cover const/non-const of VectorType:
327  const scalar<SCALARTYPE>,
328  op_div> & proxy)
329  {
330  resize(proxy.lhs().size());
331  //std::cout << "vector::operator=(vec_times_scalar_proxy)" << std::endl;
332  viennacl::linalg::divide(proxy.lhs(), proxy.rhs(), *this);
333  return *this;
334  }
335 
340  template <typename VectorType> //use template to cover const/non-const of VectorType:
342  const SCALARTYPE,
343  op_div> & proxy)
344  {
345  resize(proxy.lhs().size());
346  //std::cout << "vector::operator=(vec_times_scalar_proxy)" << std::endl;
347  viennacl::linalg::mult(proxy.lhs(), static_cast<SCALARTYPE>(1.0) / proxy.rhs(), *this);
348  return *this;
349  }
350 
351  //v1 = v2 + v3;
358  op_add> & proxy)
359  {
360  resize(proxy.lhs().size());
361  //std::cout << "vector::operator=(vec_times_scalar_proxy)" << std::endl;
362  viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
363  return *this;
364  }
365 
366  //v1 = v2 - v3;
373  op_sub> & proxy)
374  {
375  resize(proxy.lhs().size());
376  //std::cout << "vector::operator=(vec_times_scalar_proxy)" << std::endl;
377  viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
378  return *this;
379  }
380 
382 
383  //Note: The following operator overloads are defined in matrix_operations.hpp, compressed_matrix_operations.hpp and coordinate_matrix_operations.hpp
384  //This is certainly not the nicest approach and will most likely by changed in the future, but it works :-)
385 
386  //matrix<>
391  template <typename F, unsigned int MAT_ALIGNMENT>
394  op_prod> & proxy);
395 
400  template <typename F, unsigned int MAT_ALIGNMENT>
403  op_prod> & proxy);
404 
409  template <typename F, unsigned int MAT_ALIGNMENT>
412  op_prod> & proxy);
413 
418  template <typename F, unsigned int MAT_ALIGNMENT>
421  op_prod> & proxy);
422 
427  template <typename F, unsigned int MAT_ALIGNMENT>
430  op_prod> & proxy);
431 
432  //transposed_matrix_proxy:
437  template <typename F, unsigned int MAT_ALIGNMENT>
440  op_trans >,
442  op_prod> & proxy);
443 
448  template <typename F, unsigned int MAT_ALIGNMENT>
451  op_trans >,
453  op_prod> & proxy);
454 
459  template <typename F, unsigned int MAT_ALIGNMENT>
462  op_trans >,
464  op_prod> & proxy);
465 
470  template <typename F, unsigned int MAT_ALIGNMENT>
473  op_trans >,
475  op_prod> & proxy);
476 
481  template <typename F, unsigned int MAT_ALIGNMENT>
484  op_trans >,
486  op_prod> & proxy);
487 
488 
489  //
491  //
496  template <unsigned int MAT_ALIGNMENT>
499  op_prod> & proxy);
500 
505  template <unsigned int MAT_ALIGNMENT>
508  op_prod> & proxy);
509 
514  template <unsigned int MAT_ALIGNMENT>
517  op_prod> & proxy);
518 
523  template <unsigned int MAT_ALIGNMENT>
526  op_prod> & proxy);
527 
532  template <unsigned int MAT_ALIGNMENT>
535  op_prod> & proxy);
536 
537  //
538  // coordinate_matrix<>
539  //
544  template <unsigned int MAT_ALIGNMENT>
547  op_prod> & proxy);
548 
553  template <unsigned int MAT_ALIGNMENT>
556  op_prod> & proxy);
557 
562  template <unsigned int MAT_ALIGNMENT>
565  op_prod> & proxy);
566 
571  template <unsigned int MAT_ALIGNMENT>
574  op_prod> & proxy);
575 
580  template <unsigned int MAT_ALIGNMENT>
583  op_prod> & proxy);
584 
585  //
586  // circulant_matrix<>
587  //
592  template <unsigned int MAT_ALIGNMENT>
595  op_prod> & proxy);
596 
601  template <unsigned int MAT_ALIGNMENT>
604  op_prod> & proxy);
605 
610  template <unsigned int MAT_ALIGNMENT>
613  op_prod> & proxy);
614 
619  template <unsigned int MAT_ALIGNMENT>
622  op_prod> & proxy);
623 
628  template <unsigned int MAT_ALIGNMENT>
631  op_prod> & proxy);
632 
633 
634  //
635  // hankel_matrix<>
636  //
641  template <unsigned int MAT_ALIGNMENT>
644  op_prod> & proxy);
645 
650  template <unsigned int MAT_ALIGNMENT>
653  op_prod> & proxy);
654 
659  template <unsigned int MAT_ALIGNMENT>
662  op_prod> & proxy);
663 
668  template <unsigned int MAT_ALIGNMENT>
671  op_prod> & proxy);
672 
677  template <unsigned int MAT_ALIGNMENT>
680  op_prod> & proxy);
681 
682  //
683  // toeplitz_matrix<>
684  //
689  template <unsigned int MAT_ALIGNMENT>
692  op_prod> & proxy);
693 
698  template <unsigned int MAT_ALIGNMENT>
701  op_prod> & proxy);
702 
707  template <unsigned int MAT_ALIGNMENT>
710  op_prod> & proxy);
711 
716  template <unsigned int MAT_ALIGNMENT>
719  op_prod> & proxy);
720 
725  template <unsigned int MAT_ALIGNMENT>
728  op_prod> & proxy);
729 
730 
731  //
732  // vandermonde_matrix<>
733  //
738  template <unsigned int MAT_ALIGNMENT>
741  op_prod> & proxy);
742 
747  template <unsigned int MAT_ALIGNMENT>
750  op_prod> & proxy);
751 
756  template <unsigned int MAT_ALIGNMENT>
759  op_prod> & proxy);
760 
765  template <unsigned int MAT_ALIGNMENT>
768  op_prod> & proxy);
769 
774  template <unsigned int MAT_ALIGNMENT>
777  op_prod> & proxy);
778 
779 
780 
782 
783  //enlarge or reduce allocated memory and set unused memory to zero
789  void resize(size_type new_size, bool preserve = true)
790  {
791  assert(new_size > 0);
792 
793  if (new_size != size_)
794  {
795  std::size_t new_internal_size = viennacl::tools::roundUpToNextMultiple<std::size_t>(new_size, ALIGNMENT);
796 
797  std::vector<SCALARTYPE> temp(size_);
798  if (preserve && size_ > 0)
799  fast_copy(*this, temp);
800  temp.resize(new_size); //drop all entries above new_size
801  temp.resize(new_internal_size); //enlarge to fit new internal size
802 
803  if (new_internal_size != internal_size())
804  {
805  elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*new_internal_size);
806  }
807 
808  fast_copy(temp, *this);
809  size_ = new_size;
810  }
811 
812  }
813 
814 
815  //read-write access to an element of the vector
819  {
820  return entry_proxy<SCALARTYPE>(index, elements_);
821  }
822 
826  {
827  return entry_proxy<SCALARTYPE>(index, elements_);
828  }
829 
830 
834  {
835  scalar<SCALARTYPE> tmp;
836  cl_int err;
837  err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_, tmp.handle(), sizeof(SCALARTYPE)*index, 0, sizeof(SCALARTYPE), 0, NULL, NULL);
838  //assert(err == CL_SUCCESS);
839  VIENNACL_ERR_CHECK(err);
840  return tmp;
841  }
842 
846  {
847  return operator()(index);
848  }
849 
853  {
854  viennacl::linalg::inplace_add(*this, vec);
855  return *this;
856  }
857 
861  const scalar<SCALARTYPE>,
862  op_prod> & proxy)
863  {
864  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), proxy.rhs());
865  return *this;
866  }
867 
871  const scalar<SCALARTYPE>,
872  op_prod> & proxy)
873  {
874  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), proxy.rhs());
875  return *this;
876  }
877 
881  const SCALARTYPE,
882  op_prod> & proxy)
883  {
884  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), proxy.rhs());
885  return *this;
886  }
887 
891  const SCALARTYPE,
892  op_prod> & proxy)
893  {
894  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), proxy.rhs());
895  return *this;
896  }
897 
901  const scalar<SCALARTYPE>,
902  op_div> & proxy)
903  {
904  viennacl::linalg::inplace_div_add(*this, proxy.lhs(), proxy.rhs());
905  return *this;
906  }
907 
908 
909 
913  {
914  viennacl::linalg::inplace_sub(*this, vec);
915  return *this;
916  }
917 
921  const scalar<SCALARTYPE>,
922  op_prod> & proxy)
923  {
924  viennacl::linalg::inplace_mul_sub(*this, proxy.lhs(), proxy.rhs());
925  return *this;
926  }
927 
931  const scalar<SCALARTYPE>,
932  op_prod> & proxy)
933  {
934  viennacl::linalg::inplace_mul_sub(*this, proxy.lhs(), proxy.rhs());
935  return *this;
936  }
937 
941  const SCALARTYPE,
942  op_prod> & proxy)
943  {
944  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), -proxy.rhs());
945  return *this;
946  }
947 
951  const SCALARTYPE,
952  op_prod> & proxy)
953  {
954  viennacl::linalg::inplace_mul_add(*this, proxy.lhs(), -proxy.rhs());
955  return *this;
956  }
957 
961  const scalar<SCALARTYPE>,
962  op_div> & proxy)
963  {
964  viennacl::linalg::inplace_div_sub(*this, proxy.lhs(), proxy.rhs());
965  return *this;
966  }
967 
968 
969 
970 
974  {
975  viennacl::linalg::inplace_mult(*this, val);
976  return *this;
977  }
978 
982  {
983  viennacl::linalg::inplace_mult(*this, gpu_val);
984  return *this;
985  }
986 
990  {
991  viennacl::linalg::inplace_mult(*this, static_cast<SCALARTYPE>(1) / val);
992  return *this;
993  }
994 
998  {
999  viennacl::linalg::inplace_divide(*this, gpu_val);
1000  return *this;
1001  }
1002 
1003 
1004 
1005  // free addition
1006 
1010  {
1012  viennacl::linalg::add(*this, vec, result);
1013  return result;
1014  }
1015 
1019  const scalar<SCALARTYPE>,
1020  op_prod> & proxy) const
1021  {
1022  vector<SCALARTYPE, ALIGNMENT> result(size_);
1023  viennacl::linalg::mul_add(proxy.lhs(), proxy.rhs(), *this, result);
1024  return result;
1025  }
1026 
1030  const scalar<SCALARTYPE>,
1031  op_prod> & proxy) const
1032  {
1033  vector<SCALARTYPE, ALIGNMENT> result(size_);
1034  viennacl::linalg::mul_add(proxy.lhs(), proxy.rhs(), *this, result);
1035  return result;
1036  }
1037 
1041  const SCALARTYPE,
1042  op_prod> & proxy) const
1043  {
1044  vector<SCALARTYPE, ALIGNMENT> result(size_);
1045  viennacl::linalg::mul_add(proxy.lhs(), proxy.rhs(), *this, result);
1046  return result;
1047  }
1048 
1052  const SCALARTYPE,
1053  op_prod> & proxy) const
1054  {
1055  vector<SCALARTYPE, ALIGNMENT> result(size_);
1056  viennacl::linalg::mul_add(proxy.lhs(), proxy.rhs(), *this, result);
1057  return result;
1058  }
1059 
1060 
1061  //free subtraction:
1065  {
1066  vector<SCALARTYPE, ALIGNMENT> result(size_);
1067  viennacl::linalg::sub(*this, vec, result);
1068  return result;
1069  }
1070 
1074  const scalar<SCALARTYPE>,
1075  op_prod> & proxy) const
1076  {
1077  vector<SCALARTYPE, ALIGNMENT> result(size_);
1078  result = *this;
1079  viennacl::linalg::inplace_mul_sub(result, proxy.lhs(), proxy.rhs());
1080  return result;
1081  }
1082 
1086  const scalar<SCALARTYPE>,
1087  op_prod> & proxy) const
1088  {
1089  vector<SCALARTYPE, ALIGNMENT> result(size_);
1090  result = *this;
1091  viennacl::linalg::inplace_mul_sub(result, proxy.lhs(), proxy.rhs());
1092  return result;
1093  }
1094 
1098  const SCALARTYPE,
1099  op_prod> & proxy) const
1100  {
1101  vector<SCALARTYPE, ALIGNMENT> result(size_);
1102  result = *this;
1103  viennacl::linalg::inplace_mul_add(result, proxy.lhs(), -proxy.rhs());
1104  return result;
1105  }
1106 
1110  const SCALARTYPE,
1111  op_prod> & proxy) const
1112  {
1113  vector<SCALARTYPE, ALIGNMENT> result(size_);
1114  result = *this;
1115  viennacl::linalg::inplace_mul_add(result, proxy.lhs(), -proxy.rhs());
1116  return result;
1117  }
1118 
1119 
1120  //free multiplication
1123  vector_expression< const vector<SCALARTYPE, ALIGNMENT>, const SCALARTYPE, op_prod>
1124  operator * (SCALARTYPE value) const
1125  {
1126  return vector_expression< const vector<SCALARTYPE, ALIGNMENT>, const SCALARTYPE, op_prod>(*this, value);
1127  }
1128 
1132  operator * (scalar<SCALARTYPE> const & value) const
1133  {
1134  return vector_expression< const vector<SCALARTYPE, ALIGNMENT>, const scalar<SCALARTYPE>, op_prod>(*this, value);
1135  }
1136 
1137  //free division
1140  vector_expression< const vector<SCALARTYPE, ALIGNMENT>, const SCALARTYPE, op_div>
1141  operator / (SCALARTYPE value) const
1142  {
1143  return vector_expression< const vector<SCALARTYPE, ALIGNMENT>, const SCALARTYPE, op_div>(*this, value);
1144  }
1145 
1149  operator / (scalar<SCALARTYPE> const & value) const
1150  {
1152  }
1153 
1154 
1156 
1158  {
1159  return iterator(*this, 0);
1160  }
1161 
1164  {
1165  return iterator(*this, size());
1166  }
1167 
1170  {
1171  return const_iterator(*this, 0);
1172  }
1173 
1176  {
1177  return const_iterator(*this, size());
1178  }
1179 
1183  {
1184  swap(*this, other);
1185  return *this;
1186  };
1187 
1191  {
1192  assert(this->size_ == other.size_);
1193  this->elements_.swap(other.elements_);
1194  return *this;
1195  };
1196 
1199  size_type size() const { return size_; }
1200 
1204  {
1205  return (128*1024*1024) / sizeof(SCALARTYPE); //128 MB is maximum size of memory chunks in OpenCL!
1206  }
1209  size_type internal_size() const { return viennacl::tools::roundUpToNextMultiple<size_type>(size_, ALIGNMENT); }
1210 
1212  bool empty() { return size_ == 0; }
1213 
1215  const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
1216 
1219  void clear()
1220  {
1221  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "clear");
1222 
1223  viennacl::ocl::enqueue(k(elements_,
1224  cl_uint(0),
1225  cl_uint(internal_size()))
1226  );
1227  }
1228  //void swap(vector & other){}
1229 
1230 
1231  //TODO: Think about implementing the following public member functions
1232  //void insert_element(unsigned int i, SCALARTYPE val){}
1233  //void erase_element(unsigned int i){}
1234 
1235  private:
1236  cl_uint size_;
1238  }; //vector
1239 
1240 
1241  //
1243  //
1244 
1251  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1254  CPU_ITERATOR cpu_begin )
1255  {
1256  assert(gpu_end - gpu_begin >= 0);
1257  if (gpu_end - gpu_begin != 0)
1258  {
1259  std::vector<SCALARTYPE> temp_buffer(gpu_end - gpu_begin);
1260  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
1261  gpu_begin.handle(), CL_TRUE, 0,
1262  sizeof(SCALARTYPE)*(gpu_end - gpu_begin),
1263  &(temp_buffer[0]), 0, NULL, NULL);
1264  VIENNACL_ERR_CHECK(err);
1266 
1267  //now copy entries to cpu_vec:
1268  std::copy(temp_buffer.begin(), temp_buffer.end(), cpu_begin);
1269  }
1270  }
1271 
1278  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1281  CPU_ITERATOR cpu_begin )
1282 
1283  {
1286  cpu_begin);
1287  }
1288 
1294  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPUVECTOR>
1295  void copy(vector<SCALARTYPE, ALIGNMENT> const & gpu_vec,
1296  CPUVECTOR & cpu_vec )
1297  {
1298  viennacl::copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin());
1299  }
1300 
1301  //from gpu to cpu. Type assumption: cpu_vec lies in a linear memory chunk
1313  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1316  CPU_ITERATOR cpu_begin )
1317  {
1318  if (gpu_begin != gpu_end)
1319  {
1320  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
1321  gpu_begin.handle(), CL_TRUE, 0,
1322  sizeof(SCALARTYPE)*(gpu_end - gpu_begin),
1323  &(*cpu_begin), 0, NULL, NULL);
1324  VIENNACL_ERR_CHECK(err);
1326  }
1327  }
1328 
1334  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPUVECTOR>
1336  CPUVECTOR & cpu_vec )
1337  {
1338  viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin());
1339  }
1340 
1341 
1342 
1343  #ifdef VIENNACL_HAVE_EIGEN
1344  template <unsigned int ALIGNMENT>
1345  void copy(vector<float, ALIGNMENT> const & gpu_vec,
1346  Eigen::VectorXf & eigen_vec)
1347  {
1348  viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), &(eigen_vec[0]));
1349  }
1350 
1351  template <unsigned int ALIGNMENT>
1352  void copy(vector<double, ALIGNMENT> & gpu_vec,
1353  Eigen::VectorXd & eigen_vec)
1354  {
1355  viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), &(eigen_vec[0]));
1356  }
1357  #endif
1358 
1359 
1360  //
1362  //
1363 
1364  //from cpu to gpu. Safe assumption: cpu_vector does not necessarily occupy a linear memory segment, but is not larger than the allocated memory on the GPU
1371  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1372  void copy(CPU_ITERATOR const & cpu_begin,
1373  CPU_ITERATOR const & cpu_end,
1375  {
1376  assert(cpu_end - cpu_begin > 0);
1377  if (cpu_begin != cpu_end)
1378  {
1379  //we require that the size of the gpu_vector is larger or equal to the cpu-size
1380  std::vector<SCALARTYPE> temp_buffer(cpu_end - cpu_begin);
1381  std::copy(cpu_begin, cpu_end, temp_buffer.begin());
1382  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
1383  gpu_begin.handle(), CL_TRUE, sizeof(SCALARTYPE)*gpu_begin.index(),
1384  sizeof(SCALARTYPE)*(cpu_end - cpu_begin),
1385  &(temp_buffer[0]), 0, NULL, NULL);
1386  VIENNACL_ERR_CHECK(err);
1387  }
1388  }
1389 
1390  // for things like copy(std_vec.begin(), std_vec.end(), vcl_vec.begin() + 1);
1391  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1392  void copy(CPU_ITERATOR const & cpu_begin,
1393  CPU_ITERATOR const & cpu_end,
1395  {
1396  copy(cpu_begin, cpu_end, vector_iterator<SCALARTYPE, ALIGNMENT>(gpu_begin));
1397  }
1398 
1404  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPUVECTOR>
1405  void copy(const CPUVECTOR & cpu_vec, vector<SCALARTYPE, ALIGNMENT> & gpu_vec)
1406  {
1407  viennacl::copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin());
1408  }
1409 
1421  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
1422  void fast_copy(CPU_ITERATOR const & cpu_begin,
1423  CPU_ITERATOR const & cpu_end,
1425  {
1426  if (cpu_begin != cpu_end)
1427  {
1428  //we require that the size of the gpu_vector is larger or equal to the cpu-size
1429  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
1430  gpu_begin.handle(), CL_TRUE, 0,
1431  sizeof(SCALARTYPE)*(cpu_end - cpu_begin), &(*cpu_begin), 0, NULL, NULL);
1432  VIENNACL_ERR_CHECK(err);
1433  }
1434  }
1435 
1436 
1442  template <typename SCALARTYPE, unsigned int ALIGNMENT, typename CPUVECTOR>
1443  void fast_copy(const CPUVECTOR & cpu_vec, vector<SCALARTYPE, ALIGNMENT> & gpu_vec)
1444  {
1445  viennacl::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin());
1446  }
1447 
1448  #ifdef VIENNACL_HAVE_EIGEN
1449  template <unsigned int ALIGNMENT>
1450  void copy(Eigen::VectorXf const & eigen_vec,
1451  vector<float, ALIGNMENT> & gpu_vec)
1452  {
1453  std::vector<float> entries(eigen_vec.size());
1454  for (size_t i = 0; i<entries.size(); ++i)
1455  entries[i] = eigen_vec(i);
1456  viennacl::fast_copy(entries.begin(), entries.end(), gpu_vec.begin());
1457  }
1458 
1459  template <unsigned int ALIGNMENT>
1460  void copy(Eigen::VectorXd const & eigen_vec,
1461  vector<double, ALIGNMENT> & gpu_vec)
1462  {
1463  std::vector<double> entries(eigen_vec.size());
1464  for (size_t i = 0; i<entries.size(); ++i)
1465  entries[i] = eigen_vec(i);
1466  viennacl::fast_copy(entries.begin(), entries.end(), gpu_vec.begin());
1467  }
1468  #endif
1469 
1470 
1471 
1472  //
1474  //
1481  template <typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST>
1485  {
1486  assert(gpu_src_end - gpu_src_begin >= 0);
1487  if (gpu_src_begin != gpu_src_end)
1488  {
1489  cl_int err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
1490  gpu_src_begin.handle(), //src handle
1491  gpu_dest_begin.handle(), //dest handle
1492  sizeof(SCALARTYPE) * gpu_src_begin.index(), //src offset
1493  sizeof(SCALARTYPE) * gpu_dest_begin.index(), //dest offset
1494  sizeof(SCALARTYPE) * (gpu_src_end.index() - gpu_src_begin.index()), //data length
1495  0, //don't know -> check!! (something related to increment?)
1496  NULL, NULL);
1497  VIENNACL_ERR_CHECK(err);
1498  }
1499  }
1500 
1507  template <typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST>
1511  {
1512  copy(gpu_src_begin, gpu_src_end, vector_iterator<SCALARTYPE, ALIGNMENT_DEST>(gpu_dest_begin));
1513  }
1514 
1520  template <typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST>
1521  void copy(vector<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_vec,
1522  vector<SCALARTYPE, ALIGNMENT_DEST> & gpu_dest_vec )
1523  {
1524  viennacl::copy(gpu_src_vec.begin(), gpu_src_vec.end(), gpu_dest_vec.begin());
1525  }
1526 
1527 
1528 
1529 
1530 
1531 
1532  //global functions for handling vectors:
1537  template<class SCALARTYPE, unsigned int ALIGNMENT>
1538  std::ostream & operator<<(std::ostream & s, vector<SCALARTYPE,ALIGNMENT> const & val)
1539  {
1541  std::vector<SCALARTYPE> tmp(val.size());
1542  copy(val.begin(), val.end(), tmp.begin());
1543  std::cout << "[" << val.size() << "](";
1544  for (typename std::vector<SCALARTYPE>::size_type i=0; i<val.size(); ++i)
1545  {
1546  if (i > 0)
1547  s << ",";
1548  s << tmp[i];
1549  }
1550  std::cout << ")";
1551  return s;
1552  }
1553 
1559  template<class SCALARTYPE, unsigned int ALIGNMENT>
1562  {
1563  assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2)
1564  && "Incompatible vector sizes in swap()");
1565 
1566  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "swap");
1567 
1569  viennacl::traits::handle(vec2), cl_uint(viennacl::traits::start(vec2)), cl_uint(viennacl::traits::size(vec2)))
1570  );
1571  }
1572 
1578  template <typename SCALARTYPE, unsigned int ALIGNMENT>
1581  {
1582  return v1.fast_swap(v2);
1583  }
1584 
1585 
1586 
1588 
1593  template <typename SCALARTYPE, unsigned int A>
1594  vector_expression< const vector<SCALARTYPE, A>, const SCALARTYPE, op_prod> operator * (SCALARTYPE const & value, vector<SCALARTYPE, A> const & vec)
1595  {
1596  return vector_expression< const vector<SCALARTYPE, A>, const SCALARTYPE, op_prod>(vec, value);
1597  }
1598 
1604  template <typename SCALARTYPE, unsigned int A>
1606  {
1607  return vector_expression< const vector<SCALARTYPE, A>, const scalar<SCALARTYPE>, op_prod>(vec, value);
1608  }
1609 
1610 
1611  //addition and subtraction of two vector_expressions:
1617  template <typename LHS1, typename RHS1, typename OP1,
1618  typename LHS2, typename RHS2, typename OP2>
1619  typename vector_expression< LHS1, RHS1, OP1>::VectorType
1621  vector_expression< LHS2, RHS2, OP2> const & proxy2)
1622  {
1623  assert(proxy1.size() == proxy2.size());
1624  typename vector_expression< LHS1, RHS1, OP1>::VectorType result(proxy1.size());
1625  result = proxy1;
1626  result += proxy2;
1627  return result;
1628  }
1629 
1635  template <typename LHS1, typename RHS1, typename OP1,
1636  typename LHS2, typename RHS2, typename OP2>
1637  typename vector_expression< LHS1, RHS1, OP1>::VectorType
1639  vector_expression< LHS2, RHS2, OP2> const & proxy2)
1640  {
1641  assert(proxy1.size() == proxy2.size());
1642  typename vector_expression< LHS1, RHS1, OP1>::VectorType result(proxy1.size());
1643  result = proxy1;
1644  result -= proxy2;
1645  return result;
1646  }
1647 
1649 
1655  template <typename SCALARTYPE, unsigned int A, typename LHS, typename RHS, typename OP>
1657  vector<SCALARTYPE, A> const & vec)
1658  {
1659  assert(proxy.size() == vec.size());
1660  vector<SCALARTYPE, A> result(vec.size());
1661  result = proxy;
1662  result += vec;
1663  return result;
1664  }
1665 
1671  template <typename SCALARTYPE, unsigned int A, typename LHS, typename RHS, typename OP>
1673  vector<SCALARTYPE, A> const & vec)
1674  {
1675  assert(proxy.size() == vec.size());
1676  vector<SCALARTYPE, A> result(vec.size());
1677  result = proxy;
1678  result -= vec;
1679  return result;
1680  }
1681 
1682 
1688  template <typename SCALARTYPE, typename LHS, typename RHS, typename OP>
1690  scalar<SCALARTYPE> const & val)
1691  {
1692  vector<SCALARTYPE> result(proxy.size());
1693  result = proxy;
1694  result *= val;
1695  return result;
1696  }
1697 
1703  template <typename SCALARTYPE, typename LHS, typename RHS, typename OP>
1705  scalar<SCALARTYPE> const & val)
1706  {
1707  vector<SCALARTYPE> result(proxy.size());
1708  result = proxy;
1709  result /= val;
1710  return result;
1711  }
1712 
1713 
1715 
1721  template <typename SCALARTYPE, typename LHS, typename RHS, typename OP>
1723  vector_expression< LHS, RHS, OP> const & proxy)
1724  {
1725  vector<SCALARTYPE> result(proxy.size());
1726  result = proxy;
1727  result *= val;
1728  return result;
1729  }
1730 
1736  template <typename SCALARTYPE, typename LHS, typename RHS, typename OP>
1739  {
1740  viennacl::vector<SCALARTYPE> result(proxy.size());
1741  result = proxy;
1742  result *= val;
1743  return result;
1744  }
1745 
1746 }
1747 
1748 #endif