ViennaCL - The Vienna Computing Library  1.2.0
matrix_proxy.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_PROXY_HPP_
2 #define VIENNACL_MATRIX_PROXY_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #include "viennacl/forwards.h"
25 #include "viennacl/range.hpp"
26 #include "viennacl/matrix.hpp"
28 
29 namespace viennacl
30 {
31 
32  template <typename MatrixType>
34  {
35  public:
36  typedef typename MatrixType::value_type value_type;
40  typedef const value_type & const_reference;
41 
42  matrix_range(MatrixType & A,
43  range const & row_range,
44  range const & col_range) : A_(A), row_range_(row_range), col_range_(col_range) {}
45 
46  size_type start1() const { return row_range_.start(); }
47  size_type size1() const { return row_range_.size(); }
48 
49  size_type start2() const { return col_range_.start(); }
50  size_type size2() const { return col_range_.size(); }
51 
52  template <typename MatrixType1, typename MatrixType2>
54  MatrixType2,
55  op_prod > & proxy)
56  {
57  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
58  return *this;
59  }
60 
61 
63  {
64  viennacl::linalg::inplace_add(*this, other);
65  return *this;
66  }
67 
68  template <typename MatrixType1, typename MatrixType2>
70  MatrixType2,
71  op_prod > & proxy)
72  {
73  MatrixType1 temp = proxy;
74  viennacl::range r1(0, temp.size1());
75  viennacl::range r2(0, temp.size2());
76  viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
77  viennacl::linalg::inplace_add(*this, temp2);
78  return *this;
79  }
80 
81  template <typename MatrixType1, typename MatrixType2>
84  op_prod > & proxy)
85  {
86  MatrixType1 temp(proxy.size1(), proxy.size2());
87  viennacl::range r1(0, temp.size1());
88  viennacl::range r2(0, temp.size2());
89  viennacl::matrix_range<MatrixType> temp2(temp, r1, r2);
90  temp2 = proxy;
91  viennacl::linalg::inplace_add(*this, temp2);
92  return *this;
93  }
94 
95  //const_reference operator()(size_type i, size_type j) const { return A_(start1() + i, start2() + i); }
96  //reference operator()(size_type i, size_type j) { return A_(start1() + i, start2() + i); }
97 
98  MatrixType & get() { return A_; }
99  const MatrixType & get() const { return A_; }
100 
101  private:
102  MatrixType & A_;
103  range row_range_;
104  range col_range_;
105  };
106 
107 
109  template <typename MatrixType>
110  matrix_expression< const matrix_range<MatrixType>,
111  const matrix_range<MatrixType>,
112  op_trans> trans(const matrix_range<MatrixType> & mat)
113  {
116  op_trans>(mat, mat);
117  }
118 
119 
120 
121 
125 
126  //row_major:
127  template <typename CPU_MATRIX, typename SCALARTYPE>
128  void copy(const CPU_MATRIX & cpu_matrix,
129  matrix_range<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_range )
130  {
131  assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
132  && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
133 
134  if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
135  {
136  std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
137 
138  //copy each stride separately:
139  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
140  {
141  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
142  entries[j] = cpu_matrix(i,j);
143 
144  size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
145  size_t num_entries = gpu_matrix_range.size2();
146  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
147  gpu_matrix_range.get().handle(), CL_TRUE,
148  sizeof(SCALARTYPE)*start_offset,
149  sizeof(SCALARTYPE)*num_entries,
150  &(entries[0]), 0, NULL, NULL);
151  VIENNACL_ERR_CHECK(err);
152  //std::cout << "Strided copy worked!" << std::endl;
153  }
154  }
155  else
156  {
157  //full block can be copied:
158  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
159 
160  //copy each stride separately:
161  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
162  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
163  entries[i*gpu_matrix_range.get().internal_size2() + j] = cpu_matrix(i,j);
164 
165  size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
166  size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
167  //std::cout << "start_offset: " << start_offset << std::endl;
168  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
169  gpu_matrix_range.get().handle(), CL_TRUE,
170  sizeof(SCALARTYPE)*start_offset,
171  sizeof(SCALARTYPE)*num_entries,
172  &(entries[0]), 0, NULL, NULL);
173  VIENNACL_ERR_CHECK(err);
174  //std::cout << "Block copy worked!" << std::endl;
175  }
176  }
177 
178  //column_major:
179  template <typename CPU_MATRIX, typename SCALARTYPE>
180  void copy(const CPU_MATRIX & cpu_matrix,
182  {
183  assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
184  && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
185 
186  if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
187  {
188  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
189 
190  //copy each stride separately:
191  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
192  {
193  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
194  entries[i] = cpu_matrix(i,j);
195 
196  size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
197  size_t num_entries = gpu_matrix_range.size1();
198  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
199  gpu_matrix_range.get().handle(), CL_TRUE,
200  sizeof(SCALARTYPE)*start_offset,
201  sizeof(SCALARTYPE)*num_entries,
202  &(entries[0]), 0, NULL, NULL);
203  VIENNACL_ERR_CHECK(err);
204  //std::cout << "Strided copy worked!" << std::endl;
205  }
206  }
207  else
208  {
209  //full block can be copied:
210  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
211 
212  //copy each stride separately:
213  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
214  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
215  entries[i + j*gpu_matrix_range.get().internal_size1()] = cpu_matrix(i,j);
216 
217  size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
218  size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
219  //std::cout << "start_offset: " << start_offset << std::endl;
220  cl_int err = clEnqueueWriteBuffer(viennacl::ocl::get_queue().handle(),
221  gpu_matrix_range.get().handle(), CL_TRUE,
222  sizeof(SCALARTYPE)*start_offset,
223  sizeof(SCALARTYPE)*num_entries,
224  &(entries[0]), 0, NULL, NULL);
225  VIENNACL_ERR_CHECK(err);
226  //std::cout << "Block copy worked!" << std::endl;
227  }
228 
229  }
230 
231 
235 
236 
237  //row_major:
238  template <typename CPU_MATRIX, typename SCALARTYPE>
239  void copy(matrix_range<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_range,
240  CPU_MATRIX & cpu_matrix)
241  {
242  assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
243  && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
244 
245  if ( gpu_matrix_range.start2() != 0 || gpu_matrix_range.size2() != gpu_matrix_range.get().size2())
246  {
247  std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
248 
249  //copy each stride separately:
250  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
251  {
252  size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.get().internal_size2() + gpu_matrix_range.start2();
253  size_t num_entries = gpu_matrix_range.size2();
254  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
255  gpu_matrix_range.get().handle(), CL_TRUE,
256  sizeof(SCALARTYPE)*start_offset,
257  sizeof(SCALARTYPE)*num_entries,
258  &(entries[0]), 0, NULL, NULL);
259  VIENNACL_ERR_CHECK(err);
260  //std::cout << "Strided copy worked!" << std::endl;
261 
262  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
263  cpu_matrix(i,j) = entries[j];
264 
265  }
266  }
267  else
268  {
269  //full block can be copied:
270  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
271 
272  size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.get().internal_size2();
273  size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
274  //std::cout << "start_offset: " << start_offset << std::endl;
275  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
276  gpu_matrix_range.get().handle(), CL_TRUE,
277  sizeof(SCALARTYPE)*start_offset,
278  sizeof(SCALARTYPE)*num_entries,
279  &(entries[0]), 0, NULL, NULL);
280  VIENNACL_ERR_CHECK(err);
281  //std::cout << "Block copy worked!" << std::endl;
282 
283  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
284  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
285  cpu_matrix(i,j) = entries[i*gpu_matrix_range.get().internal_size2() + j];
286  }
287 
288  }
289 
290 
291  //column_major:
292  template <typename CPU_MATRIX, typename SCALARTYPE>
293  void copy(matrix_range<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_range,
294  CPU_MATRIX & cpu_matrix)
295  {
296  assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
297  && (cpu_matrix.size2() == gpu_matrix_range.size2()) );
298 
299  if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.get().size1())
300  {
301  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
302 
303  //copy each stride separately:
304  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
305  {
306  size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.get().internal_size1() + gpu_matrix_range.start1();
307  size_t num_entries = gpu_matrix_range.size1();
308  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
309  gpu_matrix_range.get().handle(), CL_TRUE,
310  sizeof(SCALARTYPE)*start_offset,
311  sizeof(SCALARTYPE)*num_entries,
312  &(entries[0]), 0, NULL, NULL);
313  VIENNACL_ERR_CHECK(err);
314  //std::cout << "Strided copy worked!" << std::endl;
315 
316  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
317  cpu_matrix(i,j) = entries[i];
318  }
319  }
320  else
321  {
322  //full block can be copied:
323  std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.size2());
324 
325  //copy each stride separately:
326  size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.get().internal_size1();
327  size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
328  //std::cout << "start_offset: " << start_offset << std::endl;
329  cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
330  gpu_matrix_range.get().handle(), CL_TRUE,
331  sizeof(SCALARTYPE)*start_offset,
332  sizeof(SCALARTYPE)*num_entries,
333  &(entries[0]), 0, NULL, NULL);
334  VIENNACL_ERR_CHECK(err);
335  //std::cout << "Block copy worked!" << std::endl;
336 
337  for (size_t i=0; i < gpu_matrix_range.size1(); ++i)
338  for (size_t j=0; j < gpu_matrix_range.size2(); ++j)
339  cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.get().internal_size1()];
340  }
341 
342  }
343 
344 
345 /*
346  template<typename MatrixType>
347  std::ostream & operator<<(std::ostream & s, matrix_range<MatrixType> const & proxy)
348  {
349  MatrixType temp(proxy.size1(), proxy.size2());
350  viennacl::range r1(0, proxy.size1());
351  viennacl::range r2(0, proxy.size2());
352  matrix_range<MatrixType> temp2(temp, r1, r2);
353  viennacl::copy(proxy, temp2);
354  s << temp;
355  return s;
356  }*/
357 
358 
359 }
360 
361 #endif