ViennaCL - The Vienna Computing Library  1.2.0
kernel.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_OCL_KERNEL_HPP_
2 #define VIENNACL_OCL_KERNEL_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
24 #ifdef __APPLE__
25 #include <OpenCL/cl.h>
26 #else
27 #include <CL/cl.h>
28 #endif
29 
30 #include "viennacl/ocl/forwards.h"
31 #include "viennacl/ocl/backend.hpp"
32 #include "viennacl/ocl/handle.hpp"
33 #include "viennacl/ocl/program.hpp"
34 #include "viennacl/ocl/device.hpp"
36 
37 namespace viennacl
38 {
39  namespace ocl
40  {
41 
43  class kernel
44  {
45  template <typename KernelType>
46  friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue);
47 
48 
49  public:
50  kernel() : handle_(0)
51  {
52  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
53  std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl;
54  #endif
55  set_work_size_defaults();
56  }
57 
58  kernel(viennacl::ocl::handle<cl_program> const & prog, std::string const & name)
59  : handle_(0), program_(prog), name_(name), init_done_(false)
60  {
61  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
62  std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl;
63  #endif
64  set_work_size_defaults();
65  }
66 
67  kernel(kernel const & other)
68  : handle_(other.handle_), program_(other.program_), name_(other.name_), init_done_(other.init_done_)
69  {
70  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
71  std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
72  #endif
73  local_work_size_[0] = other.local_work_size_[0];
74  local_work_size_[1] = other.local_work_size_[1];
75 
76  global_work_size_[0] = other.global_work_size_[0];
77  global_work_size_[1] = other.global_work_size_[1];
78  }
79 
81  {
82  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
83  std::cout << "ViennaCL: Assigning kernel object" << std::endl;
84  #endif
85  handle_ = other.handle_;
86  program_ = other.program_;
87  name_ = other.name_;
88  init_done_ = other.init_done_;
89  local_work_size_[0] = other.local_work_size_[0];
90  local_work_size_[1] = other.local_work_size_[1];
91  global_work_size_[0] = other.global_work_size_[0];
92  global_work_size_[1] = other.global_work_size_[1];
93  return *this;
94  }
95 
96 
98  void arg(unsigned int pos, cl_uint val)
99  {
100  init();
101  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
102  std::cout << "ViennaCL: Setting unsigned long kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
103  #endif
104  cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_uint), (void*)&val);
105  VIENNACL_ERR_CHECK(err);
106  }
107 
109  void arg(unsigned int pos, float val)
110  {
111  init();
112  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
113  std::cout << "ViennaCL: Setting floating point kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
114  #endif
115  cl_int err = clSetKernelArg(handle_, pos, sizeof(float), (void*)&val);
116  VIENNACL_ERR_CHECK(err);
117  }
118 
120  void arg(unsigned int pos, double val)
121  {
122  init();
123  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
124  std::cout << "ViennaCL: Setting double precision kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
125  #endif
126  cl_int err = clSetKernelArg(handle_, pos, sizeof(double), (void*)&val);
127  VIENNACL_ERR_CHECK(err);
128  }
129 
130  //generic handling: call .handle() member
132  template<class VCL_TYPE>
133  void arg(unsigned int pos, VCL_TYPE const & val)
134  {
135  init();
136  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
137  std::cout << "ViennaCL: Setting generic kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
138  #endif
139  cl_mem temp = val.handle();
140  cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_mem), (void*)&temp);
141  VIENNACL_ERR_CHECK(err);
142  }
143 
144  //forward handles directly:
146  template<class CL_TYPE>
147  void arg(unsigned int pos, viennacl::ocl::handle<CL_TYPE> const & h)
148  {
149  //arg(pos, h);
150  init();
151  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
152  std::cout << "ViennaCL: Setting handle kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
153  #endif
154  CL_TYPE temp = h;
155  cl_int err = clSetKernelArg(handle_, pos, sizeof(CL_TYPE), (void*)&temp);
156  VIENNACL_ERR_CHECK(err);
157  }
158 
159 
160  //local buffer argument:
162  void arg(unsigned int pos, const local_mem & mem)
163  {
164  unsigned int size = mem.size();
165  init();
166  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
167  std::cout << "ViennaCL: Setting local memory kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
168  #endif
169  cl_int err = clSetKernelArg(handle_, pos, size, 0);
170  VIENNACL_ERR_CHECK(err);
171  }
172 
173 
174 
176  template <typename T0>
177  kernel & operator()(T0 const & t0)
178  {
179  arg(0, t0);
180  return *this;
181  }
182 
184  template <typename T0, typename T1>
185  kernel & operator()(T0 const & t0, T1 const & t1)
186  {
187  arg(0, t0); arg(1, t1);
188  return *this;
189  }
190 
192  template <typename T0, typename T1, typename T2>
193  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2)
194  {
195  arg(0, t0); arg(1, t1); arg(2, t2);
196  return *this;
197  }
198 
200  template <typename T0, typename T1, typename T2, typename T3>
201  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3)
202  {
203  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3);
204  return *this;
205  }
206 
208  template <typename T0, typename T1, typename T2, typename T3, typename T4>
209  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4)
210  {
211  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4);
212  return *this;
213  }
214 
216  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
217  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5)
218  {
219  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
220  return *this;
221  }
222 
224  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
225  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6)
226  {
227  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6);
228  return *this;
229  }
230 
232  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
233  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7)
234  {
235  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7);
236  return *this;
237  }
238 
240  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
241  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8)
242  {
243  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8);
244  return *this;
245  }
246 
248  template <typename T0, typename T1, typename T2, typename T3, typename T4,
249  typename T5, typename T6, typename T7, typename T8, typename T9>
250  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4,
251  T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9)
252  {
253  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9);
254  return *this;
255  }
256 
258  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
259  typename T6, typename T7, typename T8, typename T9, typename T10>
260  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
261  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10)
262  {
263  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10);
264  return *this;
265  }
266 
268  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
269  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11>
270  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
271  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11)
272  {
273  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
274  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
275  return *this;
276  }
277 
279  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
280  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11, typename T12>
281  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
282  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12)
283  {
284  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
285  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12);
286  return *this;
287  }
288 
290  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
291  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
292  typename T12, typename T13>
293  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
294  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
295  T12 const & t12, T13 const & t13)
296  {
297  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
298  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
299  arg(12, t12); arg(13, t13);
300  return *this;
301  }
302 
304  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
305  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
306  typename T12, typename T13, typename T14>
307  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
308  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
309  T12 const & t12, T13 const & t13, T14 const & t14)
310  {
311  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
312  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
313  arg(12, t12); arg(13, t13); arg(14, t14);
314  return *this;
315  }
316 
318  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
319  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
320  typename T12, typename T13, typename T14, typename T15>
321  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
322  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
323  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15)
324  {
325  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
326  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
327  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15);
328  return *this;
329  }
330 
332  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
333  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
334  typename T12, typename T13, typename T14, typename T15, typename T16>
335  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
336  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
337  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16)
338  {
339  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
340  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
341  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16);
342  return *this;
343  }
344 
346  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
347  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
348  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17>
349  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
350  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
351  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17)
352  {
353  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
354  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
355  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
356  return *this;
357  }
358 
360  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
361  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
362  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
363  typename T18>
364  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
365  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
366  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
367  T18 const & t18
368  )
369  {
370  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
371  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
372  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
373  arg(18, t18);
374  return *this;
375  }
376 
378  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
379  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
380  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
381  typename T18, typename T19>
382  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
383  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
384  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
385  T18 const & t18, T19 const & t19
386  )
387  {
388  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
389  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
390  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
391  arg(18, t18); arg(19, t19);
392  return *this;
393  }
394 
396  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
397  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
398  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
399  typename T18, typename T19, typename T20>
400  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
401  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
402  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
403  T18 const & t18, T19 const & t19, T20 const & t20
404  )
405  {
406  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
407  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
408  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
409  arg(18, t18); arg(19, t19); arg(20, t20);
410  return *this;
411  }
412 
414  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
415  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
416  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
417  typename T18, typename T19, typename T20, typename T21>
418  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
419  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
420  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
421  T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21
422  )
423  {
424  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
425  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
426  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
427  arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21);
428  return *this;
429  }
430 
432  template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
433  typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
434  typename T12, typename T13, typename T14, typename T15, typename T16, typename T17,
435  typename T18, typename T19, typename T20, typename T21, typename T22>
436  kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
437  T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
438  T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16, T17 const & t17,
439  T18 const & t18, T19 const & t19, T20 const & t20, T21 const & t21, T22 const & t22
440  )
441  {
442  arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
443  arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
444  arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16); arg(17, t17);
445  arg(18, t18); arg(19, t19); arg(20, t20); arg(21, t21); arg(22, t22);
446  return *this;
447  }
448 
453  size_t local_work_size(int index = 0) const
454  {
455  assert(index == 0 || index == 1);
456  return local_work_size_[index];
457  }
462  size_t global_work_size(int index = 0) const
463  {
464  assert(index == 0 || index == 1);
465  return global_work_size_[index];
466  }
467 
473  void local_work_size(int index, size_t s)
474  {
475  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
476  std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
477  #endif
478  assert(index == 0 || index == 1);
479  local_work_size_[index] = s;
480  }
486  void global_work_size(int index, size_t s)
487  {
488  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
489  std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
490  #endif
491  assert(index == 0 || index == 1);
492  global_work_size_[index] = s;
493  }
494 
495  std::string const & name() const { return name_; }
496 
497  viennacl::ocl::handle<cl_kernel> const & handle() const { return handle_; }
498 
499 
500  private:
501  void create_kernel()
502  {
503  cl_int err;
504  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
505  std::cout << "ViennaCL: Building kernel " << name_ << std::endl;
506  #endif
507  handle_ = clCreateKernel(program_, name_.c_str(), &err);
508 
509  if (err != CL_SUCCESS)
510  {
511  #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
512  std::cout << "ViennaCL: Could not create kernel '" << name_ << "'." << std::endl;
513  #endif
514  //std::cerr << "Could not build kernel '" << name_ << "'." << std::endl;
515  }
516  VIENNACL_ERR_CHECK(err);
517  }
518 
519  void set_work_size_defaults()
520  {
521  if (viennacl::ocl::current_device().type() == CL_DEVICE_TYPE_GPU)
522  {
523  local_work_size_[0] = 128; local_work_size_[1] = 0;
524  global_work_size_[0] = 128*128; global_work_size_[1] = 0;
525  }
526  else //assume CPU type:
527  {
528  //conservative assumption: one thread per CPU core:
529  local_work_size_[0] = 1; local_work_size_[1] = 0;
530  global_work_size_[0] = viennacl::ocl::current_device().max_compute_units(); global_work_size_[1] = 0;
531  }
532  }
533 
534  void init()
535  {
536  if (!init_done_)
537  {
538  create_kernel();
539  init_done_ = true;
540  }
541  }
542 
545  std::string name_;
546  bool init_done_;
547  size_t local_work_size_[2];
548  size_t global_work_size_[2];
549  };
550 
551  } //namespace ocl
552 } //namespace viennacl
553 
554 #endif