1 #ifndef VIENNACL_OCL_KERNEL_HPP_
2 #define VIENNACL_OCL_KERNEL_HPP_
25 #include <OpenCL/cl.h>
45 template <
typename KernelType>
52 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
53 std::cout <<
"ViennaCL: Creating kernel object (default CTOR)" << std::endl;
55 set_work_size_defaults();
59 : handle_(0), program_(prog), name_(name), init_done_(false)
61 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
62 std::cout <<
"ViennaCL: Creating kernel object (full CTOR)" << std::endl;
64 set_work_size_defaults();
68 : handle_(other.handle_), program_(other.program_), name_(other.name_), init_done_(other.init_done_)
70 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
71 std::cout <<
"ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
73 local_work_size_[0] = other.local_work_size_[0];
74 local_work_size_[1] = other.local_work_size_[1];
76 global_work_size_[0] = other.global_work_size_[0];
77 global_work_size_[1] = other.global_work_size_[1];
82 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
83 std::cout <<
"ViennaCL: Assigning kernel object" << std::endl;
85 handle_ = other.handle_;
86 program_ = other.program_;
88 init_done_ = other.init_done_;
89 local_work_size_[0] = other.local_work_size_[0];
90 local_work_size_[1] = other.local_work_size_[1];
91 global_work_size_[0] = other.global_work_size_[0];
92 global_work_size_[1] = other.global_work_size_[1];
98 void arg(
unsigned int pos, cl_uint val)
101 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
102 std::cout <<
"ViennaCL: Setting unsigned long kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
104 cl_int err = clSetKernelArg(handle_, pos,
sizeof(cl_uint), (
void*)&val);
109 void arg(
unsigned int pos,
float val)
112 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
113 std::cout <<
"ViennaCL: Setting floating point kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
115 cl_int err = clSetKernelArg(handle_, pos,
sizeof(
float), (
void*)&val);
120 void arg(
unsigned int pos,
double val)
123 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
124 std::cout <<
"ViennaCL: Setting double precision kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
126 cl_int err = clSetKernelArg(handle_, pos,
sizeof(
double), (
void*)&val);
132 template<
class VCL_TYPE>
133 void arg(
unsigned int pos, VCL_TYPE
const & val)
136 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
137 std::cout <<
"ViennaCL: Setting generic kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
139 cl_mem temp = val.handle();
140 cl_int err = clSetKernelArg(handle_, pos,
sizeof(cl_mem), (
void*)&temp);
146 template<
class CL_TYPE>
151 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
152 std::cout <<
"ViennaCL: Setting handle kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
155 cl_int err = clSetKernelArg(handle_, pos,
sizeof(CL_TYPE), (
void*)&temp);
166 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
167 std::cout <<
"ViennaCL: Setting local memory kernel argument at pos " << pos <<
" for kernel " << name_ << std::endl;
169 cl_int err = clSetKernelArg(handle_, pos, size, 0);
176 template <
typename T0>
184 template <
typename T0,
typename T1>
192 template <
typename T0,
typename T1,
typename T2>
200 template <
typename T0,
typename T1,
typename T2,
typename T3>
208 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4>
209 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4)
216 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5>
217 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5)
224 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6>
225 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6)
232 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7>
233 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7)
240 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7,
typename T8>
241 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8)
248 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
249 typename T5,
typename T6,
typename T7,
typename T8,
typename T9>
250 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4,
251 T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9)
253 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
258 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
259 typename T6,
typename T7,
typename T8,
typename T9,
typename T10>
260 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
261 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10)
263 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
arg(10, t10);
268 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
269 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11>
270 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
271 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11)
279 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
280 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
typename T12>
281 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
282 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11, T12
const & t12)
290 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
291 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
292 typename T12,
typename T13>
293 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
294 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
295 T12
const & t12, T13
const & t13)
299 arg(12, t12);
arg(13, t13);
304 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
305 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
306 typename T12,
typename T13,
typename T14>
307 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
308 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
309 T12
const & t12, T13
const & t13, T14
const & t14)
313 arg(12, t12);
arg(13, t13);
arg(14, t14);
318 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
319 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
320 typename T12,
typename T13,
typename T14,
typename T15>
321 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
322 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
323 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15)
332 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
333 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
334 typename T12,
typename T13,
typename T14,
typename T15,
typename T16>
335 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
336 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
337 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16)
346 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
347 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
348 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17>
349 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
350 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
351 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17)
360 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
361 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
362 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
364 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
365 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
366 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
378 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
379 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
380 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
381 typename T18,
typename T19>
382 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
383 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
384 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
385 T18
const & t18, T19
const & t19
391 arg(18, t18);
arg(19, t19);
396 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
397 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
398 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
399 typename T18,
typename T19,
typename T20>
400 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
401 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
402 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
403 T18
const & t18, T19
const & t19, T20
const & t20
409 arg(18, t18);
arg(19, t19);
arg(20, t20);
414 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
415 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
416 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
417 typename T18,
typename T19,
typename T20,
typename T21>
418 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
419 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
420 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
421 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21
432 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
433 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
434 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
435 typename T18,
typename T19,
typename T20,
typename T21,
typename T22>
436 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
437 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
438 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
439 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22
455 assert(index == 0 || index == 1);
456 return local_work_size_[index];
464 assert(index == 0 || index == 1);
465 return global_work_size_[index];
475 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
476 std::cout <<
"ViennaCL: Setting local work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
478 assert(index == 0 || index == 1);
479 local_work_size_[index] = s;
488 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
489 std::cout <<
"ViennaCL: Setting global work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
491 assert(index == 0 || index == 1);
492 global_work_size_[index] = s;
495 std::string
const &
name()
const {
return name_; }
504 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
505 std::cout <<
"ViennaCL: Building kernel " << name_ << std::endl;
507 handle_ = clCreateKernel(program_, name_.c_str(), &err);
509 if (err != CL_SUCCESS)
511 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
512 std::cout <<
"ViennaCL: Could not create kernel '" << name_ <<
"'." << std::endl;
519 void set_work_size_defaults()
523 local_work_size_[0] = 128; local_work_size_[1] = 0;
524 global_work_size_[0] = 128*128; global_work_size_[1] = 0;
529 local_work_size_[0] = 1; local_work_size_[1] = 0;
547 size_t local_work_size_[2];
548 size_t global_work_size_[2];