Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb::flow::interface11::opencl_factory< DeviceFilter > Class Template Reference

#include <flow_graph_opencl_node.h>

Inheritance diagram for tbb::flow::interface11::opencl_factory< DeviceFilter >:
Collaboration diagram for tbb::flow::interface11::opencl_factory< DeviceFilter >:

Classes

struct  finalize_fn
 
struct  finalize_fn_leaf
 
class  kernel
 

Public Types

template<typename T >
using async_msg_type = opencl_async_msg< T, opencl_factory< DeviceFilter > >
 
typedef opencl_device device_type
 
typedef kernel kernel_type
 
typedef opencl_range range_type
 

Public Member Functions

 opencl_factory ()
 
 ~opencl_factory ()
 
bool init (const opencl_device_list &device_list)
 
template<typename ... Args>
void send_kernel (opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
 
template<typename T , typename ... Rest>
void send_data (opencl_device device, T &t, Rest &... args)
 
void send_data (opencl_device)
 
template<typename FinalizeFn , typename ... Args>
void finalize (opencl_device device, FinalizeFn fn, Args &... args)
 
const opencl_device_listdevices ()
 

Private Member Functions

template<typename Factory >
void enqueue_map_buffer (opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
 
template<typename Factory >
void enqueue_unmap_buffer (opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
 
template<size_t NUM_ARGS, typename T >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
 
template<size_t NUM_ARGS, typename T , typename F >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
 
template<size_t NUM_ARGS, typename T , typename ... Rest>
void process_arg_list (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
 
template<size_t NUM_ARGS>
void process_arg_list (const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
 
template<typename T >
void update_one_arg (cl_event, T &)
 
template<typename T , typename F >
void update_one_arg (cl_event e, opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
void update_arg_list (cl_event e, T &t, Rest &... args)
 
void update_arg_list (cl_event)
 
cl_event send_kernel_impl (opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
 
template<typename T >
bool get_event_from_one_arg (cl_event &, const T &)
 
template<typename T , typename F >
bool get_event_from_one_arg (cl_event &e, const opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
bool get_event_from_args (cl_event &e, const T &t, const Rest &... args)
 
bool get_event_from_args (cl_event &)
 
bool is_same_context (opencl_device::device_id_type d1, opencl_device::device_id_type d2)
 
 opencl_factory (const opencl_factory &)
 
opencl_factoryoperator= (const opencl_factory &)
 
cl_context context ()
 
void init_once ()
 

Static Private Member Functions

static void CL_CALLBACK finalize_callback (cl_event, cl_int event_command_exec_status, void *data)
 

Private Attributes

std::once_flag my_once_flag
 
opencl_device_list my_devices
 
cl_context my_cl_context
 
tbb::spin_mutex my_devices_mutex
 

Friends

template<typename Factory >
class opencl_program
 
template<typename Factory >
class opencl_buffer_impl
 
template<typename Factory >
class opencl_memory
 

Detailed Description

template<typename DeviceFilter>
class tbb::flow::interface11::opencl_factory< DeviceFilter >

Definition at line 48 of file flow_graph_opencl_node.h.

Member Typedef Documentation

◆ async_msg_type

template<typename DeviceFilter>
template<typename T >
using tbb::flow::interface11::opencl_factory< DeviceFilter >::async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter> >

Definition at line 812 of file flow_graph_opencl_node.h.

◆ device_type

template<typename DeviceFilter>
typedef opencl_device tbb::flow::interface11::opencl_factory< DeviceFilter >::device_type

Definition at line 813 of file flow_graph_opencl_node.h.

◆ kernel_type

template<typename DeviceFilter>
typedef kernel tbb::flow::interface11::opencl_factory< DeviceFilter >::kernel_type

Definition at line 855 of file flow_graph_opencl_node.h.

◆ range_type

template<typename DeviceFilter>
typedef opencl_range tbb::flow::interface11::opencl_factory< DeviceFilter >::range_type

Definition at line 859 of file flow_graph_opencl_node.h.

Constructor & Destructor Documentation

◆ opencl_factory() [1/2]

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::opencl_factory ( )
inline

Definition at line 861 of file flow_graph_opencl_node.h.

861 {}

◆ ~opencl_factory()

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::~opencl_factory ( )
inline

Definition at line 862 of file flow_graph_opencl_node.h.

862  {
863  if ( my_devices.size() ) {
864  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
865  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
866  }
867  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
868  }
869  }
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d

◆ opencl_factory() [2/2]

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::opencl_factory ( const opencl_factory< DeviceFilter > &  )
private

Member Function Documentation

◆ context()

template<typename DeviceFilter>
cl_context tbb::flow::interface11::opencl_factory< DeviceFilter >::context ( )
inlineprivate

◆ devices()

template<typename DeviceFilter>
const opencl_device_list& tbb::flow::interface11::opencl_factory< DeviceFilter >::devices ( )
inline

Definition at line 1061 of file flow_graph_opencl_node.h.

◆ enqueue_map_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::enqueue_map_buffer ( opencl_device  device,
opencl_buffer_impl< Factory > &  buffer,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 883 of file flow_graph_opencl_node.h.

883  {
884  cl_event const* e1 = dmsg.get_event();
885  cl_event e2;
886  cl_int err;
887  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
888  e1 == NULL ? 0 : 1, e1, &e2, &err );
889  enforce_cl_retcode( err, "Failed to map a buffer" );
890  dmsg.data( false ) = ptr;
891  dmsg.set_event( e2 );
892  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
893  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ enqueue_unmap_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::enqueue_unmap_buffer ( opencl_device  device,
opencl_memory< Factory > &  memory,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 897 of file flow_graph_opencl_node.h.

897  {
898  cl_event const* e1 = dmsg.get_event();
899  cl_event e2;
901  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
902  "Failed to unmap a buffer" );
903  dmsg.set_event( e2 );
904  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
905  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize()

template<typename DeviceFilter>
template<typename FinalizeFn , typename ... Args>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::finalize ( opencl_device  device,
FinalizeFn  fn,
Args &...  args 
)
inline

Definition at line 1050 of file flow_graph_opencl_node.h.

1050  {
1051  cl_event e;
1052 
1053  if ( get_event_from_args( e, args... ) ) {
1054  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1055  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1056  }
1057 
1058  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1059  }
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)

◆ finalize_callback()

template<typename DeviceFilter>
static void CL_CALLBACK tbb::flow::interface11::opencl_factory< DeviceFilter >::finalize_callback ( cl_event  ,
cl_int  event_command_exec_status,
void data 
)
inlinestaticprivate

Definition at line 1037 of file flow_graph_opencl_node.h.

1037  {
1038  tbb::internal::suppress_unused_warning(event_command_exec_status);
1039  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1040 
1041  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1042  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1043  (*fn_ptr)();
1044 
1045  // Function pointer was created by 'new' & this callback must be called once only
1046  delete fn_ptr;
1047  }
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:398
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data

◆ get_event_from_args() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  e,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 1012 of file flow_graph_opencl_node.h.

1012  {
1013  if ( get_event_from_one_arg( e, t ) ) {
1014  return true;
1015  }
1016 
1017  return get_event_from_args( e, args... );
1018  }
bool get_event_from_one_arg(cl_event &, const T &)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)

◆ get_event_from_args() [2/2]

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  )
inlineprivate

Definition at line 1020 of file flow_graph_opencl_node.h.

1020  {
1021  return false;
1022  }

◆ get_event_from_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  ,
const T &   
)
inlineprivate

Definition at line 995 of file flow_graph_opencl_node.h.

995  {
996  return false;
997  }

◆ get_event_from_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  e,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 1000 of file flow_graph_opencl_node.h.

1000  {
1001  cl_event const *e_ptr = msg.get_event();
1002 
1003  if ( e_ptr != NULL ) {
1004  e = *e_ptr;
1005  return true;
1006  }
1007 
1008  return false;
1009  }

◆ init()

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::init ( const opencl_device_list device_list)
inline

Definition at line 871 of file flow_graph_opencl_node.h.

871  {
873  if ( !my_devices.size() ) {
874  my_devices = device_list;
875  return true;
876  }
877  return false;
878  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
Represents acquisition of a mutex.
Definition: spin_mutex.h:53

◆ init_once()

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::init_once ( )
inlineprivate

Definition at line 1083 of file flow_graph_opencl_node.h.

Referenced by tbb::flow::interface11::opencl_factory< default_device_filter >::context(), and tbb::flow::interface11::opencl_factory< default_device_filter >::devices().

1083  {
1084  {
1086  if (!my_devices.size())
1087  my_devices = DeviceFilter()( opencl_info::available_devices() );
1088  }
1089 
1090  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1091  cl_platform_id platform_id = my_devices.begin()->platform_id();
1092  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1093  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1094 
1095  std::vector<cl_device_id> cl_device_ids;
1096  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1097  cl_device_ids.push_back((*d).my_cl_device_id);
1098  }
1099 
1100  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1101  cl_int err;
1102  cl_context ctx = clCreateContext(context_properties,
1103  (cl_uint)cl_device_ids.size(),
1104  cl_device_ids.data(),
1105  NULL, NULL, &err);
1106  enforce_cl_retcode(err, "Failed to create context");
1107  my_cl_context = ctx;
1108 
1109  size_t device_counter = 0;
1110  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1111  (*d).my_device_id = device_counter++;
1112  cl_int err2;
1113  cl_command_queue cq;
1114 #if CL_VERSION_2_0
1115  if ((*d).major_version() >= 2) {
1116  if ((*d).out_of_order_exec_mode_on_host_present()) {
1117  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1118  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1119  } else {
1120  cl_queue_properties props[] = { 0 };
1121  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1122  }
1123  } else
1124 #endif
1125  {
1126  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1127  // Suppress "declared deprecated" warning for the next line.
1128 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1129 #pragma GCC diagnostic push
1130 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1131 #endif
1132 #if _MSC_VER || __INTEL_COMPILER
1133 #pragma warning( push )
1134 #if __INTEL_COMPILER
1135 #pragma warning (disable: 1478)
1136 #else
1137 #pragma warning (disable: 4996)
1138 #endif
1139 #endif
1140  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1141 #if _MSC_VER || __INTEL_COMPILER
1142 #pragma warning( pop )
1143 #endif
1144 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1145 #pragma GCC diagnostic pop
1146 #endif
1147  }
1148  enforce_cl_retcode(err2, "Failed to create command queue");
1149  (*d).my_cl_command_queue = cq;
1150  }
1151  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
const opencl_device_list & available_devices()
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
Here is the caller graph for this function:

◆ is_same_context()

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::is_same_context ( opencl_device::device_id_type  d1,
opencl_device::device_id_type  d2 
)
inlineprivate

Definition at line 1067 of file flow_graph_opencl_node.h.

1067  {
1069  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1070  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1071  return true;
1072  return d1 == d2;
1073  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ operator=()

template<typename DeviceFilter>
opencl_factory& tbb::flow::interface11::opencl_factory< DeviceFilter >::operator= ( const opencl_factory< DeviceFilter > &  )
private

◆ process_arg_list() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 927 of file flow_graph_opencl_node.h.

927  {
928  process_one_arg( kernel, events, num_events, place, t );
929  process_arg_list( kernel, events, num_events, place, args... );
930  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

◆ process_arg_list() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type ,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int  
)
inlineprivate

Definition at line 933 of file flow_graph_opencl_node.h.

933 {}

◆ process_one_arg() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int place,
const T &  t 
)
inlineprivate

Definition at line 909 of file flow_graph_opencl_node.h.

909  {
910  auto p = get_native_object(t);
911  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
912  }
void enforce_cl_retcode(cl_int err, std::string msg)
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
void const char const char int ITT_FORMAT __itt_group_sync p

◆ process_one_arg() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename F >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 915 of file flow_graph_opencl_node.h.

915  {
916  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
917 
918  const cl_event * const e = msg.get_event();
919  if (e != NULL) {
920  events[num_events++] = *e;
921  }
922 
923  process_one_arg( kernel, events, num_events, place, msg.data(false) );
924  }
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ send_data() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_data ( opencl_device  device,
T &  t,
Rest &...  args 
)
inline

Definition at line 969 of file flow_graph_opencl_node.h.

969  {
970  send_if_memory_object( device, t );
971  send_data( device, args... );
972  }
void send_data(opencl_device device, T &t, Rest &... args)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)

◆ send_data() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_data ( opencl_device  )
inline

Definition at line 974 of file flow_graph_opencl_node.h.

974 {}

◆ send_kernel()

template<typename DeviceFilter>
template<typename ... Args>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_kernel ( opencl_device  device,
const kernel_type kernel,
const range_type work_size,
Args &...  args 
)
inline

Definition at line 953 of file flow_graph_opencl_node.h.

953  {
954  std::array<cl_event, sizeof...(Args)> events;
955  int num_events = 0;
956  int place = 0;
957  process_arg_list( kernel, events, num_events, place, args... );
958 
959  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
960 
961  update_arg_list(e, args...);
962 
963  // Release our own reference to cl_event
964  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
965  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void enforce_cl_retcode(cl_int err, std::string msg)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void update_arg_list(cl_event e, T &t, Rest &... args)

◆ send_kernel_impl()

template<typename DeviceFilter>
cl_event tbb::flow::interface11::opencl_factory< DeviceFilter >::send_kernel_impl ( opencl_device  device,
const cl_kernel &  kernel,
const range_type work_size,
cl_uint  num_events,
cl_event *  event_list 
)
inlineprivate

Definition at line 978 of file flow_graph_opencl_node.h.

979  {
980  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
981  const typename range_type::nd_range_type& g_size = work_size.global_range();
982  const typename range_type::nd_range_type& l_size = work_size.local_range();
983  cl_uint s;
984  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
985  cl_event event;
987  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
988  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
989  "Failed to enqueue a kernel" );
990  return event;
991  }
std::array< range_index_type, 3 > nd_range_type
void const char const char int ITT_FORMAT __itt_group_sync s
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event

◆ update_arg_list() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  e,
T &  t,
Rest &...  args 
)
inlineprivate

Definition at line 944 of file flow_graph_opencl_node.h.

944  {
945  update_one_arg( e, t );
946  update_arg_list( e, args... );
947  }
void update_arg_list(cl_event e, T &t, Rest &... args)

◆ update_arg_list() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  )
inlineprivate

Definition at line 949 of file flow_graph_opencl_node.h.

949 {}

◆ update_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  ,
T &   
)
inlineprivate

Definition at line 936 of file flow_graph_opencl_node.h.

936 {}

◆ update_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  e,
opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 939 of file flow_graph_opencl_node.h.

939  {
940  msg.set_event( e );
941  }

Friends And Related Function Documentation

◆ opencl_buffer_impl

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_buffer_impl
friend

Definition at line 1162 of file flow_graph_opencl_node.h.

◆ opencl_memory

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_memory
friend

Definition at line 1164 of file flow_graph_opencl_node.h.

◆ opencl_program

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_program
friend

Definition at line 1160 of file flow_graph_opencl_node.h.

Member Data Documentation

◆ my_cl_context

template<typename DeviceFilter>
cl_context tbb::flow::interface11::opencl_factory< DeviceFilter >::my_cl_context
private

Definition at line 1155 of file flow_graph_opencl_node.h.

◆ my_devices

template<typename DeviceFilter>
opencl_device_list tbb::flow::interface11::opencl_factory< DeviceFilter >::my_devices
private

Definition at line 1154 of file flow_graph_opencl_node.h.

◆ my_devices_mutex

template<typename DeviceFilter>
tbb::spin_mutex tbb::flow::interface11::opencl_factory< DeviceFilter >::my_devices_mutex
private

Definition at line 1157 of file flow_graph_opencl_node.h.

◆ my_once_flag

template<typename DeviceFilter>
std::once_flag tbb::flow::interface11::opencl_factory< DeviceFilter >::my_once_flag
private

Definition at line 1153 of file flow_graph_opencl_node.h.


The documentation for this class was generated from the following file:

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.