ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
matrix_prod_row_col_col_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_MATRIX_PROD_ROW_COL_COL_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_MATRIX_PROD_ROW_COL_COL_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
matrix_prod_row_col_col_align1_prod_TT
=
11
"// file automatically generated - do not edit!\n"
12
"// matrix-matrix multiplication C = A^T * B^T\n"
13
"// matrix layouts: C...col_major, A...row_major, B...col_major\n"
14
"__kernel void prod_TT(\n"
15
" __global const float * A,\n"
16
" unsigned int A_row_start,\n"
17
" unsigned int A_col_start,\n"
18
" unsigned int A_row_size,\n"
19
" unsigned int A_col_size,\n"
20
" unsigned int A_internal_rows,\n"
21
" unsigned int A_internal_cols,\n"
22
" __global const float * B, \n"
23
" unsigned int B_row_start,\n"
24
" unsigned int B_col_start,\n"
25
" unsigned int B_row_size,\n"
26
" unsigned int B_col_size,\n"
27
" unsigned int B_internal_rows,\n"
28
" unsigned int B_internal_cols,\n"
29
" __global float * C,\n"
30
" unsigned int C_row_start,\n"
31
" unsigned int C_col_start,\n"
32
" unsigned int C_row_size,\n"
33
" unsigned int C_col_size,\n"
34
" unsigned int C_internal_rows,\n"
35
" unsigned int C_internal_cols,\n"
36
" __local float * bufA,\n"
37
" __local float * bufB) \n"
38
"{ \n"
39
" size_t block_size = get_local_size(0);\n"
40
" size_t row_block_id = get_group_id(0);\n"
41
" size_t col_block_id = get_group_id(1);\n"
42
" size_t row_thread_id = get_local_id(0);\n"
43
" size_t col_thread_id = get_local_id(1);\n"
44
" size_t row_block_id_ = get_local_id(1);\n"
45
" size_t aBegin = (row_block_id * block_size + A_col_start) + A_row_start * A_internal_cols;\n"
46
" size_t aStep = block_size * A_internal_cols;\n"
47
" size_t bBegin = (col_block_id * block_size + B_row_start) + B_col_start * B_internal_rows;\n"
48
" size_t bStep = block_size * B_internal_rows;\n"
49
" size_t block_num = A_row_size / block_size;\n"
50
" if (block_num * block_size != A_row_size)\n"
51
" ++block_num;\n"
52
" float Csub = 0;\n"
53
" size_t aOffset = row_thread_id + col_thread_id * A_internal_cols;\n"
54
" size_t bOffset = row_thread_id * B_internal_rows + col_thread_id;\n"
55
" size_t row_thread_id_times_block_size = row_thread_id * block_size;\n"
56
" for (size_t block = 0;\n"
57
" block < block_num;\n"
58
" ++block)\n"
59
" {\n"
60
" bufA[row_thread_id_times_block_size + col_thread_id] = (block * block_size + col_thread_id < A_row_size && get_global_id(0) < A_col_size) ? A[aBegin + aOffset] : 0;\n"
61
" bufB[col_thread_id * block_size + row_thread_id] = ( (block * block_size + row_thread_id < B_col_size) && get_global_id(1) < B_row_size ) ? B[bBegin + bOffset] : 0;\n"
62
" barrier(CLK_LOCAL_MEM_FENCE);\n"
63
"__local float * bufAptr = bufA + row_thread_id_times_block_size;\n"
64
"__local float * bufBptr = bufB + col_thread_id * block_size;\n"
65
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
66
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
67
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
68
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
69
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
70
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
71
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
72
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
73
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
74
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
75
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
76
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
77
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
78
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
79
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
80
" barrier(CLK_LOCAL_MEM_FENCE);\n"
81
" aBegin += aStep;\n"
82
" bBegin += bStep;\n"
83
" }\n"
84
" if (get_global_id(0) < A_col_size && get_global_id(1) < B_row_size)\n"
85
" C[get_global_id(0) + C_row_start + (get_global_id(1) + C_col_start) * C_internal_rows] = Csub;\n"
86
"}\n"
87
;
//matrix_prod_row_col_col_align1_prod_TT
88
89
const
char
*
const
matrix_prod_row_col_col_align1_prod_AA
=
90
"// file automatically generated - do not edit!\n"
91
"// matrix-matrix multiplication C = A * B\n"
92
"// matrix layouts: C...col_major, A...row_major, B...col_major\n"
93
"__kernel void prod_AA(\n"
94
" __global const float * A,\n"
95
" unsigned int A_row_start,\n"
96
" unsigned int A_col_start,\n"
97
" unsigned int A_row_size,\n"
98
" unsigned int A_col_size,\n"
99
" unsigned int A_internal_rows,\n"
100
" unsigned int A_internal_cols,\n"
101
" __global const float * B, \n"
102
" unsigned int B_row_start,\n"
103
" unsigned int B_col_start,\n"
104
" unsigned int B_row_size,\n"
105
" unsigned int B_col_size,\n"
106
" unsigned int B_internal_rows,\n"
107
" unsigned int B_internal_cols,\n"
108
" __global float * C,\n"
109
" unsigned int C_row_start,\n"
110
" unsigned int C_col_start,\n"
111
" unsigned int C_row_size,\n"
112
" unsigned int C_col_size,\n"
113
" unsigned int C_internal_rows,\n"
114
" unsigned int C_internal_cols,\n"
115
" __local float * bufA,\n"
116
" __local float * bufB) \n"
117
"{ \n"
118
" size_t block_size = get_local_size(0);\n"
119
" size_t row_block_id = get_group_id(0);\n"
120
" size_t col_block_id = get_group_id(1);\n"
121
" size_t row_thread_id = get_local_id(0);\n"
122
" size_t col_thread_id = get_local_id(1);\n"
123
" size_t row_block_id_ = get_local_id(1);\n"
124
" size_t aBegin = (row_block_id * block_size + A_row_start) * A_internal_cols + A_col_start;\n"
125
" size_t aStep = block_size;\n"
126
" size_t bBegin = (col_block_id * block_size + B_col_start) * B_internal_rows + B_row_start;\n"
127
" size_t bStep = block_size;\n"
128
" size_t block_num = A_col_size / block_size;\n"
129
" if (block_num * block_size != A_col_size)\n"
130
" ++block_num;\n"
131
" float Csub = 0;\n"
132
" size_t aOffset = row_thread_id * A_internal_cols + col_thread_id;\n"
133
" size_t bOffset = row_thread_id + col_thread_id * B_internal_rows;\n"
134
" size_t row_thread_id_times_block_size = row_thread_id * block_size;\n"
135
" for (size_t block = 0;\n"
136
" block < block_num;\n"
137
" ++block)\n"
138
" {\n"
139
" bufA[row_thread_id_times_block_size + col_thread_id] = (block * block_size + col_thread_id < A_col_size && get_global_id(0) < A_row_size) ? A[aBegin + aOffset] : 0;\n"
140
" bufB[col_thread_id * block_size + row_thread_id] = ( (block * block_size + row_thread_id < B_row_size) && get_global_id(1) < B_col_size ) ? B[bBegin + bOffset] : 0;\n"
141
" barrier(CLK_LOCAL_MEM_FENCE);\n"
142
"__local float * bufAptr = bufA + row_thread_id_times_block_size;\n"
143
"__local float * bufBptr = bufB + col_thread_id * block_size;\n"
144
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
145
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
146
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
147
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
148
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
149
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
150
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
151
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
152
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
153
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
154
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
155
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
156
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
157
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
158
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
159
" barrier(CLK_LOCAL_MEM_FENCE);\n"
160
" aBegin += aStep;\n"
161
" bBegin += bStep;\n"
162
" }\n"
163
" if (get_global_id(0) < A_row_size && get_global_id(1) < B_col_size)\n"
164
" C[get_global_id(0) + C_row_start + (get_global_id(1) + C_col_start) * C_internal_rows] = Csub;\n"
165
"}\n"
166
;
//matrix_prod_row_col_col_align1_prod_AA
167
168
const
char
*
const
matrix_prod_row_col_col_align1_prod_TA
=
169
"// file automatically generated - do not edit!\n"
170
"// matrix-matrix multiplication C = A^T * B\n"
171
"// matrix layouts: C...col_major, A...row_major, B...col_major\n"
172
"__kernel void prod_TA(\n"
173
" __global const float * A,\n"
174
" unsigned int A_row_start,\n"
175
" unsigned int A_col_start,\n"
176
" unsigned int A_row_size,\n"
177
" unsigned int A_col_size,\n"
178
" unsigned int A_internal_rows,\n"
179
" unsigned int A_internal_cols,\n"
180
" __global const float * B, \n"
181
" unsigned int B_row_start,\n"
182
" unsigned int B_col_start,\n"
183
" unsigned int B_row_size,\n"
184
" unsigned int B_col_size,\n"
185
" unsigned int B_internal_rows,\n"
186
" unsigned int B_internal_cols,\n"
187
" __global float * C,\n"
188
" unsigned int C_row_start,\n"
189
" unsigned int C_col_start,\n"
190
" unsigned int C_row_size,\n"
191
" unsigned int C_col_size,\n"
192
" unsigned int C_internal_rows,\n"
193
" unsigned int C_internal_cols,\n"
194
" __local float * bufA,\n"
195
" __local float * bufB) \n"
196
"{ \n"
197
" size_t block_size = get_local_size(0);\n"
198
" size_t row_block_id = get_group_id(0);\n"
199
" size_t col_block_id = get_group_id(1);\n"
200
" size_t row_thread_id = get_local_id(0);\n"
201
" size_t col_thread_id = get_local_id(1);\n"
202
" size_t row_block_id_ = get_local_id(1);\n"
203
" size_t aBegin = (row_block_id * block_size + A_col_start) + A_row_start * A_internal_cols;\n"
204
" size_t aStep = block_size * A_internal_cols;\n"
205
" size_t bBegin = (col_block_id * block_size + B_col_start) * B_internal_rows + B_row_start;\n"
206
" size_t bStep = block_size;\n"
207
" size_t block_num = A_row_size / block_size;\n"
208
" if (block_num * block_size != A_row_size)\n"
209
" ++block_num;\n"
210
" float Csub = 0;\n"
211
" size_t aOffset = row_thread_id + col_thread_id * A_internal_cols;\n"
212
" size_t bOffset = row_thread_id + col_thread_id * B_internal_rows;\n"
213
" size_t row_thread_id_times_block_size = row_thread_id * block_size;\n"
214
" for (size_t block = 0;\n"
215
" block < block_num;\n"
216
" ++block)\n"
217
" {\n"
218
" bufA[row_thread_id_times_block_size + col_thread_id] = (block * block_size + col_thread_id < A_row_size && get_global_id(0) < A_col_size) ? A[aBegin + aOffset] : 0;\n"
219
" bufB[col_thread_id * block_size + row_thread_id] = ( (block * block_size + row_thread_id < B_row_size) && get_global_id(1) < B_col_size ) ? B[bBegin + bOffset] : 0;\n"
220
" barrier(CLK_LOCAL_MEM_FENCE);\n"
221
"__local float * bufAptr = bufA + row_thread_id_times_block_size;\n"
222
"__local float * bufBptr = bufB + col_thread_id * block_size;\n"
223
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
224
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
225
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
226
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
227
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
228
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
229
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
230
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
231
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
232
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
233
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
234
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
235
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
236
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
237
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
238
" barrier(CLK_LOCAL_MEM_FENCE);\n"
239
" aBegin += aStep;\n"
240
" bBegin += bStep;\n"
241
" }\n"
242
" if (get_global_id(0) < A_col_size && get_global_id(1) < B_col_size)\n"
243
" C[get_global_id(0) + C_row_start + (get_global_id(1) + C_col_start) * C_internal_rows] = Csub;\n"
244
"}\n"
245
;
//matrix_prod_row_col_col_align1_prod_TA
246
247
const
char
*
const
matrix_prod_row_col_col_align1_prod_AT
=
248
"// file automatically generated - do not edit!\n"
249
"// matrix-matrix multiplication C = A * B^T\n"
250
"// matrix layouts: C...col_major, A...row_major, B...col_major\n"
251
"__kernel void prod_AT(\n"
252
" __global const float * A,\n"
253
" unsigned int A_row_start,\n"
254
" unsigned int A_col_start,\n"
255
" unsigned int A_row_size,\n"
256
" unsigned int A_col_size,\n"
257
" unsigned int A_internal_rows,\n"
258
" unsigned int A_internal_cols,\n"
259
" __global const float * B, \n"
260
" unsigned int B_row_start,\n"
261
" unsigned int B_col_start,\n"
262
" unsigned int B_row_size,\n"
263
" unsigned int B_col_size,\n"
264
" unsigned int B_internal_rows,\n"
265
" unsigned int B_internal_cols,\n"
266
" __global float * C,\n"
267
" unsigned int C_row_start,\n"
268
" unsigned int C_col_start,\n"
269
" unsigned int C_row_size,\n"
270
" unsigned int C_col_size,\n"
271
" unsigned int C_internal_rows,\n"
272
" unsigned int C_internal_cols,\n"
273
" __local float * bufA,\n"
274
" __local float * bufB) \n"
275
"{ \n"
276
" size_t block_size = get_local_size(0);\n"
277
" size_t row_block_id = get_group_id(0);\n"
278
" size_t col_block_id = get_group_id(1);\n"
279
" size_t row_thread_id = get_local_id(0);\n"
280
" size_t col_thread_id = get_local_id(1);\n"
281
" size_t row_block_id_ = get_local_id(1);\n"
282
" size_t aBegin = (row_block_id * block_size + A_row_start) * A_internal_cols + A_col_start;\n"
283
" size_t aStep = block_size;\n"
284
" size_t bBegin = (col_block_id * block_size + B_row_start) + B_col_start * B_internal_rows;\n"
285
" size_t bStep = block_size * B_internal_rows;\n"
286
" size_t block_num = A_col_size / block_size;\n"
287
" if (block_num * block_size != A_col_size)\n"
288
" ++block_num;\n"
289
" float Csub = 0;\n"
290
" size_t aOffset = row_thread_id * A_internal_cols + col_thread_id;\n"
291
" size_t bOffset = row_thread_id * B_internal_rows + col_thread_id;\n"
292
" size_t row_thread_id_times_block_size = row_thread_id * block_size;\n"
293
" for (size_t block = 0;\n"
294
" block < block_num;\n"
295
" ++block)\n"
296
" {\n"
297
" bufA[row_thread_id_times_block_size + col_thread_id] = (block * block_size + col_thread_id < A_col_size && get_global_id(0) < A_row_size) ? A[aBegin + aOffset] : 0;\n"
298
" bufB[col_thread_id * block_size + row_thread_id] = ( (block * block_size + row_thread_id < B_col_size) && get_global_id(1) < B_row_size ) ? B[bBegin + bOffset] : 0;\n"
299
" barrier(CLK_LOCAL_MEM_FENCE);\n"
300
"__local float * bufAptr = bufA + row_thread_id_times_block_size;\n"
301
"__local float * bufBptr = bufB + col_thread_id * block_size;\n"
302
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
303
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
304
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
305
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
306
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
307
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
308
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
309
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
310
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
311
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
312
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
313
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
314
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
315
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
316
" Csub += (*bufAptr) * (*bufBptr); ++bufAptr; ++bufBptr;\n"
317
" barrier(CLK_LOCAL_MEM_FENCE);\n"
318
" aBegin += aStep;\n"
319
" bBegin += bStep;\n"
320
" }\n"
321
" if (get_global_id(0) < A_row_size && get_global_id(1) < B_row_size)\n"
322
" C[get_global_id(0) + C_row_start + (get_global_id(1) + C_col_start) * C_internal_rows] = Csub;\n"
323
"}\n"
324
;
//matrix_prod_row_col_col_align1_prod_AT
325
326
}
//namespace kernels
327
}
//namespace linalg
328
}
//namespace viennacl
329
#endif
Generated on Fri Jul 27 2012 22:02:50 for ViennaCL - The Vienna Computing Library by
1.8.1.2