ViennaCL - The Vienna Computing Library  1.2.0
matrix_solve_col_col_source.h
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
2 #define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
3 //Automatically generated file from auxiliary-directory, do not edit manually!
4 namespace viennacl
5 {
6  namespace linalg
7  {
8  namespace kernels
9  {
11 "// file automatically generated - do not edit!\n"
12 "// inplace solve A \\ B^T\n"
13 "// matrix layouts: A...col_major, B...col_major\n"
14 "__kernel void upper_trans_solve(\n"
15 " __global const float * A,\n"
16 " unsigned int A_rows,\n"
17 " unsigned int A_cols,\n"
18 " unsigned int A_internal_rows,\n"
19 " unsigned int A_internal_cols,\n"
20 " __global float * B, \n"
21 " unsigned int B_rows,\n"
22 " unsigned int B_cols,\n"
23 " unsigned int B_internal_rows,\n"
24 " unsigned int B_internal_cols)\n"
25 "{ \n"
26 " float temp; \n"
27 " for (int row = A_rows-1; row > -1; --row) \n"
28 " { \n"
29 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
30 " if (get_local_id(0) == 0) \n"
31 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
32 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
33 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
34 " //eliminate column of op(A) with index 'row' in parallel: \n"
35 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
36 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
37 " }\n"
38 "}\n"
39 ; //matrix_solve_col_col_align1_upper_trans_solve
40 
42 "// file automatically generated - do not edit!\n"
43 "// inplace solve A^T \\ B\n"
44 "// matrix layouts: A...col_major, B...col_major\n"
45 "__kernel void trans_lower_solve(\n"
46 " __global const float * A,\n"
47 " unsigned int A_rows,\n"
48 " unsigned int A_cols,\n"
49 " unsigned int A_internal_rows,\n"
50 " unsigned int A_internal_cols,\n"
51 " __global float * B, \n"
52 " unsigned int B_rows,\n"
53 " unsigned int B_cols,\n"
54 " unsigned int B_internal_rows,\n"
55 " unsigned int B_internal_cols)\n"
56 "{ \n"
57 " float temp; \n"
58 " for (int row = 0; row < A_rows; ++row) \n"
59 " { \n"
60 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
61 " if (get_local_id(0) == 0) \n"
62 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
63 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
64 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
65 " //eliminate column of op(A) with index 'row' in parallel: \n"
66 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
67 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
68 " }\n"
69 "}\n"
70 ; //matrix_solve_col_col_align1_trans_lower_solve
71 
73 "// file automatically generated - do not edit!\n"
74 "// inplace solve A \\ B^T\n"
75 "// matrix layouts: A...col_major, B...col_major\n"
76 "__kernel void unit_lower_trans_solve(\n"
77 " __global const float * A,\n"
78 " unsigned int A_rows,\n"
79 " unsigned int A_cols,\n"
80 " unsigned int A_internal_rows,\n"
81 " unsigned int A_internal_cols,\n"
82 " __global float * B, \n"
83 " unsigned int B_rows,\n"
84 " unsigned int B_cols,\n"
85 " unsigned int B_internal_rows,\n"
86 " unsigned int B_internal_cols)\n"
87 "{ \n"
88 " float temp; \n"
89 " for (int row = 0; row < A_rows; ++row) \n"
90 " { \n"
91 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
92 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
93 " //eliminate column of op(A) with index 'row' in parallel: \n"
94 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
95 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
96 " }\n"
97 "}\n"
98 ; //matrix_solve_col_col_align1_unit_lower_trans_solve
99 
101 "// file automatically generated - do not edit!\n"
102 "// inplace solve A^T \\ B^T\n"
103 "// matrix layouts: A...col_major, B...col_major\n"
104 "__kernel void trans_unit_upper_trans_solve(\n"
105 " __global const float * A,\n"
106 " unsigned int A_rows,\n"
107 " unsigned int A_cols,\n"
108 " unsigned int A_internal_rows,\n"
109 " unsigned int A_internal_cols,\n"
110 " __global float * B, \n"
111 " unsigned int B_rows,\n"
112 " unsigned int B_cols,\n"
113 " unsigned int B_internal_rows,\n"
114 " unsigned int B_internal_cols)\n"
115 "{ \n"
116 " float temp; \n"
117 " for (int row = A_rows-1; row > -1; --row) \n"
118 " { \n"
119 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
120 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
121 " //eliminate column of op(A) with index 'row' in parallel: \n"
122 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
123 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
124 " }\n"
125 "}\n"
126 ; //matrix_solve_col_col_align1_trans_unit_upper_trans_solve
127 
129 "// file automatically generated - do not edit!\n"
130 "// inplace solve A \\ B^T\n"
131 "// matrix layouts: A...col_major, B...col_major\n"
132 "__kernel void unit_upper_trans_solve(\n"
133 " __global const float * A,\n"
134 " unsigned int A_rows,\n"
135 " unsigned int A_cols,\n"
136 " unsigned int A_internal_rows,\n"
137 " unsigned int A_internal_cols,\n"
138 " __global float * B, \n"
139 " unsigned int B_rows,\n"
140 " unsigned int B_cols,\n"
141 " unsigned int B_internal_rows,\n"
142 " unsigned int B_internal_cols)\n"
143 "{ \n"
144 " float temp; \n"
145 " for (int row = A_rows-1; row > -1; --row) \n"
146 " { \n"
147 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
148 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
149 " //eliminate column of op(A) with index 'row' in parallel: \n"
150 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
151 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
152 " }\n"
153 "}\n"
154 ; //matrix_solve_col_col_align1_unit_upper_trans_solve
155 
157 "// file automatically generated - do not edit!\n"
158 "// inplace solve A \\ B\n"
159 "// matrix layouts: A...col_major, B...col_major\n"
160 "__kernel void unit_lower_solve(\n"
161 " __global const float * A,\n"
162 " unsigned int A_rows,\n"
163 " unsigned int A_cols,\n"
164 " unsigned int A_internal_rows,\n"
165 " unsigned int A_internal_cols,\n"
166 " __global float * B, \n"
167 " unsigned int B_rows,\n"
168 " unsigned int B_cols,\n"
169 " unsigned int B_internal_rows,\n"
170 " unsigned int B_internal_cols)\n"
171 "{ \n"
172 " float temp; \n"
173 " for (int row = 0; row < A_rows; ++row) \n"
174 " { \n"
175 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
176 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
177 " //eliminate column of op(A) with index 'row' in parallel: \n"
178 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
179 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
180 " }\n"
181 "}\n"
182 ; //matrix_solve_col_col_align1_unit_lower_solve
183 
185 "// file automatically generated - do not edit!\n"
186 "// inplace solve A^T \\ B^T\n"
187 "// matrix layouts: A...col_major, B...col_major\n"
188 "__kernel void trans_lower_trans_solve(\n"
189 " __global const float * A,\n"
190 " unsigned int A_rows,\n"
191 " unsigned int A_cols,\n"
192 " unsigned int A_internal_rows,\n"
193 " unsigned int A_internal_cols,\n"
194 " __global float * B, \n"
195 " unsigned int B_rows,\n"
196 " unsigned int B_cols,\n"
197 " unsigned int B_internal_rows,\n"
198 " unsigned int B_internal_cols)\n"
199 "{ \n"
200 " float temp; \n"
201 " for (int row = 0; row < A_rows; ++row) \n"
202 " { \n"
203 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
204 " if (get_local_id(0) == 0) \n"
205 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
206 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
207 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
208 " //eliminate column of op(A) with index 'row' in parallel: \n"
209 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
210 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
211 " }\n"
212 "}\n"
213 ; //matrix_solve_col_col_align1_trans_lower_trans_solve
214 
216 "// file automatically generated - do not edit!\n"
217 "// inplace solve A \\ B\n"
218 "// matrix layouts: A...col_major, B...col_major\n"
219 "__kernel void unit_upper_solve(\n"
220 " __global const float * A,\n"
221 " unsigned int A_rows,\n"
222 " unsigned int A_cols,\n"
223 " unsigned int A_internal_rows,\n"
224 " unsigned int A_internal_cols,\n"
225 " __global float * B, \n"
226 " unsigned int B_rows,\n"
227 " unsigned int B_cols,\n"
228 " unsigned int B_internal_rows,\n"
229 " unsigned int B_internal_cols)\n"
230 "{ \n"
231 " float temp; \n"
232 " for (int row = A_rows-1; row > -1; --row) \n"
233 " { \n"
234 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
235 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
236 " //eliminate column of op(A) with index 'row' in parallel: \n"
237 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
238 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
239 " }\n"
240 "}\n"
241 ; //matrix_solve_col_col_align1_unit_upper_solve
242 
244 "// file automatically generated - do not edit!\n"
245 "// inplace solve A \\ B\n"
246 "// matrix layouts: A...col_major, B...col_major\n"
247 "__kernel void lower_solve(\n"
248 " __global const float * A,\n"
249 " unsigned int A_rows,\n"
250 " unsigned int A_cols,\n"
251 " unsigned int A_internal_rows,\n"
252 " unsigned int A_internal_cols,\n"
253 " __global float * B, \n"
254 " unsigned int B_rows,\n"
255 " unsigned int B_cols,\n"
256 " unsigned int B_internal_rows,\n"
257 " unsigned int B_internal_cols)\n"
258 "{ \n"
259 " float temp; \n"
260 " for (int row = 0; row < A_rows; ++row) \n"
261 " { \n"
262 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
263 " if (get_local_id(0) == 0) \n"
264 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
265 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
266 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
267 " //eliminate column of op(A) with index 'row' in parallel: \n"
268 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
269 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
270 " }\n"
271 "}\n"
272 ; //matrix_solve_col_col_align1_lower_solve
273 
275 "// file automatically generated - do not edit!\n"
276 "// inplace solve A^T \\ B\n"
277 "// matrix layouts: A...col_major, B...col_major\n"
278 "__kernel void trans_unit_upper_solve(\n"
279 " __global const float * A,\n"
280 " unsigned int A_rows,\n"
281 " unsigned int A_cols,\n"
282 " unsigned int A_internal_rows,\n"
283 " unsigned int A_internal_cols,\n"
284 " __global float * B, \n"
285 " unsigned int B_rows,\n"
286 " unsigned int B_cols,\n"
287 " unsigned int B_internal_rows,\n"
288 " unsigned int B_internal_cols)\n"
289 "{ \n"
290 " float temp; \n"
291 " for (int row = A_rows-1; row > -1; --row) \n"
292 " { \n"
293 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
294 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
295 " //eliminate column of op(A) with index 'row' in parallel: \n"
296 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
297 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
298 " }\n"
299 "}\n"
300 ; //matrix_solve_col_col_align1_trans_unit_upper_solve
301 
303 "// file automatically generated - do not edit!\n"
304 "// inplace solve A \\ B\n"
305 "// matrix layouts: A...col_major, B...col_major\n"
306 "__kernel void upper_solve(\n"
307 " __global const float * A,\n"
308 " unsigned int A_rows,\n"
309 " unsigned int A_cols,\n"
310 " unsigned int A_internal_rows,\n"
311 " unsigned int A_internal_cols,\n"
312 " __global float * B, \n"
313 " unsigned int B_rows,\n"
314 " unsigned int B_cols,\n"
315 " unsigned int B_internal_rows,\n"
316 " unsigned int B_internal_cols)\n"
317 "{ \n"
318 " float temp; \n"
319 " for (int row = A_rows-1; row > -1; --row) \n"
320 " { \n"
321 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
322 " if (get_local_id(0) == 0) \n"
323 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
324 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
325 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
326 " //eliminate column of op(A) with index 'row' in parallel: \n"
327 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
328 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
329 " }\n"
330 "}\n"
331 ; //matrix_solve_col_col_align1_upper_solve
332 
334 "// file automatically generated - do not edit!\n"
335 "// inplace solve A^T \\ B\n"
336 "// matrix layouts: A...col_major, B...col_major\n"
337 "__kernel void trans_upper_solve(\n"
338 " __global const float * A,\n"
339 " unsigned int A_rows,\n"
340 " unsigned int A_cols,\n"
341 " unsigned int A_internal_rows,\n"
342 " unsigned int A_internal_cols,\n"
343 " __global float * B, \n"
344 " unsigned int B_rows,\n"
345 " unsigned int B_cols,\n"
346 " unsigned int B_internal_rows,\n"
347 " unsigned int B_internal_cols)\n"
348 "{ \n"
349 " float temp; \n"
350 " for (int row = A_rows-1; row > -1; --row) \n"
351 " { \n"
352 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
353 " if (get_local_id(0) == 0) \n"
354 " B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
355 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
356 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
357 " //eliminate column of op(A) with index 'row' in parallel: \n"
358 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
359 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
360 " }\n"
361 "}\n"
362 ; //matrix_solve_col_col_align1_trans_upper_solve
363 
365 "// file automatically generated - do not edit!\n"
366 "// inplace solve A^T \\ B^T\n"
367 "// matrix layouts: A...col_major, B...col_major\n"
368 "__kernel void trans_upper_trans_solve(\n"
369 " __global const float * A,\n"
370 " unsigned int A_rows,\n"
371 " unsigned int A_cols,\n"
372 " unsigned int A_internal_rows,\n"
373 " unsigned int A_internal_cols,\n"
374 " __global float * B, \n"
375 " unsigned int B_rows,\n"
376 " unsigned int B_cols,\n"
377 " unsigned int B_internal_rows,\n"
378 " unsigned int B_internal_cols)\n"
379 "{ \n"
380 " float temp; \n"
381 " for (int row = A_rows-1; row > -1; --row) \n"
382 " { \n"
383 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
384 " if (get_local_id(0) == 0) \n"
385 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
386 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
387 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
388 " //eliminate column of op(A) with index 'row' in parallel: \n"
389 " for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
390 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
391 " }\n"
392 "}\n"
393 ; //matrix_solve_col_col_align1_trans_upper_trans_solve
394 
396 "// file automatically generated - do not edit!\n"
397 "// inplace solve A \\ B^T\n"
398 "// matrix layouts: A...col_major, B...col_major\n"
399 "__kernel void lower_trans_solve(\n"
400 " __global const float * A,\n"
401 " unsigned int A_rows,\n"
402 " unsigned int A_cols,\n"
403 " unsigned int A_internal_rows,\n"
404 " unsigned int A_internal_cols,\n"
405 " __global float * B, \n"
406 " unsigned int B_rows,\n"
407 " unsigned int B_cols,\n"
408 " unsigned int B_internal_rows,\n"
409 " unsigned int B_internal_cols)\n"
410 "{ \n"
411 " float temp; \n"
412 " for (int row = 0; row < A_rows; ++row) \n"
413 " { \n"
414 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
415 " if (get_local_id(0) == 0) \n"
416 " B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
417 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
418 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
419 " //eliminate column of op(A) with index 'row' in parallel: \n"
420 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
421 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
422 " }\n"
423 "}\n"
424 ; //matrix_solve_col_col_align1_lower_trans_solve
425 
427 "// file automatically generated - do not edit!\n"
428 "// inplace solve A^T \\ B\n"
429 "// matrix layouts: A...col_major, B...col_major\n"
430 "__kernel void trans_unit_lower_solve(\n"
431 " __global const float * A,\n"
432 " unsigned int A_rows,\n"
433 " unsigned int A_cols,\n"
434 " unsigned int A_internal_rows,\n"
435 " unsigned int A_internal_cols,\n"
436 " __global float * B, \n"
437 " unsigned int B_rows,\n"
438 " unsigned int B_cols,\n"
439 " unsigned int B_internal_rows,\n"
440 " unsigned int B_internal_cols)\n"
441 "{ \n"
442 " float temp; \n"
443 " for (int row = 0; row < A_rows; ++row) \n"
444 " { \n"
445 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
446 " temp = B[row + get_group_id(0) * B_internal_rows]; \n"
447 " //eliminate column of op(A) with index 'row' in parallel: \n"
448 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
449 " B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
450 " }\n"
451 "}\n"
452 ; //matrix_solve_col_col_align1_trans_unit_lower_solve
453 
455 "// file automatically generated - do not edit!\n"
456 "// inplace solve A^T \\ B^T\n"
457 "// matrix layouts: A...col_major, B...col_major\n"
458 "__kernel void trans_unit_lower_trans_solve(\n"
459 " __global const float * A,\n"
460 " unsigned int A_rows,\n"
461 " unsigned int A_cols,\n"
462 " unsigned int A_internal_rows,\n"
463 " unsigned int A_internal_cols,\n"
464 " __global float * B, \n"
465 " unsigned int B_rows,\n"
466 " unsigned int B_cols,\n"
467 " unsigned int B_internal_rows,\n"
468 " unsigned int B_internal_cols)\n"
469 "{ \n"
470 " float temp; \n"
471 " for (int row = 0; row < A_rows; ++row) \n"
472 " { \n"
473 " barrier(CLK_GLOBAL_MEM_FENCE); \n"
474 " temp = B[row * B_internal_rows + get_group_id(0)]; \n"
475 " //eliminate column of op(A) with index 'row' in parallel: \n"
476 " for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
477 " B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
478 " }\n"
479 "}\n"
480 ; //matrix_solve_col_col_align1_trans_unit_lower_trans_solve
481 
482  } //namespace kernels
483  } //namespace linalg
484 } //namespace viennacl
485 #endif