ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
matrix_solve_col_col_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_MATRIX_SOLVE_COL_COL_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
matrix_solve_col_col_align1_upper_trans_solve
=
11
"// file automatically generated - do not edit!\n"
12
"// inplace solve A \\ B^T\n"
13
"// matrix layouts: A...col_major, B...col_major\n"
14
"__kernel void upper_trans_solve(\n"
15
" __global const float * A,\n"
16
" unsigned int A_rows,\n"
17
" unsigned int A_cols,\n"
18
" unsigned int A_internal_rows,\n"
19
" unsigned int A_internal_cols,\n"
20
" __global float * B, \n"
21
" unsigned int B_rows,\n"
22
" unsigned int B_cols,\n"
23
" unsigned int B_internal_rows,\n"
24
" unsigned int B_internal_cols)\n"
25
"{ \n"
26
" float temp; \n"
27
" for (int row = A_rows-1; row > -1; --row) \n"
28
" { \n"
29
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
30
" if (get_local_id(0) == 0) \n"
31
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
32
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
33
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
34
" //eliminate column of op(A) with index 'row' in parallel: \n"
35
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
36
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
37
" }\n"
38
"}\n"
39
;
//matrix_solve_col_col_align1_upper_trans_solve
40
41
const
char
*
const
matrix_solve_col_col_align1_trans_lower_solve
=
42
"// file automatically generated - do not edit!\n"
43
"// inplace solve A^T \\ B\n"
44
"// matrix layouts: A...col_major, B...col_major\n"
45
"__kernel void trans_lower_solve(\n"
46
" __global const float * A,\n"
47
" unsigned int A_rows,\n"
48
" unsigned int A_cols,\n"
49
" unsigned int A_internal_rows,\n"
50
" unsigned int A_internal_cols,\n"
51
" __global float * B, \n"
52
" unsigned int B_rows,\n"
53
" unsigned int B_cols,\n"
54
" unsigned int B_internal_rows,\n"
55
" unsigned int B_internal_cols)\n"
56
"{ \n"
57
" float temp; \n"
58
" for (int row = 0; row < A_rows; ++row) \n"
59
" { \n"
60
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
61
" if (get_local_id(0) == 0) \n"
62
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
63
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
64
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
65
" //eliminate column of op(A) with index 'row' in parallel: \n"
66
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
67
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
68
" }\n"
69
"}\n"
70
;
//matrix_solve_col_col_align1_trans_lower_solve
71
72
const
char
*
const
matrix_solve_col_col_align1_unit_lower_trans_solve
=
73
"// file automatically generated - do not edit!\n"
74
"// inplace solve A \\ B^T\n"
75
"// matrix layouts: A...col_major, B...col_major\n"
76
"__kernel void unit_lower_trans_solve(\n"
77
" __global const float * A,\n"
78
" unsigned int A_rows,\n"
79
" unsigned int A_cols,\n"
80
" unsigned int A_internal_rows,\n"
81
" unsigned int A_internal_cols,\n"
82
" __global float * B, \n"
83
" unsigned int B_rows,\n"
84
" unsigned int B_cols,\n"
85
" unsigned int B_internal_rows,\n"
86
" unsigned int B_internal_cols)\n"
87
"{ \n"
88
" float temp; \n"
89
" for (int row = 0; row < A_rows; ++row) \n"
90
" { \n"
91
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
92
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
93
" //eliminate column of op(A) with index 'row' in parallel: \n"
94
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
95
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
96
" }\n"
97
"}\n"
98
;
//matrix_solve_col_col_align1_unit_lower_trans_solve
99
100
const
char
*
const
matrix_solve_col_col_align1_trans_unit_upper_trans_solve
=
101
"// file automatically generated - do not edit!\n"
102
"// inplace solve A^T \\ B^T\n"
103
"// matrix layouts: A...col_major, B...col_major\n"
104
"__kernel void trans_unit_upper_trans_solve(\n"
105
" __global const float * A,\n"
106
" unsigned int A_rows,\n"
107
" unsigned int A_cols,\n"
108
" unsigned int A_internal_rows,\n"
109
" unsigned int A_internal_cols,\n"
110
" __global float * B, \n"
111
" unsigned int B_rows,\n"
112
" unsigned int B_cols,\n"
113
" unsigned int B_internal_rows,\n"
114
" unsigned int B_internal_cols)\n"
115
"{ \n"
116
" float temp; \n"
117
" for (int row = A_rows-1; row > -1; --row) \n"
118
" { \n"
119
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
120
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
121
" //eliminate column of op(A) with index 'row' in parallel: \n"
122
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
123
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
124
" }\n"
125
"}\n"
126
;
//matrix_solve_col_col_align1_trans_unit_upper_trans_solve
127
128
const
char
*
const
matrix_solve_col_col_align1_unit_upper_trans_solve
=
129
"// file automatically generated - do not edit!\n"
130
"// inplace solve A \\ B^T\n"
131
"// matrix layouts: A...col_major, B...col_major\n"
132
"__kernel void unit_upper_trans_solve(\n"
133
" __global const float * A,\n"
134
" unsigned int A_rows,\n"
135
" unsigned int A_cols,\n"
136
" unsigned int A_internal_rows,\n"
137
" unsigned int A_internal_cols,\n"
138
" __global float * B, \n"
139
" unsigned int B_rows,\n"
140
" unsigned int B_cols,\n"
141
" unsigned int B_internal_rows,\n"
142
" unsigned int B_internal_cols)\n"
143
"{ \n"
144
" float temp; \n"
145
" for (int row = A_rows-1; row > -1; --row) \n"
146
" { \n"
147
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
148
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
149
" //eliminate column of op(A) with index 'row' in parallel: \n"
150
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
151
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
152
" }\n"
153
"}\n"
154
;
//matrix_solve_col_col_align1_unit_upper_trans_solve
155
156
const
char
*
const
matrix_solve_col_col_align1_unit_lower_solve
=
157
"// file automatically generated - do not edit!\n"
158
"// inplace solve A \\ B\n"
159
"// matrix layouts: A...col_major, B...col_major\n"
160
"__kernel void unit_lower_solve(\n"
161
" __global const float * A,\n"
162
" unsigned int A_rows,\n"
163
" unsigned int A_cols,\n"
164
" unsigned int A_internal_rows,\n"
165
" unsigned int A_internal_cols,\n"
166
" __global float * B, \n"
167
" unsigned int B_rows,\n"
168
" unsigned int B_cols,\n"
169
" unsigned int B_internal_rows,\n"
170
" unsigned int B_internal_cols)\n"
171
"{ \n"
172
" float temp; \n"
173
" for (int row = 0; row < A_rows; ++row) \n"
174
" { \n"
175
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
176
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
177
" //eliminate column of op(A) with index 'row' in parallel: \n"
178
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
179
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
180
" }\n"
181
"}\n"
182
;
//matrix_solve_col_col_align1_unit_lower_solve
183
184
const
char
*
const
matrix_solve_col_col_align1_trans_lower_trans_solve
=
185
"// file automatically generated - do not edit!\n"
186
"// inplace solve A^T \\ B^T\n"
187
"// matrix layouts: A...col_major, B...col_major\n"
188
"__kernel void trans_lower_trans_solve(\n"
189
" __global const float * A,\n"
190
" unsigned int A_rows,\n"
191
" unsigned int A_cols,\n"
192
" unsigned int A_internal_rows,\n"
193
" unsigned int A_internal_cols,\n"
194
" __global float * B, \n"
195
" unsigned int B_rows,\n"
196
" unsigned int B_cols,\n"
197
" unsigned int B_internal_rows,\n"
198
" unsigned int B_internal_cols)\n"
199
"{ \n"
200
" float temp; \n"
201
" for (int row = 0; row < A_rows; ++row) \n"
202
" { \n"
203
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
204
" if (get_local_id(0) == 0) \n"
205
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
206
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
207
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
208
" //eliminate column of op(A) with index 'row' in parallel: \n"
209
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
210
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
211
" }\n"
212
"}\n"
213
;
//matrix_solve_col_col_align1_trans_lower_trans_solve
214
215
const
char
*
const
matrix_solve_col_col_align1_unit_upper_solve
=
216
"// file automatically generated - do not edit!\n"
217
"// inplace solve A \\ B\n"
218
"// matrix layouts: A...col_major, B...col_major\n"
219
"__kernel void unit_upper_solve(\n"
220
" __global const float * A,\n"
221
" unsigned int A_rows,\n"
222
" unsigned int A_cols,\n"
223
" unsigned int A_internal_rows,\n"
224
" unsigned int A_internal_cols,\n"
225
" __global float * B, \n"
226
" unsigned int B_rows,\n"
227
" unsigned int B_cols,\n"
228
" unsigned int B_internal_rows,\n"
229
" unsigned int B_internal_cols)\n"
230
"{ \n"
231
" float temp; \n"
232
" for (int row = A_rows-1; row > -1; --row) \n"
233
" { \n"
234
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
235
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
236
" //eliminate column of op(A) with index 'row' in parallel: \n"
237
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
238
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
239
" }\n"
240
"}\n"
241
;
//matrix_solve_col_col_align1_unit_upper_solve
242
243
const
char
*
const
matrix_solve_col_col_align1_lower_solve
=
244
"// file automatically generated - do not edit!\n"
245
"// inplace solve A \\ B\n"
246
"// matrix layouts: A...col_major, B...col_major\n"
247
"__kernel void lower_solve(\n"
248
" __global const float * A,\n"
249
" unsigned int A_rows,\n"
250
" unsigned int A_cols,\n"
251
" unsigned int A_internal_rows,\n"
252
" unsigned int A_internal_cols,\n"
253
" __global float * B, \n"
254
" unsigned int B_rows,\n"
255
" unsigned int B_cols,\n"
256
" unsigned int B_internal_rows,\n"
257
" unsigned int B_internal_cols)\n"
258
"{ \n"
259
" float temp; \n"
260
" for (int row = 0; row < A_rows; ++row) \n"
261
" { \n"
262
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
263
" if (get_local_id(0) == 0) \n"
264
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
265
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
266
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
267
" //eliminate column of op(A) with index 'row' in parallel: \n"
268
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
269
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
270
" }\n"
271
"}\n"
272
;
//matrix_solve_col_col_align1_lower_solve
273
274
const
char
*
const
matrix_solve_col_col_align1_trans_unit_upper_solve
=
275
"// file automatically generated - do not edit!\n"
276
"// inplace solve A^T \\ B\n"
277
"// matrix layouts: A...col_major, B...col_major\n"
278
"__kernel void trans_unit_upper_solve(\n"
279
" __global const float * A,\n"
280
" unsigned int A_rows,\n"
281
" unsigned int A_cols,\n"
282
" unsigned int A_internal_rows,\n"
283
" unsigned int A_internal_cols,\n"
284
" __global float * B, \n"
285
" unsigned int B_rows,\n"
286
" unsigned int B_cols,\n"
287
" unsigned int B_internal_rows,\n"
288
" unsigned int B_internal_cols)\n"
289
"{ \n"
290
" float temp; \n"
291
" for (int row = A_rows-1; row > -1; --row) \n"
292
" { \n"
293
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
294
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
295
" //eliminate column of op(A) with index 'row' in parallel: \n"
296
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
297
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
298
" }\n"
299
"}\n"
300
;
//matrix_solve_col_col_align1_trans_unit_upper_solve
301
302
const
char
*
const
matrix_solve_col_col_align1_upper_solve
=
303
"// file automatically generated - do not edit!\n"
304
"// inplace solve A \\ B\n"
305
"// matrix layouts: A...col_major, B...col_major\n"
306
"__kernel void upper_solve(\n"
307
" __global const float * A,\n"
308
" unsigned int A_rows,\n"
309
" unsigned int A_cols,\n"
310
" unsigned int A_internal_rows,\n"
311
" unsigned int A_internal_cols,\n"
312
" __global float * B, \n"
313
" unsigned int B_rows,\n"
314
" unsigned int B_cols,\n"
315
" unsigned int B_internal_rows,\n"
316
" unsigned int B_internal_cols)\n"
317
"{ \n"
318
" float temp; \n"
319
" for (int row = A_rows-1; row > -1; --row) \n"
320
" { \n"
321
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
322
" if (get_local_id(0) == 0) \n"
323
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
324
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
325
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
326
" //eliminate column of op(A) with index 'row' in parallel: \n"
327
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
328
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim + row * A_internal_rows];\n"
329
" }\n"
330
"}\n"
331
;
//matrix_solve_col_col_align1_upper_solve
332
333
const
char
*
const
matrix_solve_col_col_align1_trans_upper_solve
=
334
"// file automatically generated - do not edit!\n"
335
"// inplace solve A^T \\ B\n"
336
"// matrix layouts: A...col_major, B...col_major\n"
337
"__kernel void trans_upper_solve(\n"
338
" __global const float * A,\n"
339
" unsigned int A_rows,\n"
340
" unsigned int A_cols,\n"
341
" unsigned int A_internal_rows,\n"
342
" unsigned int A_internal_cols,\n"
343
" __global float * B, \n"
344
" unsigned int B_rows,\n"
345
" unsigned int B_cols,\n"
346
" unsigned int B_internal_rows,\n"
347
" unsigned int B_internal_cols)\n"
348
"{ \n"
349
" float temp; \n"
350
" for (int row = A_rows-1; row > -1; --row) \n"
351
" { \n"
352
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
353
" if (get_local_id(0) == 0) \n"
354
" B[row + get_group_id(0) * B_internal_rows] /= A[row + row*A_internal_cols]; \n"
355
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
356
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
357
" //eliminate column of op(A) with index 'row' in parallel: \n"
358
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
359
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
360
" }\n"
361
"}\n"
362
;
//matrix_solve_col_col_align1_trans_upper_solve
363
364
const
char
*
const
matrix_solve_col_col_align1_trans_upper_trans_solve
=
365
"// file automatically generated - do not edit!\n"
366
"// inplace solve A^T \\ B^T\n"
367
"// matrix layouts: A...col_major, B...col_major\n"
368
"__kernel void trans_upper_trans_solve(\n"
369
" __global const float * A,\n"
370
" unsigned int A_rows,\n"
371
" unsigned int A_cols,\n"
372
" unsigned int A_internal_rows,\n"
373
" unsigned int A_internal_cols,\n"
374
" __global float * B, \n"
375
" unsigned int B_rows,\n"
376
" unsigned int B_cols,\n"
377
" unsigned int B_internal_rows,\n"
378
" unsigned int B_internal_cols)\n"
379
"{ \n"
380
" float temp; \n"
381
" for (int row = A_rows-1; row > -1; --row) \n"
382
" { \n"
383
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
384
" if (get_local_id(0) == 0) \n"
385
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
386
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
387
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
388
" //eliminate column of op(A) with index 'row' in parallel: \n"
389
" for (int elim = get_local_id(0); elim < row; elim += get_local_size(0)) \n"
390
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
391
" }\n"
392
"}\n"
393
;
//matrix_solve_col_col_align1_trans_upper_trans_solve
394
395
const
char
*
const
matrix_solve_col_col_align1_lower_trans_solve
=
396
"// file automatically generated - do not edit!\n"
397
"// inplace solve A \\ B^T\n"
398
"// matrix layouts: A...col_major, B...col_major\n"
399
"__kernel void lower_trans_solve(\n"
400
" __global const float * A,\n"
401
" unsigned int A_rows,\n"
402
" unsigned int A_cols,\n"
403
" unsigned int A_internal_rows,\n"
404
" unsigned int A_internal_cols,\n"
405
" __global float * B, \n"
406
" unsigned int B_rows,\n"
407
" unsigned int B_cols,\n"
408
" unsigned int B_internal_rows,\n"
409
" unsigned int B_internal_cols)\n"
410
"{ \n"
411
" float temp; \n"
412
" for (int row = 0; row < A_rows; ++row) \n"
413
" { \n"
414
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
415
" if (get_local_id(0) == 0) \n"
416
" B[row * B_internal_rows + get_group_id(0)] /= A[row + row*A_internal_cols]; \n"
417
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
418
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
419
" //eliminate column of op(A) with index 'row' in parallel: \n"
420
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
421
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim + row * A_internal_rows];\n"
422
" }\n"
423
"}\n"
424
;
//matrix_solve_col_col_align1_lower_trans_solve
425
426
const
char
*
const
matrix_solve_col_col_align1_trans_unit_lower_solve
=
427
"// file automatically generated - do not edit!\n"
428
"// inplace solve A^T \\ B\n"
429
"// matrix layouts: A...col_major, B...col_major\n"
430
"__kernel void trans_unit_lower_solve(\n"
431
" __global const float * A,\n"
432
" unsigned int A_rows,\n"
433
" unsigned int A_cols,\n"
434
" unsigned int A_internal_rows,\n"
435
" unsigned int A_internal_cols,\n"
436
" __global float * B, \n"
437
" unsigned int B_rows,\n"
438
" unsigned int B_cols,\n"
439
" unsigned int B_internal_rows,\n"
440
" unsigned int B_internal_cols)\n"
441
"{ \n"
442
" float temp; \n"
443
" for (int row = 0; row < A_rows; ++row) \n"
444
" { \n"
445
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
446
" temp = B[row + get_group_id(0) * B_internal_rows]; \n"
447
" //eliminate column of op(A) with index 'row' in parallel: \n"
448
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
449
" B[elim + get_group_id(0) * B_internal_rows] -= temp * A[elim * A_internal_rows + row];\n"
450
" }\n"
451
"}\n"
452
;
//matrix_solve_col_col_align1_trans_unit_lower_solve
453
454
const
char
*
const
matrix_solve_col_col_align1_trans_unit_lower_trans_solve
=
455
"// file automatically generated - do not edit!\n"
456
"// inplace solve A^T \\ B^T\n"
457
"// matrix layouts: A...col_major, B...col_major\n"
458
"__kernel void trans_unit_lower_trans_solve(\n"
459
" __global const float * A,\n"
460
" unsigned int A_rows,\n"
461
" unsigned int A_cols,\n"
462
" unsigned int A_internal_rows,\n"
463
" unsigned int A_internal_cols,\n"
464
" __global float * B, \n"
465
" unsigned int B_rows,\n"
466
" unsigned int B_cols,\n"
467
" unsigned int B_internal_rows,\n"
468
" unsigned int B_internal_cols)\n"
469
"{ \n"
470
" float temp; \n"
471
" for (int row = 0; row < A_rows; ++row) \n"
472
" { \n"
473
" barrier(CLK_GLOBAL_MEM_FENCE); \n"
474
" temp = B[row * B_internal_rows + get_group_id(0)]; \n"
475
" //eliminate column of op(A) with index 'row' in parallel: \n"
476
" for (int elim = row + get_local_id(0) + 1; elim < A_rows; elim += get_local_size(0)) \n"
477
" B[elim * B_internal_rows + get_group_id(0)] -= temp * A[elim * A_internal_rows + row];\n"
478
" }\n"
479
"}\n"
480
;
//matrix_solve_col_col_align1_trans_unit_lower_trans_solve
481
482
}
//namespace kernels
483
}
//namespace linalg
484
}
//namespace viennacl
485
#endif
Generated on Fri Jul 27 2012 22:02:50 for ViennaCL - The Vienna Computing Library by
1.8.1.2