ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
compressed_matrix_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_COMPRESSED_MATRIX_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_COMPRESSED_MATRIX_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
compressed_matrix_align8_vec_mul
=
11
"__kernel void vec_mul(\n"
12
" __global const unsigned int * row_indices,\n"
13
" __global const uint8 * column_indices, \n"
14
" __global const float8 * elements,\n"
15
" __global const float * vector, \n"
16
" __global float * result,\n"
17
" unsigned int size)\n"
18
"{ \n"
19
" float dot_prod;\n"
20
" unsigned int start, next_stop;\n"
21
" uint8 col_idx;\n"
22
" float8 tmp_vec;\n"
23
" float8 tmp_entries;\n"
24
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
25
" {\n"
26
" dot_prod = 0.0f;\n"
27
" start = row_indices[row] / 8;\n"
28
" next_stop = row_indices[row+1] / 8;\n"
29
" for (unsigned int i = start; i < next_stop; ++i)\n"
30
" {\n"
31
" col_idx = column_indices[i];\n"
32
" tmp_entries = elements[i];\n"
33
" tmp_vec.s0 = vector[col_idx.s0];\n"
34
" tmp_vec.s1 = vector[col_idx.s1];\n"
35
" tmp_vec.s2 = vector[col_idx.s2];\n"
36
" tmp_vec.s3 = vector[col_idx.s3];\n"
37
" tmp_vec.s4 = vector[col_idx.s4];\n"
38
" tmp_vec.s5 = vector[col_idx.s5];\n"
39
" tmp_vec.s6 = vector[col_idx.s6];\n"
40
" tmp_vec.s7 = vector[col_idx.s7];\n"
41
" dot_prod += dot(tmp_entries.lo, tmp_vec.lo);\n"
42
" dot_prod += dot(tmp_entries.hi, tmp_vec.hi);\n"
43
" }\n"
44
" result[row] = dot_prod;\n"
45
" }\n"
46
"}\n"
47
;
//compressed_matrix_align8_vec_mul
48
49
const
char
*
const
compressed_matrix_align1_bicgstab_kernel2
=
50
"void helper_bicgstab_kernel2_parallel_reduction( __local float * tmp_buffer )\n"
51
"{\n"
52
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
53
" {\n"
54
" barrier(CLK_LOCAL_MEM_FENCE);\n"
55
" if (get_local_id(0) < stride)\n"
56
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride];\n"
57
" }\n"
58
"}\n"
59
"//////// inner products:\n"
60
"float bicgstab_kernel2_inner_prod(\n"
61
" __global const float * vec1,\n"
62
" __global const float * vec2,\n"
63
" unsigned int size,\n"
64
" __local float * tmp_buffer)\n"
65
"{\n"
66
" float tmp = 0;\n"
67
" unsigned int i_end = ((size - 1) / get_local_size(0) + 1) * get_local_size(0);\n"
68
" for (unsigned int i = get_local_id(0); i < i_end; i += get_local_size(0))\n"
69
" {\n"
70
" if (i < size)\n"
71
" tmp += vec1[i] * vec2[i];\n"
72
" }\n"
73
" tmp_buffer[get_local_id(0)] = tmp;\n"
74
" \n"
75
" helper_bicgstab_kernel2_parallel_reduction(tmp_buffer);\n"
76
" barrier(CLK_LOCAL_MEM_FENCE);\n"
77
" return tmp_buffer[0];\n"
78
"}\n"
79
"__kernel void bicgstab_kernel2(\n"
80
" __global const float * tmp0,\n"
81
" __global const float * tmp1,\n"
82
" __global const float * r0star, \n"
83
" __global const float * s, \n"
84
" __global float * p, \n"
85
" __global float * result,\n"
86
" __global float * residual,\n"
87
" __global const float * alpha,\n"
88
" __global float * ip_rr0star,\n"
89
" __global float * error_estimate,\n"
90
" __local float * tmp_buffer,\n"
91
" unsigned int size) \n"
92
"{ \n"
93
" float omega_local = bicgstab_kernel2_inner_prod(tmp1, s, size, tmp_buffer) / bicgstab_kernel2_inner_prod(tmp1, tmp1, size, tmp_buffer);\n"
94
" float alpha_local = alpha[0];\n"
95
" \n"
96
" //result += alpha * p + omega * s;\n"
97
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
98
" result[i] += alpha_local * p[i] + omega_local * s[i];\n"
99
" //residual = s - omega * tmp1;\n"
100
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
101
" residual[i] = s[i] - omega_local * tmp1[i];\n"
102
" //new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);\n"
103
" float new_ip_rr0star = bicgstab_kernel2_inner_prod(residual, r0star, size, tmp_buffer);\n"
104
" float beta = (new_ip_rr0star / ip_rr0star[0]) * (alpha_local / omega_local);\n"
105
" \n"
106
" //p = residual + beta * (p - omega*tmp0);\n"
107
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
108
" p[i] = residual[i] + beta * (p[i] - omega_local * tmp0[i]);\n"
109
" //compute norm of residual:\n"
110
" float new_error_estimate = bicgstab_kernel2_inner_prod(residual, residual, size, tmp_buffer);\n"
111
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
112
" //update values:\n"
113
" if (get_global_id(0) == 0)\n"
114
" {\n"
115
" error_estimate[0] = new_error_estimate;\n"
116
" ip_rr0star[0] = new_ip_rr0star;\n"
117
" }\n"
118
"}\n"
119
;
//compressed_matrix_align1_bicgstab_kernel2
120
121
const
char
*
const
compressed_matrix_align1_lu_forward
=
122
" \n"
123
"// compute y in Ly = z for incomplete LU factorizations of a sparse matrix in compressed format\n"
124
"__kernel void lu_forward(\n"
125
" __global const unsigned int * row_indices,\n"
126
" __global const unsigned int * column_indices, \n"
127
" __global const float * elements,\n"
128
" __local int * buffer, \n"
129
" __local float * vec_entries, //a memory block from vector\n"
130
" __global float * vector,\n"
131
" unsigned int size) \n"
132
"{\n"
133
" int waiting_for; //block index that must be finished before the current thread can start\n"
134
" unsigned int waiting_for_index;\n"
135
" int block_offset;\n"
136
" unsigned int col;\n"
137
" unsigned int row;\n"
138
" unsigned int row_index_end;\n"
139
" \n"
140
" //backward substitution: one thread per row in blocks of get_global_size(0)\n"
141
" for (unsigned int block_num = 0; block_num <= size / get_global_size(0); ++block_num)\n"
142
" {\n"
143
" block_offset = block_num * get_global_size(0);\n"
144
" row = block_offset + get_global_id(0);\n"
145
" buffer[get_global_id(0)] = 0; //set flag to 'undone'\n"
146
" waiting_for = -1;\n"
147
" if (row < size)\n"
148
" {\n"
149
" vec_entries[get_global_id(0)] = vector[row];\n"
150
" waiting_for_index = row_indices[row];\n"
151
" row_index_end = row_indices[row+1];\n"
152
" }\n"
153
" \n"
154
" if (get_global_id(0) == 0)\n"
155
" buffer[get_global_size(0)] = 1;\n"
156
" //try to eliminate all lines in the block. \n"
157
" //in worst case scenarios, in each step only one line can be substituted, thus loop\n"
158
" for (unsigned int k = 0; k<get_global_size(0); ++k)\n"
159
" {\n"
160
" barrier(CLK_LOCAL_MEM_FENCE);\n"
161
" if (row < size) //valid index?\n"
162
" {\n"
163
" if (waiting_for >= 0)\n"
164
" {\n"
165
" if (buffer[waiting_for] == 1)\n"
166
" waiting_for = -1;\n"
167
" }\n"
168
" \n"
169
" if (waiting_for == -1) //substitution not yet done, check whether possible\n"
170
" {\n"
171
" //check whether reduction is possible:\n"
172
" for (unsigned int j = waiting_for_index; j < row_index_end; ++j)\n"
173
" {\n"
174
" col = column_indices[j];\n"
175
" if (col < block_offset) //index valid, but not from current block\n"
176
" vec_entries[get_global_id(0)] -= elements[j] * vector[col];\n"
177
" else if (col < row) //index is from current block\n"
178
" {\n"
179
" if (buffer[col - block_offset] == 0) //entry is not yet calculated\n"
180
" {\n"
181
" waiting_for = col - block_offset;\n"
182
" waiting_for_index = j;\n"
183
" break;\n"
184
" }\n"
185
" else //updated entry is available in shared memory:\n"
186
" vec_entries[get_global_id(0)] -= elements[j] * vec_entries[col - block_offset];\n"
187
" }\n"
188
" }\n"
189
" \n"
190
" if (waiting_for == -1) //this row is done\n"
191
" {\n"
192
" buffer[get_global_id(0)] = 1;\n"
193
" waiting_for = -2; //magic number: thread is finished\n"
194
" }\n"
195
" } \n"
196
" } //row < size\n"
197
" else\n"
198
" buffer[get_global_id(0)] = 1; //work done (because there is no work to be done at all...)\n"
199
" ///////// check whether all threads are done. If yes, exit loop /////////////\n"
200
" \n"
201
" if (buffer[get_global_id(0)] == 0)\n"
202
" buffer[get_global_size(0)] = 0;\n"
203
" barrier(CLK_LOCAL_MEM_FENCE);\n"
204
" \n"
205
" if (buffer[get_global_size(0)] > 0) //all threads break this loop simultaneously\n"
206
" break;\n"
207
" if (get_global_id(0) == 0)\n"
208
" buffer[get_global_size(0)] = 1;\n"
209
" } //for k\n"
210
" \n"
211
" //write to vector:\n"
212
" if (row < size)\n"
213
" vector[row] = vec_entries[get_global_id(0)];\n"
214
" \n"
215
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
216
" } //for block_num\n"
217
"}\n"
218
;
//compressed_matrix_align1_lu_forward
219
220
const
char
*
const
compressed_matrix_align1_jacobi
=
221
"__kernel void jacobi(\n"
222
" __global const unsigned int * row_indices,\n"
223
" __global const unsigned int * column_indices,\n"
224
" __global const float * elements,\n"
225
" float weight,\n"
226
" __global const float * old_result,\n"
227
" __global float * new_result,\n"
228
" __global const float * rhs,\n"
229
" unsigned int size)\n"
230
" {\n"
231
" float sum, diag=1;\n"
232
" int col;\n"
233
" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
234
" {\n"
235
" sum = 0;\n"
236
" for (unsigned int j = row_indices[i]; j<row_indices[i+1]; j++)\n"
237
" {\n"
238
" col = column_indices[j];\n"
239
" if (i == col)\n"
240
" diag = elements[j];\n"
241
" else \n"
242
" sum += elements[j] * old_result[col]; \n"
243
" } \n"
244
" new_result[i] = weight * (rhs[i]-sum) / diag + (1-weight) * old_result[i]; \n"
245
" } \n"
246
" } \n"
247
;
//compressed_matrix_align1_jacobi
248
249
const
char
*
const
compressed_matrix_align1_lu_backward
=
250
"// compute x in Ux = y for incomplete LU factorizations of a sparse matrix in compressed format\n"
251
"__kernel void lu_backward(\n"
252
" __global const unsigned int * row_indices,\n"
253
" __global const unsigned int * column_indices, \n"
254
" __global const float * elements,\n"
255
" __local int * buffer, \n"
256
" __local float * vec_entries, //a memory block from vector\n"
257
" __global float * vector,\n"
258
" unsigned int size) \n"
259
"{\n"
260
" int waiting_for; //block index that must be finished before the current thread can start\n"
261
" unsigned int waiting_for_index;\n"
262
" unsigned int block_offset;\n"
263
" unsigned int col;\n"
264
" unsigned int row;\n"
265
" unsigned int row_index_end;\n"
266
" float diagonal_entry = 42;\n"
267
" \n"
268
" //forward substitution: one thread per row in blocks of get_global_size(0)\n"
269
" for (int block_num = size / get_global_size(0); block_num > -1; --block_num)\n"
270
" {\n"
271
" block_offset = block_num * get_global_size(0);\n"
272
" row = block_offset + get_global_id(0);\n"
273
" buffer[get_global_id(0)] = 0; //set flag to 'undone'\n"
274
" waiting_for = -1;\n"
275
" \n"
276
" if (row < size)\n"
277
" {\n"
278
" vec_entries[get_global_id(0)] = vector[row];\n"
279
" waiting_for_index = row_indices[row];\n"
280
" row_index_end = row_indices[row+1];\n"
281
" diagonal_entry = column_indices[waiting_for_index];\n"
282
" }\n"
283
" \n"
284
" if (get_global_id(0) == 0)\n"
285
" buffer[get_global_size(0)] = 1;\n"
286
" //try to eliminate all lines in the block. \n"
287
" //in worst case scenarios, in each step only one line can be substituted, thus loop\n"
288
" for (unsigned int k = 0; k<get_global_size(0); ++k)\n"
289
" {\n"
290
" barrier(CLK_LOCAL_MEM_FENCE);\n"
291
" if (row < size) //valid index?\n"
292
" {\n"
293
" if (waiting_for >= 0)\n"
294
" {\n"
295
" if (buffer[waiting_for] == 1)\n"
296
" waiting_for = -1;\n"
297
" }\n"
298
" \n"
299
" if (waiting_for == -1) //substitution not yet done, check whether possible\n"
300
" {\n"
301
" //check whether reduction is possible:\n"
302
" for (unsigned int j = waiting_for_index; j < row_index_end; ++j)\n"
303
" {\n"
304
" col = column_indices[j];\n"
305
" barrier(CLK_LOCAL_MEM_FENCE);\n"
306
" if (col >= block_offset + get_global_size(0)) //index valid, but not from current block\n"
307
" vec_entries[get_global_id(0)] -= elements[j] * vector[col];\n"
308
" else if (col > row) //index is from current block\n"
309
" {\n"
310
" if (buffer[col - block_offset] == 0) //entry is not yet calculated\n"
311
" {\n"
312
" waiting_for = col - block_offset;\n"
313
" waiting_for_index = j;\n"
314
" break;\n"
315
" }\n"
316
" else //updated entry is available in shared memory:\n"
317
" vec_entries[get_global_id(0)] -= elements[j] * vec_entries[col - block_offset];\n"
318
" }\n"
319
" else if (col == row)\n"
320
" diagonal_entry = elements[j];\n"
321
" }\n"
322
" \n"
323
" if (waiting_for == -1) //this row is done\n"
324
" {\n"
325
" if (row == 0)\n"
326
" vec_entries[get_global_id(0)] /= elements[0];\n"
327
" else\n"
328
" vec_entries[get_global_id(0)] /= diagonal_entry;\n"
329
" buffer[get_global_id(0)] = 1;\n"
330
" waiting_for = -2; //magic number: thread is finished\n"
331
" }\n"
332
" } \n"
333
" } //row < size\n"
334
" else\n"
335
" buffer[get_global_id(0)] = 1; //work done (because there is no work to be done at all...)\n"
336
" \n"
337
" ///////// check whether all threads are done. If yes, exit loop /////////////\n"
338
" if (buffer[get_global_id(0)] == 0)\n"
339
" buffer[get_global_size(0)] = 0;\n"
340
" barrier(CLK_LOCAL_MEM_FENCE);\n"
341
" \n"
342
" if (buffer[get_global_size(0)] > 0) //all threads break the loop simultaneously\n"
343
" break;\n"
344
" if (get_global_id(0) == 0)\n"
345
" buffer[get_global_size(0)] = 1;\n"
346
" } //for k\n"
347
" if (row < size)\n"
348
" vector[row] = vec_entries[get_global_id(0)];\n"
349
" //vector[row] = diagonal_entry;\n"
350
" \n"
351
" //if (row == 0)\n"
352
" //vector[0] = diagonal_entry;\n"
353
" //vector[0] = elements[0];\n"
354
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
355
" } //for block_num\n"
356
"}\n"
357
;
//compressed_matrix_align1_lu_backward
358
359
const
char
*
const
compressed_matrix_align1_row_scaling_2
=
360
"__kernel void row_scaling_2(\n"
361
" __global const unsigned int * row_indices,\n"
362
" __global const unsigned int * column_indices, \n"
363
" __global const float * elements,\n"
364
" __global float * diag_M_inv,\n"
365
" unsigned int size) \n"
366
"{ \n"
367
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
368
" {\n"
369
" float dot_prod = 0.0f;\n"
370
" float temp = 0.0f;\n"
371
" unsigned int row_end = row_indices[row+1];\n"
372
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
373
" {\n"
374
" temp = elements[i];\n"
375
" dot_prod += temp * temp;\n"
376
" }\n"
377
" diag_M_inv[row] = 1.0f / sqrt(dot_prod);\n"
378
" }\n"
379
"}\n"
380
;
//compressed_matrix_align1_row_scaling_2
381
382
const
char
*
const
compressed_matrix_align1_row_scaling_1
=
383
"__kernel void row_scaling_1(\n"
384
" __global const unsigned int * row_indices,\n"
385
" __global const unsigned int * column_indices, \n"
386
" __global const float * elements,\n"
387
" __global float * diag_M_inv,\n"
388
" unsigned int size) \n"
389
"{ \n"
390
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
391
" {\n"
392
" float dot_prod = 0.0f;\n"
393
" unsigned int row_end = row_indices[row+1];\n"
394
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
395
" dot_prod += fabs(elements[i]);\n"
396
" diag_M_inv[row] = 1.0f / dot_prod;\n"
397
" }\n"
398
"}\n"
399
;
//compressed_matrix_align1_row_scaling_1
400
401
const
char
*
const
compressed_matrix_align1_vec_mul
=
402
"__kernel void vec_mul(\n"
403
" __global const unsigned int * row_indices,\n"
404
" __global const unsigned int * column_indices, \n"
405
" __global const float * elements,\n"
406
" __global const float * vector, \n"
407
" __global float * result,\n"
408
" unsigned int size) \n"
409
"{ \n"
410
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
411
" {\n"
412
" float dot_prod = 0.0f;\n"
413
" unsigned int row_end = row_indices[row+1];\n"
414
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
415
" dot_prod += elements[i] * vector[column_indices[i]];\n"
416
" result[row] = dot_prod;\n"
417
" }\n"
418
"}\n"
419
;
//compressed_matrix_align1_vec_mul
420
421
const
char
*
const
compressed_matrix_align1_bicgstab_kernel1
=
422
"void helper_bicgstab_kernel1_parallel_reduction( __local float * tmp_buffer )\n"
423
"{\n"
424
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2)\n"
425
" {\n"
426
" barrier(CLK_LOCAL_MEM_FENCE);\n"
427
" if (get_local_id(0) < stride)\n"
428
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride];\n"
429
" }\n"
430
"}\n"
431
"//////// inner products:\n"
432
"float bicgstab_kernel1_inner_prod(\n"
433
" __global const float * vec1,\n"
434
" __global const float * vec2,\n"
435
" unsigned int size,\n"
436
" __local float * tmp_buffer)\n"
437
"{\n"
438
" float tmp = 0;\n"
439
" unsigned int i_end = ((size - 1) / get_local_size(0) + 1) * get_local_size(0);\n"
440
" for (unsigned int i = get_local_id(0); i < i_end; i += get_local_size(0))\n"
441
" {\n"
442
" if (i < size)\n"
443
" tmp += vec1[i] * vec2[i];\n"
444
" }\n"
445
" tmp_buffer[get_local_id(0)] = tmp;\n"
446
" \n"
447
" helper_bicgstab_kernel1_parallel_reduction(tmp_buffer);\n"
448
" barrier(CLK_LOCAL_MEM_FENCE);\n"
449
" return tmp_buffer[0];\n"
450
"}\n"
451
"__kernel void bicgstab_kernel1(\n"
452
" __global const float * tmp0,\n"
453
" __global const float * r0star, \n"
454
" __global const float * residual,\n"
455
" __global float * s,\n"
456
" __global float * alpha,\n"
457
" __global const float * ip_rr0star,\n"
458
" __local float * tmp_buffer,\n"
459
" unsigned int size) \n"
460
"{ \n"
461
" float alpha_local = ip_rr0star[0] / bicgstab_kernel1_inner_prod(tmp0, r0star, size, tmp_buffer);\n"
462
" \n"
463
" for (unsigned int i = get_local_id(0); i < size; i += get_local_size(0))\n"
464
" s[i] = residual[i] - alpha_local * tmp0[i];\n"
465
" \n"
466
" if (get_global_id(0) == 0)\n"
467
" alpha[0] = alpha_local;\n"
468
"}\n"
469
;
//compressed_matrix_align1_bicgstab_kernel1
470
471
const
char
*
const
compressed_matrix_align1_jacobi_precond
=
472
"__kernel void jacobi_precond(\n"
473
" __global const unsigned int * row_indices,\n"
474
" __global const unsigned int * column_indices, \n"
475
" __global const float * elements,\n"
476
" __global float * diag_M_inv,\n"
477
" unsigned int size) \n"
478
"{ \n"
479
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
480
" {\n"
481
" float diag = 1.0f;\n"
482
" unsigned int row_end = row_indices[row+1];\n"
483
" for (unsigned int i = row_indices[row]; i < row_end; ++i)\n"
484
" {\n"
485
" if (row == column_indices[i])\n"
486
" {\n"
487
" diag = elements[i];\n"
488
" break;\n"
489
" }\n"
490
" }\n"
491
" diag_M_inv[row] = 1.0f / diag;\n"
492
" }\n"
493
"}\n"
494
;
//compressed_matrix_align1_jacobi_precond
495
496
const
char
*
const
compressed_matrix_align4_vec_mul
=
497
"__kernel void vec_mul(\n"
498
" __global const unsigned int * row_indices,\n"
499
" __global const uint4 * column_indices, \n"
500
" __global const float4 * elements,\n"
501
" __global const float * vector, \n"
502
" __global float * result,\n"
503
" unsigned int size)\n"
504
"{ \n"
505
" float dot_prod;\n"
506
" unsigned int start, next_stop;\n"
507
" uint4 col_idx;\n"
508
" float4 tmp_vec;\n"
509
" float4 tmp_entries;\n"
510
" for (unsigned int row = get_global_id(0); row < size; row += get_global_size(0))\n"
511
" {\n"
512
" dot_prod = 0.0f;\n"
513
" start = row_indices[row] / 4;\n"
514
" next_stop = row_indices[row+1] / 4;\n"
515
" for (unsigned int i = start; i < next_stop; ++i)\n"
516
" {\n"
517
" col_idx = column_indices[i];\n"
518
" tmp_entries = elements[i];\n"
519
" tmp_vec.x = vector[col_idx.x];\n"
520
" tmp_vec.y = vector[col_idx.y];\n"
521
" tmp_vec.z = vector[col_idx.z];\n"
522
" tmp_vec.w = vector[col_idx.w];\n"
523
" dot_prod += dot(tmp_entries, tmp_vec);\n"
524
" }\n"
525
" result[row] = dot_prod;\n"
526
" }\n"
527
"}\n"
528
;
//compressed_matrix_align4_vec_mul
529
530
}
//namespace kernels
531
}
//namespace linalg
532
}
//namespace viennacl
533
#endif
Generated on Fri Jul 27 2012 22:02:49 for ViennaCL - The Vienna Computing Library by
1.8.1.2