ViennaCL - The Vienna Computing Library
1.2.0
Main Page
Namespaces
Data Structures
Files
File List
Globals
viennacl
linalg
kernels
coordinate_matrix_source.h
Go to the documentation of this file.
1
#ifndef VIENNACL_LINALG_KERNELS_COORDINATE_MATRIX_SOURCE_HPP_
2
#define VIENNACL_LINALG_KERNELS_COORDINATE_MATRIX_SOURCE_HPP_
3
//Automatically generated file from auxiliary-directory, do not edit manually!
4
namespace
viennacl
5
{
6
namespace
linalg
7
{
8
namespace
kernels
9
{
10
const
char
*
const
coordinate_matrix_align1_vec_mul
=
11
"//segmented parallel reduction. At present restricted to up to 256 threads\n"
12
"void segmented_parallel_reduction(unsigned int row, \n"
13
" float val, \n"
14
" __local unsigned int * shared_rows, \n"
15
" __local float * inter_results) \n"
16
"{ \n"
17
" //barrier(CLK_LOCAL_MEM_FENCE); \n"
18
" shared_rows[get_local_id(0)] = row; \n"
19
" inter_results[get_local_id(0)] = val; \n"
20
" float left = 0;\n"
21
" \n"
22
" barrier(CLK_LOCAL_MEM_FENCE); \n"
23
" if( get_local_id(0) >= 1 && row == shared_rows[get_local_id(0) - 1] ) { left = inter_results[get_local_id(0) - 1]; } \n"
24
" barrier(CLK_LOCAL_MEM_FENCE); \n"
25
" inter_results[get_local_id(0)] += left; left = 0;\n"
26
" barrier(CLK_LOCAL_MEM_FENCE); \n"
27
" if( get_local_id(0) >= 2 && row == shared_rows[get_local_id(0) - 2] ) { left = inter_results[get_local_id(0) - 2]; } \n"
28
" barrier(CLK_LOCAL_MEM_FENCE); \n"
29
" inter_results[get_local_id(0)] += left; left = 0;\n"
30
" barrier(CLK_LOCAL_MEM_FENCE); \n"
31
" if( get_local_id(0) >= 4 && row == shared_rows[get_local_id(0) - 4] ) { left = inter_results[get_local_id(0) - 4]; } \n"
32
" barrier(CLK_LOCAL_MEM_FENCE); \n"
33
" inter_results[get_local_id(0)] += left; left = 0;\n"
34
" barrier(CLK_LOCAL_MEM_FENCE); \n"
35
" if( get_local_id(0) >= 8 && row == shared_rows[get_local_id(0) - 8] ) { left = inter_results[get_local_id(0) - 8]; } \n"
36
" barrier(CLK_LOCAL_MEM_FENCE); \n"
37
" inter_results[get_local_id(0)] += left; left = 0;\n"
38
" barrier(CLK_LOCAL_MEM_FENCE); \n"
39
" if( get_local_id(0) >= 16 && row == shared_rows[get_local_id(0) - 16] ) { left = inter_results[get_local_id(0) - 16]; } \n"
40
" barrier(CLK_LOCAL_MEM_FENCE); \n"
41
" inter_results[get_local_id(0)] += left; left = 0;\n"
42
" barrier(CLK_LOCAL_MEM_FENCE); \n"
43
" if( get_local_id(0) >= 32 && row == shared_rows[get_local_id(0) - 32] ) { left = inter_results[get_local_id(0) - 32]; } \n"
44
" barrier(CLK_LOCAL_MEM_FENCE); \n"
45
" inter_results[get_local_id(0)] += left; left = 0;\n"
46
" barrier(CLK_LOCAL_MEM_FENCE); \n"
47
" if( get_local_id(0) >= 64 && row == shared_rows[get_local_id(0) - 64] ) { left = inter_results[get_local_id(0) - 64]; } \n"
48
" barrier(CLK_LOCAL_MEM_FENCE); \n"
49
" inter_results[get_local_id(0)] += left; left = 0;\n"
50
" barrier(CLK_LOCAL_MEM_FENCE); \n"
51
" if( get_local_id(0) >= 128 && row == shared_rows[get_local_id(0) - 128] ) { left = inter_results[get_local_id(0) - 128]; } \n"
52
" barrier(CLK_LOCAL_MEM_FENCE); \n"
53
" inter_results[get_local_id(0)] += left; left = 0;\n"
54
" barrier(CLK_LOCAL_MEM_FENCE); \n"
55
" //if( get_local_id(0) >= 256 && row == shared_rows[get_local_id(0) - 256] ) { left = inter_results[get_local_id(0) - 256]; } \n"
56
" //barrier(CLK_LOCAL_MEM_FENCE); \n"
57
" //inter_results[get_local_id(0)] += left; left = 0;\n"
58
" //barrier(CLK_LOCAL_MEM_FENCE); \n"
59
"}\n"
60
"__kernel void vec_mul( \n"
61
" __global const uint2 * coords, //(row_index, column_index) \n"
62
" __global const float * elements, \n"
63
" __global const uint * group_boundaries,\n"
64
" __global const float * vector, \n"
65
" __global float * result, \n"
66
" __local unsigned int * shared_rows, \n"
67
" __local float * inter_results) \n"
68
"{ \n"
69
" uint2 tmp; \n"
70
" float val;\n"
71
" uint last_index = get_local_size(0) - 1;\n"
72
" uint group_start = group_boundaries[get_group_id(0)];\n"
73
" uint group_end = group_boundaries[get_group_id(0) + 1];\n"
74
" uint k_end = 1 + (group_end - group_start - 1) / get_local_size(0); // -1 in order to have correct behavior if group_end - group_start == j * get_local_size(0)\n"
75
" uint local_index = 0;\n"
76
" for (uint k = 0; k < k_end; ++k)\n"
77
" { \n"
78
" barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
79
" \n"
80
" local_index = group_start + k * get_local_size(0) + get_local_id(0); \n"
81
" \n"
82
" if (local_index < group_end)\n"
83
" {\n"
84
" tmp = coords[local_index]; \n"
85
" val = elements[local_index] * vector[tmp.y]; \n"
86
" }\n"
87
" else\n"
88
" {\n"
89
" tmp.x = 0;\n"
90
" tmp.y = 0;\n"
91
" val = 0;\n"
92
" }\n"
93
" barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
94
" //check for carry from previous loop run: \n"
95
" if (get_local_id(0) == 0 && k > 0)\n"
96
" { \n"
97
" if (tmp.x == shared_rows[last_index]) \n"
98
" val += inter_results[last_index]; \n"
99
" else \n"
100
" result[shared_rows[last_index]] += inter_results[last_index]; \n"
101
" } \n"
102
" barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
103
" segmented_parallel_reduction(tmp.x, val, shared_rows, inter_results); //all threads have to enter this function\n"
104
" barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
105
" if (get_local_id(0) != last_index &&\n"
106
" shared_rows[get_local_id(0)] != shared_rows[get_local_id(0) + 1] &&\n"
107
" inter_results[get_local_id(0)] != 0) \n"
108
" { \n"
109
" result[tmp.x] += inter_results[get_local_id(0)]; \n"
110
" }\n"
111
" \n"
112
" barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); \n"
113
" } //for k\n"
114
" \n"
115
" if (get_local_id(0) == last_index && inter_results[last_index] != 0) \n"
116
" result[tmp.x] += inter_results[last_index]; \n"
117
"}\n"
118
;
//coordinate_matrix_align1_vec_mul
119
120
}
//namespace kernels
121
}
//namespace linalg
122
}
//namespace viennacl
123
#endif
Generated on Fri Jul 27 2012 22:02:49 for ViennaCL - The Vienna Computing Library by
1.8.1.2