ViennaCL - The Vienna Computing Library  1.2.0
fft_source.h
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
2 #define VIENNACL_LINALG_KERNELS_FFT_SOURCE_HPP_
3 //Automatically generated file from auxiliary-directory, do not edit manually!
4 namespace viennacl
5 {
6  namespace linalg
7  {
8  namespace kernels
9  {
10 const char * const fft_align1_transpose =
11 "// simplistic matrix transpose function\n"
12 "__kernel void transpose(__global float2* input,\n"
13 " __global float2* output,\n"
14 " unsigned int row_num,\n"
15 " unsigned int col_num) {\n"
16 " unsigned int size = row_num * col_num;\n"
17 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
18 " unsigned int row = i / col_num;\n"
19 " unsigned int col = i - row*col_num;\n"
20 " unsigned int new_pos = col * row_num + row;\n"
21 " output[new_pos] = input[i];\n"
22 " }\n"
23 "}\n"
24 ; //fft_align1_transpose
25 
26 const char * const fft_align1_fft_div_vec_scalar =
27 "// divide a vector by a scalar (to be removed...)\n"
28 "__kernel void fft_div_vec_scalar(__global float2* input1, unsigned int size, float factor) {\n"
29 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
30 " input1[i] /= factor;\n"
31 " }\n"
32 "}\n"
33 ; //fft_align1_fft_div_vec_scalar
34 
35 const char * const fft_align1_transpose_inplace =
36 "// inplace-transpose of a matrix\n"
37 "__kernel void transpose_inplace(__global float2* input,\n"
38 " unsigned int row_num,\n"
39 " unsigned int col_num) {\n"
40 " unsigned int size = row_num * col_num;\n"
41 " for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
42 " unsigned int row = i / col_num;\n"
43 " unsigned int col = i - row*col_num;\n"
44 " unsigned int new_pos = col * row_num + row;\n"
45 " //new_pos = col < row?0:1;\n"
46 " //input[i] = new_pos;\n"
47 " if(i < new_pos) {\n"
48 " float2 val = input[i];\n"
49 " input[i] = input[new_pos];\n"
50 " input[new_pos] = val;\n"
51 " }\n"
52 " }\n"
53 "}\n"
54 ; //fft_align1_transpose_inplace
55 
56 const char * const fft_align1_zero2 =
57 "// Zero two complex vectors (to avoid kernel launch overhead)\n"
58 "__kernel void zero2(__global float2* input1,\n"
59 " __global float2* input2,\n"
60 " unsigned int size) {\n"
61 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
62 " input1[i] = 0;\n"
63 " input2[i] = 0;\n"
64 " }\n"
65 "}\n"
66 ; //fft_align1_zero2
67 
68 const char * const fft_align1_real_to_complex =
69 "// embedd a real-valued vector into a complex one\n"
70 "__kernel void real_to_complex(__global float* in,\n"
71 " __global float2* out,\n"
72 " unsigned int size) {\n"
73 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
74 " float2 val = 0;\n"
75 " val.x = in[i];\n"
76 " out[i] = val;\n"
77 " }\n"
78 "}\n"
79 ; //fft_align1_real_to_complex
80 
81 const char * const fft_align1_complex_to_real =
82 "__kernel void complex_to_real(__global float2* in,\n"
83 " __global float* out,\n"
84 " unsigned int size) {\n"
85 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
86 " out[i] = in[i].x;\n"
87 " }\n"
88 "}\n"
89 ; //fft_align1_complex_to_real
90 
91 const char * const fft_align1_reverse_inplace =
92 "// reverses the entries in a vector\n"
93 "__kernel void reverse_inplace(__global float* vec, uint size) {\n"
94 " for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) {\n"
95 " float val1 = vec[i];\n"
96 " float val2 = vec[size - i - 1];\n"
97 " vec[i] = val2;\n"
98 " vec[size - i - 1] = val1;\n"
99 " }\n"
100 "}\n"
101 ; //fft_align1_reverse_inplace
102 
103 const char * const fft_align1_bluestein_pre =
104 "// Preprocessing phase of Bluestein algorithm\n"
105 "__kernel void bluestein_pre(__global float2* input,\n"
106 " __global float2* A,\n"
107 " __global float2* B,\n"
108 " unsigned int size,\n"
109 " unsigned int ext_size\n"
110 " ) {\n"
111 " unsigned int glb_id = get_global_id(0);\n"
112 " unsigned int glb_sz = get_global_size(0);\n"
113 " unsigned int double_size = size << 1;\n"
114 " float sn_a, cs_a;\n"
115 " const float NUM_PI = 3.14159265358979323846;\n"
116 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
117 " unsigned int rm = i * i % (double_size);\n"
118 " float angle = (float)rm / size * NUM_PI;\n"
119 " sn_a = sincos(-angle, &cs_a);\n"
120 " float2 a_i = (float2)(cs_a, sn_a);\n"
121 " float2 b_i = (float2)(cs_a, -sn_a);\n"
122 " A[i] = (float2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x);\n"
123 " B[i] = b_i;\n"
124 " // very bad instruction, to be fixed\n"
125 " if(i) \n"
126 " B[ext_size - i] = b_i;\n"
127 " }\n"
128 "}\n"
129 ; //fft_align1_bluestein_pre
130 
131 const char * const fft_align1_fft_mult_vec =
132 "// elementwise product of two complex vectors\n"
133 "__kernel void fft_mult_vec(__global const float2* input1,\n"
134 " __global const float2* input2,\n"
135 " __global float2* output,\n"
136 " unsigned int size) {\n"
137 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) {\n"
138 " float2 in1 = input1[i];\n"
139 " float2 in2 = input2[i];\n"
140 " output[i] = (float2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x);\n"
141 " }\n"
142 "}\n"
143 ; //fft_align1_fft_mult_vec
144 
145 const char * const fft_align1_bluestein_post =
146 "// Postprocessing phase of Bluestein algorithm\n"
147 "__kernel void bluestein_post(__global float2* Z,\n"
148 " __global float2* out,\n"
149 " unsigned int size) \n"
150 "{\n"
151 " unsigned int glb_id = get_global_id(0);\n"
152 " unsigned int glb_sz = get_global_size(0);\n"
153 " unsigned int double_size = size << 1;\n"
154 " float sn_a, cs_a;\n"
155 " const float NUM_PI = 3.14159265358979323846;\n"
156 " for(unsigned int i = glb_id; i < size; i += glb_sz) {\n"
157 " unsigned int rm = i * i % (double_size);\n"
158 " float angle = (float)rm / size * (-NUM_PI);\n"
159 " sn_a = sincos(angle, &cs_a);\n"
160 " float2 b_i = (float2)(cs_a, sn_a);\n"
161 " out[i] = (float2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x);\n"
162 " }\n"
163 "}\n"
164 ; //fft_align1_bluestein_post
165 
166 const char * const fft_align1_vandermonde_prod =
167 "// computes the matrix vector product with a Vandermonde matrix\n"
168 "__kernel void vandermonde_prod(__global float* vander,\n"
169 " __global float* vector,\n"
170 " __global float* result,\n"
171 " uint size) {\n"
172 " for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) {\n"
173 " float mul = vander[i];\n"
174 " float pwr = 1;\n"
175 " float val = 0;\n"
176 " for(uint j = 0; j < size; j++) {\n"
177 " val = val + pwr * vector[j];\n"
178 " pwr *= mul;\n"
179 " }\n"
180 " \n"
181 " result[i] = val;\n"
182 " }\n"
183 "}\n"
184 ; //fft_align1_vandermonde_prod
185 
186  } //namespace kernels
187  } //namespace linalg
188 } //namespace viennacl
189 #endif