Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32i_s32f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
64 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
65 #define INCLUDED_volk_32i_s32f_convert_32f_u_H
66 
67 #include <inttypes.h>
68 #include <stdio.h>
69 
70 #ifdef LV_HAVE_AVX512F
71 #include <immintrin.h>
72 
73 static inline void
74 volk_32i_s32f_convert_32f_u_avx512f(float* outputVector, const int32_t* inputVector,
75  const float scalar, unsigned int num_points)
76 {
77  unsigned int number = 0;
78  const unsigned int onesixteenthPoints = num_points / 16;
79 
80  float* outputVectorPtr = outputVector;
81  const float iScalar = 1.0 / scalar;
82  __m512 invScalar = _mm512_set1_ps(iScalar);
83  int32_t* inputPtr = (int32_t*)inputVector;
84  __m512i inputVal;
85  __m512 ret;
86 
87  for(;number < onesixteenthPoints; number++){
88  // Load the values
89  inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
90 
91  ret = _mm512_cvtepi32_ps(inputVal);
92  ret = _mm512_mul_ps(ret, invScalar);
93 
94  _mm512_storeu_ps(outputVectorPtr, ret);
95 
96  outputVectorPtr += 16;
97  inputPtr += 16;
98  }
99 
100  number = onesixteenthPoints * 16;
101  for(; number < num_points; number++){
102  outputVector[number] =((float)(inputVector[number])) * iScalar;
103  }
104 }
105 #endif /* LV_HAVE_AVX512F */
106 
107 
108 #ifdef LV_HAVE_AVX2
109 #include <immintrin.h>
110 
111 static inline void
112 volk_32i_s32f_convert_32f_u_avx2(float* outputVector, const int32_t* inputVector,
113  const float scalar, unsigned int num_points)
114 {
115  unsigned int number = 0;
116  const unsigned int oneEightPoints = num_points / 8;
117 
118  float* outputVectorPtr = outputVector;
119  const float iScalar = 1.0 / scalar;
120  __m256 invScalar = _mm256_set1_ps(iScalar);
121  int32_t* inputPtr = (int32_t*)inputVector;
122  __m256i inputVal;
123  __m256 ret;
124 
125  for(;number < oneEightPoints; number++){
126  // Load the 4 values
127  inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
128 
129  ret = _mm256_cvtepi32_ps(inputVal);
130  ret = _mm256_mul_ps(ret, invScalar);
131 
132  _mm256_storeu_ps(outputVectorPtr, ret);
133 
134  outputVectorPtr += 8;
135  inputPtr += 8;
136  }
137 
138  number = oneEightPoints * 8;
139  for(; number < num_points; number++){
140  outputVector[number] =((float)(inputVector[number])) * iScalar;
141  }
142 }
143 #endif /* LV_HAVE_AVX2 */
144 
145 
146 #ifdef LV_HAVE_SSE2
147 #include <emmintrin.h>
148 
149 static inline void
150 volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const int32_t* inputVector,
151  const float scalar, unsigned int num_points)
152 {
153  unsigned int number = 0;
154  const unsigned int quarterPoints = num_points / 4;
155 
156  float* outputVectorPtr = outputVector;
157  const float iScalar = 1.0 / scalar;
158  __m128 invScalar = _mm_set_ps1(iScalar);
159  int32_t* inputPtr = (int32_t*)inputVector;
160  __m128i inputVal;
161  __m128 ret;
162 
163  for(;number < quarterPoints; number++){
164  // Load the 4 values
165  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
166 
167  ret = _mm_cvtepi32_ps(inputVal);
168  ret = _mm_mul_ps(ret, invScalar);
169 
170  _mm_storeu_ps(outputVectorPtr, ret);
171 
172  outputVectorPtr += 4;
173  inputPtr += 4;
174  }
175 
176  number = quarterPoints * 4;
177  for(; number < num_points; number++){
178  outputVector[number] =((float)(inputVector[number])) * iScalar;
179  }
180 }
181 #endif /* LV_HAVE_SSE2 */
182 
183 
184 #ifdef LV_HAVE_GENERIC
185 
186 static inline void
187 volk_32i_s32f_convert_32f_generic(float* outputVector, const int32_t* inputVector,
188  const float scalar, unsigned int num_points)
189 {
190  float* outputVectorPtr = outputVector;
191  const int32_t* inputVectorPtr = inputVector;
192  unsigned int number = 0;
193  const float iScalar = 1.0 / scalar;
194 
195  for(number = 0; number < num_points; number++){
196  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
197  }
198 }
199 #endif /* LV_HAVE_GENERIC */
200 
201 #endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
202 
203 
204 
205 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
206 #define INCLUDED_volk_32i_s32f_convert_32f_a_H
207 
208 #include <inttypes.h>
209 #include <stdio.h>
210 
211 #ifdef LV_HAVE_AVX512F
212 #include <immintrin.h>
213 
214 static inline void
215 volk_32i_s32f_convert_32f_a_avx512f(float* outputVector, const int32_t* inputVector,
216  const float scalar, unsigned int num_points)
217 {
218  unsigned int number = 0;
219  const unsigned int onesixteenthPoints = num_points / 16;
220 
221  float* outputVectorPtr = outputVector;
222  const float iScalar = 1.0 / scalar;
223  __m512 invScalar = _mm512_set1_ps(iScalar);
224  int32_t* inputPtr = (int32_t*)inputVector;
225  __m512i inputVal;
226  __m512 ret;
227 
228  for(;number < onesixteenthPoints; number++){
229  // Load the values
230  inputVal = _mm512_load_si512((__m512i*)inputPtr);
231 
232  ret = _mm512_cvtepi32_ps(inputVal);
233  ret = _mm512_mul_ps(ret, invScalar);
234 
235  _mm512_store_ps(outputVectorPtr, ret);
236 
237  outputVectorPtr += 16;
238  inputPtr += 16;
239  }
240 
241  number = onesixteenthPoints * 16;
242  for(; number < num_points; number++){
243  outputVector[number] =((float)(inputVector[number])) * iScalar;
244  }
245 }
246 #endif /* LV_HAVE_AVX512F */
247 
248 #ifdef LV_HAVE_AVX2
249 #include <immintrin.h>
250 
251 static inline void
252 volk_32i_s32f_convert_32f_a_avx2(float* outputVector, const int32_t* inputVector,
253  const float scalar, unsigned int num_points)
254 {
255  unsigned int number = 0;
256  const unsigned int oneEightPoints = num_points / 8;
257 
258  float* outputVectorPtr = outputVector;
259  const float iScalar = 1.0 / scalar;
260  __m256 invScalar = _mm256_set1_ps(iScalar);
261  int32_t* inputPtr = (int32_t*)inputVector;
262  __m256i inputVal;
263  __m256 ret;
264 
265  for(;number < oneEightPoints; number++){
266  // Load the 4 values
267  inputVal = _mm256_load_si256((__m256i*)inputPtr);
268 
269  ret = _mm256_cvtepi32_ps(inputVal);
270  ret = _mm256_mul_ps(ret, invScalar);
271 
272  _mm256_store_ps(outputVectorPtr, ret);
273 
274  outputVectorPtr += 8;
275  inputPtr += 8;
276  }
277 
278  number = oneEightPoints * 8;
279  for(; number < num_points; number++){
280  outputVector[number] =((float)(inputVector[number])) * iScalar;
281  }
282 }
283 #endif /* LV_HAVE_AVX2 */
284 
285 
286 #ifdef LV_HAVE_SSE2
287 #include <emmintrin.h>
288 
289 static inline void
290 volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const int32_t* inputVector,
291  const float scalar, unsigned int num_points)
292 {
293  unsigned int number = 0;
294  const unsigned int quarterPoints = num_points / 4;
295 
296  float* outputVectorPtr = outputVector;
297  const float iScalar = 1.0 / scalar;
298  __m128 invScalar = _mm_set_ps1(iScalar);
299  int32_t* inputPtr = (int32_t*)inputVector;
300  __m128i inputVal;
301  __m128 ret;
302 
303  for(;number < quarterPoints; number++){
304  // Load the 4 values
305  inputVal = _mm_load_si128((__m128i*)inputPtr);
306 
307  ret = _mm_cvtepi32_ps(inputVal);
308  ret = _mm_mul_ps(ret, invScalar);
309 
310  _mm_store_ps(outputVectorPtr, ret);
311 
312  outputVectorPtr += 4;
313  inputPtr += 4;
314  }
315 
316  number = quarterPoints * 4;
317  for(; number < num_points; number++){
318  outputVector[number] =((float)(inputVector[number])) * iScalar;
319  }
320 }
321 #endif /* LV_HAVE_SSE2 */
322 
323 
324 #ifdef LV_HAVE_GENERIC
325 
326 static inline void
327 volk_32i_s32f_convert_32f_a_generic(float* outputVector, const int32_t* inputVector,
328  const float scalar, unsigned int num_points)
329 {
330  float* outputVectorPtr = outputVector;
331  const int32_t* inputVectorPtr = inputVector;
332  unsigned int number = 0;
333  const float iScalar = 1.0 / scalar;
334 
335  for(number = 0; number < num_points; number++){
336  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
337  }
338 }
339 #endif /* LV_HAVE_GENERIC */
340 
341 
342 
343 
344 #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:187
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:290
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:150
static void volk_32i_s32f_convert_32f_a_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:327