64 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H 65 #define INCLUDED_volk_32i_s32f_convert_32f_u_H 70 #ifdef LV_HAVE_AVX512F 71 #include <immintrin.h> 74 volk_32i_s32f_convert_32f_u_avx512f(
float* outputVector,
const int32_t* inputVector,
75 const float scalar,
unsigned int num_points)
77 unsigned int number = 0;
78 const unsigned int onesixteenthPoints = num_points / 16;
80 float* outputVectorPtr = outputVector;
81 const float iScalar = 1.0 / scalar;
82 __m512 invScalar = _mm512_set1_ps(iScalar);
83 int32_t* inputPtr = (int32_t*)inputVector;
87 for(;number < onesixteenthPoints; number++){
89 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
91 ret = _mm512_cvtepi32_ps(inputVal);
92 ret = _mm512_mul_ps(ret, invScalar);
94 _mm512_storeu_ps(outputVectorPtr, ret);
96 outputVectorPtr += 16;
100 number = onesixteenthPoints * 16;
101 for(; number < num_points; number++){
102 outputVector[number] =((float)(inputVector[number])) * iScalar;
109 #include <immintrin.h> 112 volk_32i_s32f_convert_32f_u_avx2(
float* outputVector,
const int32_t* inputVector,
113 const float scalar,
unsigned int num_points)
115 unsigned int number = 0;
116 const unsigned int oneEightPoints = num_points / 8;
118 float* outputVectorPtr = outputVector;
119 const float iScalar = 1.0 / scalar;
120 __m256 invScalar = _mm256_set1_ps(iScalar);
121 int32_t* inputPtr = (int32_t*)inputVector;
125 for(;number < oneEightPoints; number++){
127 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
129 ret = _mm256_cvtepi32_ps(inputVal);
130 ret = _mm256_mul_ps(ret, invScalar);
132 _mm256_storeu_ps(outputVectorPtr, ret);
134 outputVectorPtr += 8;
138 number = oneEightPoints * 8;
139 for(; number < num_points; number++){
140 outputVector[number] =((float)(inputVector[number])) * iScalar;
147 #include <emmintrin.h> 151 const float scalar,
unsigned int num_points)
153 unsigned int number = 0;
154 const unsigned int quarterPoints = num_points / 4;
156 float* outputVectorPtr = outputVector;
157 const float iScalar = 1.0 / scalar;
158 __m128 invScalar = _mm_set_ps1(iScalar);
159 int32_t* inputPtr = (int32_t*)inputVector;
163 for(;number < quarterPoints; number++){
165 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
167 ret = _mm_cvtepi32_ps(inputVal);
168 ret = _mm_mul_ps(ret, invScalar);
170 _mm_storeu_ps(outputVectorPtr, ret);
172 outputVectorPtr += 4;
176 number = quarterPoints * 4;
177 for(; number < num_points; number++){
178 outputVector[number] =((float)(inputVector[number])) * iScalar;
184 #ifdef LV_HAVE_GENERIC 188 const float scalar,
unsigned int num_points)
190 float* outputVectorPtr = outputVector;
191 const int32_t* inputVectorPtr = inputVector;
192 unsigned int number = 0;
193 const float iScalar = 1.0 / scalar;
195 for(number = 0; number < num_points; number++){
196 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
205 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H 206 #define INCLUDED_volk_32i_s32f_convert_32f_a_H 208 #include <inttypes.h> 211 #ifdef LV_HAVE_AVX512F 212 #include <immintrin.h> 215 volk_32i_s32f_convert_32f_a_avx512f(
float* outputVector,
const int32_t* inputVector,
216 const float scalar,
unsigned int num_points)
218 unsigned int number = 0;
219 const unsigned int onesixteenthPoints = num_points / 16;
221 float* outputVectorPtr = outputVector;
222 const float iScalar = 1.0 / scalar;
223 __m512 invScalar = _mm512_set1_ps(iScalar);
224 int32_t* inputPtr = (int32_t*)inputVector;
228 for(;number < onesixteenthPoints; number++){
230 inputVal = _mm512_load_si512((__m512i*)inputPtr);
232 ret = _mm512_cvtepi32_ps(inputVal);
233 ret = _mm512_mul_ps(ret, invScalar);
235 _mm512_store_ps(outputVectorPtr, ret);
237 outputVectorPtr += 16;
241 number = onesixteenthPoints * 16;
242 for(; number < num_points; number++){
243 outputVector[number] =((float)(inputVector[number])) * iScalar;
249 #include <immintrin.h> 252 volk_32i_s32f_convert_32f_a_avx2(
float* outputVector,
const int32_t* inputVector,
253 const float scalar,
unsigned int num_points)
255 unsigned int number = 0;
256 const unsigned int oneEightPoints = num_points / 8;
258 float* outputVectorPtr = outputVector;
259 const float iScalar = 1.0 / scalar;
260 __m256 invScalar = _mm256_set1_ps(iScalar);
261 int32_t* inputPtr = (int32_t*)inputVector;
265 for(;number < oneEightPoints; number++){
267 inputVal = _mm256_load_si256((__m256i*)inputPtr);
269 ret = _mm256_cvtepi32_ps(inputVal);
270 ret = _mm256_mul_ps(ret, invScalar);
272 _mm256_store_ps(outputVectorPtr, ret);
274 outputVectorPtr += 8;
278 number = oneEightPoints * 8;
279 for(; number < num_points; number++){
280 outputVector[number] =((float)(inputVector[number])) * iScalar;
287 #include <emmintrin.h> 291 const float scalar,
unsigned int num_points)
293 unsigned int number = 0;
294 const unsigned int quarterPoints = num_points / 4;
296 float* outputVectorPtr = outputVector;
297 const float iScalar = 1.0 / scalar;
298 __m128 invScalar = _mm_set_ps1(iScalar);
299 int32_t* inputPtr = (int32_t*)inputVector;
303 for(;number < quarterPoints; number++){
305 inputVal = _mm_load_si128((__m128i*)inputPtr);
307 ret = _mm_cvtepi32_ps(inputVal);
308 ret = _mm_mul_ps(ret, invScalar);
310 _mm_store_ps(outputVectorPtr, ret);
312 outputVectorPtr += 4;
316 number = quarterPoints * 4;
317 for(; number < num_points; number++){
318 outputVector[number] =((float)(inputVector[number])) * iScalar;
324 #ifdef LV_HAVE_GENERIC 328 const float scalar,
unsigned int num_points)
330 float* outputVectorPtr = outputVector;
331 const int32_t* inputVectorPtr = inputVector;
332 unsigned int number = 0;
333 const float iScalar = 1.0 / scalar;
335 for(number = 0; number < num_points; number++){
336 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:187
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:290
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:150
static void volk_32i_s32f_convert_32f_a_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:327