70 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H 71 #define INCLUDED_volk_32f_s32f_convert_32i_u_H 77 #include <immintrin.h> 81 const float scalar,
unsigned int num_points)
83 unsigned int number = 0;
85 const unsigned int eighthPoints = num_points / 8;
87 const float* inputVectorPtr = (
const float*)inputVector;
88 int32_t* outputVectorPtr = outputVector;
90 float min_val = -2147483647;
91 float max_val = 2147483647;
94 __m256 vScalar = _mm256_set1_ps(scalar);
97 __m256 vmin_val = _mm256_set1_ps(min_val);
98 __m256 vmax_val = _mm256_set1_ps(max_val);
100 for(;number < eighthPoints; number++){
101 inputVal1 = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
103 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
104 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
106 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
107 outputVectorPtr += 8;
110 number = eighthPoints * 8;
111 for(; number < num_points; number++){
112 r = inputVector[number] * scalar;
117 outputVector[number] = (int32_t)
rintf(r);
124 #include <emmintrin.h> 128 const float scalar,
unsigned int num_points)
130 unsigned int number = 0;
132 const unsigned int quarterPoints = num_points / 4;
134 const float* inputVectorPtr = (
const float*)inputVector;
135 int32_t* outputVectorPtr = outputVector;
137 float min_val = -2147483647;
138 float max_val = 2147483647;
141 __m128 vScalar = _mm_set_ps1(scalar);
143 __m128i intInputVal1;
144 __m128 vmin_val = _mm_set_ps1(min_val);
145 __m128 vmax_val = _mm_set_ps1(max_val);
147 for(;number < quarterPoints; number++){
148 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
150 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
151 intInputVal1 = _mm_cvtps_epi32(inputVal1);
153 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
154 outputVectorPtr += 4;
157 number = quarterPoints * 4;
158 for(; number < num_points; number++){
159 r = inputVector[number] * scalar;
164 outputVector[number] = (int32_t)
rintf(r);
172 #include <xmmintrin.h> 176 const float scalar,
unsigned int num_points)
178 unsigned int number = 0;
180 const unsigned int quarterPoints = num_points / 4;
182 const float* inputVectorPtr = (
const float*)inputVector;
183 int32_t* outputVectorPtr = outputVector;
185 float min_val = -2147483647;
186 float max_val = 2147483647;
189 __m128 vScalar = _mm_set_ps1(scalar);
191 __m128 vmin_val = _mm_set_ps1(min_val);
192 __m128 vmax_val = _mm_set_ps1(max_val);
196 for(;number < quarterPoints; number++){
197 ret = _mm_loadu_ps(inputVectorPtr);
200 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
202 _mm_store_ps(outputFloatBuffer, ret);
203 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
204 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
205 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
206 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
209 number = quarterPoints * 4;
210 for(; number < num_points; number++){
211 r = inputVector[number] * scalar;
216 outputVector[number] = (int32_t)
rintf(r);
223 #ifdef LV_HAVE_GENERIC 227 const float scalar,
unsigned int num_points)
229 int32_t* outputVectorPtr = outputVector;
230 const float* inputVectorPtr = inputVector;
231 unsigned int number = 0;
232 float min_val = -2147483647;
233 float max_val = 2147483647;
236 for(number = 0; number < num_points; number++){
237 r = *inputVectorPtr++ * scalar;
242 *outputVectorPtr++ = (int32_t)
rintf(r);
251 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H 252 #define INCLUDED_volk_32f_s32f_convert_32i_a_H 255 #include <inttypes.h> 259 #include <immintrin.h> 263 const float scalar,
unsigned int num_points)
265 unsigned int number = 0;
267 const unsigned int eighthPoints = num_points / 8;
269 const float* inputVectorPtr = (
const float*)inputVector;
270 int32_t* outputVectorPtr = outputVector;
272 float min_val = -2147483647;
273 float max_val = 2147483647;
276 __m256 vScalar = _mm256_set1_ps(scalar);
278 __m256i intInputVal1;
279 __m256 vmin_val = _mm256_set1_ps(min_val);
280 __m256 vmax_val = _mm256_set1_ps(max_val);
282 for(;number < eighthPoints; number++){
283 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
285 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
286 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
288 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
289 outputVectorPtr += 8;
292 number = eighthPoints * 8;
293 for(; number < num_points; number++){
294 r = inputVector[number] * scalar;
299 outputVector[number] = (int32_t)
rintf(r);
307 #include <emmintrin.h> 311 const float scalar,
unsigned int num_points)
313 unsigned int number = 0;
315 const unsigned int quarterPoints = num_points / 4;
317 const float* inputVectorPtr = (
const float*)inputVector;
318 int32_t* outputVectorPtr = outputVector;
320 float min_val = -2147483647;
321 float max_val = 2147483647;
324 __m128 vScalar = _mm_set_ps1(scalar);
326 __m128i intInputVal1;
327 __m128 vmin_val = _mm_set_ps1(min_val);
328 __m128 vmax_val = _mm_set_ps1(max_val);
330 for(;number < quarterPoints; number++){
331 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
333 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
334 intInputVal1 = _mm_cvtps_epi32(inputVal1);
336 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
337 outputVectorPtr += 4;
340 number = quarterPoints * 4;
341 for(; number < num_points; number++){
342 r = inputVector[number] * scalar;
347 outputVector[number] = (int32_t)
rintf(r);
355 #include <xmmintrin.h> 359 const float scalar,
unsigned int num_points)
361 unsigned int number = 0;
363 const unsigned int quarterPoints = num_points / 4;
365 const float* inputVectorPtr = (
const float*)inputVector;
366 int32_t* outputVectorPtr = outputVector;
368 float min_val = -2147483647;
369 float max_val = 2147483647;
372 __m128 vScalar = _mm_set_ps1(scalar);
374 __m128 vmin_val = _mm_set_ps1(min_val);
375 __m128 vmax_val = _mm_set_ps1(max_val);
379 for(;number < quarterPoints; number++){
380 ret = _mm_load_ps(inputVectorPtr);
383 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
385 _mm_store_ps(outputFloatBuffer, ret);
386 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[0]);
387 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[1]);
388 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[2]);
389 *outputVectorPtr++ = (int32_t)
rintf(outputFloatBuffer[3]);
392 number = quarterPoints * 4;
393 for(; number < num_points; number++){
394 r = inputVector[number] * scalar;
399 outputVector[number] = (int32_t)
rintf(r);
406 #ifdef LV_HAVE_GENERIC 410 const float scalar,
unsigned int num_points)
412 int32_t* outputVectorPtr = outputVector;
413 const float* inputVectorPtr = inputVector;
414 unsigned int number = 0;
415 float min_val = -2147483647;
416 float max_val = 2147483647;
419 for(number = 0; number < num_points; number++){
420 r = *inputVectorPtr++ * scalar;
425 *outputVectorPtr++ = (int32_t)
rintf(r);
static void volk_32f_s32f_convert_32i_u_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:80
static float rintf(float x)
Definition: config.h:31
static void volk_32f_s32f_convert_32i_u_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:127
static void volk_32f_s32f_convert_32i_a_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:310
static void volk_32f_s32f_convert_32i_a_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:262
static void volk_32f_s32f_convert_32i_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:226
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
static void volk_32f_s32f_convert_32i_u_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:175
static void volk_32f_s32f_convert_32i_a_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:409
static void volk_32f_s32f_convert_32i_a_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:358