71 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H 72 #define INCLUDED_volk_32fc_magnitude_32f_u_H 79 #include <immintrin.h> 84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
89 const float* complexVectorPtr = (
float*) complexVector;
90 float* magnitudeVectorPtr = magnitudeVector;
92 __m256 cplxValue1, cplxValue2, result;
94 for(; number < eighthPoints; number++){
95 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
98 _mm256_storeu_ps(magnitudeVectorPtr, result);
100 complexVectorPtr += 16;
101 magnitudeVectorPtr += 8;
104 number = eighthPoints * 8;
105 for(; number < num_points; number++){
106 float val1Real = *complexVectorPtr++;
107 float val1Imag = *complexVectorPtr++;
108 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
114 #include <pmmintrin.h> 119 unsigned int num_points)
121 unsigned int number = 0;
122 const unsigned int quarterPoints = num_points / 4;
124 const float* complexVectorPtr = (
float*) complexVector;
125 float* magnitudeVectorPtr = magnitudeVector;
127 __m128 cplxValue1, cplxValue2, result;
128 for(; number < quarterPoints; number++){
129 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
130 complexVectorPtr += 4;
132 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
133 complexVectorPtr += 4;
137 _mm_storeu_ps(magnitudeVectorPtr, result);
138 magnitudeVectorPtr += 4;
141 number = quarterPoints * 4;
142 for(; number < num_points; number++){
143 float val1Real = *complexVectorPtr++;
144 float val1Imag = *complexVectorPtr++;
145 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
152 #include <xmmintrin.h> 157 unsigned int num_points)
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
162 const float* complexVectorPtr = (
float*) complexVector;
163 float* magnitudeVectorPtr = magnitudeVector;
165 __m128 cplxValue1, cplxValue2, result;
167 for(; number < quarterPoints; number++){
168 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169 complexVectorPtr += 4;
171 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172 complexVectorPtr += 4;
175 _mm_storeu_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
179 number = quarterPoints * 4;
180 for(; number < num_points; number++){
181 float val1Real = *complexVectorPtr++;
182 float val1Imag = *complexVectorPtr++;
183 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
189 #ifdef LV_HAVE_GENERIC 194 const float* complexVectorPtr = (
float*)complexVector;
195 float* magnitudeVectorPtr = magnitudeVector;
196 unsigned int number = 0;
197 for(number = 0; number < num_points; number++){
198 const float real = *complexVectorPtr++;
199 const float imag = *complexVectorPtr++;
200 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
208 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H 209 #define INCLUDED_volk_32fc_magnitude_32f_a_H 211 #include <inttypes.h> 216 #include <immintrin.h> 221 unsigned int num_points)
223 unsigned int number = 0;
224 const unsigned int eighthPoints = num_points / 8;
226 const float* complexVectorPtr = (
float*) complexVector;
227 float* magnitudeVectorPtr = magnitudeVector;
229 __m256 cplxValue1, cplxValue2, result;
230 for(; number < eighthPoints; number++){
231 cplxValue1 = _mm256_load_ps(complexVectorPtr);
232 complexVectorPtr += 8;
234 cplxValue2 = _mm256_load_ps(complexVectorPtr);
235 complexVectorPtr += 8;
238 _mm256_store_ps(magnitudeVectorPtr, result);
239 magnitudeVectorPtr += 8;
242 number = eighthPoints * 8;
243 for(; number < num_points; number++){
244 float val1Real = *complexVectorPtr++;
245 float val1Imag = *complexVectorPtr++;
246 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
252 #include <pmmintrin.h> 257 unsigned int num_points)
259 unsigned int number = 0;
260 const unsigned int quarterPoints = num_points / 4;
262 const float* complexVectorPtr = (
float*) complexVector;
263 float* magnitudeVectorPtr = magnitudeVector;
265 __m128 cplxValue1, cplxValue2, result;
266 for(; number < quarterPoints; number++){
267 cplxValue1 = _mm_load_ps(complexVectorPtr);
268 complexVectorPtr += 4;
270 cplxValue2 = _mm_load_ps(complexVectorPtr);
271 complexVectorPtr += 4;
274 _mm_store_ps(magnitudeVectorPtr, result);
275 magnitudeVectorPtr += 4;
278 number = quarterPoints * 4;
279 for(; number < num_points; number++){
280 float val1Real = *complexVectorPtr++;
281 float val1Imag = *complexVectorPtr++;
282 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
288 #include <xmmintrin.h> 293 unsigned int num_points)
295 unsigned int number = 0;
296 const unsigned int quarterPoints = num_points / 4;
298 const float* complexVectorPtr = (
float*) complexVector;
299 float* magnitudeVectorPtr = magnitudeVector;
301 __m128 cplxValue1, cplxValue2, result;
302 for(; number < quarterPoints; number++){
303 cplxValue1 = _mm_load_ps(complexVectorPtr);
304 complexVectorPtr += 4;
306 cplxValue2 = _mm_load_ps(complexVectorPtr);
307 complexVectorPtr += 4;
310 _mm_store_ps(magnitudeVectorPtr, result);
311 magnitudeVectorPtr += 4;
314 number = quarterPoints * 4;
315 for(; number < num_points; number++){
316 float val1Real = *complexVectorPtr++;
317 float val1Imag = *complexVectorPtr++;
318 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
324 #ifdef LV_HAVE_GENERIC 328 unsigned int num_points)
330 const float* complexVectorPtr = (
float*)complexVector;
331 float* magnitudeVectorPtr = magnitudeVector;
332 unsigned int number = 0;
333 for(number = 0; number < num_points; number++){
334 const float real = *complexVectorPtr++;
335 const float imag = *complexVectorPtr++;
336 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
343 #include <arm_neon.h> 347 unsigned int num_points)
350 unsigned int quarter_points = num_points / 4;
351 const float* complexVectorPtr = (
float*)complexVector;
352 float* magnitudeVectorPtr = magnitudeVector;
354 float32x4x2_t complex_vec;
355 float32x4_t magnitude_vec;
356 for(number = 0; number < quarter_points; number++){
357 complex_vec = vld2q_f32(complexVectorPtr);
358 complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
359 magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
360 magnitude_vec = vrsqrteq_f32(magnitude_vec);
361 magnitude_vec = vrecpeq_f32( magnitude_vec );
362 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
364 complexVectorPtr += 8;
365 magnitudeVectorPtr += 4;
368 for(number = quarter_points*4; number < num_points; number++){
369 const float real = *complexVectorPtr++;
370 const float imag = *complexVectorPtr++;
371 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
394 unsigned int num_points)
397 unsigned int quarter_points = num_points / 4;
398 const float* complexVectorPtr = (
float*)complexVector;
399 float* magnitudeVectorPtr = magnitudeVector;
401 const float threshold = 0.4142135;
403 float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
404 a_high = vdupq_n_f32( 0.84 );
405 b_high = vdupq_n_f32( 0.561);
406 a_low = vdupq_n_f32( 0.99 );
407 b_low = vdupq_n_f32( 0.197);
409 uint32x4_t comp0, comp1;
411 float32x4x2_t complex_vec;
412 float32x4_t min_vec, max_vec, magnitude_vec;
413 float32x4_t real_abs, imag_abs;
414 for(number = 0; number < quarter_points; number++){
415 complex_vec = vld2q_f32(complexVectorPtr);
417 real_abs = vabsq_f32(complex_vec.val[0]);
418 imag_abs = vabsq_f32(complex_vec.val[1]);
420 min_vec = vminq_f32(real_abs, imag_abs);
421 max_vec = vmaxq_f32(real_abs, imag_abs);
424 comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
425 comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
428 a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
429 vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
430 b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
431 vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
434 min_vec = vmulq_f32(min_vec, b_vec);
435 max_vec = vmulq_f32(max_vec, a_vec);
437 magnitude_vec = vaddq_f32(min_vec, max_vec);
438 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
440 complexVectorPtr += 8;
441 magnitudeVectorPtr += 4;
444 for(number = quarter_points*4; number < num_points; number++){
445 const float real = *complexVectorPtr++;
446 const float imag = *complexVectorPtr++;
447 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
456 volk_32fc_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_32fc_t* complexVector,
457 unsigned int num_points);
460 volk_32fc_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_32fc_t* complexVector,
461 unsigned int num_points)
463 volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:83
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:292
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:192
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:45
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:220
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:67
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:156
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:393
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:327
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:256
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:118
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:346
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:60