Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_32f_u_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <math.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
81 
82 static inline void
83 volk_32fc_magnitude_32f_u_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
84  unsigned int num_points)
85 {
86  unsigned int number = 0;
87  const unsigned int eighthPoints = num_points / 8;
88 
89  const float* complexVectorPtr = (float*) complexVector;
90  float* magnitudeVectorPtr = magnitudeVector;
91 
92  __m256 cplxValue1, cplxValue2, result;
93 
94  for(; number < eighthPoints; number++){
95  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96  cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
98  _mm256_storeu_ps(magnitudeVectorPtr, result);
99 
100  complexVectorPtr += 16;
101  magnitudeVectorPtr += 8;
102  }
103 
104  number = eighthPoints * 8;
105  for(; number < num_points; number++){
106  float val1Real = *complexVectorPtr++;
107  float val1Imag = *complexVectorPtr++;
108  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
109  }
110 }
111 #endif /* LV_HAVE_AVX */
112 
113 #ifdef LV_HAVE_SSE3
114 #include <pmmintrin.h>
116 
117 static inline void
118 volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
119  unsigned int num_points)
120 {
121  unsigned int number = 0;
122  const unsigned int quarterPoints = num_points / 4;
123 
124  const float* complexVectorPtr = (float*) complexVector;
125  float* magnitudeVectorPtr = magnitudeVector;
126 
127  __m128 cplxValue1, cplxValue2, result;
128  for(; number < quarterPoints; number++){
129  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
130  complexVectorPtr += 4;
131 
132  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
133  complexVectorPtr += 4;
134 
135  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
136 
137  _mm_storeu_ps(magnitudeVectorPtr, result);
138  magnitudeVectorPtr += 4;
139  }
140 
141  number = quarterPoints * 4;
142  for(; number < num_points; number++){
143  float val1Real = *complexVectorPtr++;
144  float val1Imag = *complexVectorPtr++;
145  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
146  }
147 }
148 #endif /* LV_HAVE_SSE3 */
149 
150 
151 #ifdef LV_HAVE_SSE
152 #include <xmmintrin.h>
154 
155 static inline void
156 volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
157  unsigned int num_points)
158 {
159  unsigned int number = 0;
160  const unsigned int quarterPoints = num_points / 4;
161 
162  const float* complexVectorPtr = (float*) complexVector;
163  float* magnitudeVectorPtr = magnitudeVector;
164 
165  __m128 cplxValue1, cplxValue2, result;
166 
167  for(; number < quarterPoints; number++){
168  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169  complexVectorPtr += 4;
170 
171  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172  complexVectorPtr += 4;
173 
174  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
175  _mm_storeu_ps(magnitudeVectorPtr, result);
176  magnitudeVectorPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  for(; number < num_points; number++){
181  float val1Real = *complexVectorPtr++;
182  float val1Imag = *complexVectorPtr++;
183  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
184  }
185 }
186 #endif /* LV_HAVE_SSE */
187 
188 
189 #ifdef LV_HAVE_GENERIC
190 
191 static inline void
192 volk_32fc_magnitude_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
193 {
194  const float* complexVectorPtr = (float*)complexVector;
195  float* magnitudeVectorPtr = magnitudeVector;
196  unsigned int number = 0;
197  for(number = 0; number < num_points; number++){
198  const float real = *complexVectorPtr++;
199  const float imag = *complexVectorPtr++;
200  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
201  }
202 }
203 #endif /* LV_HAVE_GENERIC */
204 
205 
206 
207 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
208 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
209 #define INCLUDED_volk_32fc_magnitude_32f_a_H
210 
211 #include <inttypes.h>
212 #include <stdio.h>
213 #include <math.h>
214 
215 #ifdef LV_HAVE_AVX
216 #include <immintrin.h>
218 
219 static inline void
220 volk_32fc_magnitude_32f_a_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
221  unsigned int num_points)
222 {
223  unsigned int number = 0;
224  const unsigned int eighthPoints = num_points / 8;
225 
226  const float* complexVectorPtr = (float*) complexVector;
227  float* magnitudeVectorPtr = magnitudeVector;
228 
229  __m256 cplxValue1, cplxValue2, result;
230  for(; number < eighthPoints; number++){
231  cplxValue1 = _mm256_load_ps(complexVectorPtr);
232  complexVectorPtr += 8;
233 
234  cplxValue2 = _mm256_load_ps(complexVectorPtr);
235  complexVectorPtr += 8;
236 
237  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
238  _mm256_store_ps(magnitudeVectorPtr, result);
239  magnitudeVectorPtr += 8;
240  }
241 
242  number = eighthPoints * 8;
243  for(; number < num_points; number++){
244  float val1Real = *complexVectorPtr++;
245  float val1Imag = *complexVectorPtr++;
246  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
247  }
248 }
249 #endif /* LV_HAVE_AVX */
250 
251 #ifdef LV_HAVE_SSE3
252 #include <pmmintrin.h>
254 
255 static inline void
256 volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
257  unsigned int num_points)
258 {
259  unsigned int number = 0;
260  const unsigned int quarterPoints = num_points / 4;
261 
262  const float* complexVectorPtr = (float*) complexVector;
263  float* magnitudeVectorPtr = magnitudeVector;
264 
265  __m128 cplxValue1, cplxValue2, result;
266  for(; number < quarterPoints; number++){
267  cplxValue1 = _mm_load_ps(complexVectorPtr);
268  complexVectorPtr += 4;
269 
270  cplxValue2 = _mm_load_ps(complexVectorPtr);
271  complexVectorPtr += 4;
272 
273  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
274  _mm_store_ps(magnitudeVectorPtr, result);
275  magnitudeVectorPtr += 4;
276  }
277 
278  number = quarterPoints * 4;
279  for(; number < num_points; number++){
280  float val1Real = *complexVectorPtr++;
281  float val1Imag = *complexVectorPtr++;
282  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
283  }
284 }
285 #endif /* LV_HAVE_SSE3 */
286 
287 #ifdef LV_HAVE_SSE
288 #include <xmmintrin.h>
290 
291 static inline void
292 volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
293  unsigned int num_points)
294 {
295  unsigned int number = 0;
296  const unsigned int quarterPoints = num_points / 4;
297 
298  const float* complexVectorPtr = (float*) complexVector;
299  float* magnitudeVectorPtr = magnitudeVector;
300 
301  __m128 cplxValue1, cplxValue2, result;
302  for(; number < quarterPoints; number++){
303  cplxValue1 = _mm_load_ps(complexVectorPtr);
304  complexVectorPtr += 4;
305 
306  cplxValue2 = _mm_load_ps(complexVectorPtr);
307  complexVectorPtr += 4;
308 
309  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
310  _mm_store_ps(magnitudeVectorPtr, result);
311  magnitudeVectorPtr += 4;
312  }
313 
314  number = quarterPoints * 4;
315  for(; number < num_points; number++){
316  float val1Real = *complexVectorPtr++;
317  float val1Imag = *complexVectorPtr++;
318  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
319  }
320 }
321 #endif /* LV_HAVE_SSE */
322 
323 
324 #ifdef LV_HAVE_GENERIC
325 
326 static inline void
327 volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
328  unsigned int num_points)
329 {
330  const float* complexVectorPtr = (float*)complexVector;
331  float* magnitudeVectorPtr = magnitudeVector;
332  unsigned int number = 0;
333  for(number = 0; number < num_points; number++){
334  const float real = *complexVectorPtr++;
335  const float imag = *complexVectorPtr++;
336  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
337  }
338 }
339 #endif /* LV_HAVE_GENERIC */
340 
341 
342 #ifdef LV_HAVE_NEON
343 #include <arm_neon.h>
344 
345 static inline void
346 volk_32fc_magnitude_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector,
347  unsigned int num_points)
348 {
349  unsigned int number;
350  unsigned int quarter_points = num_points / 4;
351  const float* complexVectorPtr = (float*)complexVector;
352  float* magnitudeVectorPtr = magnitudeVector;
353 
354  float32x4x2_t complex_vec;
355  float32x4_t magnitude_vec;
356  for(number = 0; number < quarter_points; number++){
357  complex_vec = vld2q_f32(complexVectorPtr);
358  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
359  magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
360  magnitude_vec = vrsqrteq_f32(magnitude_vec);
361  magnitude_vec = vrecpeq_f32( magnitude_vec ); // no plain ol' sqrt
362  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
363 
364  complexVectorPtr += 8;
365  magnitudeVectorPtr += 4;
366  }
367 
368  for(number = quarter_points*4; number < num_points; number++){
369  const float real = *complexVectorPtr++;
370  const float imag = *complexVectorPtr++;
371  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
372  }
373 }
374 #endif /* LV_HAVE_NEON */
375 
376 
377 #ifdef LV_HAVE_NEON
378 
392 static inline void
393 volk_32fc_magnitude_32f_neon_fancy_sweet(float* magnitudeVector, const lv_32fc_t* complexVector,
394  unsigned int num_points)
395 {
396  unsigned int number;
397  unsigned int quarter_points = num_points / 4;
398  const float* complexVectorPtr = (float*)complexVector;
399  float* magnitudeVectorPtr = magnitudeVector;
400 
401  const float threshold = 0.4142135;
402 
403  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
404  a_high = vdupq_n_f32( 0.84 );
405  b_high = vdupq_n_f32( 0.561);
406  a_low = vdupq_n_f32( 0.99 );
407  b_low = vdupq_n_f32( 0.197);
408 
409  uint32x4_t comp0, comp1;
410 
411  float32x4x2_t complex_vec;
412  float32x4_t min_vec, max_vec, magnitude_vec;
413  float32x4_t real_abs, imag_abs;
414  for(number = 0; number < quarter_points; number++){
415  complex_vec = vld2q_f32(complexVectorPtr);
416 
417  real_abs = vabsq_f32(complex_vec.val[0]);
418  imag_abs = vabsq_f32(complex_vec.val[1]);
419 
420  min_vec = vminq_f32(real_abs, imag_abs);
421  max_vec = vmaxq_f32(real_abs, imag_abs);
422 
423  // effective branch to choose coefficient pair.
424  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
425  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
426 
427  // and 0s or 1s with coefficients from previous effective branch
428  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
429  vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
430  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
431  vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
432 
433  // coefficients chosen, do the weighted sum
434  min_vec = vmulq_f32(min_vec, b_vec);
435  max_vec = vmulq_f32(max_vec, a_vec);
436 
437  magnitude_vec = vaddq_f32(min_vec, max_vec);
438  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
439 
440  complexVectorPtr += 8;
441  magnitudeVectorPtr += 4;
442  }
443 
444  for(number = quarter_points*4; number < num_points; number++){
445  const float real = *complexVectorPtr++;
446  const float imag = *complexVectorPtr++;
447  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
448  }
449 }
450 #endif /* LV_HAVE_NEON */
451 
452 
453 #ifdef LV_HAVE_ORC
454 
455 extern void
456 volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector,
457  unsigned int num_points);
458 
459 static inline void
460 volk_32fc_magnitude_32f_u_orc(float* magnitudeVector, const lv_32fc_t* complexVector,
461  unsigned int num_points)
462 {
463  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
464 }
465 #endif /* LV_HAVE_ORC */
466 
467 
468 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:83
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:292
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:192
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:45
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:220
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:67
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:156
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:393
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:327
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:256
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:118
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:346
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:60