Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_32f_u_H
73 
74 #include <inttypes.h>
75 #include <math.h>
76 #include <stdio.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
81 
82 static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
83  const lv_32fc_t* complexVector,
84  unsigned int num_points)
85 {
86  unsigned int number = 0;
87  const unsigned int eighthPoints = num_points / 8;
88 
89  const float* complexVectorPtr = (float*)complexVector;
90  float* magnitudeVectorPtr = magnitudeVector;
91 
92  __m256 cplxValue1, cplxValue2, result;
93 
94  for (; number < eighthPoints; number++) {
95  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96  cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
98  _mm256_storeu_ps(magnitudeVectorPtr, result);
99 
100  complexVectorPtr += 16;
101  magnitudeVectorPtr += 8;
102  }
103 
104  number = eighthPoints * 8;
105  for (; number < num_points; number++) {
106  float val1Real = *complexVectorPtr++;
107  float val1Imag = *complexVectorPtr++;
108  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
109  }
110 }
111 #endif /* LV_HAVE_AVX */
112 
113 #ifdef LV_HAVE_SSE3
114 #include <pmmintrin.h>
116 
117 static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
118  const lv_32fc_t* complexVector,
119  unsigned int num_points)
120 {
121  unsigned int number = 0;
122  const unsigned int quarterPoints = num_points / 4;
123 
124  const float* complexVectorPtr = (float*)complexVector;
125  float* magnitudeVectorPtr = magnitudeVector;
126 
127  __m128 cplxValue1, cplxValue2, result;
128  for (; number < quarterPoints; number++) {
129  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
130  complexVectorPtr += 4;
131 
132  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
133  complexVectorPtr += 4;
134 
135  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
136 
137  _mm_storeu_ps(magnitudeVectorPtr, result);
138  magnitudeVectorPtr += 4;
139  }
140 
141  number = quarterPoints * 4;
142  for (; number < num_points; number++) {
143  float val1Real = *complexVectorPtr++;
144  float val1Imag = *complexVectorPtr++;
145  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
146  }
147 }
148 #endif /* LV_HAVE_SSE3 */
149 
150 
151 #ifdef LV_HAVE_SSE
153 #include <xmmintrin.h>
154 
155 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
156  const lv_32fc_t* complexVector,
157  unsigned int num_points)
158 {
159  unsigned int number = 0;
160  const unsigned int quarterPoints = num_points / 4;
161 
162  const float* complexVectorPtr = (float*)complexVector;
163  float* magnitudeVectorPtr = magnitudeVector;
164 
165  __m128 cplxValue1, cplxValue2, result;
166 
167  for (; number < quarterPoints; number++) {
168  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169  complexVectorPtr += 4;
170 
171  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172  complexVectorPtr += 4;
173 
174  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
175  _mm_storeu_ps(magnitudeVectorPtr, result);
176  magnitudeVectorPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  for (; number < num_points; number++) {
181  float val1Real = *complexVectorPtr++;
182  float val1Imag = *complexVectorPtr++;
183  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
184  }
185 }
186 #endif /* LV_HAVE_SSE */
187 
188 
189 #ifdef LV_HAVE_GENERIC
190 
191 static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
192  const lv_32fc_t* complexVector,
193  unsigned int num_points)
194 {
195  const float* complexVectorPtr = (float*)complexVector;
196  float* magnitudeVectorPtr = magnitudeVector;
197  unsigned int number = 0;
198  for (number = 0; number < num_points; number++) {
199  const float real = *complexVectorPtr++;
200  const float imag = *complexVectorPtr++;
201  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
202  }
203 }
204 #endif /* LV_HAVE_GENERIC */
205 
206 
207 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
208 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
209 #define INCLUDED_volk_32fc_magnitude_32f_a_H
210 
211 #include <inttypes.h>
212 #include <math.h>
213 #include <stdio.h>
214 
215 #ifdef LV_HAVE_AVX
216 #include <immintrin.h>
218 
219 static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
220  const lv_32fc_t* complexVector,
221  unsigned int num_points)
222 {
223  unsigned int number = 0;
224  const unsigned int eighthPoints = num_points / 8;
225 
226  const float* complexVectorPtr = (float*)complexVector;
227  float* magnitudeVectorPtr = magnitudeVector;
228 
229  __m256 cplxValue1, cplxValue2, result;
230  for (; number < eighthPoints; number++) {
231  cplxValue1 = _mm256_load_ps(complexVectorPtr);
232  complexVectorPtr += 8;
233 
234  cplxValue2 = _mm256_load_ps(complexVectorPtr);
235  complexVectorPtr += 8;
236 
237  result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
238  _mm256_store_ps(magnitudeVectorPtr, result);
239  magnitudeVectorPtr += 8;
240  }
241 
242  number = eighthPoints * 8;
243  for (; number < num_points; number++) {
244  float val1Real = *complexVectorPtr++;
245  float val1Imag = *complexVectorPtr++;
246  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
247  }
248 }
249 #endif /* LV_HAVE_AVX */
250 
251 #ifdef LV_HAVE_SSE3
252 #include <pmmintrin.h>
254 
255 static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
256  const lv_32fc_t* complexVector,
257  unsigned int num_points)
258 {
259  unsigned int number = 0;
260  const unsigned int quarterPoints = num_points / 4;
261 
262  const float* complexVectorPtr = (float*)complexVector;
263  float* magnitudeVectorPtr = magnitudeVector;
264 
265  __m128 cplxValue1, cplxValue2, result;
266  for (; number < quarterPoints; number++) {
267  cplxValue1 = _mm_load_ps(complexVectorPtr);
268  complexVectorPtr += 4;
269 
270  cplxValue2 = _mm_load_ps(complexVectorPtr);
271  complexVectorPtr += 4;
272 
273  result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
274  _mm_store_ps(magnitudeVectorPtr, result);
275  magnitudeVectorPtr += 4;
276  }
277 
278  number = quarterPoints * 4;
279  for (; number < num_points; number++) {
280  float val1Real = *complexVectorPtr++;
281  float val1Imag = *complexVectorPtr++;
282  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
283  }
284 }
285 #endif /* LV_HAVE_SSE3 */
286 
287 #ifdef LV_HAVE_SSE
289 #include <xmmintrin.h>
290 
291 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
292  const lv_32fc_t* complexVector,
293  unsigned int num_points)
294 {
295  unsigned int number = 0;
296  const unsigned int quarterPoints = num_points / 4;
297 
298  const float* complexVectorPtr = (float*)complexVector;
299  float* magnitudeVectorPtr = magnitudeVector;
300 
301  __m128 cplxValue1, cplxValue2, result;
302  for (; number < quarterPoints; number++) {
303  cplxValue1 = _mm_load_ps(complexVectorPtr);
304  complexVectorPtr += 4;
305 
306  cplxValue2 = _mm_load_ps(complexVectorPtr);
307  complexVectorPtr += 4;
308 
309  result = _mm_magnitude_ps(cplxValue1, cplxValue2);
310  _mm_store_ps(magnitudeVectorPtr, result);
311  magnitudeVectorPtr += 4;
312  }
313 
314  number = quarterPoints * 4;
315  for (; number < num_points; number++) {
316  float val1Real = *complexVectorPtr++;
317  float val1Imag = *complexVectorPtr++;
318  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
319  }
320 }
321 #endif /* LV_HAVE_SSE */
322 
323 
324 #ifdef LV_HAVE_GENERIC
325 
326 static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
327  const lv_32fc_t* complexVector,
328  unsigned int num_points)
329 {
330  const float* complexVectorPtr = (float*)complexVector;
331  float* magnitudeVectorPtr = magnitudeVector;
332  unsigned int number = 0;
333  for (number = 0; number < num_points; number++) {
334  const float real = *complexVectorPtr++;
335  const float imag = *complexVectorPtr++;
336  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
337  }
338 }
339 #endif /* LV_HAVE_GENERIC */
340 
341 
342 #ifdef LV_HAVE_NEON
343 #include <arm_neon.h>
344 
345 static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
346  const lv_32fc_t* complexVector,
347  unsigned int num_points)
348 {
349  unsigned int number;
350  unsigned int quarter_points = num_points / 4;
351  const float* complexVectorPtr = (float*)complexVector;
352  float* magnitudeVectorPtr = magnitudeVector;
353 
354  float32x4x2_t complex_vec;
355  float32x4_t magnitude_vec;
356  for (number = 0; number < quarter_points; number++) {
357  complex_vec = vld2q_f32(complexVectorPtr);
358  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
359  magnitude_vec =
360  vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
361  magnitude_vec = vrsqrteq_f32(magnitude_vec);
362  magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
363  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
364 
365  complexVectorPtr += 8;
366  magnitudeVectorPtr += 4;
367  }
368 
369  for (number = quarter_points * 4; number < num_points; number++) {
370  const float real = *complexVectorPtr++;
371  const float imag = *complexVectorPtr++;
372  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
373  }
374 }
375 #endif /* LV_HAVE_NEON */
376 
377 
378 #ifdef LV_HAVE_NEON
379 
396  float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
397 {
398  unsigned int number;
399  unsigned int quarter_points = num_points / 4;
400  const float* complexVectorPtr = (float*)complexVector;
401  float* magnitudeVectorPtr = magnitudeVector;
402 
403  const float threshold = 0.4142135;
404 
405  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
406  a_high = vdupq_n_f32(0.84);
407  b_high = vdupq_n_f32(0.561);
408  a_low = vdupq_n_f32(0.99);
409  b_low = vdupq_n_f32(0.197);
410 
411  uint32x4_t comp0, comp1;
412 
413  float32x4x2_t complex_vec;
414  float32x4_t min_vec, max_vec, magnitude_vec;
415  float32x4_t real_abs, imag_abs;
416  for (number = 0; number < quarter_points; number++) {
417  complex_vec = vld2q_f32(complexVectorPtr);
418 
419  real_abs = vabsq_f32(complex_vec.val[0]);
420  imag_abs = vabsq_f32(complex_vec.val[1]);
421 
422  min_vec = vminq_f32(real_abs, imag_abs);
423  max_vec = vmaxq_f32(real_abs, imag_abs);
424 
425  // effective branch to choose coefficient pair.
426  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
427  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
428 
429  // and 0s or 1s with coefficients from previous effective branch
430  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
431  vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
432  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
433  vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
434 
435  // coefficients chosen, do the weighted sum
436  min_vec = vmulq_f32(min_vec, b_vec);
437  max_vec = vmulq_f32(max_vec, a_vec);
438 
439  magnitude_vec = vaddq_f32(min_vec, max_vec);
440  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
441 
442  complexVectorPtr += 8;
443  magnitudeVectorPtr += 4;
444  }
445 
446  for (number = quarter_points * 4; number < num_points; number++) {
447  const float real = *complexVectorPtr++;
448  const float imag = *complexVectorPtr++;
449  *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
450  }
451 }
452 #endif /* LV_HAVE_NEON */
453 
454 
455 #ifdef LV_HAVE_ORC
456 
457 extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector,
458  const lv_32fc_t* complexVector,
459  unsigned int num_points);
460 
461 static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector,
462  const lv_32fc_t* complexVector,
463  unsigned int num_points)
464 {
465  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
466 }
467 #endif /* LV_HAVE_ORC */
468 
469 
470 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
volk_32fc_magnitude_32f_a_avx
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:219
volk_32fc_magnitude_32f_u_sse
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:155
volk_sse3_intrinsics.h
_mm_magnitude_ps
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:44
volk_32fc_magnitude_32f_u_avx
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:82
volk_32fc_magnitude_32f_neon
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:345
_mm256_magnitude_ps
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:75
volk_sse_intrinsics.h
volk_32fc_magnitude_32f_a_generic
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:326
volk_32fc_magnitude_32f_u_sse3
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:117
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70
volk_32fc_magnitude_32f_a_sse
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:291
volk_32fc_magnitude_32f_generic
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:191
volk_avx_intrinsics.h
volk_32fc_magnitude_32f_a_sse3
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:255
volk_32fc_magnitude_32f_neon_fancy_sweet
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:395
_mm_magnitude_ps_sse3
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:58