Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_squared_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <math.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
81 
82 static inline void
83 volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
84  unsigned int num_points)
85 {
86  unsigned int number = 0;
87  const unsigned int eighthPoints = num_points / 8;
88 
89  const float* complexVectorPtr = (float*) complexVector;
90  float* magnitudeVectorPtr = magnitudeVector;
91 
92  __m256 cplxValue1, cplxValue2, result;
93 
94  for(; number < eighthPoints; number++){
95  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96  cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97  result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
98  _mm256_storeu_ps(magnitudeVectorPtr, result);
99 
100  complexVectorPtr += 16;
101  magnitudeVectorPtr += 8;
102  }
103 
104  number = eighthPoints * 8;
105  for(; number < num_points; number++){
106  float val1Real = *complexVectorPtr++;
107  float val1Imag = *complexVectorPtr++;
108  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
109  }
110 }
111 #endif /* LV_HAVE_AVX */
112 
113 
114 #ifdef LV_HAVE_SSE3
115 #include <pmmintrin.h>
117 
118 static inline void
119 volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
120  unsigned int num_points)
121 {
122  unsigned int number = 0;
123  const unsigned int quarterPoints = num_points / 4;
124 
125  const float* complexVectorPtr = (float*) complexVector;
126  float* magnitudeVectorPtr = magnitudeVector;
127 
128  __m128 cplxValue1, cplxValue2, result;
129  for(; number < quarterPoints; number++){
130  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
131  complexVectorPtr += 4;
132 
133  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
134  complexVectorPtr += 4;
135 
136  result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
137  _mm_storeu_ps(magnitudeVectorPtr, result);
138  magnitudeVectorPtr += 4;
139  }
140 
141  number = quarterPoints * 4;
142  for(; number < num_points; number++){
143  float val1Real = *complexVectorPtr++;
144  float val1Imag = *complexVectorPtr++;
145  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
146  }
147 }
148 #endif /* LV_HAVE_SSE3 */
149 
150 
151 #ifdef LV_HAVE_SSE
152 #include <xmmintrin.h>
154 
155 static inline void
156 volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
157  unsigned int num_points)
158 {
159  unsigned int number = 0;
160  const unsigned int quarterPoints = num_points / 4;
161 
162  const float* complexVectorPtr = (float*) complexVector;
163  float* magnitudeVectorPtr = magnitudeVector;
164 
165  __m128 cplxValue1, cplxValue2, result;
166 
167  for(; number < quarterPoints; number++){
168  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169  complexVectorPtr += 4;
170 
171  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172  complexVectorPtr += 4;
173 
174  result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
175  _mm_storeu_ps(magnitudeVectorPtr, result);
176  magnitudeVectorPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  for(; number < num_points; number++){
181  float val1Real = *complexVectorPtr++;
182  float val1Imag = *complexVectorPtr++;
183  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
184  }
185 }
186 #endif /* LV_HAVE_SSE */
187 
188 
189 #ifdef LV_HAVE_GENERIC
190 
191 static inline void
192 volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
193  unsigned int num_points)
194 {
195  const float* complexVectorPtr = (float*)complexVector;
196  float* magnitudeVectorPtr = magnitudeVector;
197  unsigned int number = 0;
198  for(number = 0; number < num_points; number++){
199  const float real = *complexVectorPtr++;
200  const float imag = *complexVectorPtr++;
201  *magnitudeVectorPtr++ = (real*real) + (imag*imag);
202  }
203 }
204 #endif /* LV_HAVE_GENERIC */
205 
206 
207 
208 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
209 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
210 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
211 
212 #include <inttypes.h>
213 #include <stdio.h>
214 #include <math.h>
215 
216 #ifdef LV_HAVE_AVX
217 #include <immintrin.h>
219 
220 static inline void
221 volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
222  unsigned int num_points)
223 {
224  unsigned int number = 0;
225  const unsigned int eighthPoints = num_points / 8;
226 
227  const float* complexVectorPtr = (float*) complexVector;
228  float* magnitudeVectorPtr = magnitudeVector;
229 
230  __m256 cplxValue1, cplxValue2, result;
231  for(; number < eighthPoints; number++){
232  cplxValue1 = _mm256_load_ps(complexVectorPtr);
233  complexVectorPtr += 8;
234 
235  cplxValue2 = _mm256_load_ps(complexVectorPtr);
236  complexVectorPtr += 8;
237 
238  result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
239  _mm256_store_ps(magnitudeVectorPtr, result);
240  magnitudeVectorPtr += 8;
241  }
242 
243  number = eighthPoints * 8;
244  for(; number < num_points; number++){
245  float val1Real = *complexVectorPtr++;
246  float val1Imag = *complexVectorPtr++;
247  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
248  }
249 }
250 #endif /* LV_HAVE_AVX */
251 
252 
253 #ifdef LV_HAVE_SSE3
254 #include <pmmintrin.h>
256 
257 static inline void
258 volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
259  unsigned int num_points)
260 {
261  unsigned int number = 0;
262  const unsigned int quarterPoints = num_points / 4;
263 
264  const float* complexVectorPtr = (float*) complexVector;
265  float* magnitudeVectorPtr = magnitudeVector;
266 
267  __m128 cplxValue1, cplxValue2, result;
268  for(; number < quarterPoints; number++){
269  cplxValue1 = _mm_load_ps(complexVectorPtr);
270  complexVectorPtr += 4;
271 
272  cplxValue2 = _mm_load_ps(complexVectorPtr);
273  complexVectorPtr += 4;
274 
275  result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
276  _mm_store_ps(magnitudeVectorPtr, result);
277  magnitudeVectorPtr += 4;
278  }
279 
280  number = quarterPoints * 4;
281  for(; number < num_points; number++){
282  float val1Real = *complexVectorPtr++;
283  float val1Imag = *complexVectorPtr++;
284  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
285  }
286 }
287 #endif /* LV_HAVE_SSE3 */
288 
289 
290 #ifdef LV_HAVE_SSE
291 #include <xmmintrin.h>
293 
294 static inline void
295 volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
296  unsigned int num_points)
297 {
298  unsigned int number = 0;
299  const unsigned int quarterPoints = num_points / 4;
300 
301  const float* complexVectorPtr = (float*)complexVector;
302  float* magnitudeVectorPtr = magnitudeVector;
303 
304  __m128 cplxValue1, cplxValue2, result;
305  for(;number < quarterPoints; number++){
306  cplxValue1 = _mm_load_ps(complexVectorPtr);
307  complexVectorPtr += 4;
308 
309  cplxValue2 = _mm_load_ps(complexVectorPtr);
310  complexVectorPtr += 4;
311 
312  result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
313  _mm_store_ps(magnitudeVectorPtr, result);
314  magnitudeVectorPtr += 4;
315  }
316 
317  number = quarterPoints * 4;
318  for(; number < num_points; number++){
319  float val1Real = *complexVectorPtr++;
320  float val1Imag = *complexVectorPtr++;
321  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
322  }
323 }
324 #endif /* LV_HAVE_SSE */
325 
326 
327 #ifdef LV_HAVE_NEON
328 #include <arm_neon.h>
329 
330 static inline void
331 volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector,
332  unsigned int num_points)
333 {
334  unsigned int number = 0;
335  const unsigned int quarterPoints = num_points / 4;
336 
337  const float* complexVectorPtr = (float*)complexVector;
338  float* magnitudeVectorPtr = magnitudeVector;
339 
340  float32x4x2_t cmplx_val;
341  float32x4_t result;
342  for(;number < quarterPoints; number++){
343  cmplx_val = vld2q_f32(complexVectorPtr);
344  complexVectorPtr += 8;
345 
346  cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
347  cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
348 
349  result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
350 
351  vst1q_f32(magnitudeVectorPtr, result);
352  magnitudeVectorPtr += 4;
353  }
354 
355  number = quarterPoints * 4;
356  for(; number < num_points; number++){
357  float val1Real = *complexVectorPtr++;
358  float val1Imag = *complexVectorPtr++;
359  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
360  }
361 }
362 #endif /* LV_HAVE_NEON */
363 
364 
365 #ifdef LV_HAVE_GENERIC
366 
367 static inline void
368 volk_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
369  unsigned int num_points)
370 {
371  const float* complexVectorPtr = (float*)complexVector;
372  float* magnitudeVectorPtr = magnitudeVector;
373  unsigned int number = 0;
374  for(number = 0; number < num_points; number++){
375  const float real = *complexVectorPtr++;
376  const float imag = *complexVectorPtr++;
377  *magnitudeVectorPtr++ = (real*real) + (imag*imag);
378  }
379 }
380 #endif /* LV_HAVE_GENERIC */
381 
382 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
static void volk_32fc_magnitude_squared_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:156
static void volk_32fc_magnitude_squared_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:258
static void volk_32fc_magnitude_squared_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:192
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:53
static void volk_32fc_magnitude_squared_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:83
static void volk_32fc_magnitude_squared_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:331
static void volk_32fc_magnitude_squared_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:221
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:33
float complex lv_32fc_t
Definition: volk_complex.h:61
static __m256 _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:57
static void volk_32fc_magnitude_squared_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:295
static void volk_32fc_magnitude_squared_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:119
static void volk_32fc_magnitude_squared_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:368