71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H 72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H 79 #include <immintrin.h> 84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
89 const float* complexVectorPtr = (
float*) complexVector;
90 float* magnitudeVectorPtr = magnitudeVector;
92 __m256 cplxValue1, cplxValue2, result;
94 for(; number < eighthPoints; number++){
95 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
98 _mm256_storeu_ps(magnitudeVectorPtr, result);
100 complexVectorPtr += 16;
101 magnitudeVectorPtr += 8;
104 number = eighthPoints * 8;
105 for(; number < num_points; number++){
106 float val1Real = *complexVectorPtr++;
107 float val1Imag = *complexVectorPtr++;
108 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
115 #include <pmmintrin.h> 120 unsigned int num_points)
122 unsigned int number = 0;
123 const unsigned int quarterPoints = num_points / 4;
125 const float* complexVectorPtr = (
float*) complexVector;
126 float* magnitudeVectorPtr = magnitudeVector;
128 __m128 cplxValue1, cplxValue2, result;
129 for(; number < quarterPoints; number++){
130 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
131 complexVectorPtr += 4;
133 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
134 complexVectorPtr += 4;
137 _mm_storeu_ps(magnitudeVectorPtr, result);
138 magnitudeVectorPtr += 4;
141 number = quarterPoints * 4;
142 for(; number < num_points; number++){
143 float val1Real = *complexVectorPtr++;
144 float val1Imag = *complexVectorPtr++;
145 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
152 #include <xmmintrin.h> 157 unsigned int num_points)
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
162 const float* complexVectorPtr = (
float*) complexVector;
163 float* magnitudeVectorPtr = magnitudeVector;
165 __m128 cplxValue1, cplxValue2, result;
167 for(; number < quarterPoints; number++){
168 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169 complexVectorPtr += 4;
171 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172 complexVectorPtr += 4;
175 _mm_storeu_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
179 number = quarterPoints * 4;
180 for(; number < num_points; number++){
181 float val1Real = *complexVectorPtr++;
182 float val1Imag = *complexVectorPtr++;
183 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
189 #ifdef LV_HAVE_GENERIC 193 unsigned int num_points)
195 const float* complexVectorPtr = (
float*)complexVector;
196 float* magnitudeVectorPtr = magnitudeVector;
197 unsigned int number = 0;
198 for(number = 0; number < num_points; number++){
199 const float real = *complexVectorPtr++;
200 const float imag = *complexVectorPtr++;
201 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
209 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H 210 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H 212 #include <inttypes.h> 217 #include <immintrin.h> 222 unsigned int num_points)
224 unsigned int number = 0;
225 const unsigned int eighthPoints = num_points / 8;
227 const float* complexVectorPtr = (
float*) complexVector;
228 float* magnitudeVectorPtr = magnitudeVector;
230 __m256 cplxValue1, cplxValue2, result;
231 for(; number < eighthPoints; number++){
232 cplxValue1 = _mm256_load_ps(complexVectorPtr);
233 complexVectorPtr += 8;
235 cplxValue2 = _mm256_load_ps(complexVectorPtr);
236 complexVectorPtr += 8;
239 _mm256_store_ps(magnitudeVectorPtr, result);
240 magnitudeVectorPtr += 8;
243 number = eighthPoints * 8;
244 for(; number < num_points; number++){
245 float val1Real = *complexVectorPtr++;
246 float val1Imag = *complexVectorPtr++;
247 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
254 #include <pmmintrin.h> 259 unsigned int num_points)
261 unsigned int number = 0;
262 const unsigned int quarterPoints = num_points / 4;
264 const float* complexVectorPtr = (
float*) complexVector;
265 float* magnitudeVectorPtr = magnitudeVector;
267 __m128 cplxValue1, cplxValue2, result;
268 for(; number < quarterPoints; number++){
269 cplxValue1 = _mm_load_ps(complexVectorPtr);
270 complexVectorPtr += 4;
272 cplxValue2 = _mm_load_ps(complexVectorPtr);
273 complexVectorPtr += 4;
276 _mm_store_ps(magnitudeVectorPtr, result);
277 magnitudeVectorPtr += 4;
280 number = quarterPoints * 4;
281 for(; number < num_points; number++){
282 float val1Real = *complexVectorPtr++;
283 float val1Imag = *complexVectorPtr++;
284 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
291 #include <xmmintrin.h> 296 unsigned int num_points)
298 unsigned int number = 0;
299 const unsigned int quarterPoints = num_points / 4;
301 const float* complexVectorPtr = (
float*)complexVector;
302 float* magnitudeVectorPtr = magnitudeVector;
304 __m128 cplxValue1, cplxValue2, result;
305 for(;number < quarterPoints; number++){
306 cplxValue1 = _mm_load_ps(complexVectorPtr);
307 complexVectorPtr += 4;
309 cplxValue2 = _mm_load_ps(complexVectorPtr);
310 complexVectorPtr += 4;
313 _mm_store_ps(magnitudeVectorPtr, result);
314 magnitudeVectorPtr += 4;
317 number = quarterPoints * 4;
318 for(; number < num_points; number++){
319 float val1Real = *complexVectorPtr++;
320 float val1Imag = *complexVectorPtr++;
321 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
328 #include <arm_neon.h> 332 unsigned int num_points)
334 unsigned int number = 0;
335 const unsigned int quarterPoints = num_points / 4;
337 const float* complexVectorPtr = (
float*)complexVector;
338 float* magnitudeVectorPtr = magnitudeVector;
340 float32x4x2_t cmplx_val;
342 for(;number < quarterPoints; number++){
343 cmplx_val = vld2q_f32(complexVectorPtr);
344 complexVectorPtr += 8;
346 cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
347 cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
349 result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
351 vst1q_f32(magnitudeVectorPtr, result);
352 magnitudeVectorPtr += 4;
355 number = quarterPoints * 4;
356 for(; number < num_points; number++){
357 float val1Real = *complexVectorPtr++;
358 float val1Imag = *complexVectorPtr++;
359 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
365 #ifdef LV_HAVE_GENERIC 369 unsigned int num_points)
371 const float* complexVectorPtr = (
float*)complexVector;
372 float* magnitudeVectorPtr = magnitudeVector;
373 unsigned int number = 0;
374 for(number = 0; number < num_points; number++){
375 const float real = *complexVectorPtr++;
376 const float imag = *complexVectorPtr++;
377 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
static void volk_32fc_magnitude_squared_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:156
static void volk_32fc_magnitude_squared_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:258
static void volk_32fc_magnitude_squared_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:192
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:53
static void volk_32fc_magnitude_squared_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:83
static void volk_32fc_magnitude_squared_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:331
static void volk_32fc_magnitude_squared_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:221
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:33
float complex lv_32fc_t
Definition: volk_complex.h:61
static __m256 _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:57
static void volk_32fc_magnitude_squared_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:295
static void volk_32fc_magnitude_squared_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:119
static void volk_32fc_magnitude_squared_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:368