55 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H 56 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H 64 #include <immintrin.h> 67 volk_16ic_s32f_magnitude_32f_a_avx2(
float* magnitudeVector,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 unsigned int number = 0;
71 const unsigned int eighthPoints = num_points / 8;
73 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
74 float* magnitudeVectorPtr = magnitudeVector;
76 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
78 __m256 cplxValue1, cplxValue2, result;
80 __m128i short1, short2;
81 __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
83 for(;number < eighthPoints; number++){
85 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
86 complexVectorPtr += 16;
87 short1 = _mm256_extracti128_si256(int1,0);
88 short2 = _mm256_extracti128_si256(int1,1);
90 int1 = _mm256_cvtepi16_epi32(short1);
91 int2 = _mm256_cvtepi16_epi32(short2);
92 cplxValue1 = _mm256_cvtepi32_ps(int1);
93 cplxValue2 = _mm256_cvtepi32_ps(int2);
95 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
96 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
98 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
99 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
101 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
102 result = _mm256_permutevar8x32_ps(result, idx);
104 result = _mm256_sqrt_ps(result);
106 _mm256_store_ps(magnitudeVectorPtr, result);
108 magnitudeVectorPtr += 8;
111 number = eighthPoints * 8;
112 magnitudeVectorPtr = &magnitudeVector[number];
113 complexVectorPtr = (
const int16_t*)&complexVector[number];
114 for(; number < num_points; number++){
115 float val1Real = (float)(*complexVectorPtr++) / scalar;
116 float val1Imag = (float)(*complexVectorPtr++) / scalar;
117 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
124 #include <pmmintrin.h> 128 const float scalar,
unsigned int num_points)
130 unsigned int number = 0;
131 const unsigned int quarterPoints = num_points / 4;
133 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
134 float* magnitudeVectorPtr = magnitudeVector;
136 __m128 invScalar = _mm_set_ps1(1.0/scalar);
138 __m128 cplxValue1, cplxValue2, result;
142 for(;number < quarterPoints; number++){
144 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
145 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
146 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
147 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
149 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
150 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
151 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
152 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
154 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
155 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
157 complexVectorPtr += 8;
159 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
160 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
162 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
163 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
165 result = _mm_hadd_ps(cplxValue1, cplxValue2);
167 result = _mm_sqrt_ps(result);
169 _mm_store_ps(magnitudeVectorPtr, result);
171 magnitudeVectorPtr += 4;
174 number = quarterPoints * 4;
175 magnitudeVectorPtr = &magnitudeVector[number];
176 complexVectorPtr = (
const int16_t*)&complexVector[number];
177 for(; number < num_points; number++){
178 float val1Real = (float)(*complexVectorPtr++) / scalar;
179 float val1Imag = (float)(*complexVectorPtr++) / scalar;
180 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
186 #include <xmmintrin.h> 190 const float scalar,
unsigned int num_points)
192 unsigned int number = 0;
193 const unsigned int quarterPoints = num_points / 4;
195 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
196 float* magnitudeVectorPtr = magnitudeVector;
198 const float iScalar = 1.0 / scalar;
199 __m128 invScalar = _mm_set_ps1(iScalar);
201 __m128 cplxValue1, cplxValue2, result, re, im;
205 for(;number < quarterPoints; number++){
206 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
207 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
208 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
209 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
211 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
212 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
213 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
214 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
216 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
217 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
219 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
220 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
222 complexVectorPtr += 8;
224 cplxValue1 = _mm_mul_ps(re, invScalar);
225 cplxValue2 = _mm_mul_ps(im, invScalar);
227 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
228 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
230 result = _mm_add_ps(cplxValue1, cplxValue2);
232 result = _mm_sqrt_ps(result);
234 _mm_store_ps(magnitudeVectorPtr, result);
236 magnitudeVectorPtr += 4;
239 number = quarterPoints * 4;
240 magnitudeVectorPtr = &magnitudeVector[number];
241 complexVectorPtr = (
const int16_t*)&complexVector[number];
242 for(; number < num_points; number++){
243 float val1Real = (float)(*complexVectorPtr++) * iScalar;
244 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
245 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
252 #ifdef LV_HAVE_GENERIC 256 const float scalar,
unsigned int num_points)
258 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
259 float* magnitudeVectorPtr = magnitudeVector;
260 unsigned int number = 0;
261 const float invScalar = 1.0 / scalar;
262 for(number = 0; number < num_points; number++){
263 float real = ( (float) (*complexVectorPtr++)) * invScalar;
264 float imag = ( (float) (*complexVectorPtr++)) * invScalar;
265 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
270 #ifdef LV_HAVE_ORC_DISABLED 273 volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_16sc_t* complexVector,
274 const float scalar,
unsigned int num_points);
277 volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_16sc_t* complexVector,
278 const float scalar,
unsigned int num_points)
280 volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
287 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H 288 #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H 291 #include <inttypes.h> 296 #include <immintrin.h> 299 volk_16ic_s32f_magnitude_32f_u_avx2(
float* magnitudeVector,
const lv_16sc_t* complexVector,
300 const float scalar,
unsigned int num_points)
302 unsigned int number = 0;
303 const unsigned int eighthPoints = num_points / 8;
305 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
306 float* magnitudeVectorPtr = magnitudeVector;
308 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
310 __m256 cplxValue1, cplxValue2, result;
312 __m128i short1, short2;
313 __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
315 for(;number < eighthPoints; number++){
317 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
318 complexVectorPtr += 16;
319 short1 = _mm256_extracti128_si256(int1,0);
320 short2 = _mm256_extracti128_si256(int1,1);
322 int1 = _mm256_cvtepi16_epi32(short1);
323 int2 = _mm256_cvtepi16_epi32(short2);
324 cplxValue1 = _mm256_cvtepi32_ps(int1);
325 cplxValue2 = _mm256_cvtepi32_ps(int2);
327 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
328 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
330 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
331 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
333 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
334 result = _mm256_permutevar8x32_ps(result, idx);
336 result = _mm256_sqrt_ps(result);
338 _mm256_storeu_ps(magnitudeVectorPtr, result);
340 magnitudeVectorPtr += 8;
343 number = eighthPoints * 8;
344 magnitudeVectorPtr = &magnitudeVector[number];
345 complexVectorPtr = (
const int16_t*)&complexVector[number];
346 for(; number < num_points; number++){
347 float val1Real = (float)(*complexVectorPtr++) / scalar;
348 float val1Imag = (float)(*complexVectorPtr++) / scalar;
349 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
short complex lv_16sc_t
Definition: volk_complex.h:58
static void volk_16ic_s32f_magnitude_32f_a_sse(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:189
static void volk_16ic_s32f_magnitude_32f_a_sse3(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:127
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
static void volk_16ic_s32f_magnitude_32f_generic(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:255