54 #ifndef INCLUDED_volk_16ic_magnitude_16i_a_H 55 #define INCLUDED_volk_16ic_magnitude_16i_a_H 63 #include <immintrin.h> 66 volk_16ic_magnitude_16i_a_avx2(int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
68 unsigned int number = 0;
69 const unsigned int eighthPoints = num_points / 8;
71 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
72 int16_t* magnitudeVectorPtr = magnitudeVector;
74 __m256 vScalar = _mm256_set1_ps(32768.0);
75 __m256 invScalar = _mm256_set1_ps(1.0/32768.0);
77 __m128i short1, short2;
78 __m256 cplxValue1, cplxValue2, result;
79 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
81 for(;number < eighthPoints; number++){
83 int1 = _mm256_load_si256((__m256i*)complexVectorPtr);
84 complexVectorPtr += 16;
85 short1 = _mm256_extracti128_si256(int1,0);
86 short2 = _mm256_extracti128_si256(int1,1);
88 int1 = _mm256_cvtepi16_epi32(short1);
89 int2 = _mm256_cvtepi16_epi32(short2);
90 cplxValue1 = _mm256_cvtepi32_ps(int1);
91 cplxValue2 = _mm256_cvtepi32_ps(int2);
93 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
94 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
96 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
97 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
99 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
101 result = _mm256_sqrt_ps(result);
103 result = _mm256_mul_ps(result, vScalar);
105 int1 = _mm256_cvtps_epi32(result);
106 int1 = _mm256_packs_epi32(int1, int1);
107 int1 = _mm256_permutevar8x32_epi32(int1, idx);
108 short1 = _mm256_extracti128_si256(int1, 0);
109 _mm_store_si128((__m128i*)magnitudeVectorPtr,short1);
110 magnitudeVectorPtr += 8;
113 number = eighthPoints * 8;
114 magnitudeVectorPtr = &magnitudeVector[number];
115 complexVectorPtr = (
const int16_t*)&complexVector[number];
116 for(; number < num_points; number++){
117 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
118 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
119 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
120 *magnitudeVectorPtr++ = (int16_t)
rintf(val1Result);
126 #include <pmmintrin.h> 131 unsigned int number = 0;
132 const unsigned int quarterPoints = num_points / 4;
134 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
135 int16_t* magnitudeVectorPtr = magnitudeVector;
137 __m128 vScalar = _mm_set_ps1(32768.0);
138 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
140 __m128 cplxValue1, cplxValue2, result;
145 for(;number < quarterPoints; number++){
147 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
148 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
149 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
150 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
152 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
153 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
154 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
155 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
157 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
158 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
160 complexVectorPtr += 8;
162 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
163 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
165 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
166 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
168 result = _mm_hadd_ps(cplxValue1, cplxValue2);
170 result = _mm_sqrt_ps(result);
172 result = _mm_mul_ps(result, vScalar);
174 _mm_store_ps(outputFloatBuffer, result);
175 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[0]);
176 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[1]);
177 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[2]);
178 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[3]);
181 number = quarterPoints * 4;
182 magnitudeVectorPtr = &magnitudeVector[number];
183 complexVectorPtr = (
const int16_t*)&complexVector[number];
184 for(; number < num_points; number++){
185 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
186 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
187 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
188 *magnitudeVectorPtr++ = (int16_t)
rintf(val1Result);
194 #include <xmmintrin.h> 199 unsigned int number = 0;
200 const unsigned int quarterPoints = num_points / 4;
202 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
203 int16_t* magnitudeVectorPtr = magnitudeVector;
205 __m128 vScalar = _mm_set_ps1(32768.0);
206 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
208 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
213 for(;number < quarterPoints; number++){
215 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
216 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
217 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
218 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
220 cplxValue1 = _mm_load_ps(inputFloatBuffer);
221 complexVectorPtr += 4;
223 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
224 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
225 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
226 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
228 cplxValue2 = _mm_load_ps(inputFloatBuffer);
229 complexVectorPtr += 4;
231 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
232 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
235 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
237 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
239 iValue = _mm_mul_ps(iValue, iValue);
240 qValue = _mm_mul_ps(qValue, qValue);
242 result = _mm_add_ps(iValue, qValue);
244 result = _mm_sqrt_ps(result);
246 result = _mm_mul_ps(result, vScalar);
248 _mm_store_ps(outputFloatBuffer, result);
249 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[0]);
250 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[1]);
251 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[2]);
252 *magnitudeVectorPtr++ = (int16_t)
rintf(outputFloatBuffer[3]);
255 number = quarterPoints * 4;
256 magnitudeVectorPtr = &magnitudeVector[number];
257 complexVectorPtr = (
const int16_t*)&complexVector[number];
258 for(; number < num_points; number++){
259 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
260 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
261 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
262 *magnitudeVectorPtr++ = (int16_t)
rintf(val1Result);
267 #ifdef LV_HAVE_GENERIC 272 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
273 int16_t* magnitudeVectorPtr = magnitudeVector;
274 unsigned int number = 0;
275 const float scalar = 32768.0;
276 for(number = 0; number < num_points; number++){
277 float real = ((float)(*complexVectorPtr++)) / scalar;
278 float imag = ((float)(*complexVectorPtr++)) / scalar;
279 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((real*real) + (imag*imag)) * scalar);
284 #ifdef LV_HAVE_ORC_DISABLED 286 volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
float scalar,
unsigned int num_points);
289 volk_16ic_magnitude_16i_u_orc(int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
291 volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
299 #ifndef INCLUDED_volk_16ic_magnitude_16i_u_H 300 #define INCLUDED_volk_16ic_magnitude_16i_u_H 303 #include <inttypes.h> 308 #include <immintrin.h> 311 volk_16ic_magnitude_16i_u_avx2(int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
313 unsigned int number = 0;
314 const unsigned int eighthPoints = num_points / 8;
316 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
317 int16_t* magnitudeVectorPtr = magnitudeVector;
319 __m256 vScalar = _mm256_set1_ps(32768.0);
320 __m256 invScalar = _mm256_set1_ps(1.0/32768.0);
322 __m128i short1, short2;
323 __m256 cplxValue1, cplxValue2, result;
324 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
326 for(;number < eighthPoints; number++){
328 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
329 complexVectorPtr += 16;
330 short1 = _mm256_extracti128_si256(int1,0);
331 short2 = _mm256_extracti128_si256(int1,1);
333 int1 = _mm256_cvtepi16_epi32(short1);
334 int2 = _mm256_cvtepi16_epi32(short2);
335 cplxValue1 = _mm256_cvtepi32_ps(int1);
336 cplxValue2 = _mm256_cvtepi32_ps(int2);
338 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
339 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
341 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
342 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
344 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
346 result = _mm256_sqrt_ps(result);
348 result = _mm256_mul_ps(result, vScalar);
350 int1 = _mm256_cvtps_epi32(result);
351 int1 = _mm256_packs_epi32(int1, int1);
352 int1 = _mm256_permutevar8x32_epi32(int1, idx);
353 short1 = _mm256_extracti128_si256(int1, 0);
354 _mm_storeu_si128((__m128i*)magnitudeVectorPtr,short1);
355 magnitudeVectorPtr += 8;
358 number = eighthPoints * 8;
359 magnitudeVectorPtr = &magnitudeVector[number];
360 complexVectorPtr = (
const int16_t*)&complexVector[number];
361 for(; number < num_points; number++){
362 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
363 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
364 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
365 *magnitudeVectorPtr++ = (int16_t)
rintf(val1Result);
short complex lv_16sc_t
Definition: volk_complex.h:58
static float rintf(float x)
Definition: config.h:31
static void volk_16ic_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_magnitude_16i.h:197
static void volk_16ic_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_magnitude_16i.h:129
static void volk_16ic_magnitude_16i_generic(int16_t *magnitudeVector, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_magnitude_16i.h:270
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33