73 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 74 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 82 #include <immintrin.h> 85 volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
86 const float scalar,
unsigned int num_points)
88 unsigned int number = 0;
89 const unsigned int eighthPoints = num_points / 8;
91 const float* complexVectorPtr = (
const float*)complexVector;
92 int16_t* magnitudeVectorPtr = magnitudeVector;
94 __m256 vScalar = _mm256_set1_ps(scalar);
95 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
96 __m256 cplxValue1, cplxValue2, result;
100 for(;number < eighthPoints; number++){
101 cplxValue1 = _mm256_load_ps(complexVectorPtr);
102 complexVectorPtr += 8;
104 cplxValue2 = _mm256_load_ps(complexVectorPtr);
105 complexVectorPtr += 8;
107 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
108 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
110 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
112 result = _mm256_sqrt_ps(result);
114 result = _mm256_mul_ps(result, vScalar);
116 resultInt = _mm256_cvtps_epi32(result);
117 resultInt = _mm256_packs_epi32(resultInt, resultInt);
118 resultInt = _mm256_permutevar8x32_epi32(resultInt, idx);
119 resultShort = _mm256_extracti128_si256(resultInt,0);
120 _mm_store_si128((__m128i*)magnitudeVectorPtr,resultShort);
121 magnitudeVectorPtr += 8;
124 number = eighthPoints * 8;
125 magnitudeVectorPtr = &magnitudeVector[number];
126 for(; number < num_points; number++){
127 float val1Real = *complexVectorPtr++;
128 float val1Imag = *complexVectorPtr++;
129 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
135 #include <pmmintrin.h> 139 const float scalar,
unsigned int num_points)
141 unsigned int number = 0;
142 const unsigned int quarterPoints = num_points / 4;
144 const float* complexVectorPtr = (
const float*)complexVector;
145 int16_t* magnitudeVectorPtr = magnitudeVector;
147 __m128 vScalar = _mm_set_ps1(scalar);
149 __m128 cplxValue1, cplxValue2, result;
153 for(;number < quarterPoints; number++){
154 cplxValue1 = _mm_load_ps(complexVectorPtr);
155 complexVectorPtr += 4;
157 cplxValue2 = _mm_load_ps(complexVectorPtr);
158 complexVectorPtr += 4;
160 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
161 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
163 result = _mm_hadd_ps(cplxValue1, cplxValue2);
165 result = _mm_sqrt_ps(result);
167 result = _mm_mul_ps(result, vScalar);
169 _mm_store_ps(floatBuffer, result);
170 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
171 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
172 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
173 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
176 number = quarterPoints * 4;
177 magnitudeVectorPtr = &magnitudeVector[number];
178 for(; number < num_points; number++){
179 float val1Real = *complexVectorPtr++;
180 float val1Imag = *complexVectorPtr++;
181 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
188 #include <xmmintrin.h> 192 const float scalar,
unsigned int num_points)
194 unsigned int number = 0;
195 const unsigned int quarterPoints = num_points / 4;
197 const float* complexVectorPtr = (
const float*)complexVector;
198 int16_t* magnitudeVectorPtr = magnitudeVector;
200 __m128 vScalar = _mm_set_ps1(scalar);
202 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
206 for(;number < quarterPoints; number++){
207 cplxValue1 = _mm_load_ps(complexVectorPtr);
208 complexVectorPtr += 4;
210 cplxValue2 = _mm_load_ps(complexVectorPtr);
211 complexVectorPtr += 4;
214 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
216 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
218 iValue = _mm_mul_ps(iValue, iValue);
219 qValue = _mm_mul_ps(qValue, qValue);
221 result = _mm_add_ps(iValue, qValue);
223 result = _mm_sqrt_ps(result);
225 result = _mm_mul_ps(result, vScalar);
227 _mm_store_ps(floatBuffer, result);
228 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
229 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
230 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
231 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
234 number = quarterPoints * 4;
235 magnitudeVectorPtr = &magnitudeVector[number];
236 for(; number < num_points; number++){
237 float val1Real = *complexVectorPtr++;
238 float val1Imag = *complexVectorPtr++;
239 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
244 #ifdef LV_HAVE_GENERIC 248 const float scalar,
unsigned int num_points)
250 const float* complexVectorPtr = (
float*)complexVector;
251 int16_t* magnitudeVectorPtr = magnitudeVector;
252 unsigned int number = 0;
253 for(number = 0; number < num_points; number++){
254 const float real = *complexVectorPtr++;
255 const float imag = *complexVectorPtr++;
256 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((real*real) + (imag*imag)) * scalar);
264 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 265 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 268 #include <inttypes.h> 273 #include <immintrin.h> 276 volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
277 const float scalar,
unsigned int num_points)
279 unsigned int number = 0;
280 const unsigned int eighthPoints = num_points / 8;
282 const float* complexVectorPtr = (
const float*)complexVector;
283 int16_t* magnitudeVectorPtr = magnitudeVector;
285 __m256 vScalar = _mm256_set1_ps(scalar);
286 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
287 __m256 cplxValue1, cplxValue2, result;
291 for(;number < eighthPoints; number++){
292 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
293 complexVectorPtr += 8;
295 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
296 complexVectorPtr += 8;
298 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
299 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
301 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
303 result = _mm256_sqrt_ps(result);
305 result = _mm256_mul_ps(result, vScalar);
307 resultInt = _mm256_cvtps_epi32(result);
308 resultInt = _mm256_packs_epi32(resultInt, resultInt);
309 resultInt = _mm256_permutevar8x32_epi32(resultInt, idx);
310 resultShort = _mm256_extracti128_si256(resultInt,0);
311 _mm_storeu_si128((__m128i*)magnitudeVectorPtr,resultShort);
312 magnitudeVectorPtr += 8;
315 number = eighthPoints * 8;
316 magnitudeVectorPtr = &magnitudeVector[number];
317 for(; number < num_points; number++){
318 float val1Real = *complexVectorPtr++;
319 float val1Imag = *complexVectorPtr++;
320 *magnitudeVectorPtr++ = (int16_t)
rintf(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
static float rintf(float x)
Definition: config.h:31
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:138
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:247
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:191