56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H 57 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H 64 #include <immintrin.h> 67 void volk_16ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 float* iBufferPtr = iBuffer;
71 float* qBufferPtr = qBuffer;
74 const uint64_t eighthPoints = num_points / 8;
75 __m256 cplxValue1, cplxValue2, iValue, qValue;
76 __m256i cplxValueA, cplxValueB;
79 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
80 int16_t* complexVectorPtr = (int16_t*)complexVector;
81 __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
83 for(;number < eighthPoints; number++){
85 cplxValueA = _mm256_load_si256((__m256i*) complexVectorPtr);
86 complexVectorPtr += 16;
89 cplxValue128 = _mm256_extracti128_si256(cplxValueA, 0);
90 cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
91 cplxValue1 = _mm256_cvtepi32_ps(cplxValueB);
92 cplxValue128 = _mm256_extracti128_si256(cplxValueA, 1);
93 cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
94 cplxValue2 = _mm256_cvtepi32_ps(cplxValueB);
96 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
97 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
100 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
101 iValue = _mm256_permutevar8x32_ps(iValue,idx);
103 qValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
104 qValue = _mm256_permutevar8x32_ps(qValue,idx);
106 _mm256_store_ps(iBufferPtr, iValue);
107 _mm256_store_ps(qBufferPtr, qValue);
113 number = eighthPoints * 8;
114 complexVectorPtr = (int16_t*)&complexVector[number];
115 for(; number < num_points; number++){
116 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
117 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
123 #include <xmmintrin.h> 127 const float scalar,
unsigned int num_points)
129 float* iBufferPtr = iBuffer;
130 float* qBufferPtr = qBuffer;
133 const uint64_t quarterPoints = num_points / 4;
134 __m128 cplxValue1, cplxValue2, iValue, qValue;
136 __m128 invScalar = _mm_set_ps1(1.0/scalar);
137 int16_t* complexVectorPtr = (int16_t*)complexVector;
141 for(;number < quarterPoints; number++){
143 floatBuffer[0] = (float)(complexVectorPtr[0]);
144 floatBuffer[1] = (float)(complexVectorPtr[1]);
145 floatBuffer[2] = (float)(complexVectorPtr[2]);
146 floatBuffer[3] = (float)(complexVectorPtr[3]);
148 floatBuffer[4] = (float)(complexVectorPtr[4]);
149 floatBuffer[5] = (float)(complexVectorPtr[5]);
150 floatBuffer[6] = (float)(complexVectorPtr[6]);
151 floatBuffer[7] = (float)(complexVectorPtr[7]);
153 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
154 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
156 complexVectorPtr += 8;
158 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
159 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
162 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
164 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
166 _mm_store_ps(iBufferPtr, iValue);
167 _mm_store_ps(qBufferPtr, qValue);
173 number = quarterPoints * 4;
174 complexVectorPtr = (int16_t*)&complexVector[number];
175 for(; number < num_points; number++){
176 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
177 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
182 #ifdef LV_HAVE_GENERIC 186 const float scalar,
unsigned int num_points)
188 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
189 float* iBufferPtr = iBuffer;
190 float* qBufferPtr = qBuffer;
192 for(number = 0; number < num_points; number++){
193 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
194 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
200 #include <arm_neon.h> 203 const float scalar,
unsigned int num_points)
205 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
206 float* iBufferPtr = iBuffer;
207 float* qBufferPtr = qBuffer;
208 unsigned int eighth_points = num_points / 4;
210 float iScalar = 1.f/scalar;
211 float32x4_t invScalar;
212 invScalar = vld1q_dup_f32(&iScalar);
214 int16x4x2_t complexInput_s16;
215 int32x4x2_t complexInput_s32;
216 float32x4x2_t complexFloat;
218 for(number = 0; number < eighth_points; number++){
219 complexInput_s16 = vld2_s16(complexVectorPtr);
220 complexInput_s32.val[0] = vmovl_s16(complexInput_s16.val[0]);
221 complexInput_s32.val[1] = vmovl_s16(complexInput_s16.val[1]);
222 complexFloat.val[0] = vcvtq_f32_s32(complexInput_s32.val[0]);
223 complexFloat.val[1] = vcvtq_f32_s32(complexInput_s32.val[1]);
224 complexFloat.val[0] = vmulq_f32(complexFloat.val[0], invScalar);
225 complexFloat.val[1] = vmulq_f32(complexFloat.val[1], invScalar);
226 vst1q_f32(iBufferPtr, complexFloat.val[0]);
227 vst1q_f32(qBufferPtr, complexFloat.val[1]);
228 complexVectorPtr += 8;
233 for(number = eighth_points*4; number < num_points; number++){
234 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
235 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
242 volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
243 const float scalar,
unsigned int num_points);
246 volk_16ic_s32f_deinterleave_32f_x2_u_orc(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
247 const float scalar,
unsigned int num_points)
249 volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
257 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_u_H 258 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_u_H 261 #include <inttypes.h> 265 #include <immintrin.h> 268 void volk_16ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
269 const float scalar,
unsigned int num_points)
271 float* iBufferPtr = iBuffer;
272 float* qBufferPtr = qBuffer;
275 const uint64_t eighthPoints = num_points / 8;
276 __m256 cplxValue1, cplxValue2, iValue, qValue;
277 __m256i cplxValueA, cplxValueB;
278 __m128i cplxValue128;
280 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
281 int16_t* complexVectorPtr = (int16_t*)complexVector;
282 __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
284 for(;number < eighthPoints; number++){
286 cplxValueA = _mm256_loadu_si256((__m256i*) complexVectorPtr);
287 complexVectorPtr += 16;
290 cplxValue128 = _mm256_extracti128_si256(cplxValueA, 0);
291 cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
292 cplxValue1 = _mm256_cvtepi32_ps(cplxValueB);
293 cplxValue128 = _mm256_extracti128_si256(cplxValueA, 1);
294 cplxValueB = _mm256_cvtepi16_epi32(cplxValue128);
295 cplxValue2 = _mm256_cvtepi32_ps(cplxValueB);
297 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
298 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
301 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
302 iValue = _mm256_permutevar8x32_ps(iValue,idx);
304 qValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
305 qValue = _mm256_permutevar8x32_ps(qValue,idx);
307 _mm256_storeu_ps(iBufferPtr, iValue);
308 _mm256_storeu_ps(qBufferPtr, qValue);
314 number = eighthPoints * 8;
315 complexVectorPtr = (int16_t*)&complexVector[number];
316 for(; number < num_points; number++){
317 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
318 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
static void volk_16ic_s32f_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:202
static void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:126
short complex lv_16sc_t
Definition: volk_complex.h:58
static void volk_16ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_32f_x2.h:185
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33