73 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H 74 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H 82 #include <immintrin.h> 85 volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
const lv_32fc_t* complexVector,
86 const float scalar,
unsigned int num_points)
88 unsigned int number = 0;
89 const unsigned int eighthPoints = num_points / 8;
91 const float* complexVectorPtr = (
float*)complexVector;
92 int16_t* iBufferPtr = iBuffer;
94 __m256 vScalar = _mm256_set1_ps(scalar);
96 __m256 cplxValue1, cplxValue2, iValue;
100 __m256i idx = _mm256_set_epi32(3,3,3,3,5,1,4,0);
102 for(;number < eighthPoints; number++){
103 cplxValue1 = _mm256_load_ps(complexVectorPtr);
104 complexVectorPtr += 8;
106 cplxValue2 = _mm256_load_ps(complexVectorPtr);
107 complexVectorPtr += 8;
110 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
112 iValue = _mm256_mul_ps(iValue, vScalar);
114 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
115 a = _mm256_cvtps_epi32(iValue);
116 a = _mm256_packs_epi32(a,a);
117 a = _mm256_permutevar8x32_epi32(a,idx);
118 b = _mm256_extracti128_si256(a,0);
120 _mm_store_si128((__m128i*)iBufferPtr,b);
125 number = eighthPoints * 8;
126 iBufferPtr = &iBuffer[number];
127 for(; number < num_points; number++){
128 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
137 #include <xmmintrin.h> 141 const float scalar,
unsigned int num_points)
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
146 const float* complexVectorPtr = (
float*)complexVector;
147 int16_t* iBufferPtr = iBuffer;
149 __m128 vScalar = _mm_set_ps1(scalar);
151 __m128 cplxValue1, cplxValue2, iValue;
155 for(;number < quarterPoints; number++){
156 cplxValue1 = _mm_load_ps(complexVectorPtr);
157 complexVectorPtr += 4;
159 cplxValue2 = _mm_load_ps(complexVectorPtr);
160 complexVectorPtr += 4;
163 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
165 iValue = _mm_mul_ps(iValue, vScalar);
167 _mm_store_ps(floatBuffer, iValue);
168 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
169 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
170 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
171 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
174 number = quarterPoints * 4;
175 iBufferPtr = &iBuffer[number];
176 for(; number < num_points; number++){
177 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
185 #ifdef LV_HAVE_GENERIC 189 const float scalar,
unsigned int num_points)
191 const float* complexVectorPtr = (
float*)complexVector;
192 int16_t* iBufferPtr = iBuffer;
193 unsigned int number = 0;
194 for(number = 0; number < num_points; number++){
195 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
204 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H 205 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H 208 #include <inttypes.h> 212 #include <immintrin.h> 215 volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
const lv_32fc_t* complexVector,
216 const float scalar,
unsigned int num_points)
218 unsigned int number = 0;
219 const unsigned int eighthPoints = num_points / 8;
221 const float* complexVectorPtr = (
float*)complexVector;
222 int16_t* iBufferPtr = iBuffer;
224 __m256 vScalar = _mm256_set1_ps(scalar);
226 __m256 cplxValue1, cplxValue2, iValue;
230 __m256i idx = _mm256_set_epi32(3,3,3,3,5,1,4,0);
232 for(;number < eighthPoints; number++){
233 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
234 complexVectorPtr += 8;
236 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
237 complexVectorPtr += 8;
240 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
242 iValue = _mm256_mul_ps(iValue, vScalar);
244 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
245 a = _mm256_cvtps_epi32(iValue);
246 a = _mm256_packs_epi32(a,a);
247 a = _mm256_permutevar8x32_epi32(a,idx);
248 b = _mm256_extracti128_si256(a,0);
250 _mm_storeu_si128((__m128i*)iBufferPtr,b);
255 number = eighthPoints * 8;
256 iBufferPtr = &iBuffer[number];
257 for(; number < num_points; number++){
258 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:140
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:188
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
float complex lv_32fc_t
Definition: volk_complex.h:61