56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H 57 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H 64 #include <immintrin.h> 67 volk_16ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 float* iBufferPtr = iBuffer;
72 unsigned int number = 0;
73 const unsigned int eighthPoints = num_points / 8;
77 const float iScalar= 1.0 / scalar;
78 __m256 invScalar = _mm256_set1_ps(iScalar);
79 __m256i complexVal, iIntVal;
80 __m128i complexVal128;
81 int8_t* complexVectorPtr = (int8_t*)complexVector;
83 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
85 for(;number < eighthPoints; number++){
86 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
87 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
88 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
89 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
91 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
92 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
94 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
96 _mm256_store_ps(iBufferPtr, iFloatValue);
101 number = eighthPoints * 8;
102 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
103 for(; number < num_points; number++){
104 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
105 sixteenTComplexVectorPtr++;
111 #ifdef LV_HAVE_SSE4_1 112 #include <smmintrin.h> 115 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
const lv_16sc_t* complexVector,
116 const float scalar,
unsigned int num_points)
118 float* iBufferPtr = iBuffer;
120 unsigned int number = 0;
121 const unsigned int quarterPoints = num_points / 4;
125 const float iScalar= 1.0 / scalar;
126 __m128 invScalar = _mm_set_ps1(iScalar);
127 __m128i complexVal, iIntVal;
128 int8_t* complexVectorPtr = (int8_t*)complexVector;
130 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
132 for(;number < quarterPoints; number++){
133 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
134 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
136 iIntVal = _mm_cvtepi16_epi32(complexVal);
137 iFloatValue = _mm_cvtepi32_ps(iIntVal);
139 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
141 _mm_store_ps(iBufferPtr, iFloatValue);
146 number = quarterPoints * 4;
147 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
148 for(; number < num_points; number++){
149 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
150 sixteenTComplexVectorPtr++;
157 #include <xmmintrin.h> 161 const float scalar,
unsigned int num_points)
163 float* iBufferPtr = iBuffer;
165 unsigned int number = 0;
166 const unsigned int quarterPoints = num_points / 4;
169 const float iScalar = 1.0/scalar;
170 __m128 invScalar = _mm_set_ps1(iScalar);
171 int16_t* complexVectorPtr = (int16_t*)complexVector;
175 for(;number < quarterPoints; number++){
176 floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
177 floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
178 floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
179 floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
181 iValue = _mm_load_ps(floatBuffer);
183 iValue = _mm_mul_ps(iValue, invScalar);
185 _mm_store_ps(iBufferPtr, iValue);
190 number = quarterPoints * 4;
191 complexVectorPtr = (int16_t*)&complexVector[number];
192 for(; number < num_points; number++){
193 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
200 #ifdef LV_HAVE_GENERIC 203 const float scalar,
unsigned int num_points)
205 unsigned int number = 0;
206 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
207 float* iBufferPtr = iBuffer;
208 const float invScalar = 1.0 / scalar;
209 for(number = 0; number < num_points; number++){
210 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
219 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H 220 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H 223 #include <inttypes.h> 227 #include <immintrin.h> 230 volk_16ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
const lv_16sc_t* complexVector,
231 const float scalar,
unsigned int num_points)
233 float* iBufferPtr = iBuffer;
235 unsigned int number = 0;
236 const unsigned int eighthPoints = num_points / 8;
240 const float iScalar= 1.0 / scalar;
241 __m256 invScalar = _mm256_set1_ps(iScalar);
242 __m256i complexVal, iIntVal;
243 __m128i complexVal128;
244 int8_t* complexVectorPtr = (int8_t*)complexVector;
246 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
248 for(;number < eighthPoints; number++){
249 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
250 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
251 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
252 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
254 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
255 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
257 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
259 _mm256_storeu_ps(iBufferPtr, iFloatValue);
264 number = eighthPoints * 8;
265 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
266 for(; number < num_points; number++){
267 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
268 sixteenTComplexVectorPtr++;
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:160
short complex lv_16sc_t
Definition: volk_complex.h:58
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:202