55 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H 56 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H 63 #include <immintrin.h> 66 volk_8ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
const lv_8sc_t* complexVector,
67 const float scalar,
unsigned int num_points)
69 float* iBufferPtr = iBuffer;
71 unsigned int number = 0;
72 const unsigned int sixteenthPoints = num_points / 16;
75 const float iScalar= 1.0 / scalar;
76 __m256 invScalar = _mm256_set1_ps(iScalar);
77 __m256i complexVal, iIntVal;
78 int8_t* complexVectorPtr = (int8_t*)complexVector;
80 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
81 14, 12, 10, 8, 6, 4, 2, 0,
82 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
83 14, 12, 10, 8, 6, 4, 2, 0);
84 for(;number < sixteenthPoints; number++){
85 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
86 complexVectorPtr += 32;
87 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
89 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
90 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
91 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
92 _mm256_store_ps(iBufferPtr, iFloatValue);
95 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
96 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
97 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
98 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
99 _mm256_store_ps(iBufferPtr, iFloatValue);
103 number = sixteenthPoints * 16;
104 for(; number < num_points; number++){
105 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
113 #ifdef LV_HAVE_SSE4_1 114 #include <smmintrin.h> 117 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
const lv_8sc_t* complexVector,
118 const float scalar,
unsigned int num_points)
120 float* iBufferPtr = iBuffer;
122 unsigned int number = 0;
123 const unsigned int eighthPoints = num_points / 8;
126 const float iScalar= 1.0 / scalar;
127 __m128 invScalar = _mm_set_ps1(iScalar);
128 __m128i complexVal, iIntVal;
129 int8_t* complexVectorPtr = (int8_t*)complexVector;
131 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
133 for(;number < eighthPoints; number++){
134 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
135 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
137 iIntVal = _mm_cvtepi8_epi32(complexVal);
138 iFloatValue = _mm_cvtepi32_ps(iIntVal);
140 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
142 _mm_store_ps(iBufferPtr, iFloatValue);
146 complexVal = _mm_srli_si128(complexVal, 4);
147 iIntVal = _mm_cvtepi8_epi32(complexVal);
148 iFloatValue = _mm_cvtepi32_ps(iIntVal);
150 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
152 _mm_store_ps(iBufferPtr, iFloatValue);
157 number = eighthPoints * 8;
158 for(; number < num_points; number++){
159 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
168 #include <xmmintrin.h> 172 const float scalar,
unsigned int num_points)
174 float* iBufferPtr = iBuffer;
176 unsigned int number = 0;
177 const unsigned int quarterPoints = num_points / 4;
180 const float iScalar= 1.0 / scalar;
181 __m128 invScalar = _mm_set_ps1(iScalar);
182 int8_t* complexVectorPtr = (int8_t*)complexVector;
186 for(;number < quarterPoints; number++){
187 floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
188 floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
189 floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
190 floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
192 iValue = _mm_load_ps(floatBuffer);
194 iValue = _mm_mul_ps(iValue, invScalar);
196 _mm_store_ps(iBufferPtr, iValue);
201 number = quarterPoints * 4;
202 for(; number < num_points; number++){
203 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
211 #ifdef LV_HAVE_GENERIC 215 const float scalar,
unsigned int num_points)
217 unsigned int number = 0;
218 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
219 float* iBufferPtr = iBuffer;
220 const float invScalar = 1.0 / scalar;
221 for(number = 0; number < num_points; number++){
222 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
232 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H 233 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H 236 #include <inttypes.h> 240 #include <immintrin.h> 243 volk_8ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
const lv_8sc_t* complexVector,
244 const float scalar,
unsigned int num_points)
246 float* iBufferPtr = iBuffer;
248 unsigned int number = 0;
249 const unsigned int sixteenthPoints = num_points / 16;
252 const float iScalar= 1.0 / scalar;
253 __m256 invScalar = _mm256_set1_ps(iScalar);
254 __m256i complexVal, iIntVal;
256 int8_t* complexVectorPtr = (int8_t*)complexVector;
258 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
260 for(;number < sixteenthPoints; number++){
261 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
262 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
264 hcomplexVal = _mm256_extracti128_si256(complexVal,0);
265 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
266 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
268 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
270 _mm256_storeu_ps(iBufferPtr, iFloatValue);
274 hcomplexVal = _mm256_extracti128_si256(complexVal,1);
275 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
276 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
278 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
280 _mm256_storeu_ps(iBufferPtr, iFloatValue);
285 number = sixteenthPoints * 16;
286 for(; number < num_points; number++){
287 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:171
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:214
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57