56 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H 57 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H 65 #include <smmintrin.h> 68 volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
69 const float scalar,
unsigned int num_points)
71 float* iBufferPtr = iBuffer;
72 float* qBufferPtr = qBuffer;
74 unsigned int number = 0;
75 const unsigned int eighthPoints = num_points / 8;
76 __m128 iFloatValue, qFloatValue;
78 const float iScalar= 1.0 / scalar;
79 __m128 invScalar = _mm_set_ps1(iScalar);
80 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
81 int8_t* complexVectorPtr = (int8_t*)complexVector;
83 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
84 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
86 for(;number < eighthPoints; number++){
87 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
88 iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
89 qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
91 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
92 iFloatValue = _mm_cvtepi32_ps(iIntVal);
93 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
94 _mm_store_ps(iBufferPtr, iFloatValue);
97 iComplexVal = _mm_srli_si128(iComplexVal, 4);
99 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
100 iFloatValue = _mm_cvtepi32_ps(iIntVal);
101 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
102 _mm_store_ps(iBufferPtr, iFloatValue);
105 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
106 qFloatValue = _mm_cvtepi32_ps(qIntVal);
107 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
108 _mm_store_ps(qBufferPtr, qFloatValue);
111 qComplexVal = _mm_srli_si128(qComplexVal, 4);
113 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
114 qFloatValue = _mm_cvtepi32_ps(qIntVal);
115 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
116 _mm_store_ps(qBufferPtr, qFloatValue);
121 number = eighthPoints * 8;
122 for(; number < num_points; number++){
123 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
124 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
132 #include <xmmintrin.h> 137 const float scalar,
unsigned int num_points)
139 float* iBufferPtr = iBuffer;
140 float* qBufferPtr = qBuffer;
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
144 __m128 cplxValue1, cplxValue2, iValue, qValue;
146 __m128 invScalar = _mm_set_ps1(1.0/scalar);
147 int8_t* complexVectorPtr = (int8_t*)complexVector;
151 for(;number < quarterPoints; number++){
152 floatBuffer[0] = (float)(complexVectorPtr[0]);
153 floatBuffer[1] = (float)(complexVectorPtr[1]);
154 floatBuffer[2] = (float)(complexVectorPtr[2]);
155 floatBuffer[3] = (float)(complexVectorPtr[3]);
157 floatBuffer[4] = (float)(complexVectorPtr[4]);
158 floatBuffer[5] = (float)(complexVectorPtr[5]);
159 floatBuffer[6] = (float)(complexVectorPtr[6]);
160 floatBuffer[7] = (float)(complexVectorPtr[7]);
162 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
163 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
165 complexVectorPtr += 8;
167 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
168 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
171 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
172 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
174 _mm_store_ps(iBufferPtr, iValue);
175 _mm_store_ps(qBufferPtr, qValue);
181 number = quarterPoints * 4;
182 complexVectorPtr = (int8_t*)&complexVector[number];
183 for(; number < num_points; number++){
184 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
185 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
192 #include <immintrin.h> 195 volk_8ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
196 const float scalar,
unsigned int num_points)
198 float* iBufferPtr = iBuffer;
199 float* qBufferPtr = qBuffer;
201 unsigned int number = 0;
202 const unsigned int sixteenthPoints = num_points / 16;
203 __m256 iFloatValue, qFloatValue;
205 const float iScalar= 1.0 / scalar;
206 __m256 invScalar = _mm256_set1_ps(iScalar);
207 __m256i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
208 int8_t* complexVectorPtr = (int8_t*)complexVector;
210 __m256i iMoveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
211 14, 12, 10, 8, 6, 4, 2, 0,
212 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
213 14, 12, 10, 8, 6, 4, 2, 0);
214 __m256i qMoveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
215 15, 13, 11, 9, 7, 5, 3, 1,
216 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
217 15, 13, 11, 9, 7, 5, 3, 1);
219 for(;number < sixteenthPoints; number++){
220 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
221 complexVectorPtr += 32;
222 iComplexVal = _mm256_shuffle_epi8(complexVal, iMoveMask);
223 qComplexVal = _mm256_shuffle_epi8(complexVal, qMoveMask);
225 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
226 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
227 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
228 _mm256_store_ps(iBufferPtr, iFloatValue);
231 iComplexVal = _mm256_permute4x64_epi64(iComplexVal, 0b11000110);
232 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
233 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
234 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
235 _mm256_store_ps(iBufferPtr, iFloatValue);
238 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
239 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
240 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
241 _mm256_store_ps(qBufferPtr, qFloatValue);
244 qComplexVal = _mm256_permute4x64_epi64(qComplexVal, 0b11000110);
245 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
246 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
247 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
248 _mm256_store_ps(qBufferPtr, qFloatValue);
252 number = sixteenthPoints * 16;
253 for(; number < num_points; number++){
254 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
255 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
262 #ifdef LV_HAVE_GENERIC 267 const float scalar,
unsigned int num_points)
269 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
270 float* iBufferPtr = iBuffer;
271 float* qBufferPtr = qBuffer;
273 const float invScalar = 1.0 / scalar;
274 for(number = 0; number < num_points; number++){
275 *iBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
276 *qBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
285 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H 286 #define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H 289 #include <inttypes.h> 293 #include <immintrin.h> 296 volk_8ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
float* qBuffer,
const lv_8sc_t* complexVector,
297 const float scalar,
unsigned int num_points)
299 float* iBufferPtr = iBuffer;
300 float* qBufferPtr = qBuffer;
302 unsigned int number = 0;
303 const unsigned int sixteenthPoints = num_points / 16;
304 __m256 iFloatValue, qFloatValue;
306 const float iScalar= 1.0 / scalar;
307 __m256 invScalar = _mm256_set1_ps(iScalar);
308 __m256i complexVal, iIntVal, qIntVal;
309 __m128i iComplexVal, qComplexVal;
310 int8_t* complexVectorPtr = (int8_t*)complexVector;
312 __m256i MoveMask = _mm256_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8,
313 6, 4, 2, 0,15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
315 for(;number < sixteenthPoints; number++){
316 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
317 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
318 complexVal = _mm256_permute4x64_epi64(complexVal,0xd8);
319 iComplexVal = _mm256_extractf128_si256(complexVal,0);
320 qComplexVal = _mm256_extractf128_si256(complexVal,1);
322 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
323 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
324 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
325 _mm256_storeu_ps(iBufferPtr, iFloatValue);
328 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
329 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
330 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
331 _mm256_storeu_ps(qBufferPtr, qFloatValue);
334 complexVal = _mm256_srli_si256(complexVal, 8);
335 iComplexVal = _mm256_extractf128_si256(complexVal,0);
336 qComplexVal = _mm256_extractf128_si256(complexVal,1);
338 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
339 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
340 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
341 _mm256_storeu_ps(iBufferPtr, iFloatValue);
344 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
345 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
346 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
347 _mm256_storeu_ps(qBufferPtr, qFloatValue);
351 number = sixteenthPoints * 16;
352 for(; number < num_points; number++){
353 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
354 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
static void volk_8ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:265
static void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:135
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57