54 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H 55 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H 61 #include <immintrin.h> 64 volk_8ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer, int16_t* qBuffer,
65 const lv_8sc_t* complexVector,
unsigned int num_points)
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int16_t* iBufferPtr = iBuffer;
70 int16_t* qBufferPtr = qBuffer;
71 __m256i MoveMask = _mm256_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
72 __m256i complexVal, iOutputVal, qOutputVal;
73 __m128i iOutputVal0, qOutputVal0;
75 unsigned int sixteenthPoints = num_points / 16;
77 for(number = 0; number < sixteenthPoints; number++){
78 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
80 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
81 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
83 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
84 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
86 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
87 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
89 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
90 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
92 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
93 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
99 number = sixteenthPoints * 16;
100 for(; number < num_points; number++){
101 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
102 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
107 #ifdef LV_HAVE_SSE4_1 108 #include <smmintrin.h> 111 volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer,
112 const lv_8sc_t* complexVector,
unsigned int num_points)
114 unsigned int number = 0;
115 const int8_t* complexVectorPtr = (int8_t*)complexVector;
116 int16_t* iBufferPtr = iBuffer;
117 int16_t* qBufferPtr = qBuffer;
118 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
119 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
120 __m128i complexVal, iOutputVal, qOutputVal;
122 unsigned int eighthPoints = num_points / 8;
124 for(number = 0; number < eighthPoints; number++){
125 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
127 iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask);
128 qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
130 iOutputVal = _mm_cvtepi8_epi16(iOutputVal);
131 iOutputVal = _mm_slli_epi16(iOutputVal, 8);
133 qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
134 qOutputVal = _mm_slli_epi16(qOutputVal, 8);
136 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
137 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
143 number = eighthPoints * 8;
144 for(; number < num_points; number++){
145 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
146 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
153 #include <immintrin.h> 157 const lv_8sc_t* complexVector,
unsigned int num_points)
159 unsigned int number = 0;
160 const int8_t* complexVectorPtr = (int8_t*)complexVector;
161 int16_t* iBufferPtr = iBuffer;
162 int16_t* qBufferPtr = qBuffer;
163 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
164 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
165 __m256i complexVal, iOutputVal, qOutputVal;
166 __m128i complexVal1, complexVal0;
167 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
169 unsigned int sixteenthPoints = num_points / 16;
171 for(number = 0; number < sixteenthPoints; number++){
172 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
175 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
176 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
178 iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask);
179 iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
180 qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
181 qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
183 iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1);
184 iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8);
185 iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
186 iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
188 qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
189 qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
190 qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
191 qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
194 __m256i dummy = _mm256_setzero_si256();
195 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
196 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
197 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
198 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
200 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
201 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
207 number = sixteenthPoints * 16;
208 for(; number < num_points; number++){
209 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
210 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
216 #ifdef LV_HAVE_GENERIC 220 const lv_8sc_t* complexVector,
unsigned int num_points)
222 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
223 int16_t* iBufferPtr = iBuffer;
224 int16_t* qBufferPtr = qBuffer;
226 for(number = 0; number < num_points; number++){
227 *iBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
228 *qBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
237 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_u_H 238 #define INCLUDED_volk_8ic_deinterleave_16i_x2_u_H 240 #include <inttypes.h> 244 #include <immintrin.h> 247 volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer, int16_t* qBuffer,
248 const lv_8sc_t* complexVector,
unsigned int num_points)
250 unsigned int number = 0;
251 const int8_t* complexVectorPtr = (int8_t*)complexVector;
252 int16_t* iBufferPtr = iBuffer;
253 int16_t* qBufferPtr = qBuffer;
254 __m256i MoveMask = _mm256_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
255 __m256i complexVal, iOutputVal, qOutputVal;
256 __m128i iOutputVal0, qOutputVal0;
258 unsigned int sixteenthPoints = num_points / 16;
260 for(number = 0; number < sixteenthPoints; number++){
261 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
263 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
264 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
266 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
267 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
269 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
270 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
272 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
273 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
275 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
276 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
282 number = sixteenthPoints * 16;
283 for(; number < num_points; number++){
284 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
285 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
static void volk_8ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:219
static void volk_8ic_deinterleave_16i_x2_a_avx(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:156
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57