54 #ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H 55 #define INCLUDED_volk_16ic_deinterleave_real_8i_a_H 62 #include <immintrin.h> 65 volk_16ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int8_t* iBufferPtr = iBuffer;
70 __m256i iMoveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
71 __m256i iMoveMask2 = _mm256_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
72 __m256i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
74 unsigned int thirtysecondPoints = num_points / 32;
76 for(number = 0; number < thirtysecondPoints; number++){
77 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
78 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
80 complexVal3 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
81 complexVal4 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
83 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
84 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
86 complexVal1 = _mm256_or_si256(complexVal1, complexVal2);
87 complexVal1 = _mm256_permute4x64_epi64(complexVal1, 0xd8);
89 complexVal3 = _mm256_shuffle_epi8(complexVal3, iMoveMask1);
90 complexVal4 = _mm256_shuffle_epi8(complexVal4, iMoveMask2);
92 complexVal3 = _mm256_or_si256(complexVal3, complexVal4);
93 complexVal3 = _mm256_permute4x64_epi64(complexVal3, 0xd8);
95 complexVal1 = _mm256_srai_epi16(complexVal1, 8);
96 complexVal3 = _mm256_srai_epi16(complexVal3, 8);
98 iOutputVal = _mm256_packs_epi16(complexVal1, complexVal3);
99 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
101 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
106 number = thirtysecondPoints * 32;
107 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
108 for(; number < num_points; number++){
109 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
110 int16ComplexVectorPtr++;
117 #include <tmmintrin.h> 122 unsigned int number = 0;
123 const int8_t* complexVectorPtr = (int8_t*)complexVector;
124 int8_t* iBufferPtr = iBuffer;
125 __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
126 __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
127 __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
129 unsigned int sixteenthPoints = num_points / 16;
131 for(number = 0; number < sixteenthPoints; number++){
132 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
133 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
135 complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
136 complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
138 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
139 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
141 complexVal1 = _mm_or_si128(complexVal1, complexVal2);
143 complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
144 complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);
146 complexVal3 = _mm_or_si128(complexVal3, complexVal4);
149 complexVal1 = _mm_srai_epi16(complexVal1, 8);
150 complexVal3 = _mm_srai_epi16(complexVal3, 8);
152 iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);
154 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
159 number = sixteenthPoints * 16;
160 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
161 for(; number < num_points; number++){
162 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
163 int16ComplexVectorPtr++;
168 #ifdef LV_HAVE_GENERIC 173 unsigned int number = 0;
174 int16_t* complexVectorPtr = (int16_t*)complexVector;
175 int8_t* iBufferPtr = iBuffer;
176 for(number = 0; number < num_points; number++){
177 *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
184 #include <arm_neon.h> 189 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
190 int8_t* iBufferPtr = iBuffer;
191 unsigned int eighth_points = num_points / 8;
194 int16x8x2_t complexInput;
196 for(number = 0; number < eighth_points; number++){
197 complexInput = vld2q_s16(complexVectorPtr);
198 realOutput = vshrn_n_s16(complexInput.val[0], 8);
199 vst1_s8(iBufferPtr, realOutput);
200 complexVectorPtr += 16;
204 for(number = eighth_points*8; number < num_points; number++){
205 *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
214 volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points);
217 volk_16ic_deinterleave_real_8i_u_orc(int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
219 volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points);
226 #ifndef INCLUDED_volk_16ic_deinterleave_real_8i_u_H 227 #define INCLUDED_volk_16ic_deinterleave_real_8i_u_H 229 #include <inttypes.h> 234 #include <immintrin.h> 237 volk_16ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
239 unsigned int number = 0;
240 const int8_t* complexVectorPtr = (int8_t*)complexVector;
241 int8_t* iBufferPtr = iBuffer;
242 __m256i iMoveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
243 __m256i iMoveMask2 = _mm256_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
244 __m256i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
246 unsigned int thirtysecondPoints = num_points / 32;
248 for(number = 0; number < thirtysecondPoints; number++){
249 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
250 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
252 complexVal3 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
253 complexVal4 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
255 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
256 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
258 complexVal1 = _mm256_or_si256(complexVal1, complexVal2);
259 complexVal1 = _mm256_permute4x64_epi64(complexVal1, 0xd8);
261 complexVal3 = _mm256_shuffle_epi8(complexVal3, iMoveMask1);
262 complexVal4 = _mm256_shuffle_epi8(complexVal4, iMoveMask2);
264 complexVal3 = _mm256_or_si256(complexVal3, complexVal4);
265 complexVal3 = _mm256_permute4x64_epi64(complexVal3, 0xd8);
267 complexVal1 = _mm256_srai_epi16(complexVal1, 8);
268 complexVal3 = _mm256_srai_epi16(complexVal3, 8);
270 iOutputVal = _mm256_packs_epi16(complexVal1, complexVal3);
271 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
273 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
278 number = thirtysecondPoints * 32;
279 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
280 for(; number < num_points; number++){
281 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
282 int16ComplexVectorPtr++;
short complex lv_16sc_t
Definition: volk_complex.h:58
static void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_8i.h:120
static void volk_16ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_8i.h:187
static void volk_16ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_8i.h:171