54 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H 55 #define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H 60 #include <immintrin.h> 63 volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer, int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
65 unsigned int number = 0;
66 const int8_t* complexVectorPtr = (int8_t*)complexVector;
67 int16_t* iBufferPtr = iBuffer;
68 int16_t* qBufferPtr = qBuffer;
70 __m256i MoveMask = _mm256_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0, 15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0);
72 __m256i iMove2, iMove1;
73 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
75 unsigned int sixteenthPoints = num_points / 16;
77 for(number = 0; number < sixteenthPoints; number++){
78 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
79 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
81 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
82 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
84 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1,0x08),_mm256_permute4x64_epi64(iMove2,0x80),0x30);
85 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1,0x0d),_mm256_permute4x64_epi64(iMove2,0xd0),0x30);
87 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
88 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
94 number = sixteenthPoints * 16;
95 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
96 for(; number < num_points; number++){
97 *iBufferPtr++ = *int16ComplexVectorPtr++;
98 *qBufferPtr++ = *int16ComplexVectorPtr++;
104 #include <tmmintrin.h> 109 unsigned int number = 0;
110 const int8_t* complexVectorPtr = (int8_t*)complexVector;
111 int16_t* iBufferPtr = iBuffer;
112 int16_t* qBufferPtr = qBuffer;
114 __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
115 __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
117 __m128i qMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
118 __m128i qMoveMask2 = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
120 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
122 unsigned int eighthPoints = num_points / 8;
124 for(number = 0; number < eighthPoints; number++){
125 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
126 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
128 iOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, iMoveMask1) , _mm_shuffle_epi8(complexVal2, iMoveMask2));
129 qOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, qMoveMask1) , _mm_shuffle_epi8(complexVal2, qMoveMask2));
131 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
132 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
138 number = eighthPoints * 8;
139 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
140 for(; number < num_points; number++){
141 *iBufferPtr++ = *int16ComplexVectorPtr++;
142 *qBufferPtr++ = *int16ComplexVectorPtr++;
148 #include <emmintrin.h> 153 unsigned int number = 0;
154 const int16_t* complexVectorPtr = (int16_t*)complexVector;
155 int16_t* iBufferPtr = iBuffer;
156 int16_t* qBufferPtr = qBuffer;
157 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1, qComplexVal2, iOutputVal, qOutputVal;
158 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
159 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
161 unsigned int eighthPoints = num_points / 8;
163 for(number = 0; number < eighthPoints; number++){
164 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
165 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
167 iComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
169 iComplexVal1 = _mm_shufflehi_epi16(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
171 iComplexVal1 = _mm_shuffle_epi32(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
173 iComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
175 iComplexVal2 = _mm_shufflehi_epi16(iComplexVal2, _MM_SHUFFLE(3,1,2,0));
177 iComplexVal2 = _mm_shuffle_epi32(iComplexVal2, _MM_SHUFFLE(2,0,3,1));
179 iOutputVal = _mm_or_si128(_mm_and_si128(iComplexVal1, lowMask), _mm_and_si128(iComplexVal2, highMask));
181 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
183 qComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(2,0,3,1));
185 qComplexVal1 = _mm_shufflehi_epi16(qComplexVal1, _MM_SHUFFLE(2,0,3,1));
187 qComplexVal1 = _mm_shuffle_epi32(qComplexVal1, _MM_SHUFFLE(3,1,2,0));
189 qComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(2,0,3,1));
191 qComplexVal2 = _mm_shufflehi_epi16(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
193 qComplexVal2 = _mm_shuffle_epi32(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
195 qOutputVal = _mm_or_si128(_mm_and_si128(qComplexVal1, lowMask), _mm_and_si128(qComplexVal2, highMask));
197 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
203 number = eighthPoints * 8;
204 for(; number < num_points; number++){
205 *iBufferPtr++ = *complexVectorPtr++;
206 *qBufferPtr++ = *complexVectorPtr++;
211 #ifdef LV_HAVE_GENERIC 216 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
217 int16_t* iBufferPtr = iBuffer;
218 int16_t* qBufferPtr = qBuffer;
220 for(number = 0; number < num_points; number++){
221 *iBufferPtr++ = *complexVectorPtr++;
222 *qBufferPtr++ = *complexVectorPtr++;
230 volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer, int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points);
232 volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer, int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
234 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
241 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H 242 #define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H 244 #include <inttypes.h> 247 #include <immintrin.h> 250 volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer, int16_t* qBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
252 unsigned int number = 0;
253 const int8_t* complexVectorPtr = (int8_t*)complexVector;
254 int16_t* iBufferPtr = iBuffer;
255 int16_t* qBufferPtr = qBuffer;
257 __m256i MoveMask = _mm256_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0, 15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0);
259 __m256i iMove2, iMove1;
260 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
262 unsigned int sixteenthPoints = num_points / 16;
264 for(number = 0; number < sixteenthPoints; number++){
265 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
266 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
268 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
269 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
271 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1,0x08),_mm256_permute4x64_epi64(iMove2,0x80),0x30);
272 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1,0x0d),_mm256_permute4x64_epi64(iMove2,0xd0),0x30);
274 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
275 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
281 number = sixteenthPoints * 16;
282 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
283 for(; number < num_points; number++){
284 *iBufferPtr++ = *int16ComplexVectorPtr++;
285 *qBufferPtr++ = *int16ComplexVectorPtr++;
static void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:151
short complex lv_16sc_t
Definition: volk_complex.h:58
static void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:107
static void volk_16ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:214