53 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H 54 #define INCLUDED_volk_16ic_deinterleave_real_16i_a_H 61 #include <immintrin.h> 64 volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
66 unsigned int number = 0;
67 const int16_t* complexVectorPtr = (int16_t*)complexVector;
68 int16_t* iBufferPtr = iBuffer;
70 __m256i iMoveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
71 __m256i iMoveMask2 = _mm256_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
73 __m256i complexVal1, complexVal2, iOutputVal;
75 unsigned int sixteenthPoints = num_points / 16;
77 for(number = 0; number < sixteenthPoints; number++){
78 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 16;
79 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 16;
81 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
82 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
84 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
85 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
87 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
92 number = sixteenthPoints * 16;
93 for(; number < num_points; number++){
94 *iBufferPtr++ = *complexVectorPtr++;
101 #include <tmmintrin.h> 106 unsigned int number = 0;
107 const int16_t* complexVectorPtr = (int16_t*)complexVector;
108 int16_t* iBufferPtr = iBuffer;
110 __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
111 __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
113 __m128i complexVal1, complexVal2, iOutputVal;
115 unsigned int eighthPoints = num_points / 8;
117 for(number = 0; number < eighthPoints; number++){
118 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
119 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
121 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
122 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
124 iOutputVal = _mm_or_si128(complexVal1, complexVal2);
126 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
131 number = eighthPoints * 8;
132 for(; number < num_points; number++){
133 *iBufferPtr++ = *complexVectorPtr++;
141 #include <emmintrin.h> 146 unsigned int number = 0;
147 const int16_t* complexVectorPtr = (int16_t*)complexVector;
148 int16_t* iBufferPtr = iBuffer;
149 __m128i complexVal1, complexVal2, iOutputVal;
150 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
151 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
153 unsigned int eighthPoints = num_points / 8;
155 for(number = 0; number < eighthPoints; number++){
156 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
157 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 8;
159 complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
161 complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
163 complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3,1,2,0));
165 complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
167 complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
169 complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2,0,3,1));
171 iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask), _mm_and_si128(complexVal2, highMask));
173 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
178 number = eighthPoints * 8;
179 for(; number < num_points; number++){
180 *iBufferPtr++ = *complexVectorPtr++;
186 #ifdef LV_HAVE_GENERIC 191 unsigned int number = 0;
192 const int16_t* complexVectorPtr = (int16_t*)complexVector;
193 int16_t* iBufferPtr = iBuffer;
194 for(number = 0; number < num_points; number++){
195 *iBufferPtr++ = *complexVectorPtr++;
205 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H 206 #define INCLUDED_volk_16ic_deinterleave_real_16i_u_H 208 #include <inttypes.h> 213 #include <immintrin.h> 216 volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
218 unsigned int number = 0;
219 const int16_t* complexVectorPtr = (int16_t*)complexVector;
220 int16_t* iBufferPtr = iBuffer;
222 __m256i iMoveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
223 __m256i iMoveMask2 = _mm256_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
225 __m256i complexVal1, complexVal2, iOutputVal;
227 unsigned int sixteenthPoints = num_points / 16;
229 for(number = 0; number < sixteenthPoints; number++){
230 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 16;
231 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 16;
233 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
234 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
236 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
237 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
239 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
244 number = sixteenthPoints * 16;
245 for(; number < num_points; number++){
246 *iBufferPtr++ = *complexVectorPtr++;
short complex lv_16sc_t
Definition: volk_complex.h:58
static void volk_16ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:189
static void volk_16ic_deinterleave_real_16i_a_sse2(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:144
static void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_real_16i.h:104