53 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H 54 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H 61 #include <immintrin.h> 64 volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
const lv_8sc_t* complexVector,
65 unsigned int num_points)
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int16_t* iBufferPtr = iBuffer;
70 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
71 __m256i complexVal, outputVal;
74 unsigned int sixteenthPoints = num_points / 16;
76 for(number = 0; number < sixteenthPoints; number++){
77 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
79 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
80 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
82 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
84 outputVal = _mm256_cvtepi8_epi16(outputVal0);
85 outputVal = _mm256_slli_epi16(outputVal, 7);
87 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
92 number = sixteenthPoints * 16;
93 for(; number < num_points; number++){
94 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
100 #ifdef LV_HAVE_SSE4_1 101 #include <smmintrin.h> 104 volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
const lv_8sc_t* complexVector,
105 unsigned int num_points)
107 unsigned int number = 0;
108 const int8_t* complexVectorPtr = (int8_t*)complexVector;
109 int16_t* iBufferPtr = iBuffer;
110 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
111 __m128i complexVal, outputVal;
113 unsigned int eighthPoints = num_points / 8;
115 for(number = 0; number < eighthPoints; number++){
116 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
118 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
120 outputVal = _mm_cvtepi8_epi16(complexVal);
121 outputVal = _mm_slli_epi16(outputVal, 7);
123 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
127 number = eighthPoints * 8;
128 for(; number < num_points; number++){
129 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
137 #include <immintrin.h> 141 unsigned int num_points)
143 unsigned int number = 0;
144 const int8_t* complexVectorPtr = (int8_t*)complexVector;
145 int16_t* iBufferPtr = iBuffer;
146 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
147 __m256i complexVal, outputVal;
148 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
150 unsigned int sixteenthPoints = num_points / 16;
152 for(number = 0; number < sixteenthPoints; number++){
153 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
155 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
156 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
158 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
159 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
161 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
162 outputVal1 = _mm_slli_epi16(outputVal1, 7);
163 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
164 outputVal0 = _mm_slli_epi16(outputVal0, 7);
166 __m256i dummy = _mm256_setzero_si256();
167 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
168 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
169 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
174 number = sixteenthPoints * 16;
175 for(; number < num_points; number++){
176 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
183 #ifdef LV_HAVE_GENERIC 187 unsigned int num_points)
189 unsigned int number = 0;
190 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
191 int16_t* iBufferPtr = iBuffer;
192 for(number = 0; number < num_points; number++){
193 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
202 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H 203 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H 205 #include <inttypes.h> 210 #include <immintrin.h> 213 volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
const lv_8sc_t* complexVector,
214 unsigned int num_points)
216 unsigned int number = 0;
217 const int8_t* complexVectorPtr = (int8_t*)complexVector;
218 int16_t* iBufferPtr = iBuffer;
219 __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
220 __m256i complexVal, outputVal;
223 unsigned int sixteenthPoints = num_points / 16;
225 for(number = 0; number < sixteenthPoints; number++){
226 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
228 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
229 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
231 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
233 outputVal = _mm256_cvtepi8_epi16(outputVal0);
234 outputVal = _mm256_slli_epi16(outputVal, 7);
236 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
241 number = sixteenthPoints * 16;
242 for(; number < num_points; number++){
243 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:140
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:186
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57