53 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H 54 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H 60 #include <immintrin.h> 63 volk_8ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
const lv_8sc_t* complexVector,
64 unsigned int num_points)
66 unsigned int number = 0;
67 const int8_t* complexVectorPtr = (int8_t*)complexVector;
68 int8_t* iBufferPtr = iBuffer;
69 __m256i moveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
70 __m256i moveMask2 = _mm256_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
71 __m256i complexVal1, complexVal2, outputVal;
73 unsigned int thirtysecondPoints = num_points / 32;
75 for(number = 0; number < thirtysecondPoints; number++){
77 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
78 complexVectorPtr += 32;
79 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
80 complexVectorPtr += 32;
82 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
83 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
84 outputVal = _mm256_or_si256(complexVal1, complexVal2);
85 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
87 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
91 number = thirtysecondPoints * 32;
92 for(; number < num_points; number++){
93 *iBufferPtr++ = *complexVectorPtr++;
101 #include <tmmintrin.h> 105 unsigned int num_points)
107 unsigned int number = 0;
108 const int8_t* complexVectorPtr = (int8_t*)complexVector;
109 int8_t* iBufferPtr = iBuffer;
110 __m128i moveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
111 __m128i moveMask2 = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
112 __m128i complexVal1, complexVal2, outputVal;
114 unsigned int sixteenthPoints = num_points / 16;
116 for(number = 0; number < sixteenthPoints; number++){
117 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
118 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
120 complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
121 complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
123 outputVal = _mm_or_si128(complexVal1, complexVal2);
125 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
129 number = sixteenthPoints * 16;
130 for(; number < num_points; number++){
131 *iBufferPtr++ = *complexVectorPtr++;
139 #include <immintrin.h> 143 unsigned int num_points)
145 unsigned int number = 0;
146 const int8_t* complexVectorPtr = (int8_t*)complexVector;
147 int8_t* iBufferPtr = iBuffer;
148 __m128i moveMaskL = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
149 __m128i moveMaskH = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
150 __m256i complexVal1, complexVal2, outputVal;
151 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1, outputVal2;
153 unsigned int thirtysecondPoints = num_points / 32;
155 for(number = 0; number < thirtysecondPoints; number++){
157 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
158 complexVectorPtr += 32;
159 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
160 complexVectorPtr += 32;
162 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
163 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
164 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
165 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
167 complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
168 complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
169 outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
172 complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
173 complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
174 outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
176 __m256i dummy = _mm256_setzero_si256();
177 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
178 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
181 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
185 number = thirtysecondPoints * 32;
186 for(; number < num_points; number++){
187 *iBufferPtr++ = *complexVectorPtr++;
194 #ifdef LV_HAVE_GENERIC 198 unsigned int num_points)
200 unsigned int number = 0;
201 const int8_t* complexVectorPtr = (int8_t*)complexVector;
202 int8_t* iBufferPtr = iBuffer;
203 for(number = 0; number < num_points; number++){
204 *iBufferPtr++ = *complexVectorPtr++;
212 #include <arm_neon.h> 218 unsigned int sixteenth_points = num_points / 16;
220 int8x16x2_t input_vector;
221 for(number=0; number < sixteenth_points; ++number) {
222 input_vector = vld2q_s8((int8_t*) complexVector );
223 vst1q_s8(iBuffer, input_vector.val[0]);
228 const int8_t* complexVectorPtr = (int8_t*)complexVector;
229 int8_t* iBufferPtr = iBuffer;
230 for(number = sixteenth_points*16; number < num_points; number++){
231 *iBufferPtr++ = *complexVectorPtr++;
240 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H 241 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H 243 #include <inttypes.h> 247 #include <immintrin.h> 250 volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
const lv_8sc_t* complexVector,
251 unsigned int num_points)
253 unsigned int number = 0;
254 const int8_t* complexVectorPtr = (int8_t*)complexVector;
255 int8_t* iBufferPtr = iBuffer;
256 __m256i moveMask1 = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
257 __m256i moveMask2 = _mm256_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
258 __m256i complexVal1, complexVal2, outputVal;
260 unsigned int thirtysecondPoints = num_points / 32;
262 for(number = 0; number < thirtysecondPoints; number++){
264 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
265 complexVectorPtr += 32;
266 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
267 complexVectorPtr += 32;
269 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
270 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
271 outputVal = _mm256_or_si256(complexVal1, complexVal2);
272 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
274 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
278 number = thirtysecondPoints * 32;
279 for(; number < num_points; number++){
280 *iBufferPtr++ = *complexVectorPtr++;
static void volk_8ic_deinterleave_real_8i_a_avx(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:142
static void volk_8ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:197
static void volk_8ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:215
static void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:104
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57