53 #ifndef INCLUDED_volk_16i_convert_8i_u_H 54 #define INCLUDED_volk_16i_convert_8i_u_H 60 #include <immintrin.h> 63 volk_16i_convert_8i_u_avx2(int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int thirtysecondPoints = num_points / 32;
68 int8_t* outputVectorPtr = outputVector;
69 int16_t* inputPtr = (int16_t*)inputVector;
74 for(;number < thirtysecondPoints; number++){
77 inputVal1 = _mm256_loadu_si256((__m256i*)inputPtr); inputPtr += 16;
78 inputVal2 = _mm256_loadu_si256((__m256i*)inputPtr); inputPtr += 16;
80 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
81 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
83 ret = _mm256_packs_epi16(inputVal1, inputVal2);
84 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
86 _mm256_storeu_si256((__m256i*)outputVectorPtr, ret);
88 outputVectorPtr += 32;
91 number = thirtysecondPoints * 32;
92 for(; number < num_points; number++){
93 outputVector[number] =(int8_t)(inputVector[number] >> 8);
100 #include <emmintrin.h> 105 unsigned int number = 0;
106 const unsigned int sixteenthPoints = num_points / 16;
108 int8_t* outputVectorPtr = outputVector;
109 int16_t* inputPtr = (int16_t*)inputVector;
114 for(;number < sixteenthPoints; number++){
117 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
118 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
120 inputVal1 = _mm_srai_epi16(inputVal1, 8);
121 inputVal2 = _mm_srai_epi16(inputVal2, 8);
123 ret = _mm_packs_epi16(inputVal1, inputVal2);
125 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
127 outputVectorPtr += 16;
130 number = sixteenthPoints * 16;
131 for(; number < num_points; number++){
132 outputVector[number] =(int8_t)(inputVector[number] >> 8);
138 #ifdef LV_HAVE_GENERIC 143 int8_t* outputVectorPtr = outputVector;
144 const int16_t* inputVectorPtr = inputVector;
145 unsigned int number = 0;
147 for(number = 0; number < num_points; number++){
148 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
157 #ifndef INCLUDED_volk_16i_convert_8i_a_H 158 #define INCLUDED_volk_16i_convert_8i_a_H 160 #include <inttypes.h> 164 #include <immintrin.h> 167 volk_16i_convert_8i_a_avx2(int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
169 unsigned int number = 0;
170 const unsigned int thirtysecondPoints = num_points / 32;
172 int8_t* outputVectorPtr = outputVector;
173 int16_t* inputPtr = (int16_t*)inputVector;
178 for(;number < thirtysecondPoints; number++){
181 inputVal1 = _mm256_load_si256((__m256i*)inputPtr); inputPtr += 16;
182 inputVal2 = _mm256_load_si256((__m256i*)inputPtr); inputPtr += 16;
184 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
185 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
187 ret = _mm256_packs_epi16(inputVal1, inputVal2);
188 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
190 _mm256_store_si256((__m256i*)outputVectorPtr, ret);
192 outputVectorPtr += 32;
195 number = thirtysecondPoints * 32;
196 for(; number < num_points; number++){
197 outputVector[number] =(int8_t)(inputVector[number] >> 8);
204 #include <emmintrin.h> 209 unsigned int number = 0;
210 const unsigned int sixteenthPoints = num_points / 16;
212 int8_t* outputVectorPtr = outputVector;
213 int16_t* inputPtr = (int16_t*)inputVector;
218 for(;number < sixteenthPoints; number++){
221 inputVal1 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
222 inputVal2 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
224 inputVal1 = _mm_srai_epi16(inputVal1, 8);
225 inputVal2 = _mm_srai_epi16(inputVal2, 8);
227 ret = _mm_packs_epi16(inputVal1, inputVal2);
229 _mm_store_si128((__m128i*)outputVectorPtr, ret);
231 outputVectorPtr += 16;
234 number = sixteenthPoints * 16;
235 for(; number < num_points; number++){
236 outputVector[number] =(int8_t)(inputVector[number] >> 8);
243 #include <arm_neon.h> 248 int8_t* outputVectorPtr = outputVector;
249 const int16_t* inputVectorPtr = inputVector;
250 unsigned int number = 0;
251 unsigned int sixteenth_points = num_points / 16;
259 for(number = 0; number < sixteenth_points; number++){
261 inputVal0 = vld1q_s16(inputVectorPtr);
262 inputVal1 = vld1q_s16(inputVectorPtr+8);
264 outputVal0 = vshrn_n_s16(inputVal0, 8);
265 outputVal1 = vshrn_n_s16(inputVal1, 8);
267 outputVal = vcombine_s8(outputVal0, outputVal1);
268 vst1q_s8(outputVectorPtr, outputVal);
269 inputVectorPtr += 16;
270 outputVectorPtr += 16;
273 for(number = sixteenth_points * 16; number < num_points; number++){
274 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
280 #ifdef LV_HAVE_GENERIC 285 int8_t* outputVectorPtr = outputVector;
286 const int16_t* inputVectorPtr = inputVector;
287 unsigned int number = 0;
289 for(number = 0; number < num_points; number++){
290 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
static void volk_16i_convert_8i_u_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:103
static void volk_16i_convert_8i_neon(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:246
static void volk_16i_convert_8i_a_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:283
static void volk_16i_convert_8i_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:141
static void volk_16i_convert_8i_a_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:207