53 #ifndef INCLUDED_volk_8i_convert_16i_u_H 54 #define INCLUDED_volk_8i_convert_16i_u_H 60 #include <immintrin.h> 63 volk_8i_convert_16i_u_avx2(int16_t* outputVector,
const int8_t* inputVector,
64 unsigned int num_points)
66 unsigned int number = 0;
67 const unsigned int sixteenthPoints = num_points / 16;
69 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
70 __m256i* outputVectorPtr = (__m256i*)outputVector;
74 for(;number < sixteenthPoints; number++){
75 inputVal = _mm_loadu_si128(inputVectorPtr);
76 ret = _mm256_cvtepi8_epi16(inputVal);
77 ret = _mm256_slli_epi16(ret, 8);
78 _mm256_storeu_si256(outputVectorPtr, ret);
84 number = sixteenthPoints * 16;
85 for(; number < num_points; number++){
86 outputVector[number] = (int16_t)(inputVector[number])*256;
93 #include <smmintrin.h> 96 volk_8i_convert_16i_u_sse4_1(int16_t* outputVector,
const int8_t* inputVector,
97 unsigned int num_points)
99 unsigned int number = 0;
100 const unsigned int sixteenthPoints = num_points / 16;
102 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
103 __m128i* outputVectorPtr = (__m128i*)outputVector;
107 for(;number < sixteenthPoints; number++){
108 inputVal = _mm_loadu_si128(inputVectorPtr);
109 ret = _mm_cvtepi8_epi16(inputVal);
110 ret = _mm_slli_epi16(ret, 8);
111 _mm_storeu_si128(outputVectorPtr, ret);
115 inputVal = _mm_srli_si128(inputVal, 8);
116 ret = _mm_cvtepi8_epi16(inputVal);
117 ret = _mm_slli_epi16(ret, 8);
118 _mm_storeu_si128(outputVectorPtr, ret);
125 number = sixteenthPoints * 16;
126 for(; number < num_points; number++){
127 outputVector[number] = (int16_t)(inputVector[number])*256;
133 #ifdef LV_HAVE_GENERIC 137 unsigned int num_points)
139 int16_t* outputVectorPtr = outputVector;
140 const int8_t* inputVectorPtr = inputVector;
141 unsigned int number = 0;
143 for(number = 0; number < num_points; number++){
144 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
154 #ifndef INCLUDED_volk_8i_convert_16i_a_H 155 #define INCLUDED_volk_8i_convert_16i_a_H 157 #include <inttypes.h> 161 #include <immintrin.h> 164 volk_8i_convert_16i_a_avx2(int16_t* outputVector,
const int8_t* inputVector,
165 unsigned int num_points)
167 unsigned int number = 0;
168 const unsigned int sixteenthPoints = num_points / 16;
170 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
171 __m256i* outputVectorPtr = (__m256i*)outputVector;
175 for(;number < sixteenthPoints; number++){
176 inputVal = _mm_load_si128(inputVectorPtr);
177 ret = _mm256_cvtepi8_epi16(inputVal);
178 ret = _mm256_slli_epi16(ret, 8);
179 _mm256_store_si256(outputVectorPtr, ret);
185 number = sixteenthPoints * 16;
186 for(; number < num_points; number++){
187 outputVector[number] = (int16_t)(inputVector[number])*256;
193 #ifdef LV_HAVE_SSE4_1 194 #include <smmintrin.h> 197 volk_8i_convert_16i_a_sse4_1(int16_t* outputVector,
const int8_t* inputVector,
198 unsigned int num_points)
200 unsigned int number = 0;
201 const unsigned int sixteenthPoints = num_points / 16;
203 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
204 __m128i* outputVectorPtr = (__m128i*)outputVector;
208 for(;number < sixteenthPoints; number++){
209 inputVal = _mm_load_si128(inputVectorPtr);
210 ret = _mm_cvtepi8_epi16(inputVal);
211 ret = _mm_slli_epi16(ret, 8);
212 _mm_store_si128(outputVectorPtr, ret);
216 inputVal = _mm_srli_si128(inputVal, 8);
217 ret = _mm_cvtepi8_epi16(inputVal);
218 ret = _mm_slli_epi16(ret, 8);
219 _mm_store_si128(outputVectorPtr, ret);
226 number = sixteenthPoints * 16;
227 for(; number < num_points; number++){
228 outputVector[number] = (int16_t)(inputVector[number])*256;
234 #ifdef LV_HAVE_GENERIC 238 unsigned int num_points)
240 int16_t* outputVectorPtr = outputVector;
241 const int8_t* inputVectorPtr = inputVector;
242 unsigned int number = 0;
244 for(number = 0; number < num_points; number++){
245 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
252 #include <arm_neon.h> 257 int16_t* outputVectorPtr = outputVector;
258 const int8_t* inputVectorPtr = inputVector;
260 const unsigned int eighth_points = num_points / 8;
263 int16x8_t converted_vec;
268 for(number = 0; number < eighth_points; ++number) {
269 input_vec = vld1_s8(inputVectorPtr);
270 converted_vec = vmovl_s8(input_vec);
272 converted_vec = vshlq_n_s16(converted_vec, 8);
273 vst1q_s16( outputVectorPtr, converted_vec);
276 outputVectorPtr += 8;
279 for(number = eighth_points * 8; number < num_points; number++){
280 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
288 volk_8i_convert_16i_a_orc_impl(int16_t* outputVector,
const int8_t* inputVector,
289 unsigned int num_points);
292 volk_8i_convert_16i_u_orc(int16_t* outputVector,
const int8_t* inputVector,
293 unsigned int num_points)
295 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
static void volk_8i_convert_16i_a_generic(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:237
static void volk_8i_convert_16i_neon(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:255
static void volk_8i_convert_16i_generic(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition: volk_8i_convert_16i.h:136