64 #ifndef INCLUDED_volk_64f_convert_32f_u_H 65 #define INCLUDED_volk_64f_convert_32f_u_H 70 #ifdef LV_HAVE_AVX512F 71 #include <immintrin.h> 73 static inline void volk_64f_convert_32f_u_avx512f(
float* outputVector,
const double* inputVector,
unsigned int num_points){
74 unsigned int number = 0;
76 const unsigned int oneSixteenthPoints = num_points / 16;
78 const double* inputVectorPtr = (
const double*)inputVector;
79 float* outputVectorPtr = outputVector;
81 __m512d inputVal1, inputVal2;
83 for(;number < oneSixteenthPoints; number++){
84 inputVal1 = _mm512_loadu_pd(inputVectorPtr); inputVectorPtr += 8;
85 inputVal2 = _mm512_loadu_pd(inputVectorPtr); inputVectorPtr += 8;
87 ret1 = _mm512_cvtpd_ps(inputVal1);
88 ret2 = _mm512_cvtpd_ps(inputVal2);
90 _mm256_storeu_ps(outputVectorPtr, ret1);
93 _mm256_storeu_ps(outputVectorPtr, ret2);
97 number = oneSixteenthPoints * 16;
98 for(; number < num_points; number++){
99 outputVector[number] = (float)(inputVector[number]);
106 #include <immintrin.h> 109 unsigned int number = 0;
111 const unsigned int oneEightPoints = num_points / 8;
113 const double* inputVectorPtr = (
const double*)inputVector;
114 float* outputVectorPtr = outputVector;
116 __m256d inputVal1, inputVal2;
118 for(;number < oneEightPoints; number++){
119 inputVal1 = _mm256_loadu_pd(inputVectorPtr); inputVectorPtr += 4;
120 inputVal2 = _mm256_loadu_pd(inputVectorPtr); inputVectorPtr += 4;
122 ret1 = _mm256_cvtpd_ps(inputVal1);
123 ret2 = _mm256_cvtpd_ps(inputVal2);
125 _mm_storeu_ps(outputVectorPtr, ret1);
126 outputVectorPtr += 4;
128 _mm_storeu_ps(outputVectorPtr, ret2);
129 outputVectorPtr += 4;
132 number = oneEightPoints * 8;
133 for(; number < num_points; number++){
134 outputVector[number] = (float)(inputVector[number]);
141 #include <emmintrin.h> 144 unsigned int number = 0;
146 const unsigned int quarterPoints = num_points / 4;
148 const double* inputVectorPtr = (
const double*)inputVector;
149 float* outputVectorPtr = outputVector;
151 __m128d inputVal1, inputVal2;
153 for(;number < quarterPoints; number++){
154 inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
155 inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
157 ret = _mm_cvtpd_ps(inputVal1);
158 ret2 = _mm_cvtpd_ps(inputVal2);
160 ret = _mm_movelh_ps(ret, ret2);
162 _mm_storeu_ps(outputVectorPtr, ret);
163 outputVectorPtr += 4;
166 number = quarterPoints * 4;
167 for(; number < num_points; number++){
168 outputVector[number] = (float)(inputVector[number]);
174 #ifdef LV_HAVE_GENERIC 177 float* outputVectorPtr = outputVector;
178 const double* inputVectorPtr = inputVector;
179 unsigned int number = 0;
181 for(number = 0; number < num_points; number++){
182 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
191 #ifndef INCLUDED_volk_64f_convert_32f_a_H 192 #define INCLUDED_volk_64f_convert_32f_a_H 194 #include <inttypes.h> 197 #ifdef LV_HAVE_AVX512F 198 #include <immintrin.h> 200 static inline void volk_64f_convert_32f_a_avx512f(
float* outputVector,
const double* inputVector,
unsigned int num_points){
201 unsigned int number = 0;
203 const unsigned int oneSixteenthPoints = num_points / 16;
205 const double* inputVectorPtr = (
const double*)inputVector;
206 float* outputVectorPtr = outputVector;
208 __m512d inputVal1, inputVal2;
210 for(;number < oneSixteenthPoints; number++){
211 inputVal1 = _mm512_load_pd(inputVectorPtr); inputVectorPtr += 8;
212 inputVal2 = _mm512_load_pd(inputVectorPtr); inputVectorPtr += 8;
214 ret1 = _mm512_cvtpd_ps(inputVal1);
215 ret2 = _mm512_cvtpd_ps(inputVal2);
217 _mm256_store_ps(outputVectorPtr, ret1);
218 outputVectorPtr += 8;
220 _mm256_store_ps(outputVectorPtr, ret2);
221 outputVectorPtr += 8;
224 number = oneSixteenthPoints * 16;
225 for(; number < num_points; number++){
226 outputVector[number] = (float)(inputVector[number]);
233 #include <immintrin.h> 236 unsigned int number = 0;
238 const unsigned int oneEightPoints = num_points / 8;
240 const double* inputVectorPtr = (
const double*)inputVector;
241 float* outputVectorPtr = outputVector;
243 __m256d inputVal1, inputVal2;
245 for(;number < oneEightPoints; number++){
246 inputVal1 = _mm256_load_pd(inputVectorPtr); inputVectorPtr += 4;
247 inputVal2 = _mm256_load_pd(inputVectorPtr); inputVectorPtr += 4;
249 ret1 = _mm256_cvtpd_ps(inputVal1);
250 ret2 = _mm256_cvtpd_ps(inputVal2);
252 _mm_store_ps(outputVectorPtr, ret1);
253 outputVectorPtr += 4;
255 _mm_store_ps(outputVectorPtr, ret2);
256 outputVectorPtr += 4;
259 number = oneEightPoints * 8;
260 for(; number < num_points; number++){
261 outputVector[number] = (float)(inputVector[number]);
268 #include <emmintrin.h> 271 unsigned int number = 0;
273 const unsigned int quarterPoints = num_points / 4;
275 const double* inputVectorPtr = (
const double*)inputVector;
276 float* outputVectorPtr = outputVector;
278 __m128d inputVal1, inputVal2;
280 for(;number < quarterPoints; number++){
281 inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
282 inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
284 ret = _mm_cvtpd_ps(inputVal1);
285 ret2 = _mm_cvtpd_ps(inputVal2);
287 ret = _mm_movelh_ps(ret, ret2);
289 _mm_store_ps(outputVectorPtr, ret);
290 outputVectorPtr += 4;
293 number = quarterPoints * 4;
294 for(; number < num_points; number++){
295 outputVector[number] = (float)(inputVector[number]);
301 #ifdef LV_HAVE_GENERIC 304 float* outputVectorPtr = outputVector;
305 const double* inputVectorPtr = inputVector;
306 unsigned int number = 0;
308 for(number = 0; number < num_points; number++){
309 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:303
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:176
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:235
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:143
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:270
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:108