Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_64f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
64 #ifndef INCLUDED_volk_64f_convert_32f_u_H
65 #define INCLUDED_volk_64f_convert_32f_u_H
66 
67 #include <inttypes.h>
68 #include <stdio.h>
69 
70 #ifdef LV_HAVE_AVX512F
71 #include <immintrin.h>
72 
73 static inline void volk_64f_convert_32f_u_avx512f(float* outputVector, const double* inputVector, unsigned int num_points){
74  unsigned int number = 0;
75 
76  const unsigned int oneSixteenthPoints = num_points / 16;
77 
78  const double* inputVectorPtr = (const double*)inputVector;
79  float* outputVectorPtr = outputVector;
80  __m256 ret1, ret2;
81  __m512d inputVal1, inputVal2;
82 
83  for(;number < oneSixteenthPoints; number++){
84  inputVal1 = _mm512_loadu_pd(inputVectorPtr); inputVectorPtr += 8;
85  inputVal2 = _mm512_loadu_pd(inputVectorPtr); inputVectorPtr += 8;
86 
87  ret1 = _mm512_cvtpd_ps(inputVal1);
88  ret2 = _mm512_cvtpd_ps(inputVal2);
89 
90  _mm256_storeu_ps(outputVectorPtr, ret1);
91  outputVectorPtr += 8;
92 
93  _mm256_storeu_ps(outputVectorPtr, ret2);
94  outputVectorPtr += 8;
95  }
96 
97  number = oneSixteenthPoints * 16;
98  for(; number < num_points; number++){
99  outputVector[number] = (float)(inputVector[number]);
100  }
101 }
102 #endif /* LV_HAVE_AVX512F */
103 
104 
105 #ifdef LV_HAVE_AVX
106 #include <immintrin.h>
107 
108 static inline void volk_64f_convert_32f_u_avx(float* outputVector, const double* inputVector, unsigned int num_points){
109  unsigned int number = 0;
110 
111  const unsigned int oneEightPoints = num_points / 8;
112 
113  const double* inputVectorPtr = (const double*)inputVector;
114  float* outputVectorPtr = outputVector;
115  __m128 ret1, ret2;
116  __m256d inputVal1, inputVal2;
117 
118  for(;number < oneEightPoints; number++){
119  inputVal1 = _mm256_loadu_pd(inputVectorPtr); inputVectorPtr += 4;
120  inputVal2 = _mm256_loadu_pd(inputVectorPtr); inputVectorPtr += 4;
121 
122  ret1 = _mm256_cvtpd_ps(inputVal1);
123  ret2 = _mm256_cvtpd_ps(inputVal2);
124 
125  _mm_storeu_ps(outputVectorPtr, ret1);
126  outputVectorPtr += 4;
127 
128  _mm_storeu_ps(outputVectorPtr, ret2);
129  outputVectorPtr += 4;
130  }
131 
132  number = oneEightPoints * 8;
133  for(; number < num_points; number++){
134  outputVector[number] = (float)(inputVector[number]);
135  }
136 }
137 #endif /* LV_HAVE_AVX */
138 
139 
140 #ifdef LV_HAVE_SSE2
141 #include <emmintrin.h>
142 
143 static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
144  unsigned int number = 0;
145 
146  const unsigned int quarterPoints = num_points / 4;
147 
148  const double* inputVectorPtr = (const double*)inputVector;
149  float* outputVectorPtr = outputVector;
150  __m128 ret, ret2;
151  __m128d inputVal1, inputVal2;
152 
153  for(;number < quarterPoints; number++){
154  inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
155  inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
156 
157  ret = _mm_cvtpd_ps(inputVal1);
158  ret2 = _mm_cvtpd_ps(inputVal2);
159 
160  ret = _mm_movelh_ps(ret, ret2);
161 
162  _mm_storeu_ps(outputVectorPtr, ret);
163  outputVectorPtr += 4;
164  }
165 
166  number = quarterPoints * 4;
167  for(; number < num_points; number++){
168  outputVector[number] = (float)(inputVector[number]);
169  }
170 }
171 #endif /* LV_HAVE_SSE2 */
172 
173 
174 #ifdef LV_HAVE_GENERIC
175 
176 static inline void volk_64f_convert_32f_generic(float* outputVector, const double* inputVector, unsigned int num_points){
177  float* outputVectorPtr = outputVector;
178  const double* inputVectorPtr = inputVector;
179  unsigned int number = 0;
180 
181  for(number = 0; number < num_points; number++){
182  *outputVectorPtr++ = ((float)(*inputVectorPtr++));
183  }
184 }
185 #endif /* LV_HAVE_GENERIC */
186 
187 
188 
189 
190 #endif /* INCLUDED_volk_64f_convert_32f_u_H */
191 #ifndef INCLUDED_volk_64f_convert_32f_a_H
192 #define INCLUDED_volk_64f_convert_32f_a_H
193 
194 #include <inttypes.h>
195 #include <stdio.h>
196 
197 #ifdef LV_HAVE_AVX512F
198 #include <immintrin.h>
199 
200 static inline void volk_64f_convert_32f_a_avx512f(float* outputVector, const double* inputVector, unsigned int num_points){
201  unsigned int number = 0;
202 
203  const unsigned int oneSixteenthPoints = num_points / 16;
204 
205  const double* inputVectorPtr = (const double*)inputVector;
206  float* outputVectorPtr = outputVector;
207  __m256 ret1, ret2;
208  __m512d inputVal1, inputVal2;
209 
210  for(;number < oneSixteenthPoints; number++){
211  inputVal1 = _mm512_load_pd(inputVectorPtr); inputVectorPtr += 8;
212  inputVal2 = _mm512_load_pd(inputVectorPtr); inputVectorPtr += 8;
213 
214  ret1 = _mm512_cvtpd_ps(inputVal1);
215  ret2 = _mm512_cvtpd_ps(inputVal2);
216 
217  _mm256_store_ps(outputVectorPtr, ret1);
218  outputVectorPtr += 8;
219 
220  _mm256_store_ps(outputVectorPtr, ret2);
221  outputVectorPtr += 8;
222  }
223 
224  number = oneSixteenthPoints * 16;
225  for(; number < num_points; number++){
226  outputVector[number] = (float)(inputVector[number]);
227  }
228 }
229 #endif /* LV_HAVE_AVX512F */
230 
231 
232 #ifdef LV_HAVE_AVX
233 #include <immintrin.h>
234 
235 static inline void volk_64f_convert_32f_a_avx(float* outputVector, const double* inputVector, unsigned int num_points){
236  unsigned int number = 0;
237 
238  const unsigned int oneEightPoints = num_points / 8;
239 
240  const double* inputVectorPtr = (const double*)inputVector;
241  float* outputVectorPtr = outputVector;
242  __m128 ret1, ret2;
243  __m256d inputVal1, inputVal2;
244 
245  for(;number < oneEightPoints; number++){
246  inputVal1 = _mm256_load_pd(inputVectorPtr); inputVectorPtr += 4;
247  inputVal2 = _mm256_load_pd(inputVectorPtr); inputVectorPtr += 4;
248 
249  ret1 = _mm256_cvtpd_ps(inputVal1);
250  ret2 = _mm256_cvtpd_ps(inputVal2);
251 
252  _mm_store_ps(outputVectorPtr, ret1);
253  outputVectorPtr += 4;
254 
255  _mm_store_ps(outputVectorPtr, ret2);
256  outputVectorPtr += 4;
257  }
258 
259  number = oneEightPoints * 8;
260  for(; number < num_points; number++){
261  outputVector[number] = (float)(inputVector[number]);
262  }
263 }
264 #endif /* LV_HAVE_AVX */
265 
266 
267 #ifdef LV_HAVE_SSE2
268 #include <emmintrin.h>
269 
270 static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const double* inputVector, unsigned int num_points){
271  unsigned int number = 0;
272 
273  const unsigned int quarterPoints = num_points / 4;
274 
275  const double* inputVectorPtr = (const double*)inputVector;
276  float* outputVectorPtr = outputVector;
277  __m128 ret, ret2;
278  __m128d inputVal1, inputVal2;
279 
280  for(;number < quarterPoints; number++){
281  inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
282  inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
283 
284  ret = _mm_cvtpd_ps(inputVal1);
285  ret2 = _mm_cvtpd_ps(inputVal2);
286 
287  ret = _mm_movelh_ps(ret, ret2);
288 
289  _mm_store_ps(outputVectorPtr, ret);
290  outputVectorPtr += 4;
291  }
292 
293  number = quarterPoints * 4;
294  for(; number < num_points; number++){
295  outputVector[number] = (float)(inputVector[number]);
296  }
297 }
298 #endif /* LV_HAVE_SSE2 */
299 
300 
301 #ifdef LV_HAVE_GENERIC
302 
303 static inline void volk_64f_convert_32f_a_generic(float* outputVector, const double* inputVector, unsigned int num_points){
304  float* outputVectorPtr = outputVector;
305  const double* inputVectorPtr = inputVector;
306  unsigned int number = 0;
307 
308  for(number = 0; number < num_points; number++){
309  *outputVectorPtr++ = ((float)(*inputVectorPtr++));
310  }
311 }
312 #endif /* LV_HAVE_GENERIC */
313 
314 
315 
316 
317 #endif /* INCLUDED_volk_64f_convert_32f_a_H */
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:303
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:176
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:235
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:143
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:270
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:108