Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_convert_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
66 #ifndef INCLUDED_volk_32f_convert_64f_u_H
67 #define INCLUDED_volk_32f_convert_64f_u_H
68 
69 #include <inttypes.h>
70 #include <stdio.h>
71 
72 #ifdef LV_HAVE_AVX
73 #include <immintrin.h>
74 
75 static inline void volk_32f_convert_64f_u_avx(double* outputVector, const float* inputVector, unsigned int num_points){
76  unsigned int number = 0;
77 
78  const unsigned int quarterPoints = num_points / 4;
79 
80  const float* inputVectorPtr = (const float*)inputVector;
81  double* outputVectorPtr = outputVector;
82  __m256d ret;
83  __m128 inputVal;
84 
85  for(;number < quarterPoints; number++){
86  inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
87 
88  ret = _mm256_cvtps_pd(inputVal);
89  _mm256_storeu_pd(outputVectorPtr, ret);
90 
91  outputVectorPtr += 4;
92  }
93 
94  number = quarterPoints * 4;
95  for(; number < num_points; number++){
96  outputVector[number] = (double)(inputVector[number]);
97  }
98 }
99 
100 #endif /* LV_HAVE_AVX */
101 
102 #ifdef LV_HAVE_SSE2
103 #include <emmintrin.h>
104 
105 static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
106  unsigned int number = 0;
107 
108  const unsigned int quarterPoints = num_points / 4;
109 
110  const float* inputVectorPtr = (const float*)inputVector;
111  double* outputVectorPtr = outputVector;
112  __m128d ret;
113  __m128 inputVal;
114 
115  for(;number < quarterPoints; number++){
116  inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
117 
118  ret = _mm_cvtps_pd(inputVal);
119 
120  _mm_storeu_pd(outputVectorPtr, ret);
121  outputVectorPtr += 2;
122 
123  inputVal = _mm_movehl_ps(inputVal, inputVal);
124 
125  ret = _mm_cvtps_pd(inputVal);
126 
127  _mm_storeu_pd(outputVectorPtr, ret);
128  outputVectorPtr += 2;
129  }
130 
131  number = quarterPoints * 4;
132  for(; number < num_points; number++){
133  outputVector[number] = (double)(inputVector[number]);
134  }
135 }
136 #endif /* LV_HAVE_SSE2 */
137 
138 
139 #ifdef LV_HAVE_GENERIC
140 
141 static inline void volk_32f_convert_64f_generic(double* outputVector, const float* inputVector, unsigned int num_points){
142  double* outputVectorPtr = outputVector;
143  const float* inputVectorPtr = inputVector;
144  unsigned int number = 0;
145 
146  for(number = 0; number < num_points; number++){
147  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
148  }
149 }
150 #endif /* LV_HAVE_GENERIC */
151 
152 
153 
154 
155 #endif /* INCLUDED_volk_32f_convert_64f_u_H */
156 
157 
158 #ifndef INCLUDED_volk_32f_convert_64f_a_H
159 #define INCLUDED_volk_32f_convert_64f_a_H
160 
161 #include <inttypes.h>
162 #include <stdio.h>
163 
164 #ifdef LV_HAVE_AVX
165 #include <immintrin.h>
166 
167 static inline void volk_32f_convert_64f_a_avx(double* outputVector, const float* inputVector, unsigned int num_points){
168  unsigned int number = 0;
169 
170  const unsigned int quarterPoints = num_points / 4;
171 
172  const float* inputVectorPtr = (const float*)inputVector;
173  double* outputVectorPtr = outputVector;
174  __m256d ret;
175  __m128 inputVal;
176 
177  for(;number < quarterPoints; number++){
178  inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
179 
180  ret = _mm256_cvtps_pd(inputVal);
181  _mm256_store_pd(outputVectorPtr, ret);
182 
183  outputVectorPtr += 4;
184  }
185 
186  number = quarterPoints * 4;
187  for(; number < num_points; number++){
188  outputVector[number] = (double)(inputVector[number]);
189  }
190 }
191 #endif /* LV_HAVE_AVX */
192 
193 #ifdef LV_HAVE_SSE2
194 #include <emmintrin.h>
195 
196 static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const float* inputVector, unsigned int num_points){
197  unsigned int number = 0;
198 
199  const unsigned int quarterPoints = num_points / 4;
200 
201  const float* inputVectorPtr = (const float*)inputVector;
202  double* outputVectorPtr = outputVector;
203  __m128d ret;
204  __m128 inputVal;
205 
206  for(;number < quarterPoints; number++){
207  inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
208 
209  ret = _mm_cvtps_pd(inputVal);
210 
211  _mm_store_pd(outputVectorPtr, ret);
212  outputVectorPtr += 2;
213 
214  inputVal = _mm_movehl_ps(inputVal, inputVal);
215 
216  ret = _mm_cvtps_pd(inputVal);
217 
218  _mm_store_pd(outputVectorPtr, ret);
219  outputVectorPtr += 2;
220  }
221 
222  number = quarterPoints * 4;
223  for(; number < num_points; number++){
224  outputVector[number] = (double)(inputVector[number]);
225  }
226 }
227 #endif /* LV_HAVE_SSE2 */
228 
229 
230 #ifdef LV_HAVE_GENERIC
231 
232 static inline void volk_32f_convert_64f_a_generic(double* outputVector, const float* inputVector, unsigned int num_points){
233  double* outputVectorPtr = outputVector;
234  const float* inputVectorPtr = inputVector;
235  unsigned int number = 0;
236 
237  for(number = 0; number < num_points; number++){
238  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
239  }
240 }
241 #endif /* LV_HAVE_GENERIC */
242 
243 
244 
245 
246 #endif /* INCLUDED_volk_32f_convert_64f_a_H */
static void volk_32f_convert_64f_a_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:167
static void volk_32f_convert_64f_u_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:75
static void volk_32f_convert_64f_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:141
static void volk_32f_convert_64f_a_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:196
static void volk_32f_convert_64f_a_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:232
static void volk_32f_convert_64f_u_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:105