Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_64f_x2_min_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_64f_x2_min_64f_a_H
72 #define INCLUDED_volk_64f_x2_min_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX512F
78 #include <immintrin.h>
79 
80 static inline void
81 volk_64f_x2_min_64f_a_avx512f(double* cVector, const double* aVector,
82  const double* bVector, unsigned int num_points)
83 {
84  unsigned int number = 0;
85  const unsigned int eigthPoints = num_points / 8;
86 
87  double* cPtr = cVector;
88  const double* aPtr = aVector;
89  const double* bPtr= bVector;
90 
91  __m512d aVal, bVal, cVal;
92  for(;number < eigthPoints; number++){
93 
94  aVal = _mm512_load_pd(aPtr);
95  bVal = _mm512_load_pd(bPtr);
96 
97  cVal = _mm512_min_pd(aVal, bVal);
98 
99  _mm512_store_pd(cPtr,cVal); // Store the results back into the C container
100 
101  aPtr += 8;
102  bPtr += 8;
103  cPtr += 8;
104  }
105 
106  number = eigthPoints * 8;
107  for(;number < num_points; number++){
108  const double a = *aPtr++;
109  const double b = *bPtr++;
110  *cPtr++ = ( a < b ? a : b);
111  }
112 }
113 #endif /* LV_HAVE_AVX512F */
114 
115 
116 #ifdef LV_HAVE_AVX
117 #include <immintrin.h>
118 
119 static inline void
120 volk_64f_x2_min_64f_a_avx(double* cVector, const double* aVector,
121  const double* bVector, unsigned int num_points)
122 {
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  double* cPtr = cVector;
127  const double* aPtr = aVector;
128  const double* bPtr= bVector;
129 
130  __m256d aVal, bVal, cVal;
131  for(;number < quarterPoints; number++){
132 
133  aVal = _mm256_load_pd(aPtr);
134  bVal = _mm256_load_pd(bPtr);
135 
136  cVal = _mm256_min_pd(aVal, bVal);
137 
138  _mm256_store_pd(cPtr,cVal); // Store the results back into the C container
139 
140  aPtr += 4;
141  bPtr += 4;
142  cPtr += 4;
143  }
144 
145  number = quarterPoints * 4;
146  for(;number < num_points; number++){
147  const double a = *aPtr++;
148  const double b = *bPtr++;
149  *cPtr++ = ( a < b ? a : b);
150  }
151 }
152 #endif /* LV_HAVE_AVX */
153 
154 
155 #ifdef LV_HAVE_SSE2
156 #include <emmintrin.h>
157 
158 static inline void
159 volk_64f_x2_min_64f_a_sse2(double* cVector, const double* aVector,
160  const double* bVector, unsigned int num_points)
161 {
162  unsigned int number = 0;
163  const unsigned int halfPoints = num_points / 2;
164 
165  double* cPtr = cVector;
166  const double* aPtr = aVector;
167  const double* bPtr= bVector;
168 
169  __m128d aVal, bVal, cVal;
170  for(;number < halfPoints; number++){
171 
172  aVal = _mm_load_pd(aPtr);
173  bVal = _mm_load_pd(bPtr);
174 
175  cVal = _mm_min_pd(aVal, bVal);
176 
177  _mm_store_pd(cPtr,cVal); // Store the results back into the C container
178 
179  aPtr += 2;
180  bPtr += 2;
181  cPtr += 2;
182  }
183 
184  number = halfPoints * 2;
185  for(;number < num_points; number++){
186  const double a = *aPtr++;
187  const double b = *bPtr++;
188  *cPtr++ = ( a < b ? a : b);
189  }
190 }
191 #endif /* LV_HAVE_SSE2 */
192 
193 
194 #ifdef LV_HAVE_GENERIC
195 
196 static inline void
197 volk_64f_x2_min_64f_generic(double* cVector, const double* aVector,
198  const double* bVector, unsigned int num_points)
199 {
200  double* cPtr = cVector;
201  const double* aPtr = aVector;
202  const double* bPtr= bVector;
203  unsigned int number = 0;
204 
205  for(number = 0; number < num_points; number++){
206  const double a = *aPtr++;
207  const double b = *bPtr++;
208  *cPtr++ = ( a < b ? a : b);
209  }
210 }
211 #endif /* LV_HAVE_GENERIC */
212 
213 
214 #endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
215 
216 #ifndef INCLUDED_volk_64f_x2_min_64f_u_H
217 #define INCLUDED_volk_64f_x2_min_64f_u_H
218 
219 #include <inttypes.h>
220 #include <stdio.h>
221 
222 #ifdef LV_HAVE_AVX512F
223 #include <immintrin.h>
224 
225 static inline void
226 volk_64f_x2_min_64f_u_avx512f(double* cVector, const double* aVector,
227  const double* bVector, unsigned int num_points)
228 {
229  unsigned int number = 0;
230  const unsigned int eigthPoints = num_points / 8;
231 
232  double* cPtr = cVector;
233  const double* aPtr = aVector;
234  const double* bPtr= bVector;
235 
236  __m512d aVal, bVal, cVal;
237  for(;number < eigthPoints; number++){
238 
239  aVal = _mm512_loadu_pd(aPtr);
240  bVal = _mm512_loadu_pd(bPtr);
241 
242  cVal = _mm512_min_pd(aVal, bVal);
243 
244  _mm512_storeu_pd(cPtr,cVal); // Store the results back into the C container
245 
246  aPtr += 8;
247  bPtr += 8;
248  cPtr += 8;
249  }
250 
251  number = eigthPoints * 8;
252  for(;number < num_points; number++){
253  const double a = *aPtr++;
254  const double b = *bPtr++;
255  *cPtr++ = ( a < b ? a : b);
256  }
257 }
258 #endif /* LV_HAVE_AVX512F */
259 
260 
261 #ifdef LV_HAVE_AVX
262 #include <immintrin.h>
263 
264 static inline void
265 volk_64f_x2_min_64f_u_avx(double* cVector, const double* aVector,
266  const double* bVector, unsigned int num_points)
267 {
268  unsigned int number = 0;
269  const unsigned int quarterPoints = num_points / 4;
270 
271  double* cPtr = cVector;
272  const double* aPtr = aVector;
273  const double* bPtr= bVector;
274 
275  __m256d aVal, bVal, cVal;
276  for(;number < quarterPoints; number++){
277 
278  aVal = _mm256_loadu_pd(aPtr);
279  bVal = _mm256_loadu_pd(bPtr);
280 
281  cVal = _mm256_min_pd(aVal, bVal);
282 
283  _mm256_storeu_pd(cPtr,cVal); // Store the results back into the C container
284 
285  aPtr += 4;
286  bPtr += 4;
287  cPtr += 4;
288  }
289 
290  number = quarterPoints * 4;
291  for(;number < num_points; number++){
292  const double a = *aPtr++;
293  const double b = *bPtr++;
294  *cPtr++ = ( a < b ? a : b);
295  }
296 }
297 #endif /* LV_HAVE_AVX */
298 
299 
300 #endif /* INCLUDED_volk_64f_x2_min_64f_u_H */
static void volk_64f_x2_min_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:265
static void volk_64f_x2_min_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:159
static void volk_64f_x2_min_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:120
static void volk_64f_x2_min_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:197