Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_64f_x2_max_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_64f_x2_max_64f_a_H
72 #define INCLUDED_volk_64f_x2_max_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX512F
78 #include <immintrin.h>
79 
80 static inline void
81 volk_64f_x2_max_64f_a_avx512f(double* cVector, const double* aVector,
82  const double* bVector, unsigned int num_points)
83 {
84  unsigned int number = 0;
85  const unsigned int eigthPoints = num_points / 8;
86 
87  double* cPtr = cVector;
88  const double* aPtr = aVector;
89  const double* bPtr= bVector;
90 
91  __m512d aVal, bVal, cVal;
92  for(;number < eigthPoints; number++){
93 
94  aVal = _mm512_load_pd(aPtr);
95  bVal = _mm512_load_pd(bPtr);
96 
97  cVal = _mm512_max_pd(aVal, bVal);
98 
99  _mm512_store_pd(cPtr,cVal); // Store the results back into the C container
100 
101  aPtr += 8;
102  bPtr += 8;
103  cPtr += 8;
104  }
105 
106  number = eigthPoints * 8;
107  for(;number < num_points; number++){
108  const double a = *aPtr++;
109  const double b = *bPtr++;
110  *cPtr++ = ( a > b ? a : b);
111  }
112 }
113 #endif /* LV_HAVE_AVX512F */
114 
115 
116 #ifdef LV_HAVE_AVX
117 #include <immintrin.h>
118 
119 static inline void
120 volk_64f_x2_max_64f_a_avx(double* cVector, const double* aVector,
121  const double* bVector, unsigned int num_points)
122 {
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  double* cPtr = cVector;
127  const double* aPtr = aVector;
128  const double* bPtr= bVector;
129 
130  __m256d aVal, bVal, cVal;
131  for(;number < quarterPoints; number++){
132 
133  aVal = _mm256_load_pd(aPtr);
134  bVal = _mm256_load_pd(bPtr);
135 
136  cVal = _mm256_max_pd(aVal, bVal);
137 
138  _mm256_store_pd(cPtr,cVal); // Store the results back into the C container
139 
140  aPtr += 4;
141  bPtr += 4;
142  cPtr += 4;
143  }
144 
145  number = quarterPoints * 4;
146  for(;number < num_points; number++){
147  const double a = *aPtr++;
148  const double b = *bPtr++;
149  *cPtr++ = ( a > b ? a : b);
150  }
151 }
152 #endif /* LV_HAVE_AVX */
153 
154 
155 #ifdef LV_HAVE_SSE2
156 #include <emmintrin.h>
157 
158 static inline void
159 volk_64f_x2_max_64f_a_sse2(double* cVector, const double* aVector,
160  const double* bVector, unsigned int num_points)
161 {
162  unsigned int number = 0;
163  const unsigned int halfPoints = num_points / 2;
164 
165  double* cPtr = cVector;
166  const double* aPtr = aVector;
167  const double* bPtr= bVector;
168 
169  __m128d aVal, bVal, cVal;
170  for(;number < halfPoints; number++){
171 
172  aVal = _mm_load_pd(aPtr);
173  bVal = _mm_load_pd(bPtr);
174 
175  cVal = _mm_max_pd(aVal, bVal);
176 
177  _mm_store_pd(cPtr,cVal); // Store the results back into the C container
178 
179  aPtr += 2;
180  bPtr += 2;
181  cPtr += 2;
182  }
183 
184  number = halfPoints * 2;
185  for(;number < num_points; number++){
186  const double a = *aPtr++;
187  const double b = *bPtr++;
188  *cPtr++ = ( a > b ? a : b);
189  }
190 }
191 #endif /* LV_HAVE_SSE2 */
192 
193 
194 #ifdef LV_HAVE_GENERIC
195 
196 static inline void
197 volk_64f_x2_max_64f_generic(double* cVector, const double* aVector,
198  const double* bVector, unsigned int num_points)
199 {
200  double* cPtr = cVector;
201  const double* aPtr = aVector;
202  const double* bPtr= bVector;
203  unsigned int number = 0;
204 
205  for(number = 0; number < num_points; number++){
206  const double a = *aPtr++;
207  const double b = *bPtr++;
208  *cPtr++ = ( a > b ? a : b);
209  }
210 }
211 #endif /* LV_HAVE_GENERIC */
212 
213 
214 #endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
215 
216 
217 #ifndef INCLUDED_volk_64f_x2_max_64f_u_H
218 #define INCLUDED_volk_64f_x2_max_64f_u_H
219 
220 #include <inttypes.h>
221 #include <stdio.h>
222 
223 #ifdef LV_HAVE_AVX512F
224 #include <immintrin.h>
225 
226 static inline void
227 volk_64f_x2_max_64f_u_avx512f(double* cVector, const double* aVector,
228  const double* bVector, unsigned int num_points)
229 {
230  unsigned int number = 0;
231  const unsigned int eigthPoints = num_points / 8;
232 
233  double* cPtr = cVector;
234  const double* aPtr = aVector;
235  const double* bPtr= bVector;
236 
237  __m512d aVal, bVal, cVal;
238  for(;number < eigthPoints; number++){
239 
240  aVal = _mm512_loadu_pd(aPtr);
241  bVal = _mm512_loadu_pd(bPtr);
242 
243  cVal = _mm512_max_pd(aVal, bVal);
244 
245  _mm512_storeu_pd(cPtr,cVal); // Store the results back into the C container
246 
247  aPtr += 8;
248  bPtr += 8;
249  cPtr += 8;
250  }
251 
252  number = eigthPoints * 8;
253  for(;number < num_points; number++){
254  const double a = *aPtr++;
255  const double b = *bPtr++;
256  *cPtr++ = ( a > b ? a : b);
257  }
258 }
259 #endif /* LV_HAVE_AVX512F */
260 
261 
262 #ifdef LV_HAVE_AVX
263 #include <immintrin.h>
264 
265 static inline void
266 volk_64f_x2_max_64f_u_avx(double* cVector, const double* aVector,
267  const double* bVector, unsigned int num_points)
268 {
269  unsigned int number = 0;
270  const unsigned int quarterPoints = num_points / 4;
271 
272  double* cPtr = cVector;
273  const double* aPtr = aVector;
274  const double* bPtr= bVector;
275 
276  __m256d aVal, bVal, cVal;
277  for(;number < quarterPoints; number++){
278 
279  aVal = _mm256_loadu_pd(aPtr);
280  bVal = _mm256_loadu_pd(bPtr);
281 
282  cVal = _mm256_max_pd(aVal, bVal);
283 
284  _mm256_storeu_pd(cPtr,cVal); // Store the results back into the C container
285 
286  aPtr += 4;
287  bPtr += 4;
288  cPtr += 4;
289  }
290 
291  number = quarterPoints * 4;
292  for(;number < num_points; number++){
293  const double a = *aPtr++;
294  const double b = *bPtr++;
295  *cPtr++ = ( a > b ? a : b);
296  }
297 }
298 #endif /* LV_HAVE_AVX */
299 
300 
301 #endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:266
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:159
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:120
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:197