71 #ifndef INCLUDED_volk_64f_x2_max_64f_a_H 72 #define INCLUDED_volk_64f_x2_max_64f_a_H 77 #ifdef LV_HAVE_AVX512F 78 #include <immintrin.h> 81 volk_64f_x2_max_64f_a_avx512f(
double* cVector,
const double* aVector,
82 const double* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int eigthPoints = num_points / 8;
87 double* cPtr = cVector;
88 const double* aPtr = aVector;
89 const double* bPtr= bVector;
91 __m512d aVal, bVal, cVal;
92 for(;number < eigthPoints; number++){
94 aVal = _mm512_load_pd(aPtr);
95 bVal = _mm512_load_pd(bPtr);
97 cVal = _mm512_max_pd(aVal, bVal);
99 _mm512_store_pd(cPtr,cVal);
106 number = eigthPoints * 8;
107 for(;number < num_points; number++){
108 const double a = *aPtr++;
109 const double b = *bPtr++;
110 *cPtr++ = ( a > b ? a : b);
117 #include <immintrin.h> 121 const double* bVector,
unsigned int num_points)
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
126 double* cPtr = cVector;
127 const double* aPtr = aVector;
128 const double* bPtr= bVector;
130 __m256d aVal, bVal, cVal;
131 for(;number < quarterPoints; number++){
133 aVal = _mm256_load_pd(aPtr);
134 bVal = _mm256_load_pd(bPtr);
136 cVal = _mm256_max_pd(aVal, bVal);
138 _mm256_store_pd(cPtr,cVal);
145 number = quarterPoints * 4;
146 for(;number < num_points; number++){
147 const double a = *aPtr++;
148 const double b = *bPtr++;
149 *cPtr++ = ( a > b ? a : b);
156 #include <emmintrin.h> 160 const double* bVector,
unsigned int num_points)
162 unsigned int number = 0;
163 const unsigned int halfPoints = num_points / 2;
165 double* cPtr = cVector;
166 const double* aPtr = aVector;
167 const double* bPtr= bVector;
169 __m128d aVal, bVal, cVal;
170 for(;number < halfPoints; number++){
172 aVal = _mm_load_pd(aPtr);
173 bVal = _mm_load_pd(bPtr);
175 cVal = _mm_max_pd(aVal, bVal);
177 _mm_store_pd(cPtr,cVal);
184 number = halfPoints * 2;
185 for(;number < num_points; number++){
186 const double a = *aPtr++;
187 const double b = *bPtr++;
188 *cPtr++ = ( a > b ? a : b);
194 #ifdef LV_HAVE_GENERIC 198 const double* bVector,
unsigned int num_points)
200 double* cPtr = cVector;
201 const double* aPtr = aVector;
202 const double* bPtr= bVector;
203 unsigned int number = 0;
205 for(number = 0; number < num_points; number++){
206 const double a = *aPtr++;
207 const double b = *bPtr++;
208 *cPtr++ = ( a > b ? a : b);
217 #ifndef INCLUDED_volk_64f_x2_max_64f_u_H 218 #define INCLUDED_volk_64f_x2_max_64f_u_H 220 #include <inttypes.h> 223 #ifdef LV_HAVE_AVX512F 224 #include <immintrin.h> 227 volk_64f_x2_max_64f_u_avx512f(
double* cVector,
const double* aVector,
228 const double* bVector,
unsigned int num_points)
230 unsigned int number = 0;
231 const unsigned int eigthPoints = num_points / 8;
233 double* cPtr = cVector;
234 const double* aPtr = aVector;
235 const double* bPtr= bVector;
237 __m512d aVal, bVal, cVal;
238 for(;number < eigthPoints; number++){
240 aVal = _mm512_loadu_pd(aPtr);
241 bVal = _mm512_loadu_pd(bPtr);
243 cVal = _mm512_max_pd(aVal, bVal);
245 _mm512_storeu_pd(cPtr,cVal);
252 number = eigthPoints * 8;
253 for(;number < num_points; number++){
254 const double a = *aPtr++;
255 const double b = *bPtr++;
256 *cPtr++ = ( a > b ? a : b);
263 #include <immintrin.h> 267 const double* bVector,
unsigned int num_points)
269 unsigned int number = 0;
270 const unsigned int quarterPoints = num_points / 4;
272 double* cPtr = cVector;
273 const double* aPtr = aVector;
274 const double* bPtr= bVector;
276 __m256d aVal, bVal, cVal;
277 for(;number < quarterPoints; number++){
279 aVal = _mm256_loadu_pd(aPtr);
280 bVal = _mm256_loadu_pd(bPtr);
282 cVal = _mm256_max_pd(aVal, bVal);
284 _mm256_storeu_pd(cPtr,cVal);
291 number = quarterPoints * 4;
292 for(;number < num_points; number++){
293 const double a = *aPtr++;
294 const double b = *bPtr++;
295 *cPtr++ = ( a > b ? a : b);
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:266
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:159
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:120
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:197