71 #ifndef INCLUDED_volk_32f_x2_max_32f_a_H 72 #define INCLUDED_volk_32f_x2_max_32f_a_H 77 #ifdef LV_HAVE_AVX512F 78 #include <immintrin.h> 81 volk_32f_x2_max_32f_a_avx512f(
float* cVector,
const float* aVector,
82 const float* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int sixteenthPoints = num_points / 16;
87 float* cPtr = cVector;
88 const float* aPtr = aVector;
89 const float* bPtr= bVector;
91 __m512 aVal, bVal, cVal;
92 for(;number < sixteenthPoints; number++){
93 aVal = _mm512_load_ps(aPtr);
94 bVal = _mm512_load_ps(bPtr);
96 cVal = _mm512_max_ps(aVal, bVal);
98 _mm512_store_ps(cPtr,cVal);
105 number = sixteenthPoints * 16;
106 for(;number < num_points; number++){
107 const float a = *aPtr++;
108 const float b = *bPtr++;
109 *cPtr++ = ( a > b ? a : b);
115 #include <xmmintrin.h> 119 const float* bVector,
unsigned int num_points)
121 unsigned int number = 0;
122 const unsigned int quarterPoints = num_points / 4;
124 float* cPtr = cVector;
125 const float* aPtr = aVector;
126 const float* bPtr= bVector;
128 __m128 aVal, bVal, cVal;
129 for(;number < quarterPoints; number++){
130 aVal = _mm_load_ps(aPtr);
131 bVal = _mm_load_ps(bPtr);
133 cVal = _mm_max_ps(aVal, bVal);
135 _mm_store_ps(cPtr,cVal);
142 number = quarterPoints * 4;
143 for(;number < num_points; number++){
144 const float a = *aPtr++;
145 const float b = *bPtr++;
146 *cPtr++ = ( a > b ? a : b);
152 #include <immintrin.h> 156 const float* bVector,
unsigned int num_points)
158 unsigned int number = 0;
159 const unsigned int eighthPoints = num_points / 8;
161 float* cPtr = cVector;
162 const float* aPtr = aVector;
163 const float* bPtr= bVector;
165 __m256 aVal, bVal, cVal;
166 for(;number < eighthPoints; number++){
167 aVal = _mm256_load_ps(aPtr);
168 bVal = _mm256_load_ps(bPtr);
170 cVal = _mm256_max_ps(aVal, bVal);
172 _mm256_store_ps(cPtr,cVal);
179 number = eighthPoints * 8;
180 for(;number < num_points; number++){
181 const float a = *aPtr++;
182 const float b = *bPtr++;
183 *cPtr++ = ( a > b ? a : b);
189 #include <arm_neon.h> 193 const float* bVector,
unsigned int num_points)
195 unsigned int quarter_points = num_points / 4;
196 float* cPtr = cVector;
197 const float* aPtr = aVector;
198 const float* bPtr= bVector;
199 unsigned int number = 0;
201 float32x4_t a_vec, b_vec, c_vec;
202 for(number = 0; number < quarter_points; number++){
203 a_vec = vld1q_f32(aPtr);
204 b_vec = vld1q_f32(bPtr);
205 c_vec = vmaxq_f32(a_vec, b_vec);
206 vst1q_f32(cPtr, c_vec);
212 for(number = quarter_points*4; number < num_points; number++){
213 const float a = *aPtr++;
214 const float b = *bPtr++;
215 *cPtr++ = ( a > b ? a : b);
221 #ifdef LV_HAVE_GENERIC 225 const float* bVector,
unsigned int num_points)
227 float* cPtr = cVector;
228 const float* aPtr = aVector;
229 const float* bPtr= bVector;
230 unsigned int number = 0;
232 for(number = 0; number < num_points; number++){
233 const float a = *aPtr++;
234 const float b = *bPtr++;
235 *cPtr++ = ( a > b ? a : b);
242 volk_32f_x2_max_32f_a_orc_impl(
float* cVector,
const float* aVector,
243 const float* bVector,
unsigned int num_points);
246 volk_32f_x2_max_32f_u_orc(
float* cVector,
const float* aVector,
247 const float* bVector,
unsigned int num_points)
249 volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points);
257 #ifndef INCLUDED_volk_32f_x2_max_32f_u_H 258 #define INCLUDED_volk_32f_x2_max_32f_u_H 260 #include <inttypes.h> 263 #ifdef LV_HAVE_AVX512F 264 #include <immintrin.h> 267 volk_32f_x2_max_32f_u_avx512f(
float* cVector,
const float* aVector,
268 const float* bVector,
unsigned int num_points)
270 unsigned int number = 0;
271 const unsigned int sixteenthPoints = num_points / 16;
273 float* cPtr = cVector;
274 const float* aPtr = aVector;
275 const float* bPtr= bVector;
277 __m512 aVal, bVal, cVal;
278 for(;number < sixteenthPoints; number++){
279 aVal = _mm512_loadu_ps(aPtr);
280 bVal = _mm512_loadu_ps(bPtr);
282 cVal = _mm512_max_ps(aVal, bVal);
284 _mm512_storeu_ps(cPtr,cVal);
291 number = sixteenthPoints * 16;
292 for(;number < num_points; number++){
293 const float a = *aPtr++;
294 const float b = *bPtr++;
295 *cPtr++ = ( a > b ? a : b);
301 #include <immintrin.h> 305 const float* bVector,
unsigned int num_points)
307 unsigned int number = 0;
308 const unsigned int eighthPoints = num_points / 8;
310 float* cPtr = cVector;
311 const float* aPtr = aVector;
312 const float* bPtr= bVector;
314 __m256 aVal, bVal, cVal;
315 for(;number < eighthPoints; number++){
316 aVal = _mm256_loadu_ps(aPtr);
317 bVal = _mm256_loadu_ps(bPtr);
319 cVal = _mm256_max_ps(aVal, bVal);
321 _mm256_storeu_ps(cPtr,cVal);
328 number = eighthPoints * 8;
329 for(;number < num_points; number++){
330 const float a = *aPtr++;
331 const float b = *bPtr++;
332 *cPtr++ = ( a > b ? a : b);
static void volk_32f_x2_max_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_max_32f.h:304
static void volk_32f_x2_max_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_max_32f.h:118
static void volk_32f_x2_max_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_max_32f.h:224
static void volk_32f_x2_max_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_max_32f.h:192
static void volk_32f_x2_max_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_max_32f.h:155