71 #ifndef INCLUDED_volk_32f_x2_min_32f_a_H 72 #define INCLUDED_volk_32f_x2_min_32f_a_H 78 #include <xmmintrin.h> 82 const float* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 float* cPtr = cVector;
88 const float* aPtr = aVector;
89 const float* bPtr= bVector;
91 __m128 aVal, bVal, cVal;
92 for(;number < quarterPoints; number++){
93 aVal = _mm_load_ps(aPtr);
94 bVal = _mm_load_ps(bPtr);
96 cVal = _mm_min_ps(aVal, bVal);
98 _mm_store_ps(cPtr,cVal);
105 number = quarterPoints * 4;
106 for(;number < num_points; number++){
107 const float a = *aPtr++;
108 const float b = *bPtr++;
109 *cPtr++ = ( a < b ? a : b);
116 #include <arm_neon.h> 120 const float* bVector,
unsigned int num_points)
122 float* cPtr = cVector;
123 const float* aPtr = aVector;
124 const float* bPtr= bVector;
125 unsigned int number = 0;
126 unsigned int quarter_points = num_points / 4;
128 float32x4_t a_vec, b_vec, c_vec;
129 for(number = 0; number < quarter_points; number++){
130 a_vec = vld1q_f32(aPtr);
131 b_vec = vld1q_f32(bPtr);
133 c_vec = vminq_f32(a_vec, b_vec);
135 vst1q_f32(cPtr, c_vec);
141 for(number = quarter_points*4; number < num_points; number++){
142 const float a = *aPtr++;
143 const float b = *bPtr++;
144 *cPtr++ = ( a < b ? a : b);
150 #ifdef LV_HAVE_GENERIC 154 const float* bVector,
unsigned int num_points)
156 float* cPtr = cVector;
157 const float* aPtr = aVector;
158 const float* bPtr= bVector;
159 unsigned int number = 0;
161 for(number = 0; number < num_points; number++){
162 const float a = *aPtr++;
163 const float b = *bPtr++;
164 *cPtr++ = ( a < b ? a : b);
173 volk_32f_x2_min_32f_a_orc_impl(
float* cVector,
const float* aVector,
174 const float* bVector,
unsigned int num_points);
177 volk_32f_x2_min_32f_u_orc(
float* cVector,
const float* aVector,
178 const float* bVector,
unsigned int num_points)
180 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
185 #include <immintrin.h> 189 const float* bVector,
unsigned int num_points)
191 unsigned int number = 0;
192 const unsigned int eighthPoints = num_points / 8;
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
196 const float* bPtr= bVector;
198 __m256 aVal, bVal, cVal;
199 for(;number < eighthPoints; number++){
200 aVal = _mm256_load_ps(aPtr);
201 bVal = _mm256_load_ps(bPtr);
203 cVal = _mm256_min_ps(aVal, bVal);
205 _mm256_store_ps(cPtr,cVal);
212 number = eighthPoints * 8;
213 for(;number < num_points; number++){
214 const float a = *aPtr++;
215 const float b = *bPtr++;
216 *cPtr++ = ( a < b ? a : b);
221 #ifdef LV_HAVE_AVX512F 222 #include <immintrin.h> 225 volk_32f_x2_min_32f_a_avx512f(
float* cVector,
const float* aVector,
226 const float* bVector,
unsigned int num_points)
228 unsigned int number = 0;
229 const unsigned int sixteenthPoints = num_points / 16;
231 float* cPtr = cVector;
232 const float* aPtr = aVector;
233 const float* bPtr= bVector;
235 __m512 aVal, bVal, cVal;
236 for(;number < sixteenthPoints; number++){
237 aVal = _mm512_load_ps(aPtr);
238 bVal = _mm512_load_ps(bPtr);
240 cVal = _mm512_min_ps(aVal, bVal);
242 _mm512_store_ps(cPtr,cVal);
249 number = sixteenthPoints * 16;
250 for(;number < num_points; number++){
251 const float a = *aPtr++;
252 const float b = *bPtr++;
253 *cPtr++ = ( a < b ? a : b);
261 #ifndef INCLUDED_volk_32f_x2_min_32f_u_H 262 #define INCLUDED_volk_32f_x2_min_32f_u_H 264 #include <inttypes.h> 267 #ifdef LV_HAVE_AVX512F 268 #include <immintrin.h> 271 volk_32f_x2_min_32f_u_avx512f(
float* cVector,
const float* aVector,
272 const float* bVector,
unsigned int num_points)
274 unsigned int number = 0;
275 const unsigned int sixteenthPoints = num_points / 16;
277 float* cPtr = cVector;
278 const float* aPtr = aVector;
279 const float* bPtr= bVector;
281 __m512 aVal, bVal, cVal;
282 for(;number < sixteenthPoints; number++){
283 aVal = _mm512_loadu_ps(aPtr);
284 bVal = _mm512_loadu_ps(bPtr);
286 cVal = _mm512_min_ps(aVal, bVal);
288 _mm512_storeu_ps(cPtr,cVal);
295 number = sixteenthPoints * 16;
296 for(;number < num_points; number++){
297 const float a = *aPtr++;
298 const float b = *bPtr++;
299 *cPtr++ = ( a < b ? a : b);
305 #include <immintrin.h> 309 const float* bVector,
unsigned int num_points)
311 unsigned int number = 0;
312 const unsigned int eighthPoints = num_points / 8;
314 float* cPtr = cVector;
315 const float* aPtr = aVector;
316 const float* bPtr= bVector;
318 __m256 aVal, bVal, cVal;
319 for(;number < eighthPoints; number++){
320 aVal = _mm256_loadu_ps(aPtr);
321 bVal = _mm256_loadu_ps(bPtr);
323 cVal = _mm256_min_ps(aVal, bVal);
325 _mm256_storeu_ps(cPtr,cVal);
332 number = eighthPoints * 8;
333 for(;number < num_points; number++){
334 const float a = *aPtr++;
335 const float b = *bPtr++;
336 *cPtr++ = ( a < b ? a : b);
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:308
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:119
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:188
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:153
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:81