71 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H 72 #define INCLUDED_volk_32f_x2_multiply_32f_u_H 78 #include <xmmintrin.h> 82 const float* bVector,
unsigned int num_points)
84 unsigned int number = 0;
85 const unsigned int quarterPoints = num_points / 4;
87 float* cPtr = cVector;
88 const float* aPtr = aVector;
89 const float* bPtr= bVector;
91 __m128 aVal, bVal, cVal;
92 for(;number < quarterPoints; number++){
94 aVal = _mm_loadu_ps(aPtr);
95 bVal = _mm_loadu_ps(bPtr);
97 cVal = _mm_mul_ps(aVal, bVal);
99 _mm_storeu_ps(cPtr,cVal);
106 number = quarterPoints * 4;
107 for(;number < num_points; number++){
108 *cPtr++ = (*aPtr++) * (*bPtr++);
113 #ifdef LV_HAVE_AVX512F 114 #include <immintrin.h> 117 volk_32f_x2_multiply_32f_u_avx512f(
float* cVector,
const float* aVector,
118 const float* bVector,
unsigned int num_points)
120 unsigned int number = 0;
121 const unsigned int sixteenthPoints = num_points / 16;
123 float* cPtr = cVector;
124 const float* aPtr = aVector;
125 const float* bPtr= bVector;
127 __m512 aVal, bVal, cVal;
128 for(;number < sixteenthPoints; number++){
130 aVal = _mm512_loadu_ps(aPtr);
131 bVal = _mm512_loadu_ps(bPtr);
133 cVal = _mm512_mul_ps(aVal, bVal);
135 _mm512_storeu_ps(cPtr,cVal);
142 number = sixteenthPoints * 16;
143 for(;number < num_points; number++){
144 *cPtr++ = (*aPtr++) * (*bPtr++);
150 #include <immintrin.h> 154 const float* bVector,
unsigned int num_points)
156 unsigned int number = 0;
157 const unsigned int eighthPoints = num_points / 8;
159 float* cPtr = cVector;
160 const float* aPtr = aVector;
161 const float* bPtr= bVector;
163 __m256 aVal, bVal, cVal;
164 for(;number < eighthPoints; number++){
166 aVal = _mm256_loadu_ps(aPtr);
167 bVal = _mm256_loadu_ps(bPtr);
169 cVal = _mm256_mul_ps(aVal, bVal);
171 _mm256_storeu_ps(cPtr,cVal);
178 number = eighthPoints * 8;
179 for(;number < num_points; number++){
180 *cPtr++ = (*aPtr++) * (*bPtr++);
186 #ifdef LV_HAVE_GENERIC 190 const float* bVector,
unsigned int num_points)
192 float* cPtr = cVector;
193 const float* aPtr = aVector;
194 const float* bPtr= bVector;
195 unsigned int number = 0;
197 for(number = 0; number < num_points; number++){
198 *cPtr++ = (*aPtr++) * (*bPtr++);
207 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H 208 #define INCLUDED_volk_32f_x2_multiply_32f_a_H 210 #include <inttypes.h> 214 #include <xmmintrin.h> 218 const float* bVector,
unsigned int num_points)
220 unsigned int number = 0;
221 const unsigned int quarterPoints = num_points / 4;
223 float* cPtr = cVector;
224 const float* aPtr = aVector;
225 const float* bPtr= bVector;
227 __m128 aVal, bVal, cVal;
228 for(;number < quarterPoints; number++){
230 aVal = _mm_load_ps(aPtr);
231 bVal = _mm_load_ps(bPtr);
233 cVal = _mm_mul_ps(aVal, bVal);
235 _mm_store_ps(cPtr,cVal);
242 number = quarterPoints * 4;
243 for(;number < num_points; number++){
244 *cPtr++ = (*aPtr++) * (*bPtr++);
249 #ifdef LV_HAVE_AVX512F 250 #include <immintrin.h> 253 volk_32f_x2_multiply_32f_a_avx512f(
float* cVector,
const float* aVector,
254 const float* bVector,
unsigned int num_points)
256 unsigned int number = 0;
257 const unsigned int sixteenthPoints = num_points / 16;
259 float* cPtr = cVector;
260 const float* aPtr = aVector;
261 const float* bPtr= bVector;
263 __m512 aVal, bVal, cVal;
264 for(;number < sixteenthPoints; number++){
266 aVal = _mm512_load_ps(aPtr);
267 bVal = _mm512_load_ps(bPtr);
269 cVal = _mm512_mul_ps(aVal, bVal);
271 _mm512_store_ps(cPtr,cVal);
278 number = sixteenthPoints * 16;
279 for(;number < num_points; number++){
280 *cPtr++ = (*aPtr++) * (*bPtr++);
287 #include <immintrin.h> 291 const float* bVector,
unsigned int num_points)
293 unsigned int number = 0;
294 const unsigned int eighthPoints = num_points / 8;
296 float* cPtr = cVector;
297 const float* aPtr = aVector;
298 const float* bPtr= bVector;
300 __m256 aVal, bVal, cVal;
301 for(;number < eighthPoints; number++){
303 aVal = _mm256_load_ps(aPtr);
304 bVal = _mm256_load_ps(bPtr);
306 cVal = _mm256_mul_ps(aVal, bVal);
308 _mm256_store_ps(cPtr,cVal);
315 number = eighthPoints * 8;
316 for(;number < num_points; number++){
317 *cPtr++ = (*aPtr++) * (*bPtr++);
324 #include <arm_neon.h> 328 const float* bVector,
unsigned int num_points)
330 const unsigned int quarter_points = num_points / 4;
332 float32x4_t avec, bvec, cvec;
333 for(number=0; number < quarter_points; ++number) {
334 avec = vld1q_f32(aVector);
335 bvec = vld1q_f32(bVector);
336 cvec = vmulq_f32(avec, bvec);
337 vst1q_f32(cVector, cvec);
342 for(number=quarter_points*4; number < num_points; ++number) {
343 *cVector++ = *aVector++ * *bVector++;
349 #ifdef LV_HAVE_GENERIC 353 const float* bVector,
unsigned int num_points)
355 float* cPtr = cVector;
356 const float* aPtr = aVector;
357 const float* bPtr= bVector;
358 unsigned int number = 0;
360 for(number = 0; number < num_points; number++){
361 *cPtr++ = (*aPtr++) * (*bPtr++);
369 volk_32f_x2_multiply_32f_a_orc_impl(
float* cVector,
const float* aVector,
370 const float* bVector,
unsigned int num_points);
373 volk_32f_x2_multiply_32f_u_orc(
float* cVector,
const float* aVector,
374 const float* bVector,
unsigned int num_points)
376 volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
static void volk_32f_x2_multiply_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:153
static void volk_32f_x2_multiply_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:217
static void volk_32f_x2_multiply_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:327
static void volk_32f_x2_multiply_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:189
static void volk_32f_x2_multiply_32f_u_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:81
static void volk_32f_x2_multiply_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:290
static void volk_32f_x2_multiply_32f_a_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:352