73 #ifndef INCLUDED_volk_32fc_x2_add_32fc_u_H 74 #define INCLUDED_volk_32fc_x2_add_32fc_u_H 77 #include <immintrin.h> 81 const lv_32fc_t* bVector,
unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int quarterPoints = num_points / 4;
90 __m256 aVal, bVal, cVal;
91 for(;number < quarterPoints; number++){
93 aVal = _mm256_loadu_ps((
float *) aPtr);
94 bVal = _mm256_loadu_ps((
float *) bPtr);
96 cVal = _mm256_add_ps(aVal, bVal);
98 _mm256_storeu_ps((
float *) cPtr,cVal);
105 number = quarterPoints * 4;
106 for(;number < num_points; number++){
107 *cPtr++ = (*aPtr++) + (*bPtr++);
114 #include <immintrin.h> 118 const lv_32fc_t* bVector,
unsigned int num_points)
120 unsigned int number = 0;
121 const unsigned int quarterPoints = num_points / 4;
127 __m256 aVal, bVal, cVal;
128 for(;number < quarterPoints; number++){
130 aVal = _mm256_load_ps((
float*) aPtr);
131 bVal = _mm256_load_ps((
float*) bPtr);
133 cVal = _mm256_add_ps(aVal, bVal);
135 _mm256_store_ps((
float*) cPtr,cVal);
142 number = quarterPoints * 4;
143 for(;number < num_points; number++){
144 *cPtr++ = (*aPtr++) + (*bPtr++);
151 #include <xmmintrin.h> 155 const lv_32fc_t* bVector,
unsigned int num_points)
157 unsigned int number = 0;
158 const unsigned int halfPoints = num_points / 2;
164 __m128 aVal, bVal, cVal;
165 for(;number < halfPoints; number++){
167 aVal = _mm_loadu_ps((
float *) aPtr);
168 bVal = _mm_loadu_ps((
float *) bPtr);
170 cVal = _mm_add_ps(aVal, bVal);
172 _mm_storeu_ps((
float*) cPtr, cVal);
179 number = halfPoints * 2;
180 for(;number < num_points; number++){
181 *cPtr++ = (*aPtr++) + (*bPtr++);
187 #ifdef LV_HAVE_GENERIC 191 const lv_32fc_t* bVector,
unsigned int num_points)
196 unsigned int number = 0;
198 for(number = 0; number < num_points; number++){
199 *cPtr++ = (*aPtr++) + (*bPtr++);
206 #include <xmmintrin.h> 211 unsigned int number = 0;
212 const unsigned int halfPoints = num_points / 2;
218 __m128 aVal, bVal, cVal;
219 for(;number < halfPoints; number++){
220 aVal = _mm_load_ps((
float *) aPtr);
221 bVal = _mm_load_ps((
float *) bPtr);
223 cVal = _mm_add_ps(aVal, bVal);
225 _mm_store_ps((
float *) cPtr,cVal);
232 number = halfPoints * 2;
233 for(;number < num_points; number++){
234 *cPtr++ = (*aPtr++) + (*bPtr++);
241 #include <arm_neon.h> 245 const lv_32fc_t* bVector,
unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int halfPoints = num_points / 2;
253 float32x4_t aVal, bVal, cVal;
254 for(number=0; number < halfPoints; number++){
256 aVal = vld1q_f32((
const float32_t*)(aPtr));
257 bVal = vld1q_f32((
const float32_t*)(bPtr));
262 cVal = vaddq_f32(aVal, bVal);
264 vst1q_f32((
float*)(cPtr),cVal);
271 number = halfPoints * 2;
272 for(;number < num_points; number++){
273 *cPtr++ = (*aPtr++) + (*bPtr++);
static void volk_32fc_x2_add_32fc_u_neon(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:244
static void volk_32fc_x2_add_32fc_u_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:80
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:39
static void volk_32fc_x2_add_32fc_a_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:117
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_x2_add_32fc_u_sse(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:154
static void volk_32fc_x2_add_32fc_a_sse(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:209
static void volk_32fc_x2_add_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, const lv_32fc_t *bVector, unsigned int num_points)
Definition: volk_32fc_x2_add_32fc.h:190