71 #ifndef INCLUDED_volk_64f_x2_add_64f_H 72 #define INCLUDED_volk_64f_x2_add_64f_H 77 #ifdef LV_HAVE_GENERIC 81 const double *bVector,
unsigned int num_points)
83 double *cPtr = cVector;
84 const double *aPtr = aVector;
85 const double *bPtr = bVector;
86 unsigned int number = 0;
88 for (number = 0; number < num_points; number++) {
89 *cPtr++ = (*aPtr++) + (*bPtr++);
101 #include <emmintrin.h> 105 const double *bVector,
unsigned int num_points)
107 unsigned int number = 0;
108 const unsigned int half_points = num_points / 2;
110 double *cPtr = cVector;
111 const double *aPtr = aVector;
112 const double *bPtr = bVector;
114 __m128d aVal, bVal, cVal;
115 for (; number < half_points; number++) {
116 aVal = _mm_loadu_pd(aPtr);
117 bVal = _mm_loadu_pd(bPtr);
119 cVal = _mm_add_pd(aVal, bVal);
121 _mm_storeu_pd(cPtr, cVal);
128 number = half_points * 2;
129 for (; number < num_points; number++) {
130 *cPtr++ = (*aPtr++) + (*bPtr++);
139 #include <immintrin.h> 143 const double *bVector,
unsigned int num_points)
145 unsigned int number = 0;
146 const unsigned int quarter_points = num_points / 4;
148 double *cPtr = cVector;
149 const double *aPtr = aVector;
150 const double *bPtr = bVector;
152 __m256d aVal, bVal, cVal;
153 for (; number < quarter_points; number++) {
155 aVal = _mm256_loadu_pd(aPtr);
156 bVal = _mm256_loadu_pd(bPtr);
158 cVal = _mm256_add_pd(aVal, bVal);
160 _mm256_storeu_pd(cPtr, cVal);
167 number = quarter_points * 4;
168 for (; number < num_points; number++) {
169 *cPtr++ = (*aPtr++) + (*bPtr++);
181 #include <emmintrin.h> 185 const double *bVector,
unsigned int num_points)
187 unsigned int number = 0;
188 const unsigned int half_points = num_points / 2;
190 double *cPtr = cVector;
191 const double *aPtr = aVector;
192 const double *bPtr = bVector;
194 __m128d aVal, bVal, cVal;
195 for (; number < half_points; number++) {
196 aVal = _mm_load_pd(aPtr);
197 bVal = _mm_load_pd(bPtr);
199 cVal = _mm_add_pd(aVal, bVal);
201 _mm_store_pd(cPtr, cVal);
208 number = half_points * 2;
209 for (; number < num_points; number++) {
210 *cPtr++ = (*aPtr++) + (*bPtr++);
219 #include <immintrin.h> 223 const double *bVector,
unsigned int num_points)
225 unsigned int number = 0;
226 const unsigned int quarter_points = num_points / 4;
228 double *cPtr = cVector;
229 const double *aPtr = aVector;
230 const double *bPtr = bVector;
232 __m256d aVal, bVal, cVal;
233 for (; number < quarter_points; number++) {
235 aVal = _mm256_load_pd(aPtr);
236 bVal = _mm256_load_pd(bPtr);
238 cVal = _mm256_add_pd(aVal, bVal);
240 _mm256_store_pd(cPtr, cVal);
247 number = quarter_points * 4;
248 for (; number < num_points; number++) {
249 *cPtr++ = (*aPtr++) + (*bPtr++);
static void volk_64f_x2_add_64f_u_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:104
static void volk_64f_x2_add_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:184
static void volk_64f_x2_add_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:222
static void volk_64f_x2_add_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:80
static void volk_64f_x2_add_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:142