73 #ifndef INCLUDED_volk_32f_64f_add_64f_H
74 #define INCLUDED_volk_32f_64f_add_64f_H
78 #ifdef LV_HAVE_GENERIC
82 const double* bVector,
83 unsigned int num_points)
85 double* cPtr = cVector;
86 const float* aPtr = aVector;
87 const double* bPtr = bVector;
88 unsigned int number = 0;
90 for (number = 0; number < num_points; number++) {
91 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
100 static inline void volk_32f_64f_add_64f_neon(
double* cVector,
101 const float* aVector,
102 const double* bVector,
103 unsigned int num_points)
105 unsigned int number = 0;
106 const unsigned int half_points = num_points / 2;
108 double* cPtr = cVector;
109 const float* aPtr = aVector;
110 const double* bPtr = bVector;
112 float64x2_t aVal, bVal, cVal;
114 for (number = 0; number < half_points; number++) {
116 aVal1 = vld1_f32(aPtr);
117 bVal = vld1q_f64(bPtr);
124 aVal = vcvt_f64_f32(aVal1);
126 cVal = vaddq_f64(aVal, bVal);
128 vst1q_f64(cPtr, cVal);
133 number = half_points * 2;
134 for (; number < num_points; number++) {
135 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
143 #include <immintrin.h>
144 #include <xmmintrin.h>
147 const float* aVector,
148 const double* bVector,
149 unsigned int num_points)
151 unsigned int number = 0;
152 const unsigned int eighth_points = num_points / 8;
154 double* cPtr = cVector;
155 const float* aPtr = aVector;
156 const double* bPtr = bVector;
160 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
161 for (; number < eighth_points; number++) {
163 aVal = _mm256_loadu_ps(aPtr);
164 bVal1 = _mm256_loadu_pd(bPtr);
165 bVal2 = _mm256_loadu_pd(bPtr + 4);
167 aVal1 = _mm256_extractf128_ps(aVal, 0);
168 aVal2 = _mm256_extractf128_ps(aVal, 1);
170 aDbl1 = _mm256_cvtps_pd(aVal1);
171 aDbl2 = _mm256_cvtps_pd(aVal2);
173 cVal1 = _mm256_add_pd(aDbl1, bVal1);
174 cVal2 = _mm256_add_pd(aDbl2, bVal2);
176 _mm256_storeu_pd(cPtr,
178 _mm256_storeu_pd(cPtr + 4,
186 number = eighth_points * 8;
187 for (; number < num_points; number++) {
188 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
196 #include <immintrin.h>
197 #include <xmmintrin.h>
200 const float* aVector,
201 const double* bVector,
202 unsigned int num_points)
204 unsigned int number = 0;
205 const unsigned int eighth_points = num_points / 8;
207 double* cPtr = cVector;
208 const float* aPtr = aVector;
209 const double* bPtr = bVector;
213 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
214 for (; number < eighth_points; number++) {
216 aVal = _mm256_load_ps(aPtr);
217 bVal1 = _mm256_load_pd(bPtr);
218 bVal2 = _mm256_load_pd(bPtr + 4);
220 aVal1 = _mm256_extractf128_ps(aVal, 0);
221 aVal2 = _mm256_extractf128_ps(aVal, 1);
223 aDbl1 = _mm256_cvtps_pd(aVal1);
224 aDbl2 = _mm256_cvtps_pd(aVal2);
226 cVal1 = _mm256_add_pd(aDbl1, bVal1);
227 cVal2 = _mm256_add_pd(aDbl2, bVal2);
229 _mm256_store_pd(cPtr, cVal1);
230 _mm256_store_pd(cPtr + 4,
238 number = eighth_points * 8;
239 for (; number < num_points; number++) {
240 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
static void volk_32f_64f_add_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:199
static void volk_32f_64f_add_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:146
static void volk_32f_64f_add_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:80
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62