71 #ifndef INCLUDED_volk_32f_64f_add_64f_H 72 #define INCLUDED_volk_32f_64f_add_64f_H 77 #ifdef LV_HAVE_GENERIC 81 const double *bVector,
unsigned int num_points)
83 double *cPtr = cVector;
84 const float *aPtr = aVector;
85 const double *bPtr = bVector;
86 unsigned int number = 0;
88 for (number = 0; number < num_points; number++) {
89 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
102 #include <immintrin.h> 103 #include <xmmintrin.h> 107 const double *bVector,
unsigned int num_points)
109 unsigned int number = 0;
110 const unsigned int eighth_points = num_points / 8;
112 double *cPtr = cVector;
113 const float *aPtr = aVector;
114 const double *bPtr = bVector;
118 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
119 for (; number < eighth_points; number++) {
121 aVal = _mm256_loadu_ps(aPtr);
122 bVal1 = _mm256_loadu_pd(bPtr);
123 bVal2 = _mm256_loadu_pd(bPtr+4);
125 aVal1 = _mm256_extractf128_ps(aVal, 0);
126 aVal2 = _mm256_extractf128_ps(aVal, 1);
128 aDbl1 = _mm256_cvtps_pd(aVal1);
129 aDbl2 = _mm256_cvtps_pd(aVal2);
131 cVal1 = _mm256_add_pd(aDbl1, bVal1);
132 cVal2 = _mm256_add_pd(aDbl2, bVal2);
134 _mm256_storeu_pd(cPtr, cVal1);
135 _mm256_storeu_pd(cPtr+4, cVal2);
142 number = eighth_points * 8;
143 for (; number < num_points; number++) {
144 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
153 #include <immintrin.h> 154 #include <xmmintrin.h> 158 const double *bVector,
unsigned int num_points)
160 unsigned int number = 0;
161 const unsigned int eighth_points = num_points / 8;
163 double *cPtr = cVector;
164 const float *aPtr = aVector;
165 const double *bPtr = bVector;
169 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
170 for (; number < eighth_points; number++) {
172 aVal = _mm256_load_ps(aPtr);
173 bVal1 = _mm256_load_pd(bPtr);
174 bVal2 = _mm256_load_pd(bPtr+4);
176 aVal1 = _mm256_extractf128_ps(aVal, 0);
177 aVal2 = _mm256_extractf128_ps(aVal, 1);
179 aDbl1 = _mm256_cvtps_pd(aVal1);
180 aDbl2 = _mm256_cvtps_pd(aVal2);
182 cVal1 = _mm256_add_pd(aDbl1, bVal1);
183 cVal2 = _mm256_add_pd(aDbl2, bVal2);
185 _mm256_store_pd(cPtr, cVal1);
186 _mm256_store_pd(cPtr+4, cVal2);
193 number = eighth_points * 8;
194 for (; number < num_points; number++) {
195 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
static void volk_32f_64f_add_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:80
static void volk_32f_64f_add_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:106
static void volk_32f_64f_add_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:157