63 #ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
64 #define INCLUDED_volk_32f_accumulator_s32f_a_H
70 #include <immintrin.h>
73 const float* inputBuffer,
74 unsigned int num_points)
76 float returnValue = 0;
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
80 const float* aPtr = inputBuffer;
83 __m256 accumulator = _mm256_setzero_ps();
84 __m256 aVal = _mm256_setzero_ps();
86 for (; number < eighthPoints; number++) {
87 aVal = _mm256_load_ps(aPtr);
88 accumulator = _mm256_add_ps(accumulator, aVal);
92 _mm256_store_ps(tempBuffer, accumulator);
94 returnValue = tempBuffer[0];
95 returnValue += tempBuffer[1];
96 returnValue += tempBuffer[2];
97 returnValue += tempBuffer[3];
98 returnValue += tempBuffer[4];
99 returnValue += tempBuffer[5];
100 returnValue += tempBuffer[6];
101 returnValue += tempBuffer[7];
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 returnValue += (*aPtr++);
107 *result = returnValue;
113 #include <immintrin.h>
116 const float* inputBuffer,
117 unsigned int num_points)
119 float returnValue = 0;
120 unsigned int number = 0;
121 const unsigned int eighthPoints = num_points / 8;
123 const float* aPtr = inputBuffer;
126 __m256 accumulator = _mm256_setzero_ps();
127 __m256 aVal = _mm256_setzero_ps();
129 for (; number < eighthPoints; number++) {
130 aVal = _mm256_loadu_ps(aPtr);
131 accumulator = _mm256_add_ps(accumulator, aVal);
135 _mm256_store_ps(tempBuffer, accumulator);
137 returnValue = tempBuffer[0];
138 returnValue += tempBuffer[1];
139 returnValue += tempBuffer[2];
140 returnValue += tempBuffer[3];
141 returnValue += tempBuffer[4];
142 returnValue += tempBuffer[5];
143 returnValue += tempBuffer[6];
144 returnValue += tempBuffer[7];
146 number = eighthPoints * 8;
147 for (; number < num_points; number++) {
148 returnValue += (*aPtr++);
150 *result = returnValue;
156 #include <xmmintrin.h>
159 const float* inputBuffer,
160 unsigned int num_points)
162 float returnValue = 0;
163 unsigned int number = 0;
164 const unsigned int quarterPoints = num_points / 4;
166 const float* aPtr = inputBuffer;
169 __m128 accumulator = _mm_setzero_ps();
170 __m128 aVal = _mm_setzero_ps();
172 for (; number < quarterPoints; number++) {
173 aVal = _mm_load_ps(aPtr);
174 accumulator = _mm_add_ps(accumulator, aVal);
178 _mm_store_ps(tempBuffer, accumulator);
180 returnValue = tempBuffer[0];
181 returnValue += tempBuffer[1];
182 returnValue += tempBuffer[2];
183 returnValue += tempBuffer[3];
185 number = quarterPoints * 4;
186 for (; number < num_points; number++) {
187 returnValue += (*aPtr++);
189 *result = returnValue;
195 #include <xmmintrin.h>
198 const float* inputBuffer,
199 unsigned int num_points)
201 float returnValue = 0;
202 unsigned int number = 0;
203 const unsigned int quarterPoints = num_points / 4;
205 const float* aPtr = inputBuffer;
208 __m128 accumulator = _mm_setzero_ps();
209 __m128 aVal = _mm_setzero_ps();
211 for (; number < quarterPoints; number++) {
212 aVal = _mm_load_ps(aPtr);
213 accumulator = _mm_add_ps(accumulator, aVal);
217 _mm_store_ps(tempBuffer, accumulator);
219 returnValue = tempBuffer[0];
220 returnValue += tempBuffer[1];
221 returnValue += tempBuffer[2];
222 returnValue += tempBuffer[3];
224 number = quarterPoints * 4;
225 for (; number < num_points; number++) {
226 returnValue += (*aPtr++);
228 *result = returnValue;
232 #ifdef LV_HAVE_GENERIC
234 const float* inputBuffer,
235 unsigned int num_points)
237 const float* aPtr = inputBuffer;
238 unsigned int number = 0;
239 float returnValue = 0;
241 for (; number < num_points; number++) {
242 returnValue += (*aPtr++);
244 *result = returnValue;
static void volk_32f_accumulator_s32f_a_avx(float *result, const float *inputBuffer, unsigned int num_points)
Definition: volk_32f_accumulator_s32f.h:72
static void volk_32f_accumulator_s32f_u_sse(float *result, const float *inputBuffer, unsigned int num_points)
Definition: volk_32f_accumulator_s32f.h:197
static void volk_32f_accumulator_s32f_generic(float *result, const float *inputBuffer, unsigned int num_points)
Definition: volk_32f_accumulator_s32f.h:233
static void volk_32f_accumulator_s32f_u_avx(float *result, const float *inputBuffer, unsigned int num_points)
Definition: volk_32f_accumulator_s32f.h:115
static void volk_32f_accumulator_s32f_a_sse(float *result, const float *inputBuffer, unsigned int num_points)
Definition: volk_32f_accumulator_s32f.h:158
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56