56 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H 57 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H 63 #include <immintrin.h> 69 unsigned int number = 1;
73 const unsigned int eighthPoints = (num_points-1) / 8;
75 float* outPtr = outputVector;
76 const float* inPtr = inputVector;
77 __m256 upperBound = _mm256_set1_ps(bound);
78 __m256 lowerBound = _mm256_set1_ps(-bound);
82 __m256 posBoundAdjust = _mm256_set1_ps(-2*bound);
83 __m256 negBoundAdjust = _mm256_set1_ps(2*bound);
85 *outPtr = *inPtr - *saveValue;
86 if (*outPtr > bound) *outPtr -= 2*bound;
87 if (*outPtr < -bound) *outPtr += 2*bound;
90 for (j = 1; j < ( (8 < num_points) ? 8 : num_points); j++) {
91 *outPtr = *(inPtr) - *(inPtr-1);
92 if (*outPtr > bound) *outPtr -= 2*bound;
93 if (*outPtr < -bound) *outPtr += 2*bound;
98 for (; number < eighthPoints; number++) {
100 next3old1 = _mm256_loadu_ps((
float*) (inPtr-1));
101 next4 = _mm256_load_ps(inPtr);
104 next3old1 = _mm256_sub_ps(next4, next3old1);
106 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, 14);
107 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
108 next4 = _mm256_cmp_ps(next3old1, lowerBound, 1);
109 next4 = _mm256_and_ps(next4, negBoundAdjust);
110 boundAdjust = _mm256_or_ps(next4, boundAdjust);
112 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
113 _mm256_store_ps(outPtr,next3old1);
117 for (number = (8 > (eighthPoints*8) ? 8 : (8 * eighthPoints)); number < num_points; number++) {
118 *outPtr = *(inPtr) - *(inPtr-1);
119 if (*outPtr > bound) *outPtr -= 2*bound;
120 if (*outPtr < -bound) *outPtr += 2*bound;
125 *saveValue = inputVector[num_points-1];
131 #include <xmmintrin.h> 134 if (num_points < 1) {
137 unsigned int number = 1;
141 const unsigned int quarterPoints = (num_points-1) / 4;
143 float* outPtr = outputVector;
144 const float* inPtr = inputVector;
145 __m128 upperBound = _mm_set_ps1(bound);
146 __m128 lowerBound = _mm_set_ps1(-bound);
150 __m128 posBoundAdjust = _mm_set_ps1(-2*bound);
151 __m128 negBoundAdjust = _mm_set_ps1(2*bound);
153 *outPtr = *inPtr - *saveValue;
154 if (*outPtr > bound) *outPtr -= 2*bound;
155 if (*outPtr < -bound) *outPtr += 2*bound;
158 for (j = 1; j < ( (4 < num_points) ? 4 : num_points); j++) {
159 *outPtr = *(inPtr) - *(inPtr-1);
160 if (*outPtr > bound) *outPtr -= 2*bound;
161 if (*outPtr < -bound) *outPtr += 2*bound;
166 for (; number < quarterPoints; number++) {
168 next3old1 = _mm_loadu_ps((
float*) (inPtr-1));
169 next4 = _mm_load_ps(inPtr);
172 next3old1 = _mm_sub_ps(next4, next3old1);
174 boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
175 boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
176 next4 = _mm_cmplt_ps(next3old1, lowerBound);
177 next4 = _mm_and_ps(next4, negBoundAdjust);
178 boundAdjust = _mm_or_ps(next4, boundAdjust);
180 next3old1 = _mm_add_ps(next3old1, boundAdjust);
181 _mm_store_ps(outPtr,next3old1);
185 for (number = (4 > (quarterPoints*4) ? 4 : (4 * quarterPoints)); number < num_points; number++) {
186 *outPtr = *(inPtr) - *(inPtr-1);
187 if (*outPtr > bound) *outPtr -= 2*bound;
188 if (*outPtr < -bound) *outPtr += 2*bound;
193 *saveValue = inputVector[num_points-1];
197 #ifdef LV_HAVE_GENERIC 200 if (num_points < 1) {
203 unsigned int number = 0;
204 float* outPtr = outputVector;
205 const float* inPtr = inputVector;
208 *outPtr = *inPtr - *saveValue;
209 if (*outPtr > bound) *outPtr -= 2*bound;
210 if (*outPtr < -bound) *outPtr += 2*bound;
214 for (number = 1; number < num_points; number++) {
215 *outPtr = *(inPtr) - *(inPtr-1);
216 if (*outPtr > bound) *outPtr -= 2*bound;
217 if (*outPtr < -bound) *outPtr += 2*bound;
222 *saveValue = inputVector[num_points-1];
232 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H 233 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H 235 #include <inttypes.h> 239 #include <immintrin.h> 242 if (num_points < 1) {
245 unsigned int number = 1;
249 const unsigned int eighthPoints = (num_points-1) / 8;
251 float* outPtr = outputVector;
252 const float* inPtr = inputVector;
253 __m256 upperBound = _mm256_set1_ps(bound);
254 __m256 lowerBound = _mm256_set1_ps(-bound);
258 __m256 posBoundAdjust = _mm256_set1_ps(-2*bound);
259 __m256 negBoundAdjust = _mm256_set1_ps(2*bound);
261 *outPtr = *inPtr - *saveValue;
262 if (*outPtr > bound) *outPtr -= 2*bound;
263 if (*outPtr < -bound) *outPtr += 2*bound;
266 for (j = 1; j < ( (8 < num_points) ? 8 : num_points); j++) {
267 *outPtr = *(inPtr) - *(inPtr-1);
268 if (*outPtr > bound) *outPtr -= 2*bound;
269 if (*outPtr < -bound) *outPtr += 2*bound;
274 for (; number < eighthPoints; number++) {
276 next3old1 = _mm256_loadu_ps((
float*) (inPtr-1));
277 next4 = _mm256_loadu_ps(inPtr);
280 next3old1 = _mm256_sub_ps(next4, next3old1);
282 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, 14);
283 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
284 next4 = _mm256_cmp_ps(next3old1, lowerBound, 1);
285 next4 = _mm256_and_ps(next4, negBoundAdjust);
286 boundAdjust = _mm256_or_ps(next4, boundAdjust);
288 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
289 _mm256_storeu_ps(outPtr,next3old1);
293 for (number = (8 > (eighthPoints*8) ? 8 : (8 * eighthPoints)); number < num_points; number++) {
294 *outPtr = *(inPtr) - *(inPtr-1);
295 if (*outPtr > bound) *outPtr -= 2*bound;
296 if (*outPtr < -bound) *outPtr += 2*bound;
301 *saveValue = inputVector[num_points-1];
static void volk_32f_s32f_32f_fm_detect_32f_u_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:241
static void volk_32f_s32f_32f_fm_detect_32f_a_sse(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:133
static void volk_32f_s32f_32f_fm_detect_32f_a_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:65
static void volk_32f_s32f_32f_fm_detect_32f_generic(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:199