74 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H 75 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H 82 #include <smmintrin.h> 84 #ifdef LV_HAVE_LIB_SIMDMATH 88 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(
float* outputVector,
const lv_32fc_t* complexVector,
const float normalizeFactor,
unsigned int num_points){
89 const float* complexVectorPtr = (
float*)complexVector;
90 float* outPtr = outputVector;
92 unsigned int number = 0;
93 const float invNormalizeFactor = 1.0 / normalizeFactor;
95 #ifdef LV_HAVE_LIB_SIMDMATH 96 const unsigned int quarterPoints = num_points / 4;
97 __m128 testVector = _mm_set_ps1(2*M_PI);
98 __m128 correctVector = _mm_set_ps1(M_PI);
99 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
101 __m128 complex1, complex2, iValue, qValue;
104 for (; number < quarterPoints; number++) {
106 complex1 = _mm_load_ps(complexVectorPtr);
107 complexVectorPtr += 4;
108 complex2 = _mm_load_ps(complexVectorPtr);
109 complexVectorPtr += 4;
111 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
112 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
114 phase = atan2f4(qValue, iValue);
117 keepMask = _mm_cmpneq_ps(phase,testVector);
118 phase = _mm_blendv_ps(correctVector, phase, keepMask);
120 phase = _mm_mul_ps(phase, vNormalizeFactor);
121 _mm_store_ps((
float*)outPtr, phase);
124 number = quarterPoints * 4;
127 for (; number < num_points; number++) {
128 const float real = *complexVectorPtr++;
129 const float imag = *complexVectorPtr++;
130 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
137 #include <xmmintrin.h> 139 #ifdef LV_HAVE_LIB_SIMDMATH 140 #include <simdmath.h> 144 const float* complexVectorPtr = (
float*)complexVector;
145 float* outPtr = outputVector;
147 unsigned int number = 0;
148 const float invNormalizeFactor = 1.0 / normalizeFactor;
150 #ifdef LV_HAVE_LIB_SIMDMATH 151 const unsigned int quarterPoints = num_points / 4;
152 __m128 testVector = _mm_set_ps1(2*M_PI);
153 __m128 correctVector = _mm_set_ps1(M_PI);
154 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
156 __m128 complex1, complex2, iValue, qValue;
160 for (; number < quarterPoints; number++) {
162 complex1 = _mm_load_ps(complexVectorPtr);
163 complexVectorPtr += 4;
164 complex2 = _mm_load_ps(complexVectorPtr);
165 complexVectorPtr += 4;
167 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
168 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
170 phase = atan2f4(qValue, iValue);
173 keepMask = _mm_cmpneq_ps(phase,testVector);
174 phase = _mm_and_ps(phase, keepMask);
175 mask = _mm_andnot_ps(keepMask, correctVector);
176 phase = _mm_or_ps(phase, mask);
178 phase = _mm_mul_ps(phase, vNormalizeFactor);
179 _mm_store_ps((
float*)outPtr, phase);
182 number = quarterPoints * 4;
185 for (; number < num_points; number++) {
186 const float real = *complexVectorPtr++;
187 const float imag = *complexVectorPtr++;
188 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
193 #ifdef LV_HAVE_GENERIC 196 float* outPtr = outputVector;
197 const float* inPtr = (
float*)inputVector;
198 const float invNormalizeFactor = 1.0 / normalizeFactor;
200 for ( number = 0; number < num_points; number++) {
201 const float real = *inPtr++;
202 const float imag = *inPtr++;
203 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:143
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:195
float complex lv_32fc_t
Definition: volk_complex.h:61