71 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H 72 #define INCLUDED_volk_32f_s32f_power_32f_a_H 79 #include <tmmintrin.h> 81 #ifdef LV_HAVE_LIB_SIMDMATH 86 volk_32f_s32f_power_32f_a_sse4_1(
float* cVector,
const float* aVector,
87 const float power,
unsigned int num_points)
89 unsigned int number = 0;
91 float* cPtr = cVector;
92 const float* aPtr = aVector;
94 #ifdef LV_HAVE_LIB_SIMDMATH 95 const unsigned int quarterPoints = num_points / 4;
96 __m128 vPower = _mm_set_ps1(power);
97 __m128 zeroValue = _mm_setzero_ps();
100 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
101 __m128 onesMask = _mm_set_ps1(1);
104 for(;number < quarterPoints; number++){
106 aVal = _mm_load_ps(aPtr);
107 signMask = _mm_cmplt_ps(aVal, zeroValue);
108 negatedValues = _mm_sub_ps(zeroValue, aVal);
109 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
112 cVal = powf4(aVal, vPower);
114 cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
116 _mm_store_ps(cPtr,cVal);
122 number = quarterPoints * 4;
125 for(;number < num_points; number++){
126 *cPtr++ = powf((*aPtr++), power);
134 #include <xmmintrin.h> 136 #ifdef LV_HAVE_LIB_SIMDMATH 137 #include <simdmath.h> 142 const float power,
unsigned int num_points)
144 unsigned int number = 0;
146 float* cPtr = cVector;
147 const float* aPtr = aVector;
149 #ifdef LV_HAVE_LIB_SIMDMATH 150 const unsigned int quarterPoints = num_points / 4;
151 __m128 vPower = _mm_set_ps1(power);
152 __m128 zeroValue = _mm_setzero_ps();
154 __m128 negatedValues;
155 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
156 __m128 onesMask = _mm_set_ps1(1);
159 for(;number < quarterPoints; number++){
161 aVal = _mm_load_ps(aPtr);
162 signMask = _mm_cmplt_ps(aVal, zeroValue);
163 negatedValues = _mm_sub_ps(zeroValue, aVal);
164 aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
167 cVal = powf4(aVal, vPower);
169 cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
171 _mm_store_ps(cPtr,cVal);
177 number = quarterPoints * 4;
180 for(;number < num_points; number++){
181 *cPtr++ = powf((*aPtr++), power);
188 #ifdef LV_HAVE_GENERIC 192 const float power,
unsigned int num_points)
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
196 unsigned int number = 0;
198 for(number = 0; number < num_points; number++){
199 *cPtr++ = powf((*aPtr++), power);
static void volk_32f_s32f_power_32f_a_sse(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:141
static void volk_32f_s32f_power_32f_generic(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:191