56 #ifndef INCLUDE_VOLK_VOLK_NEON_INTRINSICS_H_
57 #define INCLUDE_VOLK_VOLK_NEON_INTRINSICS_H_
63 static inline float32x4_t
66 float32x4_t iValue, qValue, result;
67 iValue = vmulq_f32(cmplxValue.val[0], cmplxValue.val[0]);
68 qValue = vmulq_f32(cmplxValue.val[1], cmplxValue.val[1]);
69 result = vaddq_f32(iValue, qValue);
76 float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
77 sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
78 sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
80 return sqrt_reciprocal;
84 static inline float32x4x2_t
87 float32x4x2_t tmp_real;
88 float32x4x2_t tmp_imag;
93 tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
95 tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]);
98 tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]);
100 tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
102 c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
103 c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
110 float32x4_t cA = vmlaq_f32(coeffs[0], coeffs[4], x);
111 float32x4_t cB = vmlaq_f32(coeffs[2], coeffs[6], x);
112 float32x4_t cC = vmlaq_f32(coeffs[1], coeffs[5], x);
113 float32x4_t cD = vmlaq_f32(coeffs[3], coeffs[7], x);
114 float32x4_t x2 = vmulq_f32(x, x);
115 float32x4_t x4 = vmulq_f32(x2, x2);
116 float32x4_t res = vmlaq_f32(vmlaq_f32(cA, cB, x2), vmlaq_f32(cC, cD, x2), x4);
124 const float32x4_t log_tab[8] = {
125 vdupq_n_f32(-2.29561495781f),
126 vdupq_n_f32(-2.47071170807f),
127 vdupq_n_f32(-5.68692588806f),
128 vdupq_n_f32(-0.165253549814f),
129 vdupq_n_f32(5.17591238022f),
130 vdupq_n_f32(0.844007015228f),
131 vdupq_n_f32(4.58445882797f),
132 vdupq_n_f32(0.0141278216615f),
135 const int32x4_t CONST_127 = vdupq_n_s32(127);
136 const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f);
139 int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127);
140 float32x4_t
val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23)));
146 poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2);