Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
55 #ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H
56 #define INCLUDED_volk_32fc_s32f_power_32fc_a_H
57 
58 #include <inttypes.h>
59 #include <math.h>
60 #include <stdio.h>
61 
64  const float power)
65 {
66  const float arg = power * atan2f(lv_creal(exp), lv_cimag(exp));
67  const float mag =
68  powf(lv_creal(exp) * lv_creal(exp) + lv_cimag(exp) * lv_cimag(exp), power / 2);
69  return mag * lv_cmake(-cosf(arg), sinf(arg));
70 }
71 
72 #ifdef LV_HAVE_SSE
73 #include <xmmintrin.h>
74 
75 #ifdef LV_HAVE_LIB_SIMDMATH
76 #include <simdmath.h>
77 #endif /* LV_HAVE_LIB_SIMDMATH */
78 
79 static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector,
80  const lv_32fc_t* aVector,
81  const float power,
82  unsigned int num_points)
83 {
84  unsigned int number = 0;
85 
86  lv_32fc_t* cPtr = cVector;
87  const lv_32fc_t* aPtr = aVector;
88 
89 #ifdef LV_HAVE_LIB_SIMDMATH
90  const unsigned int quarterPoints = num_points / 4;
91  __m128 vPower = _mm_set_ps1(power);
92 
93  __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
94  for (; number < quarterPoints; number++) {
95 
96  cplxValue1 = _mm_load_ps((float*)aPtr);
97  aPtr += 2;
98 
99  cplxValue2 = _mm_load_ps((float*)aPtr);
100  aPtr += 2;
101 
102  // Convert to polar coordinates
103 
104  // Arrange in i1i2i3i4 format
105  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
106  // Arrange in q1q2q3q4 format
107  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
108 
109  phase = atan2f4(qValue, iValue); // Calculate the Phase
110 
111  magnitude = _mm_sqrt_ps(
112  _mm_add_ps(_mm_mul_ps(iValue, iValue),
113  _mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square
114  // rooting the added I2 and Q2 values
115 
116  // Now calculate the power of the polar coordinate data
117  magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power
118 
119  phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power
120 
121  // Convert back to cartesian coordinates
122  iValue = _mm_mul_ps(cosf4(phase),
123  magnitude); // Multiply the cos of the phase by the magnitude
124  qValue = _mm_mul_ps(sinf4(phase),
125  magnitude); // Multiply the sin of the phase by the magnitude
126 
127  cplxValue1 =
128  _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values
129  cplxValue2 =
130  _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values
131 
132  _mm_store_ps((float*)cPtr,
133  cplxValue1); // Store the results back into the C container
134 
135  cPtr += 2;
136 
137  _mm_store_ps((float*)cPtr,
138  cplxValue2); // Store the results back into the C container
139 
140  cPtr += 2;
141  }
142 
143  number = quarterPoints * 4;
144 #endif /* LV_HAVE_LIB_SIMDMATH */
145 
146  for (; number < num_points; number++) {
147  *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
148  }
149 }
150 #endif /* LV_HAVE_SSE */
151 
152 
153 #ifdef LV_HAVE_GENERIC
154 
155 static inline void volk_32fc_s32f_power_32fc_generic(lv_32fc_t* cVector,
156  const lv_32fc_t* aVector,
157  const float power,
158  unsigned int num_points)
159 {
160  lv_32fc_t* cPtr = cVector;
161  const lv_32fc_t* aPtr = aVector;
162  unsigned int number = 0;
163 
164  for (number = 0; number < num_points; number++) {
165  *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
166  }
167 }
168 
169 #endif /* LV_HAVE_GENERIC */
170 
171 
172 #endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */
lv_cimag
#define lv_cimag(x)
Definition: volk_complex.h:94
volk_32fc_s32f_power_32fc_a_sse
static void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t *cVector, const lv_32fc_t *aVector, const float power, unsigned int num_points)
Definition: volk_32fc_s32f_power_32fc.h:79
__volk_s32fc_s32f_power_s32fc_a
static lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, const float power)
raise a complex float to a real float power
Definition: volk_32fc_s32f_power_32fc.h:63
lv_cmake
#define lv_cmake(r, i)
Definition: volk_complex.h:73
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70
volk_32fc_s32f_power_32fc_generic
static void volk_32fc_s32f_power_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, const float power, unsigned int num_points)
Definition: volk_32fc_s32f_power_32fc.h:155
lv_creal
#define lv_creal(x)
Definition: volk_complex.h:92