Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_s32f_x2_power_spectral_density_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
55 #ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
56 #define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
57 
58 #include <inttypes.h>
59 #include <math.h>
60 #include <stdio.h>
61 
62 #ifdef LV_HAVE_AVX
63 #include <immintrin.h>
64 
65 #ifdef LV_HAVE_LIB_SIMDMATH
66 #include <simdmath.h>
67 #endif /* LV_HAVE_LIB_SIMDMATH */
68 
69 static inline void
71  const lv_32fc_t* complexFFTInput,
72  const float normalizationFactor,
73  const float rbw,
74  unsigned int num_points)
75 {
76  const float* inputPtr = (const float*)complexFFTInput;
77  float* destPtr = logPowerOutput;
78  uint64_t number = 0;
79  const float iRBW = 1.0 / rbw;
80  const float iNormalizationFactor = 1.0 / normalizationFactor;
81 
82 #ifdef LV_HAVE_LIB_SIMDMATH
83  __m256 magScalar = _mm256_set1_ps(10.0);
84  magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
85 
86  __m256 invRBW = _mm256_set1_ps(iRBW);
87 
88  __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
89 
90  __m256 power;
91  __m256 input1, input2;
92  const uint64_t eighthPoints = num_points / 8;
93  for (; number < eighthPoints; number++) {
94  // Load the complex values
95  input1 = _mm256_load_ps(inputPtr);
96  inputPtr += 8;
97  input2 = _mm256_load_ps(inputPtr);
98  inputPtr += 8;
99 
100  // Apply the normalization factor
101  input1 = _mm256_mul_ps(input1, invNormalizationFactor);
102  input2 = _mm256_mul_ps(input2, invNormalizationFactor);
103 
104  // Multiply each value by itself
105  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
106  input1 = _mm256_mul_ps(input1, input1);
107  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
108  input2 = _mm256_mul_ps(input2, input2);
109 
110  // Horizontal add, to add (r*r) + (i*i) for each complex value
111  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
112  inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
113  inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
114 
115  power = _mm256_hadd_ps(inputVal1, inputVal2);
116 
117  // Divide by the rbw
118  power = _mm256_mul_ps(power, invRBW);
119 
120  // Calculate the natural log power
121  power = logf4(power);
122 
123  // Convert to log10 and multiply by 10.0
124  power = _mm256_mul_ps(power, magScalar);
125 
126  // Store the floating point results
127  _mm256_store_ps(destPtr, power);
128 
129  destPtr += 8;
130  }
131 
132  number = eighthPoints * 8;
133 #endif /* LV_HAVE_LIB_SIMDMATH */
134  // Calculate the FFT for any remaining points
135  for (; number < num_points; number++) {
136  // Calculate dBm
137  // 50 ohm load assumption
138  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
139  // 75 ohm load assumption
140  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
141 
142  const float real = *inputPtr++ * iNormalizationFactor;
143  const float imag = *inputPtr++ * iNormalizationFactor;
144 
145  *destPtr = 10.0 * log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
146  destPtr++;
147  }
148 }
149 #endif /* LV_HAVE_AVX */
150 
151 #ifdef LV_HAVE_SSE3
152 #include <pmmintrin.h>
153 
154 
155 #ifdef LV_HAVE_LIB_SIMDMATH
156 #include <simdmath.h>
157 #endif /* LV_HAVE_LIB_SIMDMATH */
158 
159 static inline void
161  const lv_32fc_t* complexFFTInput,
162  const float normalizationFactor,
163  const float rbw,
164  unsigned int num_points)
165 {
166  const float* inputPtr = (const float*)complexFFTInput;
167  float* destPtr = logPowerOutput;
168  uint64_t number = 0;
169  const float iRBW = 1.0 / rbw;
170  const float iNormalizationFactor = 1.0 / normalizationFactor;
171 
172 #ifdef LV_HAVE_LIB_SIMDMATH
173  __m128 magScalar = _mm_set_ps1(10.0);
174  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
175 
176  __m128 invRBW = _mm_set_ps1(iRBW);
177 
178  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
179 
180  __m128 power;
181  __m128 input1, input2;
182  const uint64_t quarterPoints = num_points / 4;
183  for (; number < quarterPoints; number++) {
184  // Load the complex values
185  input1 = _mm_load_ps(inputPtr);
186  inputPtr += 4;
187  input2 = _mm_load_ps(inputPtr);
188  inputPtr += 4;
189 
190  // Apply the normalization factor
191  input1 = _mm_mul_ps(input1, invNormalizationFactor);
192  input2 = _mm_mul_ps(input2, invNormalizationFactor);
193 
194  // Multiply each value by itself
195  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
196  input1 = _mm_mul_ps(input1, input1);
197  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
198  input2 = _mm_mul_ps(input2, input2);
199 
200  // Horizontal add, to add (r*r) + (i*i) for each complex value
201  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
202  power = _mm_hadd_ps(input1, input2);
203 
204  // Divide by the rbw
205  power = _mm_mul_ps(power, invRBW);
206 
207  // Calculate the natural log power
208  power = logf4(power);
209 
210  // Convert to log10 and multiply by 10.0
211  power = _mm_mul_ps(power, magScalar);
212 
213  // Store the floating point results
214  _mm_store_ps(destPtr, power);
215 
216  destPtr += 4;
217  }
218 
219  number = quarterPoints * 4;
220 #endif /* LV_HAVE_LIB_SIMDMATH */
221  // Calculate the FFT for any remaining points
222  for (; number < num_points; number++) {
223  // Calculate dBm
224  // 50 ohm load assumption
225  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
226  // 75 ohm load assumption
227  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
228 
229  const float real = *inputPtr++ * iNormalizationFactor;
230  const float imag = *inputPtr++ * iNormalizationFactor;
231 
232  *destPtr = 10.0 * log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
233  destPtr++;
234  }
235 }
236 #endif /* LV_HAVE_SSE3 */
237 
238 
239 #ifdef LV_HAVE_GENERIC
240 
241 static inline void
243  const lv_32fc_t* complexFFTInput,
244  const float normalizationFactor,
245  const float rbw,
246  unsigned int num_points)
247 {
248  // Calculate the Power of the complex point
249  const float* inputPtr = (float*)complexFFTInput;
250  float* realFFTDataPointsPtr = logPowerOutput;
251  unsigned int point;
252  const float invRBW = 1.0 / rbw;
253  const float iNormalizationFactor = 1.0 / normalizationFactor;
254 
255  for (point = 0; point < num_points; point++) {
256  // Calculate dBm
257  // 50 ohm load assumption
258  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
259  // 75 ohm load assumption
260  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
261 
262  const float real = *inputPtr++ * iNormalizationFactor;
263  const float imag = *inputPtr++ * iNormalizationFactor;
264 
265  *realFFTDataPointsPtr =
266  10.0 * log10f((((real * real) + (imag * imag)) + 1e-20) * invRBW);
267 
268  realFFTDataPointsPtr++;
269  }
270 }
271 #endif /* LV_HAVE_GENERIC */
272 
273 #endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
volk_32fc_s32f_x2_power_spectral_density_32f_generic
static void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:242
volk_32fc_s32f_x2_power_spectral_density_32f_a_avx
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:70
volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:160
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70