Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_s32f_x2_power_spectral_density_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
54 #ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
55 #define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
56 
57 #include <inttypes.h>
58 #include <stdio.h>
59 #include <math.h>
60 
61 #ifdef LV_HAVE_AVX
62 #include <immintrin.h>
63 
64 #ifdef LV_HAVE_LIB_SIMDMATH
65 #include <simdmath.h>
66 #endif /* LV_HAVE_LIB_SIMDMATH */
67 
68 static inline void
69 volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float* logPowerOutput, const lv_32fc_t* complexFFTInput,
70  const float normalizationFactor, const float rbw,
71  unsigned int num_points)
72 {
73  const float* inputPtr = (const float*)complexFFTInput;
74  float* destPtr = logPowerOutput;
75  uint64_t number = 0;
76  const float iRBW = 1.0 / rbw;
77  const float iNormalizationFactor = 1.0 / normalizationFactor;
78 
79 #ifdef LV_HAVE_LIB_SIMDMATH
80  __m256 magScalar = _mm256_set1_ps(10.0);
81  magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
82 
83  __m256 invRBW = _mm256_set1_ps(iRBW);
84 
85  __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
86 
87  __m256 power;
88  __m256 input1, input2;
89  const uint64_t eighthPoints = num_points / 8;
90  for(;number < eighthPoints; number++){
91  // Load the complex values
92  input1 =_mm256_load_ps(inputPtr);
93  inputPtr += 8;
94  input2 =_mm256_load_ps(inputPtr);
95  inputPtr += 8;
96 
97  // Apply the normalization factor
98  input1 = _mm256_mul_ps(input1, invNormalizationFactor);
99  input2 = _mm256_mul_ps(input2, invNormalizationFactor);
100 
101  // Multiply each value by itself
102  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
103  input1 = _mm256_mul_ps(input1, input1);
104  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
105  input2 = _mm256_mul_ps(input2, input2);
106 
107  // Horizontal add, to add (r*r) + (i*i) for each complex value
108  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
109  inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
110  inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
111 
112  power = _mm256_hadd_ps(inputVal1, inputVal2);
113 
114  // Divide by the rbw
115  power = _mm256_mul_ps(power, invRBW);
116 
117  // Calculate the natural log power
118  power = logf4(power);
119 
120  // Convert to log10 and multiply by 10.0
121  power = _mm256_mul_ps(power, magScalar);
122 
123  // Store the floating point results
124  _mm256_store_ps(destPtr, power);
125 
126  destPtr += 8;
127  }
128 
129  number = eighthPoints*8;
130 #endif /* LV_HAVE_LIB_SIMDMATH */
131  // Calculate the FFT for any remaining points
132  for(; number < num_points; number++){
133  // Calculate dBm
134  // 50 ohm load assumption
135  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
136  // 75 ohm load assumption
137  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
138 
139  const float real = *inputPtr++ * iNormalizationFactor;
140  const float imag = *inputPtr++ * iNormalizationFactor;
141 
142  *destPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
143  destPtr++;
144  }
145 
146 }
147 #endif /* LV_HAVE_AVX */
148 
149 #ifdef LV_HAVE_SSE3
150 #include <pmmintrin.h>
151 
152 
153 
154 #ifdef LV_HAVE_LIB_SIMDMATH
155 #include <simdmath.h>
156 #endif /* LV_HAVE_LIB_SIMDMATH */
157 
158 static inline void
159 volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput,
160  const float normalizationFactor, const float rbw,
161  unsigned int num_points)
162 {
163  const float* inputPtr = (const float*)complexFFTInput;
164  float* destPtr = logPowerOutput;
165  uint64_t number = 0;
166  const float iRBW = 1.0 / rbw;
167  const float iNormalizationFactor = 1.0 / normalizationFactor;
168 
169 #ifdef LV_HAVE_LIB_SIMDMATH
170  __m128 magScalar = _mm_set_ps1(10.0);
171  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
172 
173  __m128 invRBW = _mm_set_ps1(iRBW);
174 
175  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
176 
177  __m128 power;
178  __m128 input1, input2;
179  const uint64_t quarterPoints = num_points / 4;
180  for(;number < quarterPoints; number++){
181  // Load the complex values
182  input1 =_mm_load_ps(inputPtr);
183  inputPtr += 4;
184  input2 =_mm_load_ps(inputPtr);
185  inputPtr += 4;
186 
187  // Apply the normalization factor
188  input1 = _mm_mul_ps(input1, invNormalizationFactor);
189  input2 = _mm_mul_ps(input2, invNormalizationFactor);
190 
191  // Multiply each value by itself
192  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
193  input1 = _mm_mul_ps(input1, input1);
194  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
195  input2 = _mm_mul_ps(input2, input2);
196 
197  // Horizontal add, to add (r*r) + (i*i) for each complex value
198  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
199  power = _mm_hadd_ps(input1, input2);
200 
201  // Divide by the rbw
202  power = _mm_mul_ps(power, invRBW);
203 
204  // Calculate the natural log power
205  power = logf4(power);
206 
207  // Convert to log10 and multiply by 10.0
208  power = _mm_mul_ps(power, magScalar);
209 
210  // Store the floating point results
211  _mm_store_ps(destPtr, power);
212 
213  destPtr += 4;
214  }
215 
216  number = quarterPoints*4;
217 #endif /* LV_HAVE_LIB_SIMDMATH */
218  // Calculate the FFT for any remaining points
219  for(; number < num_points; number++){
220  // Calculate dBm
221  // 50 ohm load assumption
222  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
223  // 75 ohm load assumption
224  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
225 
226  const float real = *inputPtr++ * iNormalizationFactor;
227  const float imag = *inputPtr++ * iNormalizationFactor;
228 
229  *destPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
230  destPtr++;
231  }
232 
233 }
234 #endif /* LV_HAVE_SSE3 */
235 
236 
237 #ifdef LV_HAVE_GENERIC
238 
239 static inline void
240 volk_32fc_s32f_x2_power_spectral_density_32f_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput,
241  const float normalizationFactor, const float rbw,
242  unsigned int num_points)
243 {
244  // Calculate the Power of the complex point
245  const float* inputPtr = (float*)complexFFTInput;
246  float* realFFTDataPointsPtr = logPowerOutput;
247  unsigned int point;
248  const float invRBW = 1.0 / rbw;
249  const float iNormalizationFactor = 1.0 / normalizationFactor;
250 
251  for(point = 0; point < num_points; point++){
252  // Calculate dBm
253  // 50 ohm load assumption
254  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
255  // 75 ohm load assumption
256  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
257 
258  const float real = *inputPtr++ * iNormalizationFactor;
259  const float imag = *inputPtr++ * iNormalizationFactor;
260 
261  *realFFTDataPointsPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * invRBW);
262 
263  realFFTDataPointsPtr++;
264  }
265 }
266 #endif /* LV_HAVE_GENERIC */
267 
268 #endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
static void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:240
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:159
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:69