Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
53 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
54 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
55 
56 #include <inttypes.h>
57 #include <stdio.h>
58 #include <math.h>
59 
60 #ifdef LV_HAVE_SSE3
61 #include <pmmintrin.h>
62 
63 #ifdef LV_HAVE_LIB_SIMDMATH
64 #include <simdmath.h>
65 #endif /* LV_HAVE_LIB_SIMDMATH */
66 
67 static inline void
68 volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput, const lv_32fc_t* complexFFTInput,
69  const float normalizationFactor, unsigned int num_points)
70 {
71  const float* inputPtr = (const float*)complexFFTInput;
72  float* destPtr = logPowerOutput;
73  uint64_t number = 0;
74  const float iNormalizationFactor = 1.0 / normalizationFactor;
75 #ifdef LV_HAVE_LIB_SIMDMATH
76  __m128 magScalar = _mm_set_ps1(10.0);
77  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
78 
79  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
80 
81  __m128 power;
82  __m128 input1, input2;
83  const uint64_t quarterPoints = num_points / 4;
84  for(;number < quarterPoints; number++){
85  // Load the complex values
86  input1 =_mm_load_ps(inputPtr);
87  inputPtr += 4;
88  input2 =_mm_load_ps(inputPtr);
89  inputPtr += 4;
90 
91  // Apply the normalization factor
92  input1 = _mm_mul_ps(input1, invNormalizationFactor);
93  input2 = _mm_mul_ps(input2, invNormalizationFactor);
94 
95  // Multiply each value by itself
96  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
97  input1 = _mm_mul_ps(input1, input1);
98  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
99  input2 = _mm_mul_ps(input2, input2);
100 
101  // Horizontal add, to add (r*r) + (i*i) for each complex value
102  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
103  power = _mm_hadd_ps(input1, input2);
104 
105  // Calculate the natural log power
106  power = logf4(power);
107 
108  // Convert to log10 and multiply by 10.0
109  power = _mm_mul_ps(power, magScalar);
110 
111  // Store the floating point results
112  _mm_store_ps(destPtr, power);
113 
114  destPtr += 4;
115  }
116 
117  number = quarterPoints*4;
118 #endif /* LV_HAVE_LIB_SIMDMATH */
119  // Calculate the FFT for any remaining points
120 
121  for(; number < num_points; number++){
122  // Calculate dBm
123  // 50 ohm load assumption
124  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
125  // 75 ohm load assumption
126  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
127 
128  const float real = *inputPtr++ * iNormalizationFactor;
129  const float imag = *inputPtr++ * iNormalizationFactor;
130 
131  *destPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
132 
133  destPtr++;
134  }
135 
136 }
137 #endif /* LV_HAVE_SSE3 */
138 
139 
140 #ifdef LV_HAVE_GENERIC
141 
142 static inline void
143 volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput, const lv_32fc_t* complexFFTInput,
144  const float normalizationFactor, unsigned int num_points)
145 {
146  // Calculate the Power of the complex point
147  const float* inputPtr = (float*)complexFFTInput;
148  float* realFFTDataPointsPtr = logPowerOutput;
149  const float iNormalizationFactor = 1.0 / normalizationFactor;
150  unsigned int point;
151  for(point = 0; point < num_points; point++){
152  // Calculate dBm
153  // 50 ohm load assumption
154  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
155  // 75 ohm load assumption
156  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
157 
158  const float real = *inputPtr++ * iNormalizationFactor;
159  const float imag = *inputPtr++ * iNormalizationFactor;
160 
161  *realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
162  realFFTDataPointsPtr++;
163  }
164 }
165 #endif /* LV_HAVE_GENERIC */
166 
167 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
static void volk_32fc_s32f_power_spectrum_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:68
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:143
float complex lv_32fc_t
Definition: volk_complex.h:61