Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_s32f_normalize.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32f_s32f_normalize_a_H
71 #define INCLUDED_volk_32f_s32f_normalize_a_H
72 
73 #include <inttypes.h>
74 #include <stdio.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void volk_32f_s32f_normalize_a_avx(float* vecBuffer, const float scalar, unsigned int num_points){
80  unsigned int number = 0;
81  float* inputPtr = vecBuffer;
82 
83  const float invScalar = 1.0 / scalar;
84  __m256 vecScalar = _mm256_set1_ps(invScalar);
85 
86  __m256 input1;
87 
88  const uint64_t eighthPoints = num_points / 8;
89  for(;number < eighthPoints; number++){
90 
91  input1 = _mm256_load_ps(inputPtr);
92 
93  input1 = _mm256_mul_ps(input1, vecScalar);
94 
95  _mm256_store_ps(inputPtr, input1);
96 
97  inputPtr += 8;
98  }
99 
100  number = eighthPoints*8;
101  for(; number < num_points; number++){
102  *inputPtr *= invScalar;
103  inputPtr++;
104  }
105 }
106 #endif /* LV_HAVE_AVX */
107 
108 #ifdef LV_HAVE_SSE
109 #include <xmmintrin.h>
110 
111 static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, const float scalar, unsigned int num_points){
112  unsigned int number = 0;
113  float* inputPtr = vecBuffer;
114 
115  const float invScalar = 1.0 / scalar;
116  __m128 vecScalar = _mm_set_ps1(invScalar);
117 
118  __m128 input1;
119 
120  const uint64_t quarterPoints = num_points / 4;
121  for(;number < quarterPoints; number++){
122 
123  input1 = _mm_load_ps(inputPtr);
124 
125  input1 = _mm_mul_ps(input1, vecScalar);
126 
127  _mm_store_ps(inputPtr, input1);
128 
129  inputPtr += 4;
130  }
131 
132  number = quarterPoints*4;
133  for(; number < num_points; number++){
134  *inputPtr *= invScalar;
135  inputPtr++;
136  }
137 }
138 #endif /* LV_HAVE_SSE */
139 
140 #ifdef LV_HAVE_GENERIC
141 
142 static inline void volk_32f_s32f_normalize_generic(float* vecBuffer, const float scalar, unsigned int num_points){
143  unsigned int number = 0;
144  float* inputPtr = vecBuffer;
145  const float invScalar = 1.0 / scalar;
146  for(number = 0; number < num_points; number++){
147  *inputPtr *= invScalar;
148  inputPtr++;
149  }
150 }
151 #endif /* LV_HAVE_GENERIC */
152 
153 #ifdef LV_HAVE_ORC
154 
155 extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, float* src, const float scalar, unsigned int num_points);
156 static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer, const float scalar, unsigned int num_points){
157  float invscalar = 1.0 / scalar;
158  volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
159 }
160 #endif /* LV_HAVE_GENERIC */
161 
162 #endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
163 
164 #ifndef INCLUDED_volk_32f_s32f_normalize_u_H
165 #define INCLUDED_volk_32f_s32f_normalize_u_H
166 
167 #include <inttypes.h>
168 #include <stdio.h>
169 #ifdef LV_HAVE_AVX
170 #include <immintrin.h>
171 
172 static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer, const float scalar, unsigned int num_points){
173  unsigned int number = 0;
174  float* inputPtr = vecBuffer;
175 
176  const float invScalar = 1.0 / scalar;
177  __m256 vecScalar = _mm256_set1_ps(invScalar);
178 
179  __m256 input1;
180 
181  const uint64_t eighthPoints = num_points / 8;
182  for(;number < eighthPoints; number++){
183 
184  input1 = _mm256_loadu_ps(inputPtr);
185 
186  input1 = _mm256_mul_ps(input1, vecScalar);
187 
188  _mm256_storeu_ps(inputPtr, input1);
189 
190  inputPtr += 8;
191  }
192 
193  number = eighthPoints*8;
194  for(; number < num_points; number++){
195  *inputPtr *= invScalar;
196  inputPtr++;
197  }
198 }
199 #endif /* LV_HAVE_AVX */
200 
201 
202 #endif /* INCLUDED_volk_32f_s32f_normalize_u_H */
static void volk_32f_s32f_normalize_generic(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:142
static void volk_32f_s32f_normalize_u_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:172
static void volk_32f_s32f_normalize_a_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:79
static void volk_32f_s32f_normalize_a_sse(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:111