Vector Optimized Library of Kernels  2.5.1
Architecture-tuned implementations of math kernels
volk_16ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
57 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
58 
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <volk/volk_common.h>
62 
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65 
66 static inline void
67 volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
68  const lv_16sc_t* complexVector,
69  const float scalar,
70  unsigned int num_points)
71 {
72  float* iBufferPtr = iBuffer;
73 
74  unsigned int number = 0;
75  const unsigned int eighthPoints = num_points / 8;
76 
77  __m256 iFloatValue;
78 
79  const float iScalar = 1.0 / scalar;
80  __m256 invScalar = _mm256_set1_ps(iScalar);
81  __m256i complexVal, iIntVal;
82  __m128i complexVal128;
83  int8_t* complexVectorPtr = (int8_t*)complexVector;
84 
85  __m256i moveMask = _mm256_set_epi8(0x80,
86  0x80,
87  0x80,
88  0x80,
89  0x80,
90  0x80,
91  0x80,
92  0x80,
93  13,
94  12,
95  9,
96  8,
97  5,
98  4,
99  1,
100  0,
101  0x80,
102  0x80,
103  0x80,
104  0x80,
105  0x80,
106  0x80,
107  0x80,
108  0x80,
109  13,
110  12,
111  9,
112  8,
113  5,
114  4,
115  1,
116  0);
117 
118  for (; number < eighthPoints; number++) {
119  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
120  complexVectorPtr += 32;
121  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
122  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
123  complexVal128 = _mm256_extracti128_si256(complexVal, 0);
124 
125  iIntVal = _mm256_cvtepi16_epi32(complexVal128);
126  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
127 
128  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
129 
130  _mm256_store_ps(iBufferPtr, iFloatValue);
131 
132  iBufferPtr += 8;
133  }
134 
135  number = eighthPoints * 8;
136  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
137  for (; number < num_points; number++) {
138  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
139  sixteenTComplexVectorPtr++;
140  }
141 }
142 #endif /* LV_HAVE_AVX2 */
143 
144 #ifdef LV_HAVE_SSE4_1
145 #include <smmintrin.h>
146 
147 static inline void
148 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
149  const lv_16sc_t* complexVector,
150  const float scalar,
151  unsigned int num_points)
152 {
153  float* iBufferPtr = iBuffer;
154 
155  unsigned int number = 0;
156  const unsigned int quarterPoints = num_points / 4;
157 
158  __m128 iFloatValue;
159 
160  const float iScalar = 1.0 / scalar;
161  __m128 invScalar = _mm_set_ps1(iScalar);
162  __m128i complexVal, iIntVal;
163  int8_t* complexVectorPtr = (int8_t*)complexVector;
164 
165  __m128i moveMask = _mm_set_epi8(
166  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
167 
168  for (; number < quarterPoints; number++) {
169  complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
170  complexVectorPtr += 16;
171  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
172 
173  iIntVal = _mm_cvtepi16_epi32(complexVal);
174  iFloatValue = _mm_cvtepi32_ps(iIntVal);
175 
176  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
177 
178  _mm_store_ps(iBufferPtr, iFloatValue);
179 
180  iBufferPtr += 4;
181  }
182 
183  number = quarterPoints * 4;
184  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
185  for (; number < num_points; number++) {
186  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
187  sixteenTComplexVectorPtr++;
188  }
189 }
190 #endif /* LV_HAVE_SSE4_1 */
191 
192 #ifdef LV_HAVE_SSE
193 #include <xmmintrin.h>
194 
195 static inline void
197  const lv_16sc_t* complexVector,
198  const float scalar,
199  unsigned int num_points)
200 {
201  float* iBufferPtr = iBuffer;
202 
203  unsigned int number = 0;
204  const unsigned int quarterPoints = num_points / 4;
205  __m128 iValue;
206 
207  const float iScalar = 1.0 / scalar;
208  __m128 invScalar = _mm_set_ps1(iScalar);
209  int16_t* complexVectorPtr = (int16_t*)complexVector;
210 
211  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
212 
213  for (; number < quarterPoints; number++) {
214  floatBuffer[0] = (float)(*complexVectorPtr);
215  complexVectorPtr += 2;
216  floatBuffer[1] = (float)(*complexVectorPtr);
217  complexVectorPtr += 2;
218  floatBuffer[2] = (float)(*complexVectorPtr);
219  complexVectorPtr += 2;
220  floatBuffer[3] = (float)(*complexVectorPtr);
221  complexVectorPtr += 2;
222 
223  iValue = _mm_load_ps(floatBuffer);
224 
225  iValue = _mm_mul_ps(iValue, invScalar);
226 
227  _mm_store_ps(iBufferPtr, iValue);
228 
229  iBufferPtr += 4;
230  }
231 
232  number = quarterPoints * 4;
233  complexVectorPtr = (int16_t*)&complexVector[number];
234  for (; number < num_points; number++) {
235  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
236  complexVectorPtr++;
237  }
238 }
239 #endif /* LV_HAVE_SSE */
240 
241 #ifdef LV_HAVE_GENERIC
242 static inline void
244  const lv_16sc_t* complexVector,
245  const float scalar,
246  unsigned int num_points)
247 {
248  unsigned int number = 0;
249  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
250  float* iBufferPtr = iBuffer;
251  const float invScalar = 1.0 / scalar;
252  for (number = 0; number < num_points; number++) {
253  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
254  complexVectorPtr++;
255  }
256 }
257 #endif /* LV_HAVE_GENERIC */
258 
259 
260 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
261 
262 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
263 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
264 
265 #include <inttypes.h>
266 #include <stdio.h>
267 #include <volk/volk_common.h>
268 
269 #ifdef LV_HAVE_AVX2
270 #include <immintrin.h>
271 
272 static inline void
273 volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
274  const lv_16sc_t* complexVector,
275  const float scalar,
276  unsigned int num_points)
277 {
278  float* iBufferPtr = iBuffer;
279 
280  unsigned int number = 0;
281  const unsigned int eighthPoints = num_points / 8;
282 
283  __m256 iFloatValue;
284 
285  const float iScalar = 1.0 / scalar;
286  __m256 invScalar = _mm256_set1_ps(iScalar);
287  __m256i complexVal, iIntVal;
288  __m128i complexVal128;
289  int8_t* complexVectorPtr = (int8_t*)complexVector;
290 
291  __m256i moveMask = _mm256_set_epi8(0x80,
292  0x80,
293  0x80,
294  0x80,
295  0x80,
296  0x80,
297  0x80,
298  0x80,
299  13,
300  12,
301  9,
302  8,
303  5,
304  4,
305  1,
306  0,
307  0x80,
308  0x80,
309  0x80,
310  0x80,
311  0x80,
312  0x80,
313  0x80,
314  0x80,
315  13,
316  12,
317  9,
318  8,
319  5,
320  4,
321  1,
322  0);
323 
324  for (; number < eighthPoints; number++) {
325  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326  complexVectorPtr += 32;
327  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
328  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
329  complexVal128 = _mm256_extracti128_si256(complexVal, 0);
330 
331  iIntVal = _mm256_cvtepi16_epi32(complexVal128);
332  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
333 
334  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
335 
336  _mm256_storeu_ps(iBufferPtr, iFloatValue);
337 
338  iBufferPtr += 8;
339  }
340 
341  number = eighthPoints * 8;
342  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
343  for (; number < num_points; number++) {
344  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
345  sixteenTComplexVectorPtr++;
346  }
347 }
348 #endif /* LV_HAVE_AVX2 */
349 
350 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:243
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:196
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
short complex lv_16sc_t
Definition: volk_complex.h:62