Vector Optimized Library of Kernels  2.5.0
Architecture-tuned implementations of math kernels
volk_8ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
55 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
56 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
57 
58 #include <inttypes.h>
59 #include <stdio.h>
60 #include <volk/volk_common.h>
61 
62 #ifdef LV_HAVE_AVX2
63 #include <immintrin.h>
64 
65 static inline void
66 volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
67  const lv_8sc_t* complexVector,
68  const float scalar,
69  unsigned int num_points)
70 {
71  float* iBufferPtr = iBuffer;
72 
73  unsigned int number = 0;
74  const unsigned int sixteenthPoints = num_points / 16;
75  __m256 iFloatValue;
76 
77  const float iScalar = 1.0 / scalar;
78  __m256 invScalar = _mm256_set1_ps(iScalar);
79  __m256i complexVal, iIntVal;
80  int8_t* complexVectorPtr = (int8_t*)complexVector;
81 
82  __m256i moveMask = _mm256_set_epi8(0x80,
83  0x80,
84  0x80,
85  0x80,
86  0x80,
87  0x80,
88  0x80,
89  0x80,
90  14,
91  12,
92  10,
93  8,
94  6,
95  4,
96  2,
97  0,
98  0x80,
99  0x80,
100  0x80,
101  0x80,
102  0x80,
103  0x80,
104  0x80,
105  0x80,
106  14,
107  12,
108  10,
109  8,
110  6,
111  4,
112  2,
113  0);
114  for (; number < sixteenthPoints; number++) {
115  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
116  complexVectorPtr += 32;
117  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
118 
119  iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
120  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
121  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
122  _mm256_store_ps(iBufferPtr, iFloatValue);
123  iBufferPtr += 8;
124 
125  complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
126  iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
127  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
128  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
129  _mm256_store_ps(iBufferPtr, iFloatValue);
130  iBufferPtr += 8;
131  }
132 
133  number = sixteenthPoints * 16;
134  for (; number < num_points; number++) {
135  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
136  complexVectorPtr++;
137  }
138 }
139 #endif /* LV_HAVE_AVX2 */
140 
141 
142 #ifdef LV_HAVE_SSE4_1
143 #include <smmintrin.h>
144 
145 static inline void
146 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
147  const lv_8sc_t* complexVector,
148  const float scalar,
149  unsigned int num_points)
150 {
151  float* iBufferPtr = iBuffer;
152 
153  unsigned int number = 0;
154  const unsigned int eighthPoints = num_points / 8;
155  __m128 iFloatValue;
156 
157  const float iScalar = 1.0 / scalar;
158  __m128 invScalar = _mm_set_ps1(iScalar);
159  __m128i complexVal, iIntVal;
160  int8_t* complexVectorPtr = (int8_t*)complexVector;
161 
162  __m128i moveMask = _mm_set_epi8(
163  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
164 
165  for (; number < eighthPoints; number++) {
166  complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
167  complexVectorPtr += 16;
168  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
169 
170  iIntVal = _mm_cvtepi8_epi32(complexVal);
171  iFloatValue = _mm_cvtepi32_ps(iIntVal);
172 
173  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
174 
175  _mm_store_ps(iBufferPtr, iFloatValue);
176 
177  iBufferPtr += 4;
178 
179  complexVal = _mm_srli_si128(complexVal, 4);
180  iIntVal = _mm_cvtepi8_epi32(complexVal);
181  iFloatValue = _mm_cvtepi32_ps(iIntVal);
182 
183  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
184 
185  _mm_store_ps(iBufferPtr, iFloatValue);
186 
187  iBufferPtr += 4;
188  }
189 
190  number = eighthPoints * 8;
191  for (; number < num_points; number++) {
192  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
193  complexVectorPtr++;
194  }
195 }
196 #endif /* LV_HAVE_SSE4_1 */
197 
198 
199 #ifdef LV_HAVE_SSE
200 #include <xmmintrin.h>
201 
202 static inline void
204  const lv_8sc_t* complexVector,
205  const float scalar,
206  unsigned int num_points)
207 {
208  float* iBufferPtr = iBuffer;
209 
210  unsigned int number = 0;
211  const unsigned int quarterPoints = num_points / 4;
212  __m128 iValue;
213 
214  const float iScalar = 1.0 / scalar;
215  __m128 invScalar = _mm_set_ps1(iScalar);
216  int8_t* complexVectorPtr = (int8_t*)complexVector;
217 
218  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
219 
220  for (; number < quarterPoints; number++) {
221  floatBuffer[0] = (float)(*complexVectorPtr);
222  complexVectorPtr += 2;
223  floatBuffer[1] = (float)(*complexVectorPtr);
224  complexVectorPtr += 2;
225  floatBuffer[2] = (float)(*complexVectorPtr);
226  complexVectorPtr += 2;
227  floatBuffer[3] = (float)(*complexVectorPtr);
228  complexVectorPtr += 2;
229 
230  iValue = _mm_load_ps(floatBuffer);
231 
232  iValue = _mm_mul_ps(iValue, invScalar);
233 
234  _mm_store_ps(iBufferPtr, iValue);
235 
236  iBufferPtr += 4;
237  }
238 
239  number = quarterPoints * 4;
240  for (; number < num_points; number++) {
241  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
242  complexVectorPtr++;
243  }
244 }
245 #endif /* LV_HAVE_SSE */
246 
247 
248 #ifdef LV_HAVE_GENERIC
249 
250 static inline void
252  const lv_8sc_t* complexVector,
253  const float scalar,
254  unsigned int num_points)
255 {
256  unsigned int number = 0;
257  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
258  float* iBufferPtr = iBuffer;
259  const float invScalar = 1.0 / scalar;
260  for (number = 0; number < num_points; number++) {
261  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
262  complexVectorPtr++;
263  }
264 }
265 #endif /* LV_HAVE_GENERIC */
266 
267 
268 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
269 
270 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
271 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
272 
273 #include <inttypes.h>
274 #include <stdio.h>
275 #include <volk/volk_common.h>
276 
277 #ifdef LV_HAVE_AVX2
278 #include <immintrin.h>
279 
280 static inline void
281 volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
282  const lv_8sc_t* complexVector,
283  const float scalar,
284  unsigned int num_points)
285 {
286  float* iBufferPtr = iBuffer;
287 
288  unsigned int number = 0;
289  const unsigned int sixteenthPoints = num_points / 16;
290  __m256 iFloatValue;
291 
292  const float iScalar = 1.0 / scalar;
293  __m256 invScalar = _mm256_set1_ps(iScalar);
294  __m256i complexVal, iIntVal;
295  __m128i hcomplexVal;
296  int8_t* complexVectorPtr = (int8_t*)complexVector;
297 
298  __m256i moveMask = _mm256_set_epi8(0x80,
299  0x80,
300  0x80,
301  0x80,
302  0x80,
303  0x80,
304  0x80,
305  0x80,
306  14,
307  12,
308  10,
309  8,
310  6,
311  4,
312  2,
313  0,
314  0x80,
315  0x80,
316  0x80,
317  0x80,
318  0x80,
319  0x80,
320  0x80,
321  0x80,
322  14,
323  12,
324  10,
325  8,
326  6,
327  4,
328  2,
329  0);
330 
331  for (; number < sixteenthPoints; number++) {
332  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
333  complexVectorPtr += 32;
334  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
335 
336  hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
337  iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
338  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
339 
340  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
341 
342  _mm256_storeu_ps(iBufferPtr, iFloatValue);
343 
344  iBufferPtr += 8;
345 
346  hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
347  iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
348  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
349 
350  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
351 
352  _mm256_storeu_ps(iBufferPtr, iFloatValue);
353 
354  iBufferPtr += 8;
355  }
356 
357  number = sixteenthPoints * 16;
358  for (; number < num_points; number++) {
359  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
360  complexVectorPtr++;
361  }
362 }
363 #endif /* LV_HAVE_AVX2 */
364 
365 
366 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:251
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:203
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61