Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_8ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
55 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
56 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
57 
58 #include <volk/volk_common.h>
59 #include <inttypes.h>
60 #include <stdio.h>
61 
62 #ifdef LV_HAVE_AVX2
63 #include <immintrin.h>
64 
65 static inline void
66 volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer, const lv_8sc_t* complexVector,
67  const float scalar, unsigned int num_points)
68 {
69  float* iBufferPtr = iBuffer;
70 
71  unsigned int number = 0;
72  const unsigned int sixteenthPoints = num_points / 16;
73  __m256 iFloatValue;
74 
75  const float iScalar= 1.0 / scalar;
76  __m256 invScalar = _mm256_set1_ps(iScalar);
77  __m256i complexVal, iIntVal;
78  int8_t* complexVectorPtr = (int8_t*)complexVector;
79 
80  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
81  14, 12, 10, 8, 6, 4, 2, 0,
82  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
83  14, 12, 10, 8, 6, 4, 2, 0);
84  for(;number < sixteenthPoints; number++){
85  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
86  complexVectorPtr += 32;
87  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
88 
89  iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
90  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
91  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
92  _mm256_store_ps(iBufferPtr, iFloatValue);
93  iBufferPtr += 8;
94 
95  complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
96  iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
97  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
98  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
99  _mm256_store_ps(iBufferPtr, iFloatValue);
100  iBufferPtr += 8;
101  }
102 
103  number = sixteenthPoints * 16;
104  for(; number < num_points; number++){
105  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
106  complexVectorPtr++;
107  }
108 
109 }
110 #endif /* LV_HAVE_AVX2 */
111 
112 
113 #ifdef LV_HAVE_SSE4_1
114 #include <smmintrin.h>
115 
116 static inline void
117 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_8sc_t* complexVector,
118  const float scalar, unsigned int num_points)
119 {
120  float* iBufferPtr = iBuffer;
121 
122  unsigned int number = 0;
123  const unsigned int eighthPoints = num_points / 8;
124  __m128 iFloatValue;
125 
126  const float iScalar= 1.0 / scalar;
127  __m128 invScalar = _mm_set_ps1(iScalar);
128  __m128i complexVal, iIntVal;
129  int8_t* complexVectorPtr = (int8_t*)complexVector;
130 
131  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
132 
133  for(;number < eighthPoints; number++){
134  complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
135  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
136 
137  iIntVal = _mm_cvtepi8_epi32(complexVal);
138  iFloatValue = _mm_cvtepi32_ps(iIntVal);
139 
140  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
141 
142  _mm_store_ps(iBufferPtr, iFloatValue);
143 
144  iBufferPtr += 4;
145 
146  complexVal = _mm_srli_si128(complexVal, 4);
147  iIntVal = _mm_cvtepi8_epi32(complexVal);
148  iFloatValue = _mm_cvtepi32_ps(iIntVal);
149 
150  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
151 
152  _mm_store_ps(iBufferPtr, iFloatValue);
153 
154  iBufferPtr += 4;
155  }
156 
157  number = eighthPoints * 8;
158  for(; number < num_points; number++){
159  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
160  complexVectorPtr++;
161  }
162 
163 }
164 #endif /* LV_HAVE_SSE4_1 */
165 
166 
167 #ifdef LV_HAVE_SSE
168 #include <xmmintrin.h>
169 
170 static inline void
171 volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_8sc_t* complexVector,
172  const float scalar, unsigned int num_points)
173 {
174  float* iBufferPtr = iBuffer;
175 
176  unsigned int number = 0;
177  const unsigned int quarterPoints = num_points / 4;
178  __m128 iValue;
179 
180  const float iScalar= 1.0 / scalar;
181  __m128 invScalar = _mm_set_ps1(iScalar);
182  int8_t* complexVectorPtr = (int8_t*)complexVector;
183 
184  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
185 
186  for(;number < quarterPoints; number++){
187  floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
188  floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
189  floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
190  floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
191 
192  iValue = _mm_load_ps(floatBuffer);
193 
194  iValue = _mm_mul_ps(iValue, invScalar);
195 
196  _mm_store_ps(iBufferPtr, iValue);
197 
198  iBufferPtr += 4;
199  }
200 
201  number = quarterPoints * 4;
202  for(; number < num_points; number++){
203  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
204  complexVectorPtr++;
205  }
206 
207 }
208 #endif /* LV_HAVE_SSE */
209 
210 
211 #ifdef LV_HAVE_GENERIC
212 
213 static inline void
214 volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer, const lv_8sc_t* complexVector,
215  const float scalar, unsigned int num_points)
216 {
217  unsigned int number = 0;
218  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
219  float* iBufferPtr = iBuffer;
220  const float invScalar = 1.0 / scalar;
221  for(number = 0; number < num_points; number++){
222  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
223  complexVectorPtr++;
224  }
225 }
226 #endif /* LV_HAVE_GENERIC */
227 
228 
229 
230 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
231 
232 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
233 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
234 
235 #include <volk/volk_common.h>
236 #include <inttypes.h>
237 #include <stdio.h>
238 
239 #ifdef LV_HAVE_AVX2
240 #include <immintrin.h>
241 
242 static inline void
243 volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer, const lv_8sc_t* complexVector,
244  const float scalar, unsigned int num_points)
245 {
246  float* iBufferPtr = iBuffer;
247 
248  unsigned int number = 0;
249  const unsigned int sixteenthPoints = num_points / 16;
250  __m256 iFloatValue;
251 
252  const float iScalar= 1.0 / scalar;
253  __m256 invScalar = _mm256_set1_ps(iScalar);
254  __m256i complexVal, iIntVal;
255  __m128i hcomplexVal;
256  int8_t* complexVectorPtr = (int8_t*)complexVector;
257 
258  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
259 
260  for(;number < sixteenthPoints; number++){
261  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
262  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
263 
264  hcomplexVal = _mm256_extracti128_si256(complexVal,0);
265  iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
266  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
267 
268  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
269 
270  _mm256_storeu_ps(iBufferPtr, iFloatValue);
271 
272  iBufferPtr += 8;
273 
274  hcomplexVal = _mm256_extracti128_si256(complexVal,1);
275  iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
276  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
277 
278  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
279 
280  _mm256_storeu_ps(iBufferPtr, iFloatValue);
281 
282  iBufferPtr += 8;
283  }
284 
285  number = sixteenthPoints * 16;
286  for(; number < num_points; number++){
287  *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
288  complexVectorPtr++;
289  }
290 
291 }
292 #endif /* LV_HAVE_AVX2 */
293 
294 
295 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:171
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:214
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57