Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_16ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
57 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
58 
59 #include <volk/volk_common.h>
60 #include <inttypes.h>
61 #include <stdio.h>
62 
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65 
66 static inline void
67 volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer, const lv_16sc_t* complexVector,
68  const float scalar, unsigned int num_points)
69 {
70  float* iBufferPtr = iBuffer;
71 
72  unsigned int number = 0;
73  const unsigned int eighthPoints = num_points / 8;
74 
75  __m256 iFloatValue;
76 
77  const float iScalar= 1.0 / scalar;
78  __m256 invScalar = _mm256_set1_ps(iScalar);
79  __m256i complexVal, iIntVal;
80  __m128i complexVal128;
81  int8_t* complexVectorPtr = (int8_t*)complexVector;
82 
83  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
84 
85  for(;number < eighthPoints; number++){
86  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
87  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
88  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
89  complexVal128 = _mm256_extracti128_si256(complexVal, 0);
90 
91  iIntVal = _mm256_cvtepi16_epi32(complexVal128);
92  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
93 
94  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
95 
96  _mm256_store_ps(iBufferPtr, iFloatValue);
97 
98  iBufferPtr += 8;
99  }
100 
101  number = eighthPoints * 8;
102  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
103  for(; number < num_points; number++){
104  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
105  sixteenTComplexVectorPtr++;
106  }
107 
108 }
109 #endif /* LV_HAVE_AVX2 */
110 
111 #ifdef LV_HAVE_SSE4_1
112 #include <smmintrin.h>
113 
114 static inline void
115 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, const lv_16sc_t* complexVector,
116  const float scalar, unsigned int num_points)
117 {
118  float* iBufferPtr = iBuffer;
119 
120  unsigned int number = 0;
121  const unsigned int quarterPoints = num_points / 4;
122 
123  __m128 iFloatValue;
124 
125  const float iScalar= 1.0 / scalar;
126  __m128 invScalar = _mm_set_ps1(iScalar);
127  __m128i complexVal, iIntVal;
128  int8_t* complexVectorPtr = (int8_t*)complexVector;
129 
130  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
131 
132  for(;number < quarterPoints; number++){
133  complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
134  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
135 
136  iIntVal = _mm_cvtepi16_epi32(complexVal);
137  iFloatValue = _mm_cvtepi32_ps(iIntVal);
138 
139  iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
140 
141  _mm_store_ps(iBufferPtr, iFloatValue);
142 
143  iBufferPtr += 4;
144  }
145 
146  number = quarterPoints * 4;
147  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
148  for(; number < num_points; number++){
149  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
150  sixteenTComplexVectorPtr++;
151  }
152 
153 }
154 #endif /* LV_HAVE_SSE4_1 */
155 
156 #ifdef LV_HAVE_SSE
157 #include <xmmintrin.h>
158 
159 static inline void
160 volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, const lv_16sc_t* complexVector,
161  const float scalar, unsigned int num_points)
162 {
163  float* iBufferPtr = iBuffer;
164 
165  unsigned int number = 0;
166  const unsigned int quarterPoints = num_points / 4;
167  __m128 iValue;
168 
169  const float iScalar = 1.0/scalar;
170  __m128 invScalar = _mm_set_ps1(iScalar);
171  int16_t* complexVectorPtr = (int16_t*)complexVector;
172 
173  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
174 
175  for(;number < quarterPoints; number++){
176  floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
177  floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
178  floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
179  floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
180 
181  iValue = _mm_load_ps(floatBuffer);
182 
183  iValue = _mm_mul_ps(iValue, invScalar);
184 
185  _mm_store_ps(iBufferPtr, iValue);
186 
187  iBufferPtr += 4;
188  }
189 
190  number = quarterPoints * 4;
191  complexVectorPtr = (int16_t*)&complexVector[number];
192  for(; number < num_points; number++){
193  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
194  complexVectorPtr++;
195  }
196 
197 }
198 #endif /* LV_HAVE_SSE */
199 
200 #ifdef LV_HAVE_GENERIC
201 static inline void
202 volk_16ic_s32f_deinterleave_real_32f_generic(float* iBuffer, const lv_16sc_t* complexVector,
203  const float scalar, unsigned int num_points)
204 {
205  unsigned int number = 0;
206  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
207  float* iBufferPtr = iBuffer;
208  const float invScalar = 1.0 / scalar;
209  for(number = 0; number < num_points; number++){
210  *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
211  complexVectorPtr++;
212  }
213 }
214 #endif /* LV_HAVE_GENERIC */
215 
216 
217 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
218 
219 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
220 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
221 
222 #include <volk/volk_common.h>
223 #include <inttypes.h>
224 #include <stdio.h>
225 
226 #ifdef LV_HAVE_AVX2
227 #include <immintrin.h>
228 
229 static inline void
230 volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer, const lv_16sc_t* complexVector,
231  const float scalar, unsigned int num_points)
232 {
233  float* iBufferPtr = iBuffer;
234 
235  unsigned int number = 0;
236  const unsigned int eighthPoints = num_points / 8;
237 
238  __m256 iFloatValue;
239 
240  const float iScalar= 1.0 / scalar;
241  __m256 invScalar = _mm256_set1_ps(iScalar);
242  __m256i complexVal, iIntVal;
243  __m128i complexVal128;
244  int8_t* complexVectorPtr = (int8_t*)complexVector;
245 
246  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
247 
248  for(;number < eighthPoints; number++){
249  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
250  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
251  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
252  complexVal128 = _mm256_extracti128_si256(complexVal, 0);
253 
254  iIntVal = _mm256_cvtepi16_epi32(complexVal128);
255  iFloatValue = _mm256_cvtepi32_ps(iIntVal);
256 
257  iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
258 
259  _mm256_storeu_ps(iBufferPtr, iFloatValue);
260 
261  iBufferPtr += 8;
262  }
263 
264  number = eighthPoints * 8;
265  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
266  for(; number < num_points; number++){
267  *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
268  sixteenTComplexVectorPtr++;
269  }
270 
271 }
272 #endif /* LV_HAVE_AVX2 */
273 
274 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:160
short complex lv_16sc_t
Definition: volk_complex.h:58
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:202