Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_8ic_deinterleave_real_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
53 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
54 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
55 
56 #include <inttypes.h>
57 #include <stdio.h>
58 
59 
60 #ifdef LV_HAVE_AVX2
61 #include <immintrin.h>
62 
63 static inline void
64 volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer, const lv_8sc_t* complexVector,
65  unsigned int num_points)
66 {
67  unsigned int number = 0;
68  const int8_t* complexVectorPtr = (int8_t*)complexVector;
69  int16_t* iBufferPtr = iBuffer;
70  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
71  __m256i complexVal, outputVal;
72  __m128i outputVal0;
73 
74  unsigned int sixteenthPoints = num_points / 16;
75 
76  for(number = 0; number < sixteenthPoints; number++){
77  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
78 
79  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
80  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
81 
82  outputVal0 = _mm256_extractf128_si256(complexVal, 0);
83 
84  outputVal = _mm256_cvtepi8_epi16(outputVal0);
85  outputVal = _mm256_slli_epi16(outputVal, 7);
86 
87  _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
88 
89  iBufferPtr += 16;
90  }
91 
92  number = sixteenthPoints * 16;
93  for(; number < num_points; number++){
94  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
95  complexVectorPtr++;
96  }
97 }
98 #endif /* LV_HAVE_AVX2 */
99 
100 #ifdef LV_HAVE_SSE4_1
101 #include <smmintrin.h>
102 
103 static inline void
104 volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, const lv_8sc_t* complexVector,
105  unsigned int num_points)
106 {
107  unsigned int number = 0;
108  const int8_t* complexVectorPtr = (int8_t*)complexVector;
109  int16_t* iBufferPtr = iBuffer;
110  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
111  __m128i complexVal, outputVal;
112 
113  unsigned int eighthPoints = num_points / 8;
114 
115  for(number = 0; number < eighthPoints; number++){
116  complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
117 
118  complexVal = _mm_shuffle_epi8(complexVal, moveMask);
119 
120  outputVal = _mm_cvtepi8_epi16(complexVal);
121  outputVal = _mm_slli_epi16(outputVal, 7);
122 
123  _mm_store_si128((__m128i*)iBufferPtr, outputVal);
124  iBufferPtr += 8;
125  }
126 
127  number = eighthPoints * 8;
128  for(; number < num_points; number++){
129  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
130  complexVectorPtr++;
131  }
132 }
133 #endif /* LV_HAVE_SSE4_1 */
134 
135 
136 #ifdef LV_HAVE_AVX
137 #include <immintrin.h>
138 
139 static inline void
140 volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer, const lv_8sc_t* complexVector,
141  unsigned int num_points)
142 {
143  unsigned int number = 0;
144  const int8_t* complexVectorPtr = (int8_t*)complexVector;
145  int16_t* iBufferPtr = iBuffer;
146  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
147  __m256i complexVal, outputVal;
148  __m128i complexVal1, complexVal0, outputVal1, outputVal0;
149 
150  unsigned int sixteenthPoints = num_points / 16;
151 
152  for(number = 0; number < sixteenthPoints; number++){
153  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
154 
155  complexVal1 = _mm256_extractf128_si256(complexVal, 1);
156  complexVal0 = _mm256_extractf128_si256(complexVal, 0);
157 
158  outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
159  outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
160 
161  outputVal1 = _mm_cvtepi8_epi16(outputVal1);
162  outputVal1 = _mm_slli_epi16(outputVal1, 7);
163  outputVal0 = _mm_cvtepi8_epi16(outputVal0);
164  outputVal0 = _mm_slli_epi16(outputVal0, 7);
165 
166  __m256i dummy = _mm256_setzero_si256();
167  outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
168  outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
169  _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
170 
171  iBufferPtr += 16;
172  }
173 
174  number = sixteenthPoints * 16;
175  for(; number < num_points; number++){
176  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
177  complexVectorPtr++;
178  }
179 }
180 #endif /* LV_HAVE_AVX */
181 
182 
183 #ifdef LV_HAVE_GENERIC
184 
185 static inline void
186 volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer, const lv_8sc_t* complexVector,
187  unsigned int num_points)
188 {
189  unsigned int number = 0;
190  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
191  int16_t* iBufferPtr = iBuffer;
192  for(number = 0; number < num_points; number++){
193  *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
194  complexVectorPtr++;
195  }
196 }
197 #endif /* LV_HAVE_GENERIC */
198 
199 
200 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
201 
202 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
203 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
204 
205 #include <inttypes.h>
206 #include <stdio.h>
207 
208 
209 #ifdef LV_HAVE_AVX2
210 #include <immintrin.h>
211 
212 static inline void
213 volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer, const lv_8sc_t* complexVector,
214  unsigned int num_points)
215 {
216  unsigned int number = 0;
217  const int8_t* complexVectorPtr = (int8_t*)complexVector;
218  int16_t* iBufferPtr = iBuffer;
219  __m256i moveMask = _mm256_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
220  __m256i complexVal, outputVal;
221  __m128i outputVal0;
222 
223  unsigned int sixteenthPoints = num_points / 16;
224 
225  for(number = 0; number < sixteenthPoints; number++){
226  complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
227 
228  complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
229  complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
230 
231  outputVal0 = _mm256_extractf128_si256(complexVal, 0);
232 
233  outputVal = _mm256_cvtepi8_epi16(outputVal0);
234  outputVal = _mm256_slli_epi16(outputVal, 7);
235 
236  _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
237 
238  iBufferPtr += 16;
239  }
240 
241  number = sixteenthPoints * 16;
242  for(; number < num_points; number++){
243  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
244  complexVectorPtr++;
245  }
246 }
247 #endif /* LV_HAVE_AVX2 */
248 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:140
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:186
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57