Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
71 #define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
72 
73 #include <inttypes.h>
74 #include <stdio.h>
75 
76 #ifdef LV_HAVE_AVX2
77 #include <immintrin.h>
78 
79 static inline void
80 volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer, const lv_32fc_t* complexVector,
81  unsigned int num_points)
82 {
83  unsigned int number = 0;
84  const unsigned int eighthPoints = num_points / 8;
85 
86  const float* complexVectorPtr = (const float*)complexVector;
87  float* iBufferPtr = iBuffer;
88 
89  __m256 cplxValue1, cplxValue2;
90  __m256 iValue;
91  __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
92  for(;number < eighthPoints; number++){
93 
94  cplxValue1 = _mm256_load_ps(complexVectorPtr);
95  complexVectorPtr += 8;
96 
97  cplxValue2 = _mm256_load_ps(complexVectorPtr);
98  complexVectorPtr += 8;
99 
100  // Arrange in i1i2i3i4 format
101  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
102  iValue = _mm256_permutevar8x32_ps(iValue,idx);
103 
104  _mm256_store_ps(iBufferPtr, iValue);
105 
106  iBufferPtr += 8;
107  }
108 
109  number = eighthPoints * 8;
110  for(; number < num_points; number++){
111  *iBufferPtr++ = *complexVectorPtr++;
112  complexVectorPtr++;
113  }
114 }
115 #endif /* LV_HAVE_AVX2 */
116 
117 #ifdef LV_HAVE_SSE
118 #include <xmmintrin.h>
119 
120 static inline void
121 volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const lv_32fc_t* complexVector,
122  unsigned int num_points)
123 {
124  unsigned int number = 0;
125  const unsigned int quarterPoints = num_points / 4;
126 
127  const float* complexVectorPtr = (const float*)complexVector;
128  float* iBufferPtr = iBuffer;
129 
130  __m128 cplxValue1, cplxValue2, iValue;
131  for(;number < quarterPoints; number++){
132 
133  cplxValue1 = _mm_load_ps(complexVectorPtr);
134  complexVectorPtr += 4;
135 
136  cplxValue2 = _mm_load_ps(complexVectorPtr);
137  complexVectorPtr += 4;
138 
139  // Arrange in i1i2i3i4 format
140  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
141 
142  _mm_store_ps(iBufferPtr, iValue);
143 
144  iBufferPtr += 4;
145  }
146 
147  number = quarterPoints * 4;
148  for(; number < num_points; number++){
149  *iBufferPtr++ = *complexVectorPtr++;
150  complexVectorPtr++;
151  }
152 }
153 #endif /* LV_HAVE_SSE */
154 
155 
156 #ifdef LV_HAVE_GENERIC
157 
158 static inline void
159 volk_32fc_deinterleave_real_32f_generic(float* iBuffer, const lv_32fc_t* complexVector,
160  unsigned int num_points)
161 {
162  unsigned int number = 0;
163  const float* complexVectorPtr = (float*)complexVector;
164  float* iBufferPtr = iBuffer;
165  for(number = 0; number < num_points; number++){
166  *iBufferPtr++ = *complexVectorPtr++;
167  complexVectorPtr++;
168  }
169 }
170 #endif /* LV_HAVE_GENERIC */
171 
172 
173 #ifdef LV_HAVE_NEON
174 #include <arm_neon.h>
175 
176 static inline void
177 volk_32fc_deinterleave_real_32f_neon(float* iBuffer, const lv_32fc_t* complexVector,
178  unsigned int num_points)
179 {
180  unsigned int number = 0;
181  unsigned int quarter_points = num_points / 4;
182  const float* complexVectorPtr = (float*)complexVector;
183  float* iBufferPtr = iBuffer;
184  float32x4x2_t complexInput;
185 
186  for(number = 0; number < quarter_points; number++){
187  complexInput = vld2q_f32(complexVectorPtr);
188  vst1q_f32( iBufferPtr, complexInput.val[0] );
189  complexVectorPtr += 8;
190  iBufferPtr += 4;
191  }
192 
193  for(number = quarter_points*4; number < num_points; number++){
194  *iBufferPtr++ = *complexVectorPtr++;
195  complexVectorPtr++;
196  }
197 }
198 #endif /* LV_HAVE_NEON */
199 
200 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
201 
202 
203 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
204 #define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
205 
206 #include <inttypes.h>
207 #include <stdio.h>
208 
209 #ifdef LV_HAVE_AVX2
210 #include <immintrin.h>
211 
212 static inline void
213 volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer, const lv_32fc_t* complexVector,
214  unsigned int num_points)
215 {
216  unsigned int number = 0;
217  const unsigned int eighthPoints = num_points / 8;
218 
219  const float* complexVectorPtr = (const float*)complexVector;
220  float* iBufferPtr = iBuffer;
221 
222  __m256 cplxValue1, cplxValue2;
223  __m256 iValue;
224  __m256i idx = _mm256_set_epi32(7,6,3,2,5,4,1,0);
225  for(;number < eighthPoints; number++){
226 
227  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228  complexVectorPtr += 8;
229 
230  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231  complexVectorPtr += 8;
232 
233  // Arrange in i1i2i3i4 format
234  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
235  iValue = _mm256_permutevar8x32_ps(iValue,idx);
236 
237  _mm256_storeu_ps(iBufferPtr, iValue);
238 
239  iBufferPtr += 8;
240  }
241 
242  number = eighthPoints * 8;
243  for(; number < num_points; number++){
244  *iBufferPtr++ = *complexVectorPtr++;
245  complexVectorPtr++;
246  }
247 }
248 #endif /* LV_HAVE_AVX2 */
249 
250 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
static void volk_32fc_deinterleave_real_32f_generic(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:159
static void volk_32fc_deinterleave_real_32f_neon(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:177
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_deinterleave_real_32f_a_sse(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:121