Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_32f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
74 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 static inline void
82 volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector,
83  unsigned int num_points)
84 {
85  const float* complexVectorPtr = (float*)complexVector;
86  float* iBufferPtr = iBuffer;
87  float* qBufferPtr = qBuffer;
88 
89  unsigned int number = 0;
90  // Mask for real and imaginary parts
91  const unsigned int eighthPoints = num_points / 8;
92  __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
93  for(;number < eighthPoints; number++){
94  cplxValue1 = _mm256_load_ps(complexVectorPtr);
95  complexVectorPtr += 8;
96 
97  cplxValue2 = _mm256_load_ps(complexVectorPtr);
98  complexVectorPtr += 8;
99 
100  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
101  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
102 
103  // Arrange in i1i2i3i4 format
104  iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
105  // Arrange in q1q2q3q4 format
106  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
107 
108  _mm256_store_ps(iBufferPtr, iValue);
109  _mm256_store_ps(qBufferPtr, qValue);
110 
111  iBufferPtr += 8;
112  qBufferPtr += 8;
113  }
114 
115  number = eighthPoints * 8;
116  for(; number < num_points; number++){
117  *iBufferPtr++ = *complexVectorPtr++;
118  *qBufferPtr++ = *complexVectorPtr++;
119  }
120 }
121 #endif /* LV_HAVE_AVX */
122 
123 #ifdef LV_HAVE_SSE
124 #include <xmmintrin.h>
125 
126 static inline void
127 volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector,
128  unsigned int num_points)
129 {
130  const float* complexVectorPtr = (float*)complexVector;
131  float* iBufferPtr = iBuffer;
132  float* qBufferPtr = qBuffer;
133 
134  unsigned int number = 0;
135  const unsigned int quarterPoints = num_points / 4;
136  __m128 cplxValue1, cplxValue2, iValue, qValue;
137  for(;number < quarterPoints; number++){
138  cplxValue1 = _mm_load_ps(complexVectorPtr);
139  complexVectorPtr += 4;
140 
141  cplxValue2 = _mm_load_ps(complexVectorPtr);
142  complexVectorPtr += 4;
143 
144  // Arrange in i1i2i3i4 format
145  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
146  // Arrange in q1q2q3q4 format
147  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
148 
149  _mm_store_ps(iBufferPtr, iValue);
150  _mm_store_ps(qBufferPtr, qValue);
151 
152  iBufferPtr += 4;
153  qBufferPtr += 4;
154  }
155 
156  number = quarterPoints * 4;
157  for(; number < num_points; number++){
158  *iBufferPtr++ = *complexVectorPtr++;
159  *qBufferPtr++ = *complexVectorPtr++;
160  }
161 }
162 #endif /* LV_HAVE_SSE */
163 
164 
165 #ifdef LV_HAVE_NEON
166 #include <arm_neon.h>
167 
168 static inline void
169 volk_32fc_deinterleave_32f_x2_neon(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector,
170  unsigned int num_points)
171 {
172  unsigned int number = 0;
173  unsigned int quarter_points = num_points / 4;
174  const float* complexVectorPtr = (float*)complexVector;
175  float* iBufferPtr = iBuffer;
176  float* qBufferPtr = qBuffer;
177  float32x4x2_t complexInput;
178 
179  for(number = 0; number < quarter_points; number++){
180  complexInput = vld2q_f32(complexVectorPtr);
181  vst1q_f32( iBufferPtr, complexInput.val[0] );
182  vst1q_f32( qBufferPtr, complexInput.val[1] );
183  complexVectorPtr += 8;
184  iBufferPtr += 4;
185  qBufferPtr += 4;
186  }
187 
188  for(number = quarter_points*4; number < num_points; number++){
189  *iBufferPtr++ = *complexVectorPtr++;
190  *qBufferPtr++ = *complexVectorPtr++;
191  }
192 }
193 #endif /* LV_HAVE_NEON */
194 
195 
196 #ifdef LV_HAVE_GENERIC
197 
198 static inline void
199 volk_32fc_deinterleave_32f_x2_generic(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector,
200  unsigned int num_points)
201 {
202  const float* complexVectorPtr = (float*)complexVector;
203  float* iBufferPtr = iBuffer;
204  float* qBufferPtr = qBuffer;
205  unsigned int number;
206  for(number = 0; number < num_points; number++){
207  *iBufferPtr++ = *complexVectorPtr++;
208  *qBufferPtr++ = *complexVectorPtr++;
209  }
210 }
211 #endif /* LV_HAVE_GENERIC */
212 
213 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
214 
215 
216 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
217 #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
218 
219 #include <inttypes.h>
220 #include <stdio.h>
221 
222 #ifdef LV_HAVE_AVX
223 #include <immintrin.h>
224 static inline void
225 volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer, float* qBuffer, const lv_32fc_t* complexVector,
226  unsigned int num_points)
227 {
228  const float* complexVectorPtr = (float*)complexVector;
229  float* iBufferPtr = iBuffer;
230  float* qBufferPtr = qBuffer;
231 
232  unsigned int number = 0;
233  // Mask for real and imaginary parts
234  const unsigned int eighthPoints = num_points / 8;
235  __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
236  for(;number < eighthPoints; number++){
237  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
238  complexVectorPtr += 8;
239 
240  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
241  complexVectorPtr += 8;
242 
243  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
244  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
245 
246  // Arrange in i1i2i3i4 format
247  iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
248  // Arrange in q1q2q3q4 format
249  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
250 
251  _mm256_storeu_ps(iBufferPtr, iValue);
252  _mm256_storeu_ps(qBufferPtr, qValue);
253 
254  iBufferPtr += 8;
255  qBufferPtr += 8;
256  }
257 
258  number = eighthPoints * 8;
259  for(; number < num_points; number++){
260  *iBufferPtr++ = *complexVectorPtr++;
261  *qBufferPtr++ = *complexVectorPtr++;
262  }
263 }
264 #endif /* LV_HAVE_AVX */
265 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:169
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:127
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:225
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:199
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:82
float complex lv_32fc_t
Definition: volk_complex.h:61