Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_imag_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
71 #define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
72 
73 #include <inttypes.h>
74 #include <stdio.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void
80 volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer, const lv_32fc_t* complexVector,
81  unsigned int num_points)
82 {
83  unsigned int number = 0;
84  const unsigned int eighthPoints = num_points / 8;
85  const float* complexVectorPtr = (const float*)complexVector;
86  float* qBufferPtr = qBuffer;
87 
88  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
89  for(;number < eighthPoints; number++){
90 
91  cplxValue1 = _mm256_load_ps(complexVectorPtr);
92  complexVectorPtr += 8;
93 
94  cplxValue2 = _mm256_load_ps(complexVectorPtr);
95  complexVectorPtr += 8;
96 
97  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
98  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
99 
100  // Arrange in q1q2q3q4 format
101  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
102 
103  _mm256_store_ps(qBufferPtr, qValue);
104 
105  qBufferPtr += 8;
106  }
107 
108  number = eighthPoints * 8;
109  for(; number < num_points; number++){
110  complexVectorPtr++;
111  *qBufferPtr++ = *complexVectorPtr++;
112  }
113 }
114 #endif /* LV_HAVE_AVX */
115 
116 #ifdef LV_HAVE_SSE
117 #include <xmmintrin.h>
118 
119 static inline void
120 volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const lv_32fc_t* complexVector,
121  unsigned int num_points)
122 {
123  unsigned int number = 0;
124  const unsigned int quarterPoints = num_points / 4;
125 
126  const float* complexVectorPtr = (const float*)complexVector;
127  float* qBufferPtr = qBuffer;
128 
129  __m128 cplxValue1, cplxValue2, iValue;
130  for(;number < quarterPoints; number++){
131 
132  cplxValue1 = _mm_load_ps(complexVectorPtr);
133  complexVectorPtr += 4;
134 
135  cplxValue2 = _mm_load_ps(complexVectorPtr);
136  complexVectorPtr += 4;
137 
138  // Arrange in q1q2q3q4 format
139  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
140 
141  _mm_store_ps(qBufferPtr, iValue);
142 
143  qBufferPtr += 4;
144  }
145 
146  number = quarterPoints * 4;
147  for(; number < num_points; number++){
148  complexVectorPtr++;
149  *qBufferPtr++ = *complexVectorPtr++;
150  }
151 }
152 #endif /* LV_HAVE_SSE */
153 
154 #ifdef LV_HAVE_NEON
155 #include <arm_neon.h>
156 
157 static inline void
158 volk_32fc_deinterleave_imag_32f_neon(float* qBuffer, const lv_32fc_t* complexVector,
159  unsigned int num_points)
160 {
161  unsigned int number = 0;
162  unsigned int quarter_points = num_points / 4;
163  const float* complexVectorPtr = (float*)complexVector;
164  float* qBufferPtr = qBuffer;
165  float32x4x2_t complexInput;
166 
167  for(number = 0; number < quarter_points; number++){
168  complexInput = vld2q_f32(complexVectorPtr);
169  vst1q_f32( qBufferPtr, complexInput.val[1] );
170  complexVectorPtr += 8;
171  qBufferPtr += 4;
172  }
173 
174  for(number = quarter_points*4; number < num_points; number++){
175  complexVectorPtr++;
176  *qBufferPtr++ = *complexVectorPtr++;
177  }
178 }
179 #endif /* LV_HAVE_NEON */
180 
181 #ifdef LV_HAVE_GENERIC
182 
183 static inline void
184 volk_32fc_deinterleave_imag_32f_generic(float* qBuffer, const lv_32fc_t* complexVector,
185  unsigned int num_points)
186 {
187  unsigned int number = 0;
188  const float* complexVectorPtr = (float*)complexVector;
189  float* qBufferPtr = qBuffer;
190  for(number = 0; number < num_points; number++){
191  complexVectorPtr++;
192  *qBufferPtr++ = *complexVectorPtr++;
193  }
194 }
195 #endif /* LV_HAVE_GENERIC */
196 
197 
198 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
199 
200 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
201 #define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
202 
203 #include <inttypes.h>
204 #include <stdio.h>
205 
206 #ifdef LV_HAVE_AVX
207 #include <immintrin.h>
208 
209 static inline void
210 volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer, const lv_32fc_t* complexVector,
211  unsigned int num_points)
212 {
213  unsigned int number = 0;
214  const unsigned int eighthPoints = num_points / 8;
215  const float* complexVectorPtr = (const float*)complexVector;
216  float* qBufferPtr = qBuffer;
217 
218  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
219  for(;number < eighthPoints; number++){
220 
221  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
222  complexVectorPtr += 8;
223 
224  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
225  complexVectorPtr += 8;
226 
227  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
228  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
229 
230  // Arrange in q1q2q3q4 format
231  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
232 
233  _mm256_storeu_ps(qBufferPtr, qValue);
234 
235  qBufferPtr += 8;
236  }
237 
238  number = eighthPoints * 8;
239  for(; number < num_points; number++){
240  complexVectorPtr++;
241  *qBufferPtr++ = *complexVectorPtr++;
242  }
243 }
244 #endif /* LV_HAVE_AVX */
245 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */
static void volk_32fc_deinterleave_imag_32f_u_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:210
static void volk_32fc_deinterleave_imag_32f_a_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:80
static void volk_32fc_deinterleave_imag_32f_a_sse(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:120
static void volk_32fc_deinterleave_imag_32f_generic(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:184
static void volk_32fc_deinterleave_imag_32f_neon(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:158
float complex lv_32fc_t
Definition: volk_complex.h:61