Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32f_x2_interleave_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
74 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 
82 static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
83  const float* iBuffer,
84  const float* qBuffer,
85  unsigned int num_points)
86 {
87  unsigned int number = 0;
88  float* complexVectorPtr = (float*)complexVector;
89  const float* iBufferPtr = iBuffer;
90  const float* qBufferPtr = qBuffer;
91 
92  const uint64_t eighthPoints = num_points / 8;
93 
94  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
95  for (; number < eighthPoints; number++) {
96  iValue = _mm256_load_ps(iBufferPtr);
97  qValue = _mm256_load_ps(qBufferPtr);
98 
99  // Interleaves the lower two values in the i and q variables into one buffer
100  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
101  // Interleaves the upper two values in the i and q variables into one buffer
102  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
103 
104  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
105  _mm256_store_ps(complexVectorPtr, cplxValue);
106  complexVectorPtr += 8;
107 
108  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
109  _mm256_store_ps(complexVectorPtr, cplxValue);
110  complexVectorPtr += 8;
111 
112  iBufferPtr += 8;
113  qBufferPtr += 8;
114  }
115 
116  number = eighthPoints * 8;
117  for (; number < num_points; number++) {
118  *complexVectorPtr++ = *iBufferPtr++;
119  *complexVectorPtr++ = *qBufferPtr++;
120  }
121 }
122 
123 #endif /* LV_HAV_AVX */
124 
125 #ifdef LV_HAVE_SSE
126 #include <xmmintrin.h>
127 
128 static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
129  const float* iBuffer,
130  const float* qBuffer,
131  unsigned int num_points)
132 {
133  unsigned int number = 0;
134  float* complexVectorPtr = (float*)complexVector;
135  const float* iBufferPtr = iBuffer;
136  const float* qBufferPtr = qBuffer;
137 
138  const uint64_t quarterPoints = num_points / 4;
139 
140  __m128 iValue, qValue, cplxValue;
141  for (; number < quarterPoints; number++) {
142  iValue = _mm_load_ps(iBufferPtr);
143  qValue = _mm_load_ps(qBufferPtr);
144 
145  // Interleaves the lower two values in the i and q variables into one buffer
146  cplxValue = _mm_unpacklo_ps(iValue, qValue);
147  _mm_store_ps(complexVectorPtr, cplxValue);
148  complexVectorPtr += 4;
149 
150  // Interleaves the upper two values in the i and q variables into one buffer
151  cplxValue = _mm_unpackhi_ps(iValue, qValue);
152  _mm_store_ps(complexVectorPtr, cplxValue);
153  complexVectorPtr += 4;
154 
155  iBufferPtr += 4;
156  qBufferPtr += 4;
157  }
158 
159  number = quarterPoints * 4;
160  for (; number < num_points; number++) {
161  *complexVectorPtr++ = *iBufferPtr++;
162  *complexVectorPtr++ = *qBufferPtr++;
163  }
164 }
165 #endif /* LV_HAVE_SSE */
166 
167 
168 #ifdef LV_HAVE_NEON
169 #include <arm_neon.h>
170 
171 static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
172  const float* iBuffer,
173  const float* qBuffer,
174  unsigned int num_points)
175 {
176  unsigned int quarter_points = num_points / 4;
177  unsigned int number;
178  float* complexVectorPtr = (float*)complexVector;
179 
180  float32x4x2_t complex_vec;
181  for (number = 0; number < quarter_points; ++number) {
182  complex_vec.val[0] = vld1q_f32(iBuffer);
183  complex_vec.val[1] = vld1q_f32(qBuffer);
184  vst2q_f32(complexVectorPtr, complex_vec);
185  iBuffer += 4;
186  qBuffer += 4;
187  complexVectorPtr += 8;
188  }
189 
190  for (number = quarter_points * 4; number < num_points; ++number) {
191  *complexVectorPtr++ = *iBuffer++;
192  *complexVectorPtr++ = *qBuffer++;
193  }
194 }
195 #endif /* LV_HAVE_NEON */
196 
197 
198 #ifdef LV_HAVE_GENERIC
199 
200 static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
201  const float* iBuffer,
202  const float* qBuffer,
203  unsigned int num_points)
204 {
205  float* complexVectorPtr = (float*)complexVector;
206  const float* iBufferPtr = iBuffer;
207  const float* qBufferPtr = qBuffer;
208  unsigned int number;
209 
210  for (number = 0; number < num_points; number++) {
211  *complexVectorPtr++ = *iBufferPtr++;
212  *complexVectorPtr++ = *qBufferPtr++;
213  }
214 }
215 #endif /* LV_HAVE_GENERIC */
216 
217 
218 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
219 
220 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
221 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
222 
223 #include <inttypes.h>
224 #include <stdio.h>
225 
226 #ifdef LV_HAVE_AVX
227 #include <immintrin.h>
228 
229 static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
230  const float* iBuffer,
231  const float* qBuffer,
232  unsigned int num_points)
233 {
234  unsigned int number = 0;
235  float* complexVectorPtr = (float*)complexVector;
236  const float* iBufferPtr = iBuffer;
237  const float* qBufferPtr = qBuffer;
238 
239  const uint64_t eighthPoints = num_points / 8;
240 
241  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
242  for (; number < eighthPoints; number++) {
243  iValue = _mm256_loadu_ps(iBufferPtr);
244  qValue = _mm256_loadu_ps(qBufferPtr);
245 
246  // Interleaves the lower two values in the i and q variables into one buffer
247  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
248  // Interleaves the upper two values in the i and q variables into one buffer
249  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
250 
251  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
252  _mm256_storeu_ps(complexVectorPtr, cplxValue);
253  complexVectorPtr += 8;
254 
255  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
256  _mm256_storeu_ps(complexVectorPtr, cplxValue);
257  complexVectorPtr += 8;
258 
259  iBufferPtr += 8;
260  qBufferPtr += 8;
261  }
262 
263  number = eighthPoints * 8;
264  for (; number < num_points; number++) {
265  *complexVectorPtr++ = *iBufferPtr++;
266  *complexVectorPtr++ = *qBufferPtr++;
267  }
268 }
269 #endif /* LV_HAVE_AVX */
270 
271 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
volk_32f_x2_interleave_32fc_neon
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:171
volk_32f_x2_interleave_32fc_a_avx
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:82
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70
volk_32f_x2_interleave_32fc_a_sse
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:128
volk_32f_x2_interleave_32fc_u_avx
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:229
volk_32f_x2_interleave_32fc_generic
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:200