Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_x2_interleave_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
74 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 
82 static inline void
83 volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector, const float* iBuffer,
84  const float* qBuffer, unsigned int num_points)
85 {
86  unsigned int number = 0;
87  float* complexVectorPtr = (float*)complexVector;
88  const float* iBufferPtr = iBuffer;
89  const float* qBufferPtr = qBuffer;
90 
91  const uint64_t eighthPoints = num_points / 8;
92 
93  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
94  for(;number < eighthPoints; number++){
95  iValue = _mm256_load_ps(iBufferPtr);
96  qValue = _mm256_load_ps(qBufferPtr);
97 
98  // Interleaves the lower two values in the i and q variables into one buffer
99  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
100  // Interleaves the upper two values in the i and q variables into one buffer
101  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
102 
103  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
104  _mm256_store_ps(complexVectorPtr, cplxValue);
105  complexVectorPtr += 8;
106 
107  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
108  _mm256_store_ps(complexVectorPtr, cplxValue);
109  complexVectorPtr += 8;
110 
111  iBufferPtr += 8;
112  qBufferPtr += 8;
113  }
114 
115  number = eighthPoints * 8;
116  for(; number < num_points; number++){
117  *complexVectorPtr++ = *iBufferPtr++;
118  *complexVectorPtr++ = *qBufferPtr++;
119  }
120 }
121 
122 #endif /* LV_HAV_AVX */
123 
124 #ifdef LV_HAVE_SSE
125 #include <xmmintrin.h>
126 
127 static inline void
128 volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, const float* iBuffer,
129  const float* qBuffer, unsigned int num_points)
130 {
131  unsigned int number = 0;
132  float* complexVectorPtr = (float*)complexVector;
133  const float* iBufferPtr = iBuffer;
134  const float* qBufferPtr = qBuffer;
135 
136  const uint64_t quarterPoints = num_points / 4;
137 
138  __m128 iValue, qValue, cplxValue;
139  for(;number < quarterPoints; number++){
140  iValue = _mm_load_ps(iBufferPtr);
141  qValue = _mm_load_ps(qBufferPtr);
142 
143  // Interleaves the lower two values in the i and q variables into one buffer
144  cplxValue = _mm_unpacklo_ps(iValue, qValue);
145  _mm_store_ps(complexVectorPtr, cplxValue);
146  complexVectorPtr += 4;
147 
148  // Interleaves the upper two values in the i and q variables into one buffer
149  cplxValue = _mm_unpackhi_ps(iValue, qValue);
150  _mm_store_ps(complexVectorPtr, cplxValue);
151  complexVectorPtr += 4;
152 
153  iBufferPtr += 4;
154  qBufferPtr += 4;
155  }
156 
157  number = quarterPoints * 4;
158  for(; number < num_points; number++){
159  *complexVectorPtr++ = *iBufferPtr++;
160  *complexVectorPtr++ = *qBufferPtr++;
161  }
162 }
163 #endif /* LV_HAVE_SSE */
164 
165 
166 #ifdef LV_HAVE_NEON
167 #include <arm_neon.h>
168 
169 static inline void
170 volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector, const float* iBuffer,
171  const float* qBuffer, unsigned int num_points)
172 {
173  unsigned int quarter_points = num_points / 4;
174  unsigned int number;
175  float* complexVectorPtr = (float*) complexVector;
176 
177  float32x4x2_t complex_vec;
178  for(number=0; number < quarter_points; ++number) {
179  complex_vec.val[0] = vld1q_f32(iBuffer);
180  complex_vec.val[1] = vld1q_f32(qBuffer);
181  vst2q_f32(complexVectorPtr, complex_vec);
182  iBuffer += 4;
183  qBuffer += 4;
184  complexVectorPtr += 8;
185  }
186 
187  for(number=quarter_points * 4; number < num_points; ++number) {
188  *complexVectorPtr++ = *iBuffer++;
189  *complexVectorPtr++ = *qBuffer++;
190  }
191 }
192 #endif /* LV_HAVE_NEON */
193 
194 
195 #ifdef LV_HAVE_GENERIC
196 
197 static inline void
198 volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector, const float* iBuffer,
199  const float* qBuffer, unsigned int num_points)
200 {
201  float* complexVectorPtr = (float*)complexVector;
202  const float* iBufferPtr = iBuffer;
203  const float* qBufferPtr = qBuffer;
204  unsigned int number;
205 
206  for(number = 0; number < num_points; number++){
207  *complexVectorPtr++ = *iBufferPtr++;
208  *complexVectorPtr++ = *qBufferPtr++;
209  }
210 }
211 #endif /* LV_HAVE_GENERIC */
212 
213 
214 
215 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
216 
217 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
218 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
219 
220 #include <inttypes.h>
221 #include <stdio.h>
222 
223 #ifdef LV_HAVE_AVX
224 #include <immintrin.h>
225 
226 static inline void
227 volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector, const float* iBuffer,
228  const float* qBuffer, unsigned int num_points)
229 {
230  unsigned int number = 0;
231  float* complexVectorPtr = (float*)complexVector;
232  const float* iBufferPtr = iBuffer;
233  const float* qBufferPtr = qBuffer;
234 
235  const uint64_t eighthPoints = num_points / 8;
236 
237  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
238  for(;number < eighthPoints; number++){
239  iValue = _mm256_loadu_ps(iBufferPtr);
240  qValue = _mm256_loadu_ps(qBufferPtr);
241 
242  // Interleaves the lower two values in the i and q variables into one buffer
243  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
244  // Interleaves the upper two values in the i and q variables into one buffer
245  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
246 
247  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
248  _mm256_storeu_ps(complexVectorPtr, cplxValue);
249  complexVectorPtr += 8;
250 
251  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
252  _mm256_storeu_ps(complexVectorPtr, cplxValue);
253  complexVectorPtr += 8;
254 
255  iBufferPtr += 8;
256  qBufferPtr += 8;
257  }
258 
259  number = eighthPoints * 8;
260  for(; number < num_points; number++){
261  *complexVectorPtr++ = *iBufferPtr++;
262  *complexVectorPtr++ = *qBufferPtr++;
263  }
264 }
265 #endif /* LV_HAVE_AVX */
266 
267 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:170
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:83
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:128
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:227
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:198