Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_x2_s32f_interleave_16ic.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
75 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
76 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
77 
78 #include <volk/volk_common.h>
79 #include <inttypes.h>
80 #include <stdio.h>
81 
82 #ifdef LV_HAVE_AVX2
83 #include <immintrin.h>
84 
85 static inline void
86 volk_32f_x2_s32f_interleave_16ic_a_avx2(lv_16sc_t* complexVector, const float* iBuffer,
87  const float* qBuffer, const float scalar, unsigned int num_points)
88 {
89  unsigned int number = 0;
90  const float* iBufferPtr = iBuffer;
91  const float* qBufferPtr = qBuffer;
92 
93  __m256 vScalar = _mm256_set1_ps(scalar);
94 
95  const unsigned int eighthPoints = num_points / 8;
96 
97  __m256 iValue, qValue, cplxValue1, cplxValue2;
98  __m256i intValue1, intValue2;
99 
100  int16_t* complexVectorPtr = (int16_t*)complexVector;
101 
102  for(;number < eighthPoints; number++){
103  iValue = _mm256_load_ps(iBufferPtr);
104  qValue = _mm256_load_ps(qBufferPtr);
105 
106  // Interleaves the lower two values in the i and q variables into one buffer
107  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
108  cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
109 
110  // Interleaves the upper two values in the i and q variables into one buffer
111  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
112  cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
113 
114  intValue1 = _mm256_cvtps_epi32(cplxValue1);
115  intValue2 = _mm256_cvtps_epi32(cplxValue2);
116 
117  intValue1 = _mm256_packs_epi32(intValue1, intValue2);
118 
119  _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
120  complexVectorPtr += 16;
121 
122  iBufferPtr += 8;
123  qBufferPtr += 8;
124  }
125 
126  number = eighthPoints * 8;
127  complexVectorPtr = (int16_t*)(&complexVector[number]);
128  for(; number < num_points; number++){
129  *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
130  *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
131  }
132 }
133 #endif /* LV_HAVE_AVX2 */
134 
135 
136 #ifdef LV_HAVE_SSE2
137 #include <emmintrin.h>
138 
139 static inline void
140 volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector, const float* iBuffer,
141  const float* qBuffer, const float scalar, unsigned int num_points)
142 {
143  unsigned int number = 0;
144  const float* iBufferPtr = iBuffer;
145  const float* qBufferPtr = qBuffer;
146 
147  __m128 vScalar = _mm_set_ps1(scalar);
148 
149  const unsigned int quarterPoints = num_points / 4;
150 
151  __m128 iValue, qValue, cplxValue1, cplxValue2;
152  __m128i intValue1, intValue2;
153 
154  int16_t* complexVectorPtr = (int16_t*)complexVector;
155 
156  for(;number < quarterPoints; number++){
157  iValue = _mm_load_ps(iBufferPtr);
158  qValue = _mm_load_ps(qBufferPtr);
159 
160  // Interleaves the lower two values in the i and q variables into one buffer
161  cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
162  cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
163 
164  // Interleaves the upper two values in the i and q variables into one buffer
165  cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
166  cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
167 
168  intValue1 = _mm_cvtps_epi32(cplxValue1);
169  intValue2 = _mm_cvtps_epi32(cplxValue2);
170 
171  intValue1 = _mm_packs_epi32(intValue1, intValue2);
172 
173  _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
174  complexVectorPtr += 8;
175 
176  iBufferPtr += 4;
177  qBufferPtr += 4;
178  }
179 
180  number = quarterPoints * 4;
181  complexVectorPtr = (int16_t*)(&complexVector[number]);
182  for(; number < num_points; number++){
183  *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
184  *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
185  }
186 }
187 #endif /* LV_HAVE_SSE2 */
188 
189 
190 #ifdef LV_HAVE_SSE
191 #include <xmmintrin.h>
192 
193 static inline void
194 volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector, const float* iBuffer,
195  const float* qBuffer, const float scalar, unsigned int num_points)
196 {
197  unsigned int number = 0;
198  const float* iBufferPtr = iBuffer;
199  const float* qBufferPtr = qBuffer;
200 
201  __m128 vScalar = _mm_set_ps1(scalar);
202 
203  const unsigned int quarterPoints = num_points / 4;
204 
205  __m128 iValue, qValue, cplxValue;
206 
207  int16_t* complexVectorPtr = (int16_t*)complexVector;
208 
209  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
210 
211  for(;number < quarterPoints; number++){
212  iValue = _mm_load_ps(iBufferPtr);
213  qValue = _mm_load_ps(qBufferPtr);
214 
215  // Interleaves the lower two values in the i and q variables into one buffer
216  cplxValue = _mm_unpacklo_ps(iValue, qValue);
217  cplxValue = _mm_mul_ps(cplxValue, vScalar);
218 
219  _mm_store_ps(floatBuffer, cplxValue);
220 
221  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
222  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
223  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
224  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
225 
226  // Interleaves the upper two values in the i and q variables into one buffer
227  cplxValue = _mm_unpackhi_ps(iValue, qValue);
228  cplxValue = _mm_mul_ps(cplxValue, vScalar);
229 
230  _mm_store_ps(floatBuffer, cplxValue);
231 
232  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
233  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
234  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
235  *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
236 
237  iBufferPtr += 4;
238  qBufferPtr += 4;
239  }
240 
241  number = quarterPoints * 4;
242  complexVectorPtr = (int16_t*)(&complexVector[number]);
243  for(; number < num_points; number++){
244  *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
245  *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
246  }
247 }
248 #endif /* LV_HAVE_SSE */
249 
250 
251 #ifdef LV_HAVE_GENERIC
252 
253 static inline void
254 volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* complexVector, const float* iBuffer,
255  const float* qBuffer, const float scalar, unsigned int num_points)
256 {
257  int16_t* complexVectorPtr = (int16_t*)complexVector;
258  const float* iBufferPtr = iBuffer;
259  const float* qBufferPtr = qBuffer;
260  unsigned int number = 0;
261 
262  for(number = 0; number < num_points; number++){
263  *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
264  *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
265  }
266 }
267 #endif /* LV_HAVE_GENERIC */
268 
269 
270 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
271 
272 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
273 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
274 
275 #include <volk/volk_common.h>
276 #include <inttypes.h>
277 #include <stdio.h>
278 
279 #ifdef LV_HAVE_AVX2
280 #include <immintrin.h>
281 
282 static inline void
283 volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVector, const float* iBuffer,
284  const float* qBuffer, const float scalar, unsigned int num_points)
285 {
286  unsigned int number = 0;
287  const float* iBufferPtr = iBuffer;
288  const float* qBufferPtr = qBuffer;
289 
290  __m256 vScalar = _mm256_set1_ps(scalar);
291 
292  const unsigned int eighthPoints = num_points / 8;
293 
294  __m256 iValue, qValue, cplxValue1, cplxValue2;
295  __m256i intValue1, intValue2;
296 
297  int16_t* complexVectorPtr = (int16_t*)complexVector;
298 
299  for(;number < eighthPoints; number++){
300  iValue = _mm256_loadu_ps(iBufferPtr);
301  qValue = _mm256_loadu_ps(qBufferPtr);
302 
303  // Interleaves the lower two values in the i and q variables into one buffer
304  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
305  cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
306 
307  // Interleaves the upper two values in the i and q variables into one buffer
308  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
309  cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
310 
311  intValue1 = _mm256_cvtps_epi32(cplxValue1);
312  intValue2 = _mm256_cvtps_epi32(cplxValue2);
313 
314  intValue1 = _mm256_packs_epi32(intValue1, intValue2);
315 
316  _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
317  complexVectorPtr += 16;
318 
319  iBufferPtr += 8;
320  qBufferPtr += 8;
321  }
322 
323  number = eighthPoints * 8;
324  complexVectorPtr = (int16_t*)(&complexVector[number]);
325  for(; number < num_points; number++){
326  *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
327  *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
328  }
329 }
330 #endif /* LV_HAVE_AVX2 */
331 
332 
333 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */
static void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:254
short complex lv_16sc_t
Definition: volk_complex.h:58
static float rintf(float x)
Definition: config.h:31
static void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:140
static void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:194
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33