Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_8ic_x2_s32f_multiply_conjugate_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
56 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
57 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
58 
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <volk/volk_complex.h>
62 
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65 
66 static inline void
67 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_avx2(lv_32fc_t* cVector, const lv_8sc_t* aVector,
68  const lv_8sc_t* bVector, const float scalar,
69  unsigned int num_points)
70 {
71  unsigned int number = 0;
72  const unsigned int oneEigthPoints = num_points / 8;
73 
74  __m256i x, y, realz, imagz;
75  __m256 ret, retlo, rethi;
76  lv_32fc_t* c = cVector;
77  const lv_8sc_t* a = aVector;
78  const lv_8sc_t* b = bVector;
79  __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
80 
81  __m256 invScalar = _mm256_set1_ps(1.0/scalar);
82 
83  for(;number < oneEigthPoints; number++){
84  // Convert 8 bit values into 16 bit values
85  x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
86  y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
87 
88  // Calculate the ar*cr - ai*(-ci) portions
89  realz = _mm256_madd_epi16(x,y);
90 
91  // Calculate the complex conjugate of the cr + ci j values
92  y = _mm256_sign_epi16(y, conjugateSign);
93 
94  // Shift the order of the cr and ci values
95  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
96 
97  // Calculate the ar*(-ci) + cr*(ai)
98  imagz = _mm256_madd_epi16(x,y);
99 
100  // Interleave real and imaginary and then convert to float values
101  retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
102 
103  // Normalize the floating point values
104  retlo = _mm256_mul_ps(retlo, invScalar);
105 
106  // Interleave real and imaginary and then convert to float values
107  rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
108 
109  // Normalize the floating point values
110  rethi = _mm256_mul_ps(rethi, invScalar);
111 
112  ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
113  _mm256_store_ps((float*)c, ret);
114  c += 4;
115 
116  ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
117  _mm256_store_ps((float*)c, ret);
118  c += 4;
119 
120  a += 8;
121  b += 8;
122  }
123 
124  number = oneEigthPoints * 8;
125  float* cFloatPtr = (float*)&cVector[number];
126  int8_t* a8Ptr = (int8_t*)&aVector[number];
127  int8_t* b8Ptr = (int8_t*)&bVector[number];
128  for(; number < num_points; number++){
129  float aReal = (float)*a8Ptr++;
130  float aImag = (float)*a8Ptr++;
131  lv_32fc_t aVal = lv_cmake(aReal, aImag );
132  float bReal = (float)*b8Ptr++;
133  float bImag = (float)*b8Ptr++;
134  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
135  lv_32fc_t temp = aVal * bVal;
136 
137  *cFloatPtr++ = lv_creal(temp) / scalar;
138  *cFloatPtr++ = lv_cimag(temp) / scalar;
139  }
140 }
141 #endif /* LV_HAVE_AVX2*/
142 
143 
144 #ifdef LV_HAVE_SSE4_1
145 #include <smmintrin.h>
146 
147 static inline void
148 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector,
149  const lv_8sc_t* bVector, const float scalar,
150  unsigned int num_points)
151 {
152  unsigned int number = 0;
153  const unsigned int quarterPoints = num_points / 4;
154 
155  __m128i x, y, realz, imagz;
156  __m128 ret;
157  lv_32fc_t* c = cVector;
158  const lv_8sc_t* a = aVector;
159  const lv_8sc_t* b = bVector;
160  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
161 
162  __m128 invScalar = _mm_set_ps1(1.0/scalar);
163 
164  for(;number < quarterPoints; number++){
165  // Convert into 8 bit values into 16 bit values
166  x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
167  y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
168 
169  // Calculate the ar*cr - ai*(-ci) portions
170  realz = _mm_madd_epi16(x,y);
171 
172  // Calculate the complex conjugate of the cr + ci j values
173  y = _mm_sign_epi16(y, conjugateSign);
174 
175  // Shift the order of the cr and ci values
176  y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
177 
178  // Calculate the ar*(-ci) + cr*(ai)
179  imagz = _mm_madd_epi16(x,y);
180 
181  // Interleave real and imaginary and then convert to float values
182  ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
183 
184  // Normalize the floating point values
185  ret = _mm_mul_ps(ret, invScalar);
186 
187  // Store the floating point values
188  _mm_store_ps((float*)c, ret);
189  c += 2;
190 
191  // Interleave real and imaginary and then convert to float values
192  ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
193 
194  // Normalize the floating point values
195  ret = _mm_mul_ps(ret, invScalar);
196 
197  // Store the floating point values
198  _mm_store_ps((float*)c, ret);
199  c += 2;
200 
201  a += 4;
202  b += 4;
203  }
204 
205  number = quarterPoints * 4;
206  float* cFloatPtr = (float*)&cVector[number];
207  int8_t* a8Ptr = (int8_t*)&aVector[number];
208  int8_t* b8Ptr = (int8_t*)&bVector[number];
209  for(; number < num_points; number++){
210  float aReal = (float)*a8Ptr++;
211  float aImag = (float)*a8Ptr++;
212  lv_32fc_t aVal = lv_cmake(aReal, aImag );
213  float bReal = (float)*b8Ptr++;
214  float bImag = (float)*b8Ptr++;
215  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
216  lv_32fc_t temp = aVal * bVal;
217 
218  *cFloatPtr++ = lv_creal(temp) / scalar;
219  *cFloatPtr++ = lv_cimag(temp) / scalar;
220  }
221 }
222 #endif /* LV_HAVE_SSE4_1 */
223 
224 
225 #ifdef LV_HAVE_GENERIC
226 
227 static inline void
229  const lv_8sc_t* bVector, const float scalar,
230  unsigned int num_points)
231 {
232  unsigned int number = 0;
233  float* cPtr = (float*)cVector;
234  const float invScalar = 1.0 / scalar;
235  int8_t* a8Ptr = (int8_t*)aVector;
236  int8_t* b8Ptr = (int8_t*)bVector;
237  for(number = 0; number < num_points; number++){
238  float aReal = (float)*a8Ptr++;
239  float aImag = (float)*a8Ptr++;
240  lv_32fc_t aVal = lv_cmake(aReal, aImag );
241  float bReal = (float)*b8Ptr++;
242  float bImag = (float)*b8Ptr++;
243  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
244  lv_32fc_t temp = aVal * bVal;
245 
246  *cPtr++ = (lv_creal(temp) * invScalar);
247  *cPtr++ = (lv_cimag(temp) * invScalar);
248  }
249 }
250 #endif /* LV_HAVE_GENERIC */
251 
252 
253 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
254 
255 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H
256 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H
257 
258 #include <inttypes.h>
259 #include <stdio.h>
260 #include <volk/volk_complex.h>
261 
262 #ifdef LV_HAVE_AVX2
263 #include <immintrin.h>
264 
265 static inline void
266 volk_8ic_x2_s32f_multiply_conjugate_32fc_u_avx2(lv_32fc_t* cVector, const lv_8sc_t* aVector,
267  const lv_8sc_t* bVector, const float scalar,
268  unsigned int num_points)
269 {
270  unsigned int number = 0;
271  const unsigned int oneEigthPoints = num_points / 8;
272 
273  __m256i x, y, realz, imagz;
274  __m256 ret, retlo, rethi;
275  lv_32fc_t* c = cVector;
276  const lv_8sc_t* a = aVector;
277  const lv_8sc_t* b = bVector;
278  __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
279 
280  __m256 invScalar = _mm256_set1_ps(1.0/scalar);
281 
282  for(;number < oneEigthPoints; number++){
283  // Convert 8 bit values into 16 bit values
284  x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
285  y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
286 
287  // Calculate the ar*cr - ai*(-ci) portions
288  realz = _mm256_madd_epi16(x,y);
289 
290  // Calculate the complex conjugate of the cr + ci j values
291  y = _mm256_sign_epi16(y, conjugateSign);
292 
293  // Shift the order of the cr and ci values
294  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
295 
296  // Calculate the ar*(-ci) + cr*(ai)
297  imagz = _mm256_madd_epi16(x,y);
298 
299  // Interleave real and imaginary and then convert to float values
300  retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
301 
302  // Normalize the floating point values
303  retlo = _mm256_mul_ps(retlo, invScalar);
304 
305  // Interleave real and imaginary and then convert to float values
306  rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
307 
308  // Normalize the floating point values
309  rethi = _mm256_mul_ps(rethi, invScalar);
310 
311  ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
312  _mm256_storeu_ps((float*)c, ret);
313  c += 4;
314 
315  ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
316  _mm256_storeu_ps((float*)c, ret);
317  c += 4;
318 
319  a += 8;
320  b += 8;
321  }
322 
323  number = oneEigthPoints * 8;
324  float* cFloatPtr = (float*)&cVector[number];
325  int8_t* a8Ptr = (int8_t*)&aVector[number];
326  int8_t* b8Ptr = (int8_t*)&bVector[number];
327  for(; number < num_points; number++){
328  float aReal = (float)*a8Ptr++;
329  float aImag = (float)*a8Ptr++;
330  lv_32fc_t aVal = lv_cmake(aReal, aImag );
331  float bReal = (float)*b8Ptr++;
332  float bImag = (float)*b8Ptr++;
333  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
334  lv_32fc_t temp = aVal * bVal;
335 
336  *cFloatPtr++ = lv_creal(temp) / scalar;
337  *cFloatPtr++ = lv_cimag(temp) / scalar;
338  }
339 }
340 #endif /* LV_HAVE_AVX2*/
341 
342 
343 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H */
static void volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_x2_s32f_multiply_conjugate_32fc.h:228
#define lv_cmake(r, i)
Definition: volk_complex.h:64
float complex lv_32fc_t
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:83
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57
#define lv_cimag(x)
Definition: volk_complex.h:85