Vector Optimized Library of Kernels  2.5.0
Architecture-tuned implementations of math kernels
volk_8ic_x2_multiply_conjugate_16ic.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
24 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <volk/volk_complex.h>
29 
30 #ifdef LV_HAVE_AVX2
31 #include <immintrin.h>
40 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector,
41  const lv_8sc_t* aVector,
42  const lv_8sc_t* bVector,
43  unsigned int num_points)
44 {
45  unsigned int number = 0;
46  const unsigned int quarterPoints = num_points / 8;
47 
48  __m256i x, y, realz, imagz;
49  lv_16sc_t* c = cVector;
50  const lv_8sc_t* a = aVector;
51  const lv_8sc_t* b = bVector;
52  __m256i conjugateSign =
53  _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
54 
55  for (; number < quarterPoints; number++) {
56  // Convert 8 bit values into 16 bit values
57  x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
58  y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
59 
60  // Calculate the ar*cr - ai*(-ci) portions
61  realz = _mm256_madd_epi16(x, y);
62 
63  // Calculate the complex conjugate of the cr + ci j values
64  y = _mm256_sign_epi16(y, conjugateSign);
65 
66  // Shift the order of the cr and ci values
67  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
68  _MM_SHUFFLE(2, 3, 0, 1));
69 
70  // Calculate the ar*(-ci) + cr*(ai)
71  imagz = _mm256_madd_epi16(x, y);
72 
73  // Perform the addition of products
74 
75  _mm256_store_si256((__m256i*)c,
76  _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
77  _mm256_unpackhi_epi32(realz, imagz)));
78 
79  a += 8;
80  b += 8;
81  c += 8;
82  }
83 
84  number = quarterPoints * 8;
85  int16_t* c16Ptr = (int16_t*)&cVector[number];
86  int8_t* a8Ptr = (int8_t*)&aVector[number];
87  int8_t* b8Ptr = (int8_t*)&bVector[number];
88  for (; number < num_points; number++) {
89  float aReal = (float)*a8Ptr++;
90  float aImag = (float)*a8Ptr++;
91  lv_32fc_t aVal = lv_cmake(aReal, aImag);
92  float bReal = (float)*b8Ptr++;
93  float bImag = (float)*b8Ptr++;
94  lv_32fc_t bVal = lv_cmake(bReal, -bImag);
95  lv_32fc_t temp = aVal * bVal;
96 
97  *c16Ptr++ = (int16_t)lv_creal(temp);
98  *c16Ptr++ = (int16_t)lv_cimag(temp);
99  }
100 }
101 #endif /* LV_HAVE_AVX2 */
102 
103 
104 #ifdef LV_HAVE_SSE4_1
105 #include <smmintrin.h>
114 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector,
115  const lv_8sc_t* aVector,
116  const lv_8sc_t* bVector,
117  unsigned int num_points)
118 {
119  unsigned int number = 0;
120  const unsigned int quarterPoints = num_points / 4;
121 
122  __m128i x, y, realz, imagz;
123  lv_16sc_t* c = cVector;
124  const lv_8sc_t* a = aVector;
125  const lv_8sc_t* b = bVector;
126  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
127 
128  for (; number < quarterPoints; number++) {
129  // Convert into 8 bit values into 16 bit values
130  x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
131  y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
132 
133  // Calculate the ar*cr - ai*(-ci) portions
134  realz = _mm_madd_epi16(x, y);
135 
136  // Calculate the complex conjugate of the cr + ci j values
137  y = _mm_sign_epi16(y, conjugateSign);
138 
139  // Shift the order of the cr and ci values
140  y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
141  _MM_SHUFFLE(2, 3, 0, 1));
142 
143  // Calculate the ar*(-ci) + cr*(ai)
144  imagz = _mm_madd_epi16(x, y);
145 
146  _mm_store_si128((__m128i*)c,
147  _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz),
148  _mm_unpackhi_epi32(realz, imagz)));
149 
150  a += 4;
151  b += 4;
152  c += 4;
153  }
154 
155  number = quarterPoints * 4;
156  int16_t* c16Ptr = (int16_t*)&cVector[number];
157  int8_t* a8Ptr = (int8_t*)&aVector[number];
158  int8_t* b8Ptr = (int8_t*)&bVector[number];
159  for (; number < num_points; number++) {
160  float aReal = (float)*a8Ptr++;
161  float aImag = (float)*a8Ptr++;
162  lv_32fc_t aVal = lv_cmake(aReal, aImag);
163  float bReal = (float)*b8Ptr++;
164  float bImag = (float)*b8Ptr++;
165  lv_32fc_t bVal = lv_cmake(bReal, -bImag);
166  lv_32fc_t temp = aVal * bVal;
167 
168  *c16Ptr++ = (int16_t)lv_creal(temp);
169  *c16Ptr++ = (int16_t)lv_cimag(temp);
170  }
171 }
172 #endif /* LV_HAVE_SSE4_1 */
173 
174 #ifdef LV_HAVE_GENERIC
184  const lv_8sc_t* aVector,
185  const lv_8sc_t* bVector,
186  unsigned int num_points)
187 {
188  unsigned int number = 0;
189  int16_t* c16Ptr = (int16_t*)cVector;
190  int8_t* a8Ptr = (int8_t*)aVector;
191  int8_t* b8Ptr = (int8_t*)bVector;
192  for (number = 0; number < num_points; number++) {
193  float aReal = (float)*a8Ptr++;
194  float aImag = (float)*a8Ptr++;
195  lv_32fc_t aVal = lv_cmake(aReal, aImag);
196  float bReal = (float)*b8Ptr++;
197  float bImag = (float)*b8Ptr++;
198  lv_32fc_t bVal = lv_cmake(bReal, -bImag);
199  lv_32fc_t temp = aVal * bVal;
200 
201  *c16Ptr++ = (int16_t)lv_creal(temp);
202  *c16Ptr++ = (int16_t)lv_cimag(temp);
203  }
204 }
205 #endif /* LV_HAVE_GENERIC */
206 
207 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
208 
209 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
210 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
211 
212 #include <inttypes.h>
213 #include <stdio.h>
214 #include <volk/volk_complex.h>
215 
216 #ifdef LV_HAVE_AVX2
217 #include <immintrin.h>
226 static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector,
227  const lv_8sc_t* aVector,
228  const lv_8sc_t* bVector,
229  unsigned int num_points)
230 {
231  unsigned int number = 0;
232  const unsigned int oneEigthPoints = num_points / 8;
233 
234  __m256i x, y, realz, imagz;
235  lv_16sc_t* c = cVector;
236  const lv_8sc_t* a = aVector;
237  const lv_8sc_t* b = bVector;
238  __m256i conjugateSign =
239  _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
240 
241  for (; number < oneEigthPoints; number++) {
242  // Convert 8 bit values into 16 bit values
243  x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
244  y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
245 
246  // Calculate the ar*cr - ai*(-ci) portions
247  realz = _mm256_madd_epi16(x, y);
248 
249  // Calculate the complex conjugate of the cr + ci j values
250  y = _mm256_sign_epi16(y, conjugateSign);
251 
252  // Shift the order of the cr and ci values
253  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
254  _MM_SHUFFLE(2, 3, 0, 1));
255 
256  // Calculate the ar*(-ci) + cr*(ai)
257  imagz = _mm256_madd_epi16(x, y);
258 
259  // Perform the addition of products
260 
261  _mm256_storeu_si256((__m256i*)c,
262  _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
263  _mm256_unpackhi_epi32(realz, imagz)));
264 
265  a += 8;
266  b += 8;
267  c += 8;
268  }
269 
270  number = oneEigthPoints * 8;
271  int16_t* c16Ptr = (int16_t*)&cVector[number];
272  int8_t* a8Ptr = (int8_t*)&aVector[number];
273  int8_t* b8Ptr = (int8_t*)&bVector[number];
274  for (; number < num_points; number++) {
275  float aReal = (float)*a8Ptr++;
276  float aImag = (float)*a8Ptr++;
277  lv_32fc_t aVal = lv_cmake(aReal, aImag);
278  float bReal = (float)*b8Ptr++;
279  float bImag = (float)*b8Ptr++;
280  lv_32fc_t bVal = lv_cmake(bReal, -bImag);
281  lv_32fc_t temp = aVal * bVal;
282 
283  *c16Ptr++ = (int16_t)lv_creal(temp);
284  *c16Ptr++ = (int16_t)lv_cimag(temp);
285  }
286 }
287 #endif /* LV_HAVE_AVX2 */
288 
289 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
static void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, unsigned int num_points)
Multiplys the one complex vector with the complex conjugate of the second complex vector and stores t...
Definition: volk_8ic_x2_multiply_conjugate_16ic.h:183
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_cmake(r, i)
Definition: volk_complex.h:68
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65
short complex lv_16sc_t
Definition: volk_complex.h:62