Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_8ic_x2_multiply_conjugate_16ic.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
24 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <volk/volk_complex.h>
29 
30 #ifdef LV_HAVE_AVX2
31 #include <immintrin.h>
39 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
40  unsigned int number = 0;
41  const unsigned int quarterPoints = num_points / 8;
42 
43  __m256i x, y, realz, imagz;
44  lv_16sc_t* c = cVector;
45  const lv_8sc_t* a = aVector;
46  const lv_8sc_t* b = bVector;
47  __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
48 
49  for(;number < quarterPoints; number++){
50  // Convert 8 bit values into 16 bit values
51  x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
52  y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
53 
54  // Calculate the ar*cr - ai*(-ci) portions
55  realz = _mm256_madd_epi16(x,y);
56 
57  // Calculate the complex conjugate of the cr + ci j values
58  y = _mm256_sign_epi16(y, conjugateSign);
59 
60  // Shift the order of the cr and ci values
61  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
62 
63  // Calculate the ar*(-ci) + cr*(ai)
64  imagz = _mm256_madd_epi16(x,y);
65 
66  // Perfrom the addition of products
67 
68  _mm256_store_si256((__m256i*)c, _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), _mm256_unpackhi_epi32(realz, imagz)));
69 
70  a += 8;
71  b += 8;
72  c += 8;
73  }
74 
75  number = quarterPoints * 8;
76  int16_t* c16Ptr = (int16_t*)&cVector[number];
77  int8_t* a8Ptr = (int8_t*)&aVector[number];
78  int8_t* b8Ptr = (int8_t*)&bVector[number];
79  for(; number < num_points; number++){
80  float aReal = (float)*a8Ptr++;
81  float aImag = (float)*a8Ptr++;
82  lv_32fc_t aVal = lv_cmake(aReal, aImag );
83  float bReal = (float)*b8Ptr++;
84  float bImag = (float)*b8Ptr++;
85  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
86  lv_32fc_t temp = aVal * bVal;
87 
88  *c16Ptr++ = (int16_t)lv_creal(temp);
89  *c16Ptr++ = (int16_t)lv_cimag(temp);
90  }
91 }
92 #endif /* LV_HAVE_AVX2 */
93 
94 
95 #ifdef LV_HAVE_SSE4_1
96 #include <smmintrin.h>
104 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
105  unsigned int number = 0;
106  const unsigned int quarterPoints = num_points / 4;
107 
108  __m128i x, y, realz, imagz;
109  lv_16sc_t* c = cVector;
110  const lv_8sc_t* a = aVector;
111  const lv_8sc_t* b = bVector;
112  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
113 
114  for(;number < quarterPoints; number++){
115  // Convert into 8 bit values into 16 bit values
116  x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
117  y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
118 
119  // Calculate the ar*cr - ai*(-ci) portions
120  realz = _mm_madd_epi16(x,y);
121 
122  // Calculate the complex conjugate of the cr + ci j values
123  y = _mm_sign_epi16(y, conjugateSign);
124 
125  // Shift the order of the cr and ci values
126  y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
127 
128  // Calculate the ar*(-ci) + cr*(ai)
129  imagz = _mm_madd_epi16(x,y);
130 
131  _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz)));
132 
133  a += 4;
134  b += 4;
135  c += 4;
136  }
137 
138  number = quarterPoints * 4;
139  int16_t* c16Ptr = (int16_t*)&cVector[number];
140  int8_t* a8Ptr = (int8_t*)&aVector[number];
141  int8_t* b8Ptr = (int8_t*)&bVector[number];
142  for(; number < num_points; number++){
143  float aReal = (float)*a8Ptr++;
144  float aImag = (float)*a8Ptr++;
145  lv_32fc_t aVal = lv_cmake(aReal, aImag );
146  float bReal = (float)*b8Ptr++;
147  float bImag = (float)*b8Ptr++;
148  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
149  lv_32fc_t temp = aVal * bVal;
150 
151  *c16Ptr++ = (int16_t)lv_creal(temp);
152  *c16Ptr++ = (int16_t)lv_cimag(temp);
153  }
154 }
155 #endif /* LV_HAVE_SSE4_1 */
156 
157 #ifdef LV_HAVE_GENERIC
158 
165 static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
166  unsigned int number = 0;
167  int16_t* c16Ptr = (int16_t*)cVector;
168  int8_t* a8Ptr = (int8_t*)aVector;
169  int8_t* b8Ptr = (int8_t*)bVector;
170  for(number =0; number < num_points; number++){
171  float aReal = (float)*a8Ptr++;
172  float aImag = (float)*a8Ptr++;
173  lv_32fc_t aVal = lv_cmake(aReal, aImag );
174  float bReal = (float)*b8Ptr++;
175  float bImag = (float)*b8Ptr++;
176  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
177  lv_32fc_t temp = aVal * bVal;
178 
179  *c16Ptr++ = (int16_t)lv_creal(temp);
180  *c16Ptr++ = (int16_t)lv_cimag(temp);
181  }
182 }
183 #endif /* LV_HAVE_GENERIC */
184 
185 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
186 
187 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
188 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
189 
190 #include <inttypes.h>
191 #include <stdio.h>
192 #include <volk/volk_complex.h>
193 
194 #ifdef LV_HAVE_AVX2
195 #include <immintrin.h>
203 static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
204  unsigned int number = 0;
205  const unsigned int oneEigthPoints = num_points / 8;
206 
207  __m256i x, y, realz, imagz;
208  lv_16sc_t* c = cVector;
209  const lv_8sc_t* a = aVector;
210  const lv_8sc_t* b = bVector;
211  __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
212 
213  for(;number < oneEigthPoints; number++){
214  // Convert 8 bit values into 16 bit values
215  x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
216  y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
217 
218  // Calculate the ar*cr - ai*(-ci) portions
219  realz = _mm256_madd_epi16(x,y);
220 
221  // Calculate the complex conjugate of the cr + ci j values
222  y = _mm256_sign_epi16(y, conjugateSign);
223 
224  // Shift the order of the cr and ci values
225  y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
226 
227  // Calculate the ar*(-ci) + cr*(ai)
228  imagz = _mm256_madd_epi16(x,y);
229 
230  // Perfrom the addition of products
231 
232  _mm256_storeu_si256((__m256i*)c, _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), _mm256_unpackhi_epi32(realz, imagz)));
233 
234  a += 8;
235  b += 8;
236  c += 8;
237  }
238 
239  number = oneEigthPoints * 8;
240  int16_t* c16Ptr = (int16_t*)&cVector[number];
241  int8_t* a8Ptr = (int8_t*)&aVector[number];
242  int8_t* b8Ptr = (int8_t*)&bVector[number];
243  for(; number < num_points; number++){
244  float aReal = (float)*a8Ptr++;
245  float aImag = (float)*a8Ptr++;
246  lv_32fc_t aVal = lv_cmake(aReal, aImag );
247  float bReal = (float)*b8Ptr++;
248  float bImag = (float)*b8Ptr++;
249  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
250  lv_32fc_t temp = aVal * bVal;
251 
252  *c16Ptr++ = (int16_t)lv_creal(temp);
253  *c16Ptr++ = (int16_t)lv_cimag(temp);
254  }
255 }
256 #endif /* LV_HAVE_AVX2 */
257 
258 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
short complex lv_16sc_t
Definition: volk_complex.h:58
#define lv_cmake(r, i)
Definition: volk_complex.h:64
static void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, unsigned int num_points)
Multiplys the one complex vector with the complex conjugate of the second complex vector and stores t...
Definition: volk_8ic_x2_multiply_conjugate_16ic.h:165
float complex lv_32fc_t
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:83
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57
#define lv_cimag(x)
Definition: volk_complex.h:85