23 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H 24 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H 31 #include <immintrin.h> 39 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(
lv_16sc_t* cVector,
const lv_8sc_t* aVector,
const lv_8sc_t* bVector,
unsigned int num_points){
40 unsigned int number = 0;
41 const unsigned int quarterPoints = num_points / 8;
43 __m256i x, y, realz, imagz;
47 __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
49 for(;number < quarterPoints; number++){
51 x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
52 y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
55 realz = _mm256_madd_epi16(x,y);
58 y = _mm256_sign_epi16(y, conjugateSign);
61 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
64 imagz = _mm256_madd_epi16(x,y);
68 _mm256_store_si256((__m256i*)c, _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), _mm256_unpackhi_epi32(realz, imagz)));
75 number = quarterPoints * 8;
76 int16_t* c16Ptr = (int16_t*)&cVector[number];
77 int8_t* a8Ptr = (int8_t*)&aVector[number];
78 int8_t* b8Ptr = (int8_t*)&bVector[number];
79 for(; number < num_points; number++){
80 float aReal = (float)*a8Ptr++;
81 float aImag = (float)*a8Ptr++;
83 float bReal = (float)*b8Ptr++;
84 float bImag = (float)*b8Ptr++;
96 #include <smmintrin.h> 104 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(
lv_16sc_t* cVector,
const lv_8sc_t* aVector,
const lv_8sc_t* bVector,
unsigned int num_points){
105 unsigned int number = 0;
106 const unsigned int quarterPoints = num_points / 4;
108 __m128i x, y, realz, imagz;
112 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
114 for(;number < quarterPoints; number++){
116 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
117 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
120 realz = _mm_madd_epi16(x,y);
123 y = _mm_sign_epi16(y, conjugateSign);
126 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
129 imagz = _mm_madd_epi16(x,y);
131 _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz)));
138 number = quarterPoints * 4;
139 int16_t* c16Ptr = (int16_t*)&cVector[number];
140 int8_t* a8Ptr = (int8_t*)&aVector[number];
141 int8_t* b8Ptr = (int8_t*)&bVector[number];
142 for(; number < num_points; number++){
143 float aReal = (float)*a8Ptr++;
144 float aImag = (float)*a8Ptr++;
146 float bReal = (float)*b8Ptr++;
147 float bImag = (float)*b8Ptr++;
151 *c16Ptr++ = (int16_t)
lv_creal(temp);
152 *c16Ptr++ = (int16_t)
lv_cimag(temp);
157 #ifdef LV_HAVE_GENERIC 166 unsigned int number = 0;
167 int16_t* c16Ptr = (int16_t*)cVector;
168 int8_t* a8Ptr = (int8_t*)aVector;
169 int8_t* b8Ptr = (int8_t*)bVector;
170 for(number =0; number < num_points; number++){
171 float aReal = (float)*a8Ptr++;
172 float aImag = (float)*a8Ptr++;
174 float bReal = (float)*b8Ptr++;
175 float bImag = (float)*b8Ptr++;
179 *c16Ptr++ = (int16_t)
lv_creal(temp);
180 *c16Ptr++ = (int16_t)
lv_cimag(temp);
187 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H 188 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H 190 #include <inttypes.h> 195 #include <immintrin.h> 203 static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(
lv_16sc_t* cVector,
const lv_8sc_t* aVector,
const lv_8sc_t* bVector,
unsigned int num_points){
204 unsigned int number = 0;
205 const unsigned int oneEigthPoints = num_points / 8;
207 __m256i x, y, realz, imagz;
211 __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
213 for(;number < oneEigthPoints; number++){
215 x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
216 y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
219 realz = _mm256_madd_epi16(x,y);
222 y = _mm256_sign_epi16(y, conjugateSign);
225 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
228 imagz = _mm256_madd_epi16(x,y);
232 _mm256_storeu_si256((__m256i*)c, _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz), _mm256_unpackhi_epi32(realz, imagz)));
239 number = oneEigthPoints * 8;
240 int16_t* c16Ptr = (int16_t*)&cVector[number];
241 int8_t* a8Ptr = (int8_t*)&aVector[number];
242 int8_t* b8Ptr = (int8_t*)&bVector[number];
243 for(; number < num_points; number++){
244 float aReal = (float)*a8Ptr++;
245 float aImag = (float)*a8Ptr++;
247 float bReal = (float)*b8Ptr++;
248 float bImag = (float)*b8Ptr++;
252 *c16Ptr++ = (int16_t)
lv_creal(temp);
253 *c16Ptr++ = (int16_t)
lv_cimag(temp);
short complex lv_16sc_t
Definition: volk_complex.h:58
#define lv_cmake(r, i)
Definition: volk_complex.h:64
static void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, unsigned int num_points)
Multiplys the one complex vector with the complex conjugate of the second complex vector and stores t...
Definition: volk_8ic_x2_multiply_conjugate_16ic.h:165
float complex lv_32fc_t
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:83
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57
#define lv_cimag(x)
Definition: volk_complex.h:85