56 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H 57 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H 64 #include <immintrin.h> 67 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_avx2(
lv_32fc_t* cVector,
const lv_8sc_t* aVector,
68 const lv_8sc_t* bVector,
const float scalar,
69 unsigned int num_points)
71 unsigned int number = 0;
72 const unsigned int oneEigthPoints = num_points / 8;
74 __m256i x, y, realz, imagz;
75 __m256 ret, retlo, rethi;
79 __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
81 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
83 for(;number < oneEigthPoints; number++){
85 x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
86 y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
89 realz = _mm256_madd_epi16(x,y);
92 y = _mm256_sign_epi16(y, conjugateSign);
95 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
98 imagz = _mm256_madd_epi16(x,y);
101 retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
104 retlo = _mm256_mul_ps(retlo, invScalar);
107 rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
110 rethi = _mm256_mul_ps(rethi, invScalar);
112 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
113 _mm256_store_ps((
float*)c, ret);
116 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
117 _mm256_store_ps((
float*)c, ret);
124 number = oneEigthPoints * 8;
125 float* cFloatPtr = (
float*)&cVector[number];
126 int8_t* a8Ptr = (int8_t*)&aVector[number];
127 int8_t* b8Ptr = (int8_t*)&bVector[number];
128 for(; number < num_points; number++){
129 float aReal = (float)*a8Ptr++;
130 float aImag = (float)*a8Ptr++;
132 float bReal = (float)*b8Ptr++;
133 float bImag = (float)*b8Ptr++;
137 *cFloatPtr++ =
lv_creal(temp) / scalar;
138 *cFloatPtr++ =
lv_cimag(temp) / scalar;
144 #ifdef LV_HAVE_SSE4_1 145 #include <smmintrin.h> 148 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(
lv_32fc_t* cVector,
const lv_8sc_t* aVector,
149 const lv_8sc_t* bVector,
const float scalar,
150 unsigned int num_points)
152 unsigned int number = 0;
153 const unsigned int quarterPoints = num_points / 4;
155 __m128i x, y, realz, imagz;
160 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
162 __m128 invScalar = _mm_set_ps1(1.0/scalar);
164 for(;number < quarterPoints; number++){
166 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
167 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
170 realz = _mm_madd_epi16(x,y);
173 y = _mm_sign_epi16(y, conjugateSign);
176 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
179 imagz = _mm_madd_epi16(x,y);
182 ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
185 ret = _mm_mul_ps(ret, invScalar);
188 _mm_store_ps((
float*)c, ret);
192 ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
195 ret = _mm_mul_ps(ret, invScalar);
198 _mm_store_ps((
float*)c, ret);
205 number = quarterPoints * 4;
206 float* cFloatPtr = (
float*)&cVector[number];
207 int8_t* a8Ptr = (int8_t*)&aVector[number];
208 int8_t* b8Ptr = (int8_t*)&bVector[number];
209 for(; number < num_points; number++){
210 float aReal = (float)*a8Ptr++;
211 float aImag = (float)*a8Ptr++;
213 float bReal = (float)*b8Ptr++;
214 float bImag = (float)*b8Ptr++;
218 *cFloatPtr++ =
lv_creal(temp) / scalar;
219 *cFloatPtr++ =
lv_cimag(temp) / scalar;
225 #ifdef LV_HAVE_GENERIC 229 const lv_8sc_t* bVector,
const float scalar,
230 unsigned int num_points)
232 unsigned int number = 0;
233 float* cPtr = (
float*)cVector;
234 const float invScalar = 1.0 / scalar;
235 int8_t* a8Ptr = (int8_t*)aVector;
236 int8_t* b8Ptr = (int8_t*)bVector;
237 for(number = 0; number < num_points; number++){
238 float aReal = (float)*a8Ptr++;
239 float aImag = (float)*a8Ptr++;
241 float bReal = (float)*b8Ptr++;
242 float bImag = (float)*b8Ptr++;
246 *cPtr++ = (
lv_creal(temp) * invScalar);
247 *cPtr++ = (
lv_cimag(temp) * invScalar);
255 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H 256 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H 258 #include <inttypes.h> 263 #include <immintrin.h> 266 volk_8ic_x2_s32f_multiply_conjugate_32fc_u_avx2(
lv_32fc_t* cVector,
const lv_8sc_t* aVector,
267 const lv_8sc_t* bVector,
const float scalar,
268 unsigned int num_points)
270 unsigned int number = 0;
271 const unsigned int oneEigthPoints = num_points / 8;
273 __m256i x, y, realz, imagz;
274 __m256 ret, retlo, rethi;
278 __m256i conjugateSign = _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
280 __m256 invScalar = _mm256_set1_ps(1.0/scalar);
282 for(;number < oneEigthPoints; number++){
284 x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
285 y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
288 realz = _mm256_madd_epi16(x,y);
291 y = _mm256_sign_epi16(y, conjugateSign);
294 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
297 imagz = _mm256_madd_epi16(x,y);
300 retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
303 retlo = _mm256_mul_ps(retlo, invScalar);
306 rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
309 rethi = _mm256_mul_ps(rethi, invScalar);
311 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
312 _mm256_storeu_ps((
float*)c, ret);
315 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
316 _mm256_storeu_ps((
float*)c, ret);
323 number = oneEigthPoints * 8;
324 float* cFloatPtr = (
float*)&cVector[number];
325 int8_t* a8Ptr = (int8_t*)&aVector[number];
326 int8_t* b8Ptr = (int8_t*)&bVector[number];
327 for(; number < num_points; number++){
328 float aReal = (float)*a8Ptr++;
329 float aImag = (float)*a8Ptr++;
331 float bReal = (float)*b8Ptr++;
332 float bImag = (float)*b8Ptr++;
336 *cFloatPtr++ =
lv_creal(temp) / scalar;
337 *cFloatPtr++ =
lv_cimag(temp) / scalar;
static void volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_x2_s32f_multiply_conjugate_32fc.h:228
#define lv_cmake(r, i)
Definition: volk_complex.h:64
float complex lv_32fc_t
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:83
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:57
#define lv_cimag(x)
Definition: volk_complex.h:85