Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_sse3_intrinsics.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2015 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*
24  * This file is intended to hold SSE3 intrinsics of intrinsics.
25  * They should be used in VOLK kernels to avoid copy-pasta.
26  */
27 
28 #ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
29 #define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
30 #include <pmmintrin.h>
31 
32 static inline __m128
33 _mm_complexmul_ps(__m128 x, __m128 y)
34 {
35  __m128 yl, yh, tmp1, tmp2;
36  yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
37  yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
38  tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
39  x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
40  tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
41  return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
42 }
43 
44 static inline __m128
45 _mm_complexconjugatemul_ps(__m128 x, __m128 y)
46 {
47  const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
48  y = _mm_xor_ps(y, conjugator); // conjugate y
49  return _mm_complexmul_ps(x, y);
50 }
51 
52 static inline __m128
53 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
54  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
55  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
56  return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
57 }
58 
59 static inline __m128
60 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
61  return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
62 }
63 
64 #endif /* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:53
static __m128 _mm_complexmul_ps(__m128 x, __m128 y)
Definition: volk_sse3_intrinsics.h:33
static __m128 _mm_complexconjugatemul_ps(__m128 x, __m128 y)
Definition: volk_sse3_intrinsics.h:45
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:60