Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_conjugate_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
68 #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
69 #define INCLUDED_volk_32fc_conjugate_32fc_u_H
70 
71 #include <float.h>
72 #include <inttypes.h>
73 #include <stdio.h>
74 #include <volk/volk_complex.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector,
80  const lv_32fc_t* aVector,
81  unsigned int num_points)
82 {
83  unsigned int number = 0;
84  const unsigned int quarterPoints = num_points / 4;
85 
86  __m256 x;
87  lv_32fc_t* c = cVector;
88  const lv_32fc_t* a = aVector;
89 
90  __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
91 
92  for (; number < quarterPoints; number++) {
93 
94  x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
95 
96  x = _mm256_xor_ps(x, conjugator); // conjugate register
97 
98  _mm256_storeu_ps((float*)c, x); // Store the results back into the C container
99 
100  a += 4;
101  c += 4;
102  }
103 
104  number = quarterPoints * 4;
105 
106  for (; number < num_points; number++) {
107  *c++ = lv_conj(*a++);
108  }
109 }
110 #endif /* LV_HAVE_AVX */
111 
112 #ifdef LV_HAVE_SSE3
113 #include <pmmintrin.h>
114 
115 static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector,
116  const lv_32fc_t* aVector,
117  unsigned int num_points)
118 {
119  unsigned int number = 0;
120  const unsigned int halfPoints = num_points / 2;
121 
122  __m128 x;
123  lv_32fc_t* c = cVector;
124  const lv_32fc_t* a = aVector;
125 
126  __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
127 
128  for (; number < halfPoints; number++) {
129 
130  x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
131 
132  x = _mm_xor_ps(x, conjugator); // conjugate register
133 
134  _mm_storeu_ps((float*)c, x); // Store the results back into the C container
135 
136  a += 2;
137  c += 2;
138  }
139 
140  if ((num_points % 2) != 0) {
141  *c = lv_conj(*a);
142  }
143 }
144 #endif /* LV_HAVE_SSE3 */
145 
146 #ifdef LV_HAVE_GENERIC
147 
148 static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector,
149  const lv_32fc_t* aVector,
150  unsigned int num_points)
151 {
152  lv_32fc_t* cPtr = cVector;
153  const lv_32fc_t* aPtr = aVector;
154  unsigned int number = 0;
155 
156  for (number = 0; number < num_points; number++) {
157  *cPtr++ = lv_conj(*aPtr++);
158  }
159 }
160 #endif /* LV_HAVE_GENERIC */
161 
162 
163 #endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
164 #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
165 #define INCLUDED_volk_32fc_conjugate_32fc_a_H
166 
167 #include <float.h>
168 #include <inttypes.h>
169 #include <stdio.h>
170 #include <volk/volk_complex.h>
171 
172 #ifdef LV_HAVE_AVX
173 #include <immintrin.h>
174 
175 static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector,
176  const lv_32fc_t* aVector,
177  unsigned int num_points)
178 {
179  unsigned int number = 0;
180  const unsigned int quarterPoints = num_points / 4;
181 
182  __m256 x;
183  lv_32fc_t* c = cVector;
184  const lv_32fc_t* a = aVector;
185 
186  __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
187 
188  for (; number < quarterPoints; number++) {
189 
190  x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
191 
192  x = _mm256_xor_ps(x, conjugator); // conjugate register
193 
194  _mm256_store_ps((float*)c, x); // Store the results back into the C container
195 
196  a += 4;
197  c += 4;
198  }
199 
200  number = quarterPoints * 4;
201 
202  for (; number < num_points; number++) {
203  *c++ = lv_conj(*a++);
204  }
205 }
206 #endif /* LV_HAVE_AVX */
207 
208 #ifdef LV_HAVE_SSE3
209 #include <pmmintrin.h>
210 
211 static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector,
212  const lv_32fc_t* aVector,
213  unsigned int num_points)
214 {
215  unsigned int number = 0;
216  const unsigned int halfPoints = num_points / 2;
217 
218  __m128 x;
219  lv_32fc_t* c = cVector;
220  const lv_32fc_t* a = aVector;
221 
222  __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
223 
224  for (; number < halfPoints; number++) {
225 
226  x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
227 
228  x = _mm_xor_ps(x, conjugator); // conjugate register
229 
230  _mm_store_ps((float*)c, x); // Store the results back into the C container
231 
232  a += 2;
233  c += 2;
234  }
235 
236  if ((num_points % 2) != 0) {
237  *c = lv_conj(*a);
238  }
239 }
240 #endif /* LV_HAVE_SSE3 */
241 
242 #ifdef LV_HAVE_NEON
243 #include <arm_neon.h>
244 
245 static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
246  const lv_32fc_t* aVector,
247  unsigned int num_points)
248 {
249  unsigned int number;
250  const unsigned int quarterPoints = num_points / 4;
251 
252  float32x4x2_t x;
253  lv_32fc_t* c = cVector;
254  const lv_32fc_t* a = aVector;
255 
256  for (number = 0; number < quarterPoints; number++) {
257  __VOLK_PREFETCH(a + 4);
258  x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di
259 
260  // xor the imaginary lane
261  x.val[1] = vnegq_f32(x.val[1]);
262 
263  vst2q_f32((float*)c, x); // Store the results back into the C container
264 
265  a += 4;
266  c += 4;
267  }
268 
269  for (number = quarterPoints * 4; number < num_points; number++) {
270  *c++ = lv_conj(*a++);
271  }
272 }
273 #endif /* LV_HAVE_NEON */
274 
275 
276 #ifdef LV_HAVE_GENERIC
277 
278 static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector,
279  const lv_32fc_t* aVector,
280  unsigned int num_points)
281 {
282  lv_32fc_t* cPtr = cVector;
283  const lv_32fc_t* aPtr = aVector;
284  unsigned int number = 0;
285 
286  for (number = 0; number < num_points; number++) {
287  *cPtr++ = lv_conj(*aPtr++);
288  }
289 }
290 #endif /* LV_HAVE_GENERIC */
291 
292 
293 #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
volk_32fc_conjugate_32fc_u_sse3
static void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:115
volk_32fc_conjugate_32fc_u_avx
static void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:79
volk_32fc_conjugate_32fc_a_neon
static void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:245
volk_32fc_conjugate_32fc_a_generic
static void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:278
__VOLK_PREFETCH
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
lv_conj
#define lv_conj(x)
Definition: volk_complex.h:96
volk_32fc_conjugate_32fc_a_avx
static void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:175
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70
volk_complex.h
volk_32fc_conjugate_32fc_a_sse3
static void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:211
volk_32fc_conjugate_32fc_generic
static void volk_32fc_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:148