Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_conjugate_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
68 #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
69 #define INCLUDED_volk_32fc_conjugate_32fc_u_H
70 
71 #include <inttypes.h>
72 #include <stdio.h>
73 #include <volk/volk_complex.h>
74 #include <float.h>
75 
76 #ifdef LV_HAVE_AVX
77 #include <immintrin.h>
78 
79 static inline void
80 volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
81 {
82  unsigned int number = 0;
83  const unsigned int quarterPoints = num_points / 4;
84 
85  __m256 x;
86  lv_32fc_t* c = cVector;
87  const lv_32fc_t* a = aVector;
88 
89  __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
90 
91  for(;number < quarterPoints; number++){
92 
93  x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
94 
95  x = _mm256_xor_ps(x, conjugator); // conjugate register
96 
97  _mm256_storeu_ps((float*)c,x); // Store the results back into the C container
98 
99  a += 4;
100  c += 4;
101  }
102 
103  number = quarterPoints * 4;
104 
105  for(;number < num_points; number++) {
106  *c++ = lv_conj(*a++);
107  }
108 }
109 #endif /* LV_HAVE_AVX */
110 
111 #ifdef LV_HAVE_SSE3
112 #include <pmmintrin.h>
113 
114 static inline void
115 volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
116 {
117  unsigned int number = 0;
118  const unsigned int halfPoints = num_points / 2;
119 
120  __m128 x;
121  lv_32fc_t* c = cVector;
122  const lv_32fc_t* a = aVector;
123 
124  __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
125 
126  for(;number < halfPoints; number++){
127 
128  x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
129 
130  x = _mm_xor_ps(x, conjugator); // conjugate register
131 
132  _mm_storeu_ps((float*)c,x); // Store the results back into the C container
133 
134  a += 2;
135  c += 2;
136  }
137 
138  if((num_points % 2) != 0) {
139  *c = lv_conj(*a);
140  }
141 }
142 #endif /* LV_HAVE_SSE3 */
143 
144 #ifdef LV_HAVE_GENERIC
145 
146 static inline void
147 volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
148 {
149  lv_32fc_t* cPtr = cVector;
150  const lv_32fc_t* aPtr = aVector;
151  unsigned int number = 0;
152 
153  for(number = 0; number < num_points; number++){
154  *cPtr++ = lv_conj(*aPtr++);
155  }
156 }
157 #endif /* LV_HAVE_GENERIC */
158 
159 
160 #endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
161 #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
162 #define INCLUDED_volk_32fc_conjugate_32fc_a_H
163 
164 #include <inttypes.h>
165 #include <stdio.h>
166 #include <volk/volk_complex.h>
167 #include <float.h>
168 
169 #ifdef LV_HAVE_AVX
170 #include <immintrin.h>
171 
172 static inline void
173 volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
174 {
175  unsigned int number = 0;
176  const unsigned int quarterPoints = num_points / 4;
177 
178  __m256 x;
179  lv_32fc_t* c = cVector;
180  const lv_32fc_t* a = aVector;
181 
182  __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
183 
184  for(;number < quarterPoints; number++){
185 
186  x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
187 
188  x = _mm256_xor_ps(x, conjugator); // conjugate register
189 
190  _mm256_store_ps((float*)c,x); // Store the results back into the C container
191 
192  a += 4;
193  c += 4;
194  }
195 
196  number = quarterPoints * 4;
197 
198  for(;number < num_points; number++) {
199  *c++ = lv_conj(*a++);
200  }
201 }
202 #endif /* LV_HAVE_AVX */
203 
204 #ifdef LV_HAVE_SSE3
205 #include <pmmintrin.h>
206 
207 static inline void
208 volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
209 {
210  unsigned int number = 0;
211  const unsigned int halfPoints = num_points / 2;
212 
213  __m128 x;
214  lv_32fc_t* c = cVector;
215  const lv_32fc_t* a = aVector;
216 
217  __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
218 
219  for(;number < halfPoints; number++){
220 
221  x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
222 
223  x = _mm_xor_ps(x, conjugator); // conjugate register
224 
225  _mm_store_ps((float*)c,x); // Store the results back into the C container
226 
227  a += 2;
228  c += 2;
229  }
230 
231  if((num_points % 2) != 0) {
232  *c = lv_conj(*a);
233  }
234 }
235 #endif /* LV_HAVE_SSE3 */
236 
237 #ifdef LV_HAVE_NEON
238 #include <arm_neon.h>
239 
240 static inline void
241 volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
242 {
243  unsigned int number;
244  const unsigned int quarterPoints = num_points / 4;
245 
246  float32x4x2_t x;
247  lv_32fc_t* c = cVector;
248  const lv_32fc_t* a = aVector;
249 
250  for(number=0; number < quarterPoints; number++){
251  __VOLK_PREFETCH(a+4);
252  x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di
253 
254  // xor the imaginary lane
255  x.val[1] = vnegq_f32( x.val[1]);
256 
257  vst2q_f32((float*)c,x); // Store the results back into the C container
258 
259  a += 4;
260  c += 4;
261  }
262 
263  for(number=quarterPoints*4; number < num_points; number++){
264  *c++ = lv_conj(*a++);
265  }
266 }
267 #endif /* LV_HAVE_NEON */
268 
269 
270 #ifdef LV_HAVE_GENERIC
271 
272 static inline void
273 volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points)
274 {
275  lv_32fc_t* cPtr = cVector;
276  const lv_32fc_t* aPtr = aVector;
277  unsigned int number = 0;
278 
279  for(number = 0; number < num_points; number++){
280  *cPtr++ = lv_conj(*aPtr++);
281  }
282 }
283 #endif /* LV_HAVE_GENERIC */
284 
285 
286 #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
static void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:80
static void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:208
#define lv_conj(x)
Definition: volk_complex.h:87
static void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:115
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:39
static void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:173
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:147
static void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:241
static void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:273