Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_binary_slicer_32i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
71 #define INCLUDED_volk_32f_binary_slicer_32i_H
72 
73 
74 #ifdef LV_HAVE_GENERIC
75 
76 static inline void
77 volk_32f_binary_slicer_32i_generic(int* cVector, const float* aVector, unsigned int num_points)
78 {
79  int* cPtr = cVector;
80  const float* aPtr = aVector;
81  unsigned int number = 0;
82 
83  for(number = 0; number < num_points; number++){
84  if( *aPtr++ >= 0) {
85  *cPtr++ = 1;
86  }
87  else {
88  *cPtr++ = 0;
89  }
90  }
91 }
92 #endif /* LV_HAVE_GENERIC */
93 
94 
95 #ifdef LV_HAVE_GENERIC
96 
97 static inline void
98 volk_32f_binary_slicer_32i_generic_branchless(int* cVector, const float* aVector, unsigned int num_points)
99 {
100  int* cPtr = cVector;
101  const float* aPtr = aVector;
102  unsigned int number = 0;
103 
104  for(number = 0; number < num_points; number++){
105  *cPtr++ = (*aPtr++ >= 0);
106  }
107 }
108 #endif /* LV_HAVE_GENERIC */
109 
110 
111 #ifdef LV_HAVE_SSE2
112 #include <emmintrin.h>
113 
114 static inline void
115 volk_32f_binary_slicer_32i_a_sse2(int* cVector, const float* aVector, unsigned int num_points)
116 {
117  int* cPtr = cVector;
118  const float* aPtr = aVector;
119  unsigned int number = 0;
120 
121  unsigned int quarter_points = num_points / 4;
122  __m128 a_val, res_f;
123  __m128i res_i, binary_i;
124  __m128 zero_val;
125  zero_val = _mm_set1_ps (0.0f);
126 
127  for(number = 0; number < quarter_points; number++){
128  a_val = _mm_load_ps(aPtr);
129 
130  res_f = _mm_cmpge_ps (a_val, zero_val);
131  res_i = _mm_cvtps_epi32 (res_f);
132  binary_i = _mm_srli_epi32 (res_i, 31);
133 
134  _mm_store_si128((__m128i*)cPtr, binary_i);
135 
136  cPtr += 4;
137  aPtr += 4;
138  }
139 
140  for(number = quarter_points * 4; number < num_points; number++){
141  if( *aPtr++ >= 0) {
142  *cPtr++ = 1;
143  }
144  else {
145  *cPtr++ = 0;
146  }
147  }
148 }
149 #endif /* LV_HAVE_SSE2 */
150 
151 
152 #ifdef LV_HAVE_AVX
153 #include <immintrin.h>
154 
155 static inline void
156 volk_32f_binary_slicer_32i_a_avx(int* cVector, const float* aVector, unsigned int num_points)
157 {
158  int* cPtr = cVector;
159  const float* aPtr = aVector;
160  unsigned int number = 0;
161 
162  unsigned int quarter_points = num_points / 8;
163  __m256 a_val, res_f, binary_f;
164  __m256i binary_i;
165  __m256 zero_val, one_val;
166  zero_val = _mm256_set1_ps (0.0f);
167  one_val = _mm256_set1_ps (1.0f);
168 
169  for(number = 0; number < quarter_points; number++){
170  a_val = _mm256_load_ps(aPtr);
171 
172  res_f = _mm256_cmp_ps (a_val, zero_val, 13);
173  binary_f = _mm256_and_ps (res_f, one_val);
174  binary_i = _mm256_cvtps_epi32(binary_f);
175 
176  _mm256_store_si256((__m256i *)cPtr, binary_i);
177 
178  cPtr += 8;
179  aPtr += 8;
180  }
181 
182  for(number = quarter_points * 8; number < num_points; number++){
183  if( *aPtr++ >= 0) {
184  *cPtr++ = 1;
185  }
186  else {
187  *cPtr++ = 0;
188  }
189  }
190 }
191 #endif /* LV_HAVE_AVX */
192 
193 
194 #ifdef LV_HAVE_SSE2
195 #include <emmintrin.h>
196 
197 static inline void
198 volk_32f_binary_slicer_32i_u_sse2(int* cVector, const float* aVector, unsigned int num_points)
199 {
200  int* cPtr = cVector;
201  const float* aPtr = aVector;
202  unsigned int number = 0;
203 
204  unsigned int quarter_points = num_points / 4;
205  __m128 a_val, res_f;
206  __m128i res_i, binary_i;
207  __m128 zero_val;
208  zero_val = _mm_set1_ps (0.0f);
209 
210  for(number = 0; number < quarter_points; number++){
211  a_val = _mm_loadu_ps(aPtr);
212 
213  res_f = _mm_cmpge_ps (a_val, zero_val);
214  res_i = _mm_cvtps_epi32 (res_f);
215  binary_i = _mm_srli_epi32 (res_i, 31);
216 
217  _mm_storeu_si128((__m128i*)cPtr, binary_i);
218 
219  cPtr += 4;
220  aPtr += 4;
221  }
222 
223  for(number = quarter_points * 4; number < num_points; number++){
224  if( *aPtr++ >= 0) {
225  *cPtr++ = 1;
226  }
227  else {
228  *cPtr++ = 0;
229  }
230  }
231 }
232 #endif /* LV_HAVE_SSE2 */
233 
234 
235 #ifdef LV_HAVE_AVX
236 #include <immintrin.h>
237 
238 static inline void
239 volk_32f_binary_slicer_32i_u_avx(int* cVector, const float* aVector, unsigned int num_points)
240 {
241  int* cPtr = cVector;
242  const float* aPtr = aVector;
243  unsigned int number = 0;
244 
245  unsigned int quarter_points = num_points / 8;
246  __m256 a_val, res_f, binary_f;
247  __m256i binary_i;
248  __m256 zero_val, one_val;
249  zero_val = _mm256_set1_ps (0.0f);
250  one_val = _mm256_set1_ps (1.0f);
251 
252  for(number = 0; number < quarter_points; number++){
253  a_val = _mm256_loadu_ps(aPtr);
254 
255  res_f = _mm256_cmp_ps (a_val, zero_val, 13);
256  binary_f = _mm256_and_ps (res_f, one_val);
257  binary_i = _mm256_cvtps_epi32(binary_f);
258 
259  _mm256_storeu_si256((__m256i*)cPtr, binary_i);
260 
261  cPtr += 8;
262  aPtr += 8;
263  }
264 
265  for(number = quarter_points * 8; number < num_points; number++){
266  if( *aPtr++ >= 0) {
267  *cPtr++ = 1;
268  }
269  else {
270  *cPtr++ = 0;
271  }
272  }
273 }
274 #endif /* LV_HAVE_AVX */
275 
276 
277 #endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
static void volk_32f_binary_slicer_32i_generic_branchless(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:98
static void volk_32f_binary_slicer_32i_generic(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:77
static void volk_32f_binary_slicer_32i_u_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:198
static void volk_32f_binary_slicer_32i_u_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:239
static void volk_32f_binary_slicer_32i_a_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:115
static void volk_32f_binary_slicer_32i_a_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:156