Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32f_binary_slicer_32i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
70 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H
71 #define INCLUDED_volk_32f_binary_slicer_32i_H
72 
73 
74 #ifdef LV_HAVE_GENERIC
75 
76 static inline void volk_32f_binary_slicer_32i_generic(int* cVector,
77  const float* aVector,
78  unsigned int num_points)
79 {
80  int* cPtr = cVector;
81  const float* aPtr = aVector;
82  unsigned int number = 0;
83 
84  for (number = 0; number < num_points; number++) {
85  if (*aPtr++ >= 0) {
86  *cPtr++ = 1;
87  } else {
88  *cPtr++ = 0;
89  }
90  }
91 }
92 #endif /* LV_HAVE_GENERIC */
93 
94 
95 #ifdef LV_HAVE_GENERIC
96 
97 static inline void volk_32f_binary_slicer_32i_generic_branchless(int* cVector,
98  const float* aVector,
99  unsigned int num_points)
100 {
101  int* cPtr = cVector;
102  const float* aPtr = aVector;
103  unsigned int number = 0;
104 
105  for (number = 0; number < num_points; number++) {
106  *cPtr++ = (*aPtr++ >= 0);
107  }
108 }
109 #endif /* LV_HAVE_GENERIC */
110 
111 
112 #ifdef LV_HAVE_SSE2
113 #include <emmintrin.h>
114 
115 static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector,
116  const float* aVector,
117  unsigned int num_points)
118 {
119  int* cPtr = cVector;
120  const float* aPtr = aVector;
121  unsigned int number = 0;
122 
123  unsigned int quarter_points = num_points / 4;
124  __m128 a_val, res_f;
125  __m128i res_i, binary_i;
126  __m128 zero_val;
127  zero_val = _mm_set1_ps(0.0f);
128 
129  for (number = 0; number < quarter_points; number++) {
130  a_val = _mm_load_ps(aPtr);
131 
132  res_f = _mm_cmpge_ps(a_val, zero_val);
133  res_i = _mm_cvtps_epi32(res_f);
134  binary_i = _mm_srli_epi32(res_i, 31);
135 
136  _mm_store_si128((__m128i*)cPtr, binary_i);
137 
138  cPtr += 4;
139  aPtr += 4;
140  }
141 
142  for (number = quarter_points * 4; number < num_points; number++) {
143  if (*aPtr++ >= 0) {
144  *cPtr++ = 1;
145  } else {
146  *cPtr++ = 0;
147  }
148  }
149 }
150 #endif /* LV_HAVE_SSE2 */
151 
152 
153 #ifdef LV_HAVE_AVX
154 #include <immintrin.h>
155 
156 static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector,
157  const float* aVector,
158  unsigned int num_points)
159 {
160  int* cPtr = cVector;
161  const float* aPtr = aVector;
162  unsigned int number = 0;
163 
164  unsigned int quarter_points = num_points / 8;
165  __m256 a_val, res_f, binary_f;
166  __m256i binary_i;
167  __m256 zero_val, one_val;
168  zero_val = _mm256_set1_ps(0.0f);
169  one_val = _mm256_set1_ps(1.0f);
170 
171  for (number = 0; number < quarter_points; number++) {
172  a_val = _mm256_load_ps(aPtr);
173 
174  res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
175  binary_f = _mm256_and_ps(res_f, one_val);
176  binary_i = _mm256_cvtps_epi32(binary_f);
177 
178  _mm256_store_si256((__m256i*)cPtr, binary_i);
179 
180  cPtr += 8;
181  aPtr += 8;
182  }
183 
184  for (number = quarter_points * 8; number < num_points; number++) {
185  if (*aPtr++ >= 0) {
186  *cPtr++ = 1;
187  } else {
188  *cPtr++ = 0;
189  }
190  }
191 }
192 #endif /* LV_HAVE_AVX */
193 
194 
195 #ifdef LV_HAVE_SSE2
196 #include <emmintrin.h>
197 
198 static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector,
199  const float* aVector,
200  unsigned int num_points)
201 {
202  int* cPtr = cVector;
203  const float* aPtr = aVector;
204  unsigned int number = 0;
205 
206  unsigned int quarter_points = num_points / 4;
207  __m128 a_val, res_f;
208  __m128i res_i, binary_i;
209  __m128 zero_val;
210  zero_val = _mm_set1_ps(0.0f);
211 
212  for (number = 0; number < quarter_points; number++) {
213  a_val = _mm_loadu_ps(aPtr);
214 
215  res_f = _mm_cmpge_ps(a_val, zero_val);
216  res_i = _mm_cvtps_epi32(res_f);
217  binary_i = _mm_srli_epi32(res_i, 31);
218 
219  _mm_storeu_si128((__m128i*)cPtr, binary_i);
220 
221  cPtr += 4;
222  aPtr += 4;
223  }
224 
225  for (number = quarter_points * 4; number < num_points; number++) {
226  if (*aPtr++ >= 0) {
227  *cPtr++ = 1;
228  } else {
229  *cPtr++ = 0;
230  }
231  }
232 }
233 #endif /* LV_HAVE_SSE2 */
234 
235 
236 #ifdef LV_HAVE_AVX
237 #include <immintrin.h>
238 
239 static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector,
240  const float* aVector,
241  unsigned int num_points)
242 {
243  int* cPtr = cVector;
244  const float* aPtr = aVector;
245  unsigned int number = 0;
246 
247  unsigned int quarter_points = num_points / 8;
248  __m256 a_val, res_f, binary_f;
249  __m256i binary_i;
250  __m256 zero_val, one_val;
251  zero_val = _mm256_set1_ps(0.0f);
252  one_val = _mm256_set1_ps(1.0f);
253 
254  for (number = 0; number < quarter_points; number++) {
255  a_val = _mm256_loadu_ps(aPtr);
256 
257  res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
258  binary_f = _mm256_and_ps(res_f, one_val);
259  binary_i = _mm256_cvtps_epi32(binary_f);
260 
261  _mm256_storeu_si256((__m256i*)cPtr, binary_i);
262 
263  cPtr += 8;
264  aPtr += 8;
265  }
266 
267  for (number = quarter_points * 8; number < num_points; number++) {
268  if (*aPtr++ >= 0) {
269  *cPtr++ = 1;
270  } else {
271  *cPtr++ = 0;
272  }
273  }
274 }
275 #endif /* LV_HAVE_AVX */
276 
277 
278 #endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
volk_32f_binary_slicer_32i_a_avx
static void volk_32f_binary_slicer_32i_a_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:156
volk_32f_binary_slicer_32i_u_sse2
static void volk_32f_binary_slicer_32i_u_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:198
volk_32f_binary_slicer_32i_generic
static void volk_32f_binary_slicer_32i_generic(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:76
volk_32f_binary_slicer_32i_u_avx
static void volk_32f_binary_slicer_32i_u_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:239
volk_32f_binary_slicer_32i_generic_branchless
static void volk_32f_binary_slicer_32i_generic_branchless(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:97
volk_32f_binary_slicer_32i_a_sse2
static void volk_32f_binary_slicer_32i_a_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:115