70 #ifndef INCLUDED_volk_32f_binary_slicer_32i_H 71 #define INCLUDED_volk_32f_binary_slicer_32i_H 74 #ifdef LV_HAVE_GENERIC 80 const float* aPtr = aVector;
81 unsigned int number = 0;
83 for(number = 0; number < num_points; number++){
95 #ifdef LV_HAVE_GENERIC 101 const float* aPtr = aVector;
102 unsigned int number = 0;
104 for(number = 0; number < num_points; number++){
105 *cPtr++ = (*aPtr++ >= 0);
112 #include <emmintrin.h> 118 const float* aPtr = aVector;
119 unsigned int number = 0;
121 unsigned int quarter_points = num_points / 4;
123 __m128i res_i, binary_i;
125 zero_val = _mm_set1_ps (0.0f);
127 for(number = 0; number < quarter_points; number++){
128 a_val = _mm_load_ps(aPtr);
130 res_f = _mm_cmpge_ps (a_val, zero_val);
131 res_i = _mm_cvtps_epi32 (res_f);
132 binary_i = _mm_srli_epi32 (res_i, 31);
134 _mm_store_si128((__m128i*)cPtr, binary_i);
140 for(number = quarter_points * 4; number < num_points; number++){
153 #include <immintrin.h> 159 const float* aPtr = aVector;
160 unsigned int number = 0;
162 unsigned int quarter_points = num_points / 8;
163 __m256 a_val, res_f, binary_f;
165 __m256 zero_val, one_val;
166 zero_val = _mm256_set1_ps (0.0f);
167 one_val = _mm256_set1_ps (1.0f);
169 for(number = 0; number < quarter_points; number++){
170 a_val = _mm256_load_ps(aPtr);
172 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
173 binary_f = _mm256_and_ps (res_f, one_val);
174 binary_i = _mm256_cvtps_epi32(binary_f);
176 _mm256_store_si256((__m256i *)cPtr, binary_i);
182 for(number = quarter_points * 8; number < num_points; number++){
195 #include <emmintrin.h> 201 const float* aPtr = aVector;
202 unsigned int number = 0;
204 unsigned int quarter_points = num_points / 4;
206 __m128i res_i, binary_i;
208 zero_val = _mm_set1_ps (0.0f);
210 for(number = 0; number < quarter_points; number++){
211 a_val = _mm_loadu_ps(aPtr);
213 res_f = _mm_cmpge_ps (a_val, zero_val);
214 res_i = _mm_cvtps_epi32 (res_f);
215 binary_i = _mm_srli_epi32 (res_i, 31);
217 _mm_storeu_si128((__m128i*)cPtr, binary_i);
223 for(number = quarter_points * 4; number < num_points; number++){
236 #include <immintrin.h> 242 const float* aPtr = aVector;
243 unsigned int number = 0;
245 unsigned int quarter_points = num_points / 8;
246 __m256 a_val, res_f, binary_f;
248 __m256 zero_val, one_val;
249 zero_val = _mm256_set1_ps (0.0f);
250 one_val = _mm256_set1_ps (1.0f);
252 for(number = 0; number < quarter_points; number++){
253 a_val = _mm256_loadu_ps(aPtr);
255 res_f = _mm256_cmp_ps (a_val, zero_val, 13);
256 binary_f = _mm256_and_ps (res_f, one_val);
257 binary_i = _mm256_cvtps_epi32(binary_f);
259 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
265 for(number = quarter_points * 8; number < num_points; number++){
static void volk_32f_binary_slicer_32i_generic_branchless(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:98
static void volk_32f_binary_slicer_32i_generic(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:77
static void volk_32f_binary_slicer_32i_u_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:198
static void volk_32f_binary_slicer_32i_u_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:239
static void volk_32f_binary_slicer_32i_a_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:115
static void volk_32f_binary_slicer_32i_a_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:156