Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_64u_popcnt.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
60 #ifndef INCLUDED_volk_64u_popcnt_a_H
61 #define INCLUDED_volk_64u_popcnt_a_H
62 
63 #include <stdio.h>
64 #include <inttypes.h>
65 
66 
67 #ifdef LV_HAVE_GENERIC
68 
69 
70 static inline void
71 volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value)
72 {
73  //const uint32_t* valueVector = (const uint32_t*)&value;
74 
75  // This is faster than a lookup table
76  //uint32_t retVal = valueVector[0];
77  uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
78 
79  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
80  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
81  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
82  retVal = (retVal + (retVal >> 8));
83  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
84  uint64_t retVal64 = retVal;
85 
86  //retVal = valueVector[1];
87  retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
88  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
89  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
90  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
91  retVal = (retVal + (retVal >> 8));
92  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
93  retVal64 += retVal;
94 
95  *ret = retVal64;
96 }
97 
98 #endif /*LV_HAVE_GENERIC*/
99 
100 
101 #if LV_HAVE_SSE4_2 && LV_HAVE_64
102 
103 #include <nmmintrin.h>
104 
105 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value)
106 {
107  *ret = _mm_popcnt_u64(value);
108 }
109 
110 #endif /*LV_HAVE_SSE4_2*/
111 
112 
113 #if LV_HAVE_NEON
114 #include <arm_neon.h>
115 static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
116 {
117  uint8x8_t input_val, count8x8_val;
118  uint16x4_t count16x4_val;
119  uint32x2_t count32x2_val;
120  uint64x1_t count64x1_val;
121 
122  input_val = vld1_u8((unsigned char *) &value);
123  count8x8_val = vcnt_u8(input_val);
124  count16x4_val = vpaddl_u8(count8x8_val);
125  count32x2_val = vpaddl_u16(count16x4_val);
126  count64x1_val = vpaddl_u32(count32x2_val);
127  vst1_u64(ret, count64x1_val);
128 
129  //*ret = _mm_popcnt_u64(value);
130 }
131 #endif /*LV_HAVE_NEON*/
132 
133 
134 #endif /*INCLUDED_volk_64u_popcnt_a_H*/
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:71
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:115