Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_64u_popcnt.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
60 #ifndef INCLUDED_volk_64u_popcnt_a_H
61 #define INCLUDED_volk_64u_popcnt_a_H
62 
63 #include <inttypes.h>
64 #include <stdio.h>
65 
66 
67 #ifdef LV_HAVE_GENERIC
68 
69 
70 static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value)
71 {
72  // const uint32_t* valueVector = (const uint32_t*)&value;
73 
74  // This is faster than a lookup table
75  // uint32_t retVal = valueVector[0];
76  uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
77 
78  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
79  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
80  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
81  retVal = (retVal + (retVal >> 8));
82  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
83  uint64_t retVal64 = retVal;
84 
85  // retVal = valueVector[1];
86  retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
87  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
88  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
89  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
90  retVal = (retVal + (retVal >> 8));
91  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
92  retVal64 += retVal;
93 
94  *ret = retVal64;
95 }
96 
97 #endif /*LV_HAVE_GENERIC*/
98 
99 
100 #if LV_HAVE_SSE4_2 && LV_HAVE_64
101 
102 #include <nmmintrin.h>
103 
104 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value)
105 {
106  *ret = _mm_popcnt_u64(value);
107 }
108 
109 #endif /*LV_HAVE_SSE4_2*/
110 
111 
112 #if LV_HAVE_NEON
113 #include <arm_neon.h>
114 static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
115 {
116  uint8x8_t input_val, count8x8_val;
117  uint16x4_t count16x4_val;
118  uint32x2_t count32x2_val;
119  uint64x1_t count64x1_val;
120 
121  input_val = vld1_u8((unsigned char*)&value);
122  count8x8_val = vcnt_u8(input_val);
123  count16x4_val = vpaddl_u8(count8x8_val);
124  count32x2_val = vpaddl_u16(count16x4_val);
125  count64x1_val = vpaddl_u32(count32x2_val);
126  vst1_u64(ret, count64x1_val);
127 
128  //*ret = _mm_popcnt_u64(value);
129 }
130 #endif /*LV_HAVE_NEON*/
131 
132 
133 #endif /*INCLUDED_volk_64u_popcnt_a_H*/
volk_64u_popcnt_generic
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:70
volk_64u_popcnt_neon
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:114