60 #ifndef INCLUDED_volk_64u_popcnt_a_H 61 #define INCLUDED_volk_64u_popcnt_a_H 67 #ifdef LV_HAVE_GENERIC 77 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
79 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
80 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
81 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
82 retVal = (retVal + (retVal >> 8));
83 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
84 uint64_t retVal64 = retVal;
87 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
88 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
89 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
90 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
91 retVal = (retVal + (retVal >> 8));
92 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
101 #if LV_HAVE_SSE4_2 && LV_HAVE_64 103 #include <nmmintrin.h> 105 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret,
const uint64_t value)
107 *ret = _mm_popcnt_u64(value);
114 #include <arm_neon.h> 117 uint8x8_t input_val, count8x8_val;
118 uint16x4_t count16x4_val;
119 uint32x2_t count32x2_val;
120 uint64x1_t count64x1_val;
122 input_val = vld1_u8((
unsigned char *) &value);
123 count8x8_val = vcnt_u8(input_val);
124 count16x4_val = vpaddl_u8(count8x8_val);
125 count32x2_val = vpaddl_u16(count16x4_val);
126 count64x1_val = vpaddl_u32(count32x2_val);
127 vst1_u64(ret, count64x1_val);
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:71
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:115