41 #ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H
90 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0,
91 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8,
92 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94,
93 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
94 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2,
95 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
96 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86,
97 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
98 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
99 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1,
100 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99,
101 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
102 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
103 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3,
104 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B,
105 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
106 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7,
107 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
108 0x3F, 0xBF, 0x7F, 0xFF
110 #ifdef LV_HAVE_GENERIC
113 unsigned int num_points)
117 unsigned int number = 0;
118 for (; number < num_points; ++number) {
119 out_ptr->
b00 = in_ptr->
b31;
120 out_ptr->
b01 = in_ptr->
b30;
121 out_ptr->
b02 = in_ptr->
b29;
122 out_ptr->
b03 = in_ptr->
b28;
123 out_ptr->
b04 = in_ptr->
b27;
124 out_ptr->
b05 = in_ptr->
b26;
125 out_ptr->
b06 = in_ptr->
b25;
126 out_ptr->
b07 = in_ptr->
b24;
127 out_ptr->
b08 = in_ptr->
b23;
128 out_ptr->
b09 = in_ptr->
b22;
129 out_ptr->
b10 = in_ptr->
b21;
130 out_ptr->
b11 = in_ptr->
b20;
131 out_ptr->
b12 = in_ptr->
b19;
132 out_ptr->
b13 = in_ptr->
b18;
133 out_ptr->
b14 = in_ptr->
b17;
134 out_ptr->
b15 = in_ptr->
b16;
135 out_ptr->
b16 = in_ptr->
b15;
136 out_ptr->
b17 = in_ptr->
b14;
137 out_ptr->
b18 = in_ptr->
b13;
138 out_ptr->
b19 = in_ptr->
b12;
139 out_ptr->
b20 = in_ptr->
b11;
140 out_ptr->
b21 = in_ptr->
b10;
141 out_ptr->
b22 = in_ptr->
b09;
142 out_ptr->
b23 = in_ptr->
b08;
143 out_ptr->
b24 = in_ptr->
b07;
144 out_ptr->
b25 = in_ptr->
b06;
145 out_ptr->
b26 = in_ptr->
b05;
146 out_ptr->
b27 = in_ptr->
b04;
147 out_ptr->
b28 = in_ptr->
b03;
148 out_ptr->
b29 = in_ptr->
b02;
149 out_ptr->
b30 = in_ptr->
b01;
150 out_ptr->
b31 = in_ptr->
b00;
157 #ifdef LV_HAVE_GENERIC
160 unsigned int num_points)
162 const uint32_t* in_ptr = in;
163 uint32_t* out_ptr = out;
164 unsigned int number = 0;
165 for (; number < num_points; ++number) {
212 #ifdef LV_HAVE_GENERIC
216 const uint32_t* in_ptr = in;
217 uint32_t* out_ptr = out;
218 unsigned int number = 0;
219 for (; number < num_points; ++number) {
232 #ifdef LV_HAVE_GENERIC
236 const uint32_t* in_ptr = in;
237 uint32_t* out_ptr = out;
240 unsigned int number = 0;
241 for (; number < num_points; ++number) {
242 in8 = (
const uint8_t*)in_ptr;
243 out8 = (uint8_t*)out_ptr;
244 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
245 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
246 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
247 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
254 #ifdef LV_HAVE_GENERIC
259 const uint32_t* in_ptr = in;
260 uint32_t* out_ptr = out;
263 unsigned int number = 0;
264 for (; number < num_points; ++number) {
265 in8 = (
const uint8_t*)in_ptr;
266 out8 = (uint8_t*)out_ptr;
267 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
268 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
269 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
270 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
278 #ifdef LV_HAVE_GENERIC
281 unsigned int num_points)
283 const uint32_t* in_ptr = in;
284 uint32_t* out_ptr = out;
285 unsigned int number = 0;
286 for (; number < num_points; ++number) {
287 uint32_t tmp = *in_ptr;
291 tmp = (tmp << 16) | (tmp >> 16);
296 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
300 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
301 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
306 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
311 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
319 #ifdef LV_HAVE_GENERIC
322 unsigned int num_points)
325 const uint32_t* in_ptr = in;
326 uint32_t* out_ptr = out;
327 unsigned int number = 0;
328 for (; number < num_points; ++number) {
329 uint32_t tmp = *in_ptr;
330 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
331 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
332 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
333 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
334 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
335 tmp = (tmp << 16) | (tmp >> 16);
344 #ifdef LV_HAVE_NEONV8
345 #include <arm_neon.h>
348 volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
unsigned int num_points)
350 const uint32_t* in_ptr = in;
351 uint32_t* out_ptr = out;
353 const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
355 const unsigned int quarterPoints = num_points / 4;
356 unsigned int number = 0;
357 for (; number < quarterPoints; ++number) {
359 uint32x4_t x = vld1q_u32(in_ptr);
361 vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32(x)), idx));
362 vst1q_u32(out_ptr, z);
366 number = quarterPoints * 4;
367 for (; number < num_points; ++number) {
379 #include <arm_neon.h>
382 __VOLK_ASM("rbit %[result], %[value]" \
383 : [result] "=r"(*out_ptr) \
384 : [value] "r"(*in_ptr) \
393 const uint32_t* in_ptr = in;
394 uint32_t* out_ptr = out;
395 const unsigned int eighthPoints = num_points / 8;
396 unsigned int number = 0;
397 for (; number < eighthPoints; ++number) {
408 number = eighthPoints * 8;
409 for (; number < num_points; ++number) {