Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
24 #define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
25 
26 #include <string.h>
27 #include <volk/volk.h>
29 
30 typedef union {
31  // decision_t is a BIT vector
32  unsigned char* t;
33  unsigned int* w;
34 } p_decision_t;
35 
36 static inline int parity(int x, unsigned char* Partab)
37 {
38  x ^= (x >> 16);
39  x ^= (x >> 8);
40  return Partab[x];
41 }
42 
43 static inline int chainback_viterbi(unsigned char* data,
44  unsigned int nbits,
45  unsigned int endstate,
46  unsigned int tailsize,
47  unsigned char* decisions)
48 {
49  unsigned char* d;
50  int d_ADDSHIFT = 0;
51  int d_numstates = (1 << 6);
52  int d_decision_t_size = d_numstates / 8;
53  unsigned int d_k = 7;
54  int d_framebits = nbits;
55  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
56  d = decisions;
57  /* Make room beyond the end of the encoder register so we can
58  * accumulate a full byte of decoded data
59  */
60 
61  endstate = (endstate % d_numstates) << d_ADDSHIFT;
62 
63  /* The store into data[] only needs to be done every 8 bits.
64  * But this avoids a conditional branch, and the writes will
65  * combine in the cache anyway
66  */
67 
68  d += tailsize * d_decision_t_size; /* Look past tail */
69  int retval;
70  int dif = tailsize - (d_k - 1);
71  // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
72  p_decision_t dec;
73  while (nbits-- > d_framebits - (d_k - 1)) {
74  int k;
75  dec.t = &d[nbits * d_decision_t_size];
76  k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
77 
78  endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
79  // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
80  // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
81  data[((nbits + dif) % d_framebits)] = k;
82 
83  retval = endstate;
84  }
85  nbits += 1;
86 
87  while (nbits-- != 0) {
88  int k;
89 
90  dec.t = &d[nbits * d_decision_t_size];
91 
92  k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
93 
94  endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
95  data[((nbits + dif) % d_framebits)] = k;
96  }
97  // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
98  // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
99 
100 
101  return retval >> d_ADDSHIFT;
102 }
103 
104 
105 #if LV_HAVE_SSE3
106 
107 #include <emmintrin.h>
108 #include <mmintrin.h>
109 #include <pmmintrin.h>
110 #include <stdio.h>
111 #include <xmmintrin.h>
112 
113 static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
114  unsigned char* dec,
115  unsigned int framebits)
116 {
117 
118 
119  static int once = 1;
120  int d_numstates = (1 << 6);
121  int rate = 2;
122  static unsigned char* D;
123  static unsigned char* Y;
124  static unsigned char* X;
125  static unsigned int excess = 6;
126  static unsigned char* Branchtab;
127  static unsigned char Partab[256];
128 
129  int d_polys[2] = { 79, 109 };
130 
131 
132  if (once) {
133 
134  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
135  Y = X + d_numstates;
136  Branchtab =
137  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
138  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
140  int state, i;
141  int cnt, ti;
142 
143  /* Initialize parity lookup table */
144  for (i = 0; i < 256; i++) {
145  cnt = 0;
146  ti = i;
147  while (ti) {
148  if (ti & 1)
149  cnt++;
150  ti >>= 1;
151  }
152  Partab[i] = cnt & 1;
153  }
154  /* Initialize the branch table */
155  for (state = 0; state < d_numstates / 2; state++) {
156  for (i = 0; i < rate; i++) {
157  Branchtab[i * d_numstates / 2 + state] =
158  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
159  }
160  }
161 
162  once = 0;
163  }
164 
165  // unbias the old_metrics
166  memset(X, 31, d_numstates);
167 
168  // initialize decisions
169  memset(D, 0, (d_numstates / 8) * (framebits + 6));
170 
172  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
173 
174  unsigned int min = X[0];
175  int i = 0, state = 0;
176  for (i = 0; i < (d_numstates); ++i) {
177  if (X[i] < min) {
178  min = X[i];
179  state = i;
180  }
181  }
182 
183  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
184 
185  return;
186 }
187 
188 #endif /*LV_HAVE_SSE3*/
189 
190 
191 #if LV_HAVE_AVX2
192 
193 #include <immintrin.h>
194 #include <stdio.h>
195 
196 static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
197  unsigned char* dec,
198  unsigned int framebits)
199 {
200 
201 
202  static int once = 1;
203  int d_numstates = (1 << 6);
204  int rate = 2;
205  static unsigned char* D;
206  static unsigned char* Y;
207  static unsigned char* X;
208  static unsigned int excess = 6;
209  static unsigned char* Branchtab;
210  static unsigned char Partab[256];
211 
212  int d_polys[2] = { 79, 109 };
213 
214 
215  if (once) {
216 
217  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
218  Y = X + d_numstates;
219  Branchtab =
220  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
221  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
223  int state, i;
224  int cnt, ti;
225 
226  /* Initialize parity lookup table */
227  for (i = 0; i < 256; i++) {
228  cnt = 0;
229  ti = i;
230  while (ti) {
231  if (ti & 1)
232  cnt++;
233  ti >>= 1;
234  }
235  Partab[i] = cnt & 1;
236  }
237  /* Initialize the branch table */
238  for (state = 0; state < d_numstates / 2; state++) {
239  for (i = 0; i < rate; i++) {
240  Branchtab[i * d_numstates / 2 + state] =
241  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
242  }
243  }
244 
245  once = 0;
246  }
247 
248  // unbias the old_metrics
249  memset(X, 31, d_numstates);
250 
251  // initialize decisions
252  memset(D, 0, (d_numstates / 8) * (framebits + 6));
253 
254  volk_8u_x4_conv_k7_r2_8u_avx2(
255  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
256 
257  unsigned int min = X[0];
258  int i = 0, state = 0;
259  for (i = 0; i < (d_numstates); ++i) {
260  if (X[i] < min) {
261  min = X[i];
262  state = i;
263  }
264  }
265 
266  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
267 
268  return;
269 }
270 
271 #endif /*LV_HAVE_AVX2*/
272 
273 
274 #if LV_HAVE_GENERIC
275 
276 
277 static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
278  unsigned char* dec,
279  unsigned int framebits)
280 {
281 
282 
283  static int once = 1;
284  int d_numstates = (1 << 6);
285  int rate = 2;
286  static unsigned char* Y;
287  static unsigned char* X;
288  static unsigned char* D;
289  static unsigned int excess = 6;
290  static unsigned char* Branchtab;
291  static unsigned char Partab[256];
292 
293  int d_polys[2] = { 79, 109 };
294 
295 
296  if (once) {
297 
298  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
299  Y = X + d_numstates;
300  Branchtab =
301  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
302  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
304 
305  int state, i;
306  int cnt, ti;
307 
308  /* Initialize parity lookup table */
309  for (i = 0; i < 256; i++) {
310  cnt = 0;
311  ti = i;
312  while (ti) {
313  if (ti & 1)
314  cnt++;
315  ti >>= 1;
316  }
317  Partab[i] = cnt & 1;
318  }
319  /* Initialize the branch table */
320  for (state = 0; state < d_numstates / 2; state++) {
321  for (i = 0; i < rate; i++) {
322  Branchtab[i * d_numstates / 2 + state] =
323  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
324  }
325  }
326 
327  once = 0;
328  }
329 
330  // unbias the old_metrics
331  memset(X, 31, d_numstates);
332 
333  // initialize decisions
334  memset(D, 0, (d_numstates / 8) * (framebits + 6));
335 
337  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
338 
339  unsigned int min = X[0];
340  int i = 0, state = 0;
341  for (i = 0; i < (d_numstates); ++i) {
342  if (X[i] < min) {
343  min = X[i];
344  state = i;
345  }
346  }
347 
348  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
349 
350  return;
351 }
352 
353 #endif /* LV_HAVE_GENERIC */
354 
355 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
volk_get_alignment
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:102
volk_8u_x4_conv_k7_r2_8u_generic
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:636
parity
static int parity(int x, unsigned char *Partab)
Definition: volk_8u_conv_k7_r2puppet_8u.h:36
volk_8u_x4_conv_k7_r2_8u_spiral
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:341
p_decision_t::w
unsigned int * w
Definition: volk_8u_conv_k7_r2puppet_8u.h:33
i
for i
Definition: volk_config_fixed.tmpl.h:25
volk_malloc
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:51
plot_best_vs_generic.data
data
Definition: plot_best_vs_generic.py:36
p_decision_t::t
unsigned char * t
Definition: volk_8u_conv_k7_r2puppet_8u.h:32
volk_8u_x4_conv_k7_r2_8u.h
volk_8u_conv_k7_r2puppet_8u_spiral
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:113
p_decision_t
Definition: volk_8u_conv_k7_r2puppet_8u.h:30
chainback_viterbi
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition: volk_8u_conv_k7_r2puppet_8u.h:43
volk_8u_conv_k7_r2puppet_8u_generic
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:277