Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
24 #define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
25 
26 #include <volk/volk.h>
28 #include <string.h>
29 
30 typedef union {
31  //decision_t is a BIT vector
32  unsigned char* t;
33  unsigned int* w;
34 } p_decision_t;
35 
36 static inline int parity(int x, unsigned char* Partab)
37 {
38  x ^= (x >> 16);
39  x ^= (x >> 8);
40  return Partab[x];
41 }
42 
43 static inline int chainback_viterbi(unsigned char* data,
44  unsigned int nbits,
45  unsigned int endstate,
46  unsigned int tailsize,
47  unsigned char* decisions)
48 {
49  unsigned char* d;
50  int d_ADDSHIFT = 0;
51  int d_numstates = (1 << 6);
52  int d_decision_t_size = d_numstates/8;
53  unsigned int d_k = 7;
54  int d_framebits = nbits;
55  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
56  d = decisions;
57  /* Make room beyond the end of the encoder register so we can
58  * accumulate a full byte of decoded data
59  */
60 
61  endstate = (endstate%d_numstates) << d_ADDSHIFT;
62 
63  /* The store into data[] only needs to be done every 8 bits.
64  * But this avoids a conditional branch, and the writes will
65  * combine in the cache anyway
66  */
67 
68  d += tailsize * d_decision_t_size ; /* Look past tail */
69  int retval;
70  int dif = tailsize - (d_k - 1);
71  //printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
72  p_decision_t dec;
73  while(nbits-- > d_framebits - (d_k - 1)) {
74  int k;
75  dec.t = &d[nbits * d_decision_t_size];
76  k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
77 
78  endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
79  //data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
80  //printf("%d, %d\n", k, (nbits+dif)%d_framebits);
81  data[((nbits+dif)%d_framebits)] = k;
82 
83  retval = endstate;
84  }
85  nbits += 1;
86 
87  while(nbits-- != 0) {
88  int k;
89 
90  dec.t = &d[nbits * d_decision_t_size];
91 
92  k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
93 
94  endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
95  data[((nbits+dif)%d_framebits)] = k;
96  }
97  //printf("%d, %d, %d, %d, %d, %d, %d, %d\n", data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
98 
99 
100  return retval >> d_ADDSHIFT;
101 }
102 
103 
104 #if LV_HAVE_SSE3
105 
106 #include <pmmintrin.h>
107 #include <emmintrin.h>
108 #include <xmmintrin.h>
109 #include <mmintrin.h>
110 #include <stdio.h>
111 
112 static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
113 
114 
115  static int once = 1;
116  int d_numstates = (1 << 6);
117  int rate = 2;
118  static unsigned char* D;
119  static unsigned char* Y;
120  static unsigned char* X;
121  static unsigned int excess = 6;
122  static unsigned char* Branchtab;
123  static unsigned char Partab[256];
124 
125  int d_polys[2] = {79, 109};
126 
127 
128  if(once) {
129 
130  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
131  Y = X + d_numstates;
132  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
133  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
134  int state, i;
135  int cnt,ti;
136 
137  /* Initialize parity lookup table */
138  for(i=0;i<256;i++){
139  cnt = 0;
140  ti = i;
141  while(ti){
142  if(ti & 1)
143  cnt++;
144  ti >>= 1;
145  }
146  Partab[i] = cnt & 1;
147  }
148  /* Initialize the branch table */
149  for(state=0;state < d_numstates/2;state++){
150  for(i=0; i<rate; i++){
151  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
152  }
153  }
154 
155  once = 0;
156  }
157 
158  //unbias the old_metrics
159  memset(X, 31, d_numstates);
160 
161  volk_8u_x4_conv_k7_r2_8u_spiral(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
162 
163  unsigned int min = X[0];
164  int i = 0, state = 0;
165  for(i = 0; i < (d_numstates); ++i) {
166  if(X[i] < min) {
167  min = X[i];
168  state = i;
169  }
170  }
171 
172  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
173 
174  return;
175 }
176 
177 #endif /*LV_HAVE_SSE3*/
178 
179 
180 #if LV_HAVE_AVX2
181 
182 #include <immintrin.h>
183 #include <stdio.h>
184 
185 static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
186 
187 
188  static int once = 1;
189  int d_numstates = (1 << 6);
190  int rate = 2;
191  static unsigned char* D;
192  static unsigned char* Y;
193  static unsigned char* X;
194  static unsigned int excess = 6;
195  static unsigned char* Branchtab;
196  static unsigned char Partab[256];
197 
198  int d_polys[2] = {79, 109};
199 
200 
201  if(once) {
202 
203  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
204  Y = X + d_numstates;
205  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
206  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
207  int state, i;
208  int cnt,ti;
209 
210  /* Initialize parity lookup table */
211  for(i=0;i<256;i++){
212  cnt = 0;
213  ti = i;
214  while(ti){
215  if(ti & 1)
216  cnt++;
217  ti >>= 1;
218  }
219  Partab[i] = cnt & 1;
220  }
221  /* Initialize the branch table */
222  for(state=0;state < d_numstates/2;state++){
223  for(i=0; i<rate; i++){
224  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
225  }
226  }
227 
228  once = 0;
229  }
230 
231  //unbias the old_metrics
232  memset(X, 31, d_numstates);
233 
234  volk_8u_x4_conv_k7_r2_8u_avx2(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
235 
236  unsigned int min = X[0];
237  int i = 0, state = 0;
238  for(i = 0; i < (d_numstates); ++i) {
239  if(X[i] < min) {
240  min = X[i];
241  state = i;
242  }
243  }
244 
245  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
246 
247  return;
248 }
249 
250 #endif /*LV_HAVE_AVX2*/
251 
252 
253 
254 #if LV_HAVE_GENERIC
255 
256 
257 static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
258 
259 
260 
261  static int once = 1;
262  int d_numstates = (1 << 6);
263  int rate = 2;
264  static unsigned char* Y;
265  static unsigned char* X;
266  static unsigned char* D;
267  static unsigned int excess = 6;
268  static unsigned char* Branchtab;
269  static unsigned char Partab[256];
270 
271  int d_polys[2] = {79, 109};
272 
273 
274  if(once) {
275 
276  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
277  Y = X + d_numstates;
278  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
279  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
280 
281  int state, i;
282  int cnt,ti;
283 
284  /* Initialize parity lookup table */
285  for(i=0;i<256;i++){
286  cnt = 0;
287  ti = i;
288  while(ti){
289  if(ti & 1)
290  cnt++;
291  ti >>= 1;
292  }
293  Partab[i] = cnt & 1;
294  }
295  /* Initialize the branch table */
296  for(state=0;state < d_numstates/2;state++){
297  for(i=0; i<rate; i++){
298  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
299  }
300  }
301 
302  once = 0;
303  }
304 
305 
306 
307 
308  //unbias the old_metrics
309  memset(X, 31, d_numstates);
310 
311  volk_8u_x4_conv_k7_r2_8u_generic(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
312 
313  unsigned int min = X[0];
314  int i = 0, state = 0;
315  for(i = 0; i < (d_numstates); ++i) {
316  if(X[i] < min) {
317  min = X[i];
318  state = i;
319  }
320  }
321 
322  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
323 
324  return;
325 
326 
327 }
328 
329 #endif /* LV_HAVE_GENERIC */
330 
331 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
unsigned char * t
Definition: volk_8u_conv_k7_r2puppet_8u.h:32
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:102
Definition: volk_8u_conv_k7_r2puppet_8u.h:30
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:326
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:616
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition: volk_8u_conv_k7_r2puppet_8u.h:43
for i
Definition: volk_config_fixed.tmpl.h:25
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:93
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:257
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:112
static int parity(int x, unsigned char *Partab)
Definition: volk_8u_conv_k7_r2puppet_8u.h:36
unsigned int * w
Definition: volk_8u_conv_k7_r2puppet_8u.h:33
data
Definition: plot_best_vs_generic.py:36