Vector Optimized Library of Kernels  2.1
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
24 #define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
25 
26 #include <volk/volk.h>
28 #include <string.h>
29 
30 typedef union {
31  //decision_t is a BIT vector
32  unsigned char* t;
33  unsigned int* w;
34 } p_decision_t;
35 
36 static inline int parity(int x, unsigned char* Partab)
37 {
38  x ^= (x >> 16);
39  x ^= (x >> 8);
40  return Partab[x];
41 }
42 
43 static inline int chainback_viterbi(unsigned char* data,
44  unsigned int nbits,
45  unsigned int endstate,
46  unsigned int tailsize,
47  unsigned char* decisions)
48 {
49  unsigned char* d;
50  int d_ADDSHIFT = 0;
51  int d_numstates = (1 << 6);
52  int d_decision_t_size = d_numstates/8;
53  unsigned int d_k = 7;
54  int d_framebits = nbits;
55  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
56  d = decisions;
57  /* Make room beyond the end of the encoder register so we can
58  * accumulate a full byte of decoded data
59  */
60 
61  endstate = (endstate%d_numstates) << d_ADDSHIFT;
62 
63  /* The store into data[] only needs to be done every 8 bits.
64  * But this avoids a conditional branch, and the writes will
65  * combine in the cache anyway
66  */
67 
68  d += tailsize * d_decision_t_size ; /* Look past tail */
69  int retval;
70  int dif = tailsize - (d_k - 1);
71  //printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
72  p_decision_t dec;
73  while(nbits-- > d_framebits - (d_k - 1)) {
74  int k;
75  dec.t = &d[nbits * d_decision_t_size];
76  k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
77 
78  endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
79  //data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
80  //printf("%d, %d\n", k, (nbits+dif)%d_framebits);
81  data[((nbits+dif)%d_framebits)] = k;
82 
83  retval = endstate;
84  }
85  nbits += 1;
86 
87  while(nbits-- != 0) {
88  int k;
89 
90  dec.t = &d[nbits * d_decision_t_size];
91 
92  k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
93 
94  endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
95  data[((nbits+dif)%d_framebits)] = k;
96  }
97  //printf("%d, %d, %d, %d, %d, %d, %d, %d\n", data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
98 
99 
100  return retval >> d_ADDSHIFT;
101 }
102 
103 
104 #if LV_HAVE_SSE3
105 
106 #include <pmmintrin.h>
107 #include <emmintrin.h>
108 #include <xmmintrin.h>
109 #include <mmintrin.h>
110 #include <stdio.h>
111 
112 static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
113 
114 
115  static int once = 1;
116  int d_numstates = (1 << 6);
117  int rate = 2;
118  static unsigned char* D;
119  static unsigned char* Y;
120  static unsigned char* X;
121  static unsigned int excess = 6;
122  static unsigned char* Branchtab;
123  static unsigned char Partab[256];
124 
125  int d_polys[2] = {79, 109};
126 
127 
128  if(once) {
129 
130  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
131  Y = X + d_numstates;
132  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
133  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
134  int state, i;
135  int cnt,ti;
136 
137  /* Initialize parity lookup table */
138  for(i=0;i<256;i++){
139  cnt = 0;
140  ti = i;
141  while(ti){
142  if(ti & 1)
143  cnt++;
144  ti >>= 1;
145  }
146  Partab[i] = cnt & 1;
147  }
148  /* Initialize the branch table */
149  for(state=0;state < d_numstates/2;state++){
150  for(i=0; i<rate; i++){
151  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
152  }
153  }
154 
155  once = 0;
156  }
157 
158  //unbias the old_metrics
159  memset(X, 31, d_numstates);
160 
161  // initialize decisions
162  memset(D, 0, (d_numstates/8) * (framebits + 6));
163 
164  volk_8u_x4_conv_k7_r2_8u_spiral(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
165 
166  unsigned int min = X[0];
167  int i = 0, state = 0;
168  for(i = 0; i < (d_numstates); ++i) {
169  if(X[i] < min) {
170  min = X[i];
171  state = i;
172  }
173  }
174 
175  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
176 
177  return;
178 }
179 
180 #endif /*LV_HAVE_SSE3*/
181 
182 
183 #if LV_HAVE_AVX2
184 
185 #include <immintrin.h>
186 #include <stdio.h>
187 
188 static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
189 
190 
191  static int once = 1;
192  int d_numstates = (1 << 6);
193  int rate = 2;
194  static unsigned char* D;
195  static unsigned char* Y;
196  static unsigned char* X;
197  static unsigned int excess = 6;
198  static unsigned char* Branchtab;
199  static unsigned char Partab[256];
200 
201  int d_polys[2] = {79, 109};
202 
203 
204  if(once) {
205 
206  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
207  Y = X + d_numstates;
208  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
209  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
210  int state, i;
211  int cnt,ti;
212 
213  /* Initialize parity lookup table */
214  for(i=0;i<256;i++){
215  cnt = 0;
216  ti = i;
217  while(ti){
218  if(ti & 1)
219  cnt++;
220  ti >>= 1;
221  }
222  Partab[i] = cnt & 1;
223  }
224  /* Initialize the branch table */
225  for(state=0;state < d_numstates/2;state++){
226  for(i=0; i<rate; i++){
227  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
228  }
229  }
230 
231  once = 0;
232  }
233 
234  //unbias the old_metrics
235  memset(X, 31, d_numstates);
236 
237  // initialize decisions
238  memset(D, 0, (d_numstates/8) * (framebits + 6));
239 
240  volk_8u_x4_conv_k7_r2_8u_avx2(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
241 
242  unsigned int min = X[0];
243  int i = 0, state = 0;
244  for(i = 0; i < (d_numstates); ++i) {
245  if(X[i] < min) {
246  min = X[i];
247  state = i;
248  }
249  }
250 
251  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
252 
253  return;
254 }
255 
256 #endif /*LV_HAVE_AVX2*/
257 
258 
259 
260 #if LV_HAVE_GENERIC
261 
262 
263 static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms, unsigned char* dec, unsigned int framebits) {
264 
265 
266 
267  static int once = 1;
268  int d_numstates = (1 << 6);
269  int rate = 2;
270  static unsigned char* Y;
271  static unsigned char* X;
272  static unsigned char* D;
273  static unsigned int excess = 6;
274  static unsigned char* Branchtab;
275  static unsigned char Partab[256];
276 
277  int d_polys[2] = {79, 109};
278 
279 
280  if(once) {
281 
282  X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
283  Y = X + d_numstates;
284  Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, volk_get_alignment());
285  D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), volk_get_alignment());
286 
287  int state, i;
288  int cnt,ti;
289 
290  /* Initialize parity lookup table */
291  for(i=0;i<256;i++){
292  cnt = 0;
293  ti = i;
294  while(ti){
295  if(ti & 1)
296  cnt++;
297  ti >>= 1;
298  }
299  Partab[i] = cnt & 1;
300  }
301  /* Initialize the branch table */
302  for(state=0;state < d_numstates/2;state++){
303  for(i=0; i<rate; i++){
304  Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) & abs(d_polys[i]), Partab) ? 255 : 0;
305  }
306  }
307 
308  once = 0;
309  }
310 
311  //unbias the old_metrics
312  memset(X, 31, d_numstates);
313 
314  // initialize decisions
315  memset(D, 0, (d_numstates/8) * (framebits + 6));
316 
317  volk_8u_x4_conv_k7_r2_8u_generic(Y, X, syms, D, framebits/2 - excess, excess, Branchtab);
318 
319  unsigned int min = X[0];
320  int i = 0, state = 0;
321  for(i = 0; i < (d_numstates); ++i) {
322  if(X[i] < min) {
323  min = X[i];
324  state = i;
325  }
326  }
327 
328  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
329 
330  return;
331 
332 
333 }
334 
335 #endif /* LV_HAVE_GENERIC */
336 
337 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
unsigned char * t
Definition: volk_8u_conv_k7_r2puppet_8u.h:32
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:102
Definition: volk_8u_conv_k7_r2puppet_8u.h:30
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:326
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:616
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition: volk_8u_conv_k7_r2puppet_8u.h:43
for i
Definition: volk_config_fixed.tmpl.h:25
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:93
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:263
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:112
static int parity(int x, unsigned char *Partab)
Definition: volk_8u_conv_k7_r2puppet_8u.h:36
unsigned int * w
Definition: volk_8u_conv_k7_r2puppet_8u.h:33
data
Definition: plot_best_vs_generic.py:36