1 /* K=15 r=1/6 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions 2 * 8-bit offset-binary soft decision samples 3 * Copyright Mar 2004, Phil Karn, KA9Q 4 * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 */ 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <memory.h> 9 #include <limits.h> 10 #include "fec.h" 11 12 typedef union { unsigned char c[128][16]; vector unsigned char v[128]; } decision_t; 13 typedef union { unsigned short s[16384]; vector unsigned short v[2048]; } metric_t; 14 15 static union branchtab615 { unsigned short s[8192]; vector unsigned short v[1024];} Branchtab615[6]; 16 static int Init = 0; 17 18 /* State info for instance of Viterbi decoder */ 19 struct v615 { 20 metric_t metrics1; /* path metric buffer 1 */ 21 metric_t metrics2; /* path metric buffer 2 */ 22 void *dp; /* Pointer to current decision */ 23 metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 24 void *decisions; /* Beginning of decisions for block */ 25 }; 26 27 /* Initialize Viterbi decoder for start of new frame */ 28 int init_viterbi615_av(void *p,int starting_state){ 29 struct v615 *vp = p; 30 int i; 31 32 if(p == NULL) 33 return -1; 34 35 for(i=0;i<2048;i++) 36 vp->metrics1.v[i] = (vector unsigned short)(5000); 37 38 vp->old_metrics = &vp->metrics1; 39 vp->new_metrics = &vp->metrics2; 40 vp->dp = vp->decisions; 41 vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */ 42 return 0; 43 } 44 45 /* Create a new instance of a Viterbi decoder */ 46 void *create_viterbi615_av(int len){ 47 struct v615 *vp; 48 49 if(!Init){ 50 int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; 51 set_viterbi615_polynomial_av(polys); 52 } 53 vp = (struct v615 *)malloc(sizeof(struct v615)); 54 vp->decisions = malloc(sizeof(decision_t)*(len+14)); 55 init_viterbi615_av(vp,0); 56 return vp; 57 } 58 59 void set_viterbi615_polynomial_av(int polys[6]){ 60 int state; 61 int i; 62 63 for(state=0;state < 8192;state++){ 64 for(i=0;i<6;i++) 65 Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; 66 } 67 Init++; 68 } 69 70 71 /* Viterbi chainback */ 72 int chainback_viterbi615_av( 73 void *p, 74 unsigned char *data, /* Decoded output data */ 75 unsigned int nbits, /* Number of data bits */ 76 unsigned int endstate){ /* Terminal encoder state */ 77 struct v615 *vp = p; 78 decision_t *d = (decision_t *)vp->decisions; 79 int path_metric; 80 81 endstate %= 16384; 82 83 path_metric = vp->old_metrics->s[endstate]; 84 85 /* The store into data[] only needs to be done every 8 bits. 86 * But this avoids a conditional branch, and the writes will 87 * combine in the cache anyway 88 */ 89 d += 14; /* Look past tail */ 90 while(nbits-- != 0){ 91 int k; 92 93 k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0; 94 endstate = (k << 13) | (endstate >> 1); 95 data[nbits>>3] = endstate >> 6; 96 } 97 return path_metric; 98 } 99 100 /* Delete instance of a Viterbi decoder */ 101 void delete_viterbi615_av(void *p){ 102 struct v615 *vp = p; 103 104 if(vp != NULL){ 105 free(vp->decisions); 106 free(vp); 107 } 108 } 109 110 int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits){ 111 struct v615 *vp = p; 112 decision_t *d = (decision_t *)vp->dp; 113 int path_metric = 0; 114 vector unsigned char decisions = (vector unsigned char)(0); 115 116 while(nbits--){ 117 vector unsigned short symv,sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; 118 vector unsigned char s; 119 void *tmp; 120 int i; 121 122 /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ 123 s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms)); 124 125 symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */ 126 sym0v = vec_splat(symv,0); 127 sym1v = vec_splat(symv,1); 128 sym2v = vec_splat(symv,2); 129 sym3v = vec_splat(symv,3); 130 sym4v = vec_splat(symv,4); 131 sym5v = vec_splat(symv,5); 132 syms += 6; 133 134 for(i=0;i<1024;i++){ 135 vector bool short decision0,decision1; 136 vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; 137 138 /* Form branch metrics 139 * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, 140 * the XOR operations constitute conditional negation. 141 * metric and m_metric (-metric) are in the range 0-1530 142 */ 143 m0 = vec_add(vec_xor(Branchtab615[0].v[i],sym0v),vec_xor(Branchtab615[1].v[i],sym1v)); 144 m1 = vec_add(vec_xor(Branchtab615[2].v[i],sym2v),vec_xor(Branchtab615[3].v[i],sym3v)); 145 m2 = vec_add(vec_xor(Branchtab615[4].v[i],sym4v),vec_xor(Branchtab615[5].v[i],sym5v)); 146 metric = vec_add(m0,m1); 147 metric = vec_add(metric,m2); 148 m_metric = vec_sub((vector unsigned short)(1530),metric); 149 150 /* Add branch metrics to path metrics */ 151 m0 = vec_adds(vp->old_metrics->v[i],metric); 152 m3 = vec_adds(vp->old_metrics->v[1024+i],metric); 153 m1 = vec_adds(vp->old_metrics->v[1024+i],m_metric); 154 m2 = vec_adds(vp->old_metrics->v[i],m_metric); 155 156 /* Compare and select */ 157 decision0 = vec_cmpgt(m0,m1); 158 decision1 = vec_cmpgt(m2,m3); 159 survivor0 = vec_min(m0,m1); 160 survivor1 = vec_min(m2,m3); 161 162 /* Store decisions and survivors. 163 * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in 164 * a funny interleaved fashion that we undo in the chainback function. 165 */ 166 decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */ 167 168 /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting 169 * 0xff is equivalent to adding 1, which sets the lsb. 170 */ 171 decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1))); 172 173 vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1); 174 vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1); 175 176 if((i % 8) == 7){ 177 /* We've accumulated a total of 128 decisions, stash and start again */ 178 d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */ 179 } 180 } 181 #if 0 182 /* Experimentally determine metric spread 183 * The results are fixed for a given code and input symbol size 184 */ 185 { 186 int i; 187 vector unsigned short min_metric; 188 vector unsigned short max_metric; 189 union { vector unsigned short v; unsigned short s[8];} t; 190 int minimum,maximum; 191 static int max_spread = 0; 192 193 min_metric = max_metric = vp->new_metrics->v[0]; 194 for(i=1;i<2048;i++){ 195 min_metric = vec_min(min_metric,vp->new_metrics->v[i]); 196 max_metric = vec_max(max_metric,vp->new_metrics->v[i]); 197 } 198 min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8)); 199 max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8)); 200 min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4)); 201 max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4)); 202 min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2)); 203 max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2)); 204 205 t.v = min_metric; 206 minimum = t.s[0]; 207 t.v = max_metric; 208 maximum = t.s[0]; 209 if(maximum-minimum > max_spread){ 210 max_spread = maximum-minimum; 211 printf("metric spread = %d\n",max_spread); 212 } 213 } 214 #endif 215 216 /* Renormalize if necessary. This deserves some explanation. 217 218 * The maximum possible spread, found by experiment, for 4-bit symbols is 405; for 8 bit symbols, it's 12750. 219 * So by looking at one arbitrary metric we can tell if any of them have possibly saturated. 220 * However, this is very conservative. Large spreads occur only at very high Eb/No, where 221 * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor. 222 223 * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric 224 * by not not normalizing when we should are extremely low. So either way, the risk to performance is small. 225 226 * All this is borne out by experiment. 227 */ 228 if(vp->new_metrics->s[0] >= USHRT_MAX-12750){ 229 vector unsigned short scale; 230 union { vector unsigned short v; unsigned short s[8];} t; 231 232 /* Find smallest metric and splat */ 233 scale = vp->new_metrics->v[0]; 234 for(i=1;i<2048;i++) 235 scale = vec_min(scale,vp->new_metrics->v[i]); 236 237 scale = vec_min(scale,vec_sld(scale,scale,8)); 238 scale = vec_min(scale,vec_sld(scale,scale,4)); 239 scale = vec_min(scale,vec_sld(scale,scale,2)); 240 241 /* Subtract it from all metrics 242 * Work backwards to try to improve the cache hit ratio, assuming LRU 243 */ 244 for(i=2047;i>=0;i--) 245 vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale); 246 t.v = scale; 247 path_metric += t.s[0]; 248 } 249 d++; 250 /* Swap pointers to old and new metrics */ 251 tmp = vp->old_metrics; 252 vp->old_metrics = vp->new_metrics; 253 vp->new_metrics = tmp; 254 } 255 vp->dp = d; 256 return path_metric; 257 } 258