1 /* K=9 r=1/2 Viterbi decoder for SSE 2 * Copyright Feb 2004, Phil Karn, KA9Q 3 * May be used under the terms of the GNU Lesser General Public License (LGPL) 4 */ 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <memory.h> 8 #include <xmmintrin.h> 9 #include "fec.h" 10 11 typedef union { unsigned char w[256]; __m64 v[32];} metric_t; 12 typedef union { unsigned long w[8]; unsigned char c[32]; __m64 v[4];} decision_t; 13 14 union branchtab29 { unsigned char c[128]; } Branchtab29_sse[2]; 15 static int Init = 0; 16 17 /* State info for instance of Viterbi decoder 18 * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! 19 */ 20 struct v29 { 21 metric_t metrics1; /* path metric buffer 1 */ 22 metric_t metrics2; /* path metric buffer 2 */ 23 decision_t *dp; /* Pointer to current decision */ 24 metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 25 decision_t *decisions; /* Beginning of decisions for block */ 26 }; 27 28 /* Create a new instance of a Viterbi decoder */ 29 void *create_viterbi29_sse(int len){ 30 struct v29 *vp; 31 32 if(!Init){ 33 int polys[2] = { V29POLYA,V29POLYB }; 34 35 set_viterbi29_polynomial_sse(polys); 36 } 37 if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) 38 return NULL; 39 if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ 40 free(vp); 41 return NULL; 42 } 43 init_viterbi29(vp,0); 44 return vp; 45 } 46 47 void set_viterbi29_polynomial_sse(int polys[2]){ 48 int state; 49 50 for(state=0;state < 128;state++){ 51 Branchtab29_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 52 Branchtab29_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 53 } 54 Init++; 55 } 56 57 /* Initialize Viterbi decoder for start of new frame */ 58 int init_viterbi29_sse(void *p,int starting_state){ 59 struct v29 *vp = p; 60 int i; 61 62 if(p == NULL) 63 return -1; 64 for(i=0;i<256;i++) 65 vp->metrics1.w[i] = 200; 66 67 vp->old_metrics = &vp->metrics1; 68 vp->new_metrics = &vp->metrics2; 69 vp->dp = vp->decisions; 70 vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ 71 return 0; 72 } 73 74 /* Viterbi chainback */ 75 int chainback_viterbi29_sse( 76 void *p, 77 unsigned char *data, /* Decoded output data */ 78 unsigned int nbits, /* Number of data bits */ 79 unsigned int endstate){ /* Terminal encoder state */ 80 struct v29 *vp = p; 81 decision_t *d; 82 83 if(p == NULL) 84 return -1; 85 d = vp->decisions; 86 /* Make room beyond the end of the encoder register so we can 87 * accumulate a full byte of decoded data 88 */ 89 endstate %= 256; 90 91 /* The store into data[] only needs to be done every 8 bits. 92 * But this avoids a conditional branch, and the writes will 93 * combine in the cache anyway 94 */ 95 d += 8; /* Look past tail */ 96 while(nbits-- != 0){ 97 int k; 98 99 k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; 100 data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 101 } 102 return 0; 103 } 104 105 106 /* Delete instance of a Viterbi decoder */ 107 void delete_viterbi29_sse(void *p){ 108 struct v29 *vp = p; 109 110 if(vp != NULL){ 111 free(vp->decisions); 112 free(vp); 113 } 114 } 115