Home | History | Annotate | Download | only in fec
      1 /* K=9 r=1/2 Viterbi decoder for SSE
      2  * Copyright Feb 2004, Phil Karn, KA9Q
      3  * May be used under the terms of the GNU Lesser General Public License (LGPL)
      4  */
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <memory.h>
      8 #include <xmmintrin.h>
      9 #include "fec.h"
     10 
     11 typedef union { unsigned char w[256]; __m64 v[32];} metric_t;
     12 typedef union { unsigned long w[8]; unsigned char c[32]; __m64 v[4];} decision_t;
     13 
     14 union branchtab29 { unsigned char c[128]; } Branchtab29_sse[2];
     15 static int Init = 0;
     16 
     17 /* State info for instance of Viterbi decoder
     18  * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
     19  */
     20 struct v29 {
     21   metric_t metrics1; /* path metric buffer 1 */
     22   metric_t metrics2; /* path metric buffer 2 */
     23   decision_t *dp;          /* Pointer to current decision */
     24   metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
     25   decision_t *decisions;   /* Beginning of decisions for block */
     26 };
     27 
     28 /* Create a new instance of a Viterbi decoder */
     29 void *create_viterbi29_sse(int len){
     30   struct v29 *vp;
     31 
     32   if(!Init){
     33     int polys[2] = { V29POLYA,V29POLYB };
     34 
     35     set_viterbi29_polynomial_sse(polys);
     36   }
     37   if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL)
     38     return NULL;
     39   if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){
     40     free(vp);
     41     return NULL;
     42   }
     43   init_viterbi29(vp,0);
     44   return vp;
     45 }
     46 
     47 void set_viterbi29_polynomial_sse(int polys[2]){
     48   int state;
     49 
     50   for(state=0;state < 128;state++){
     51     Branchtab29_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
     52     Branchtab29_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
     53   }
     54   Init++;
     55 }
     56 
     57 /* Initialize Viterbi decoder for start of new frame */
     58 int init_viterbi29_sse(void *p,int starting_state){
     59   struct v29 *vp = p;
     60   int i;
     61 
     62   if(p == NULL)
     63     return -1;
     64   for(i=0;i<256;i++)
     65     vp->metrics1.w[i] = 200;
     66 
     67   vp->old_metrics = &vp->metrics1;
     68   vp->new_metrics = &vp->metrics2;
     69   vp->dp = vp->decisions;
     70   vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
     71   return 0;
     72 }
     73 
     74 /* Viterbi chainback */
     75 int chainback_viterbi29_sse(
     76       void *p,
     77       unsigned char *data, /* Decoded output data */
     78       unsigned int nbits, /* Number of data bits */
     79       unsigned int endstate){ /* Terminal encoder state */
     80   struct v29 *vp = p;
     81   decision_t *d;
     82 
     83   if(p == NULL)
     84     return -1;
     85   d = vp->decisions;
     86   /* Make room beyond the end of the encoder register so we can
     87    * accumulate a full byte of decoded data
     88    */
     89   endstate %= 256;
     90 
     91   /* The store into data[] only needs to be done every 8 bits.
     92    * But this avoids a conditional branch, and the writes will
     93    * combine in the cache anyway
     94    */
     95   d += 8; /* Look past tail */
     96   while(nbits-- != 0){
     97     int k;
     98 
     99     k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1;
    100     data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
    101   }
    102   return 0;
    103 }
    104 
    105 
    106 /* Delete instance of a Viterbi decoder */
    107 void delete_viterbi29_sse(void *p){
    108   struct v29 *vp = p;
    109 
    110   if(vp != NULL){
    111     free(vp->decisions);
    112     free(vp);
    113   }
    114 }
    115