Home | History | Annotate | Download | only in fec
      1 /* 16-bit signed integer dot product
      2  * MMX assisted version; also for SSE
      3  *
      4  * Copyright 2004 Phil Karn
      5  * May be used under the terms of the GNU Lesser General Public License (LGPL)
      6  */
      7 #include <stdlib.h>
      8 #include "fec.h"
      9 
     10 struct dotprod {
     11   int len; /* Number of coefficients */
     12 
     13   /* On a MMX or SSE machine, these hold 4 copies of the coefficients,
     14    * preshifted by 0,1,2,3 words to meet all possible input data
     15    * alignments (see Intel ap559 on MMX dot products).
     16    */
     17   signed short *coeffs[4];
     18 };
     19 long dotprod_mmx_assist(signed short *a,signed short *b,int cnt);
     20 
     21 /* Create and return a descriptor for use with the dot product function */
     22 void *initdp_mmx(signed short coeffs[],int len){
     23   struct dotprod *dp;
     24   int i,j;
     25 
     26 
     27   if(len == 0)
     28     return NULL;
     29 
     30   dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
     31   dp->len = len;
     32 
     33   /* Make 4 copies of coefficients, one for each data alignment */
     34   for(i=0;i<4;i++){
     35     dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
     36 					   4*sizeof(signed short));
     37     for(j=0;j<len;j++)
     38       dp->coeffs[i][j+i] = coeffs[j];
     39   }
     40   return (void *)dp;
     41 }
     42 
     43 
     44 /* Free a dot product descriptor created earlier */
     45 void freedp_mmx(void *p){
     46   struct dotprod *dp = (struct dotprod *)p;
     47   int i;
     48 
     49   for(i=0;i<4;i++)
     50     if(dp->coeffs[i] != NULL)
     51       free(dp->coeffs[i]);
     52   free(dp);
     53 }
     54 
     55 /* Compute a dot product given a descriptor and an input array
     56  * The length is taken from the descriptor
     57  */
     58 long dotprod_mmx(void *p,signed short a[]){
     59   struct dotprod *dp = (struct dotprod *)p;
     60   int al;
     61   signed short *ar;
     62 
     63   /* Round input data address down to 8 byte boundary
     64    * NB: depending on the alignment of a[], memory
     65    * before a[] will be accessed. The contents don't matter since they'll
     66    * be multiplied by zero coefficients. I can't conceive of any
     67    * situation where this could cause a segfault since memory protection
     68    * in the x86 machines is done on much larger boundaries
     69    */
     70   ar = (signed short *)((int)a & ~7);
     71 
     72   /* Choose one of 4 sets of pre-shifted coefficients. al is both the
     73    * index into dp->coeffs[] and the number of 0 words padded onto
     74    * that coefficients array for alignment purposes
     75    */
     76   al = a - ar;
     77 
     78   /* Call assembler routine to do the work, passing number of 4-word blocks */
     79   return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
     80 }
     81 
     82