Home | History | Annotate | Download | only in fec
      1 /* 16-bit signed integer dot product
      2  * SSE2 version
      3  * Copyright 2004 Phil Karn
      4  * May be used under the terms of the GNU Lesser General Public License (LGPL)
      5  */
      6 #define _XOPEN_SOURCE 600
      7 #include <stdlib.h>
      8 #include <memory.h>
      9 #include "fec.h"
     10 
     11 struct dotprod {
     12   int len; /* Number of coefficients */
     13 
     14   /* On a SSE2 machine, these hold 8 copies of the coefficients,
     15    * preshifted by 0,1,..7 words to meet all possible input data
     16    * alignments (see Intel ap559 on MMX dot products).
     17    */
     18   signed short *coeffs[8];
     19 };
     20 
     21 long dotprod_sse2_assist(signed short *a,signed short *b,int cnt);
     22 
     23 /* Create and return a descriptor for use with the dot product function */
     24 void *initdp_sse2(signed short coeffs[],int len){
     25   struct dotprod *dp;
     26   int i,j,blksize;
     27 
     28   if(len == 0)
     29     return NULL;
     30 
     31   dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
     32   dp->len = len;
     33 
     34   /* Make 8 copies of coefficients, one for each data alignment,
     35    * each aligned to 16-byte boundary
     36    */
     37   for(i=0;i<8;i++){
     38     blksize = (1+(len+i-1)/8) * 8*sizeof(signed short);
     39     posix_memalign((void **)&dp->coeffs[i],16,blksize);
     40     memset(dp->coeffs[i],0,blksize);
     41     for(j=0;j<len;j++)
     42       dp->coeffs[i][j+i] = coeffs[j];
     43   }
     44   return (void *)dp;
     45 }
     46 
     47 
     48 /* Free a dot product descriptor created earlier */
     49 void freedp_sse2(void *p){
     50   struct dotprod *dp = (struct dotprod *)p;
     51   int i;
     52 
     53   for(i=0;i<8;i++)
     54     if(dp->coeffs[i] != NULL)
     55       free(dp->coeffs[i]);
     56   free(dp);
     57 }
     58 
     59 /* Compute a dot product given a descriptor and an input array
     60  * The length is taken from the descriptor
     61  */
     62 long dotprod_sse2(void *p,signed short a[]){
     63   struct dotprod *dp = (struct dotprod *)p;
     64   int al;
     65   signed short *ar;
     66 
     67   ar = (signed short *)((int)a & ~15);
     68   al = a - ar;
     69 
     70   /* Call assembler routine to do the work, passing number of 8-word blocks */
     71   return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1);
     72 }
     73