1 /* 16-bit signed integer dot product 2 * SSE2 version 3 * Copyright 2004 Phil Karn 4 * May be used under the terms of the GNU Lesser General Public License (LGPL) 5 */ 6 #define _XOPEN_SOURCE 600 7 #include <stdlib.h> 8 #include <memory.h> 9 #include "fec.h" 10 11 struct dotprod { 12 int len; /* Number of coefficients */ 13 14 /* On a SSE2 machine, these hold 8 copies of the coefficients, 15 * preshifted by 0,1,..7 words to meet all possible input data 16 * alignments (see Intel ap559 on MMX dot products). 17 */ 18 signed short *coeffs[8]; 19 }; 20 21 long dotprod_sse2_assist(signed short *a,signed short *b,int cnt); 22 23 /* Create and return a descriptor for use with the dot product function */ 24 void *initdp_sse2(signed short coeffs[],int len){ 25 struct dotprod *dp; 26 int i,j,blksize; 27 28 if(len == 0) 29 return NULL; 30 31 dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 32 dp->len = len; 33 34 /* Make 8 copies of coefficients, one for each data alignment, 35 * each aligned to 16-byte boundary 36 */ 37 for(i=0;i<8;i++){ 38 blksize = (1+(len+i-1)/8) * 8*sizeof(signed short); 39 posix_memalign((void **)&dp->coeffs[i],16,blksize); 40 memset(dp->coeffs[i],0,blksize); 41 for(j=0;j<len;j++) 42 dp->coeffs[i][j+i] = coeffs[j]; 43 } 44 return (void *)dp; 45 } 46 47 48 /* Free a dot product descriptor created earlier */ 49 void freedp_sse2(void *p){ 50 struct dotprod *dp = (struct dotprod *)p; 51 int i; 52 53 for(i=0;i<8;i++) 54 if(dp->coeffs[i] != NULL) 55 free(dp->coeffs[i]); 56 free(dp); 57 } 58 59 /* Compute a dot product given a descriptor and an input array 60 * The length is taken from the descriptor 61 */ 62 long dotprod_sse2(void *p,signed short a[]){ 63 struct dotprod *dp = (struct dotprod *)p; 64 int al; 65 signed short *ar; 66 67 ar = (signed short *)((int)a & ~15); 68 al = a - ar; 69 70 /* Call assembler routine to do the work, passing number of 8-word blocks */ 71 return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1); 72 } 73