1 /* 16-bit signed integer dot product 2 * MMX assisted version; also for SSE 3 * 4 * Copyright 2004 Phil Karn 5 * May be used under the terms of the GNU Lesser General Public License (LGPL) 6 */ 7 #include <stdlib.h> 8 #include "fec.h" 9 10 struct dotprod { 11 int len; /* Number of coefficients */ 12 13 /* On a MMX or SSE machine, these hold 4 copies of the coefficients, 14 * preshifted by 0,1,2,3 words to meet all possible input data 15 * alignments (see Intel ap559 on MMX dot products). 16 */ 17 signed short *coeffs[4]; 18 }; 19 long dotprod_mmx_assist(signed short *a,signed short *b,int cnt); 20 21 /* Create and return a descriptor for use with the dot product function */ 22 void *initdp_mmx(signed short coeffs[],int len){ 23 struct dotprod *dp; 24 int i,j; 25 26 27 if(len == 0) 28 return NULL; 29 30 dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); 31 dp->len = len; 32 33 /* Make 4 copies of coefficients, one for each data alignment */ 34 for(i=0;i<4;i++){ 35 dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4, 36 4*sizeof(signed short)); 37 for(j=0;j<len;j++) 38 dp->coeffs[i][j+i] = coeffs[j]; 39 } 40 return (void *)dp; 41 } 42 43 44 /* Free a dot product descriptor created earlier */ 45 void freedp_mmx(void *p){ 46 struct dotprod *dp = (struct dotprod *)p; 47 int i; 48 49 for(i=0;i<4;i++) 50 if(dp->coeffs[i] != NULL) 51 free(dp->coeffs[i]); 52 free(dp); 53 } 54 55 /* Compute a dot product given a descriptor and an input array 56 * The length is taken from the descriptor 57 */ 58 long dotprod_mmx(void *p,signed short a[]){ 59 struct dotprod *dp = (struct dotprod *)p; 60 int al; 61 signed short *ar; 62 63 /* Round input data address down to 8 byte boundary 64 * NB: depending on the alignment of a[], memory 65 * before a[] will be accessed. The contents don't matter since they'll 66 * be multiplied by zero coefficients. I can't conceive of any 67 * situation where this could cause a segfault since memory protection 68 * in the x86 machines is done on much larger boundaries 69 */ 70 ar = (signed short *)((int)a & ~7); 71 72 /* Choose one of 4 sets of pre-shifted coefficients. al is both the 73 * index into dp->coeffs[] and the number of 0 words padded onto 74 * that coefficients array for alignment purposes 75 */ 76 al = a - ar; 77 78 /* Call assembler routine to do the work, passing number of 4-word blocks */ 79 return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1); 80 } 81 82