Home | History | Annotate | Download | only in fec
      1 /* Compute the sum of the squares of a vector of signed shorts
      2 
      3  * The SSE2 and MMX assist routines both operate on multiples of
      4  * 8 words; they differ only in their alignment requirements (8 bytes
      5  * for MMX, 16 bytes for SSE2)
      6 
      7  * Copyright 2004 Phil Karn, KA9Q
      8  * May be used under the terms of the GNU Lesser Public License (LGPL)
      9  */
     10 
     11 long long sumsq_sse2_assist(signed short *,int);
     12 
     13 long long sumsq_sse2(signed short *in,int cnt){
     14   long long sum = 0;
     15 
     16   /* Handle stuff before the next 8-byte boundary */
     17   while(((int)in & 15) != 0 && cnt != 0){
     18     sum += (long)in[0] * in[0];
     19     in++;
     20     cnt--;
     21   }
     22   sum += sumsq_sse2_assist(in,cnt);
     23   in += cnt & ~7;
     24   cnt &= 7;
     25 
     26   /* Handle up to 7 trailing words */
     27   while(cnt != 0){
     28     sum += (long)in[0] * in[0];
     29     in++;
     30     cnt--;
     31   }
     32   return sum;
     33 }
     34