1 /* Compute the sum of the squares of a vector of signed shorts 2 3 * The SSE2 and MMX assist routines both operate on multiples of 4 * 8 words; they differ only in their alignment requirements (8 bytes 5 * for MMX, 16 bytes for SSE2) 6 7 * Copyright 2004 Phil Karn, KA9Q 8 * May be used under the terms of the GNU Lesser Public License (LGPL) 9 */ 10 11 long long sumsq_sse2_assist(signed short *,int); 12 13 long long sumsq_sse2(signed short *in,int cnt){ 14 long long sum = 0; 15 16 /* Handle stuff before the next 8-byte boundary */ 17 while(((int)in & 15) != 0 && cnt != 0){ 18 sum += (long)in[0] * in[0]; 19 in++; 20 cnt--; 21 } 22 sum += sumsq_sse2_assist(in,cnt); 23 in += cnt & ~7; 24 cnt &= 7; 25 26 /* Handle up to 7 trailing words */ 27 while(cnt != 0){ 28 sum += (long)in[0] * in[0]; 29 in++; 30 cnt--; 31 } 32 return sum; 33 } 34