1 /* Compute the sum of the squares of a vector of signed shorts 2 3 * MMX-assisted version (also used on SSE) 4 5 * The SSE2 and MMX assist routines both operate on multiples of 6 * 8 words; they differ only in their alignment requirements (8 bytes 7 * for MMX, 16 bytes for SSE2) 8 9 * Copyright 2004 Phil Karn, KA9Q 10 * May be used under the terms of the GNU Lesser Public License (LGPL) 11 */ 12 13 long long sumsq_mmx_assist(signed short *,int); 14 15 long long sumsq_mmx(signed short *in,int cnt){ 16 long long sum = 0; 17 18 /* Handle stuff before the next 8-byte boundary */ 19 while(((int)in & 7) != 0 && cnt != 0){ 20 sum += (long)in[0] * in[0]; 21 in++; 22 cnt--; 23 } 24 sum += sumsq_mmx_assist(in,cnt); 25 in += cnt & ~7; 26 cnt &= 7; 27 28 /* Handle up to 7 words at end */ 29 while(cnt != 0){ 30 sum += (long)in[0] * in[0]; 31 in++; 32 cnt--; 33 } 34 return sum; 35 } 36