1 # MMX assist routines for sumsq 2 # Copyright 2001 Phil Karn, KA9Q 3 # May be used under the terms of the GNU Public License (GPL) 4 5 .text 6 7 # Evaluate sum of squares of signed 16-bit input samples 8 # long long sumsq_mmx_assist(signed short *in,int cnt); 9 .global sumsq_mmx_assist 10 .type sumsq_mmx_assist,@function 11 .align 16 12 sumsq_mmx_assist: 13 pushl %ebp 14 movl %esp,%ebp 15 pushl %esi 16 pushl %ecx 17 pushl %ebx 18 19 movl 8(%ebp),%esi 20 movl 12(%ebp),%ecx 21 xor %eax,%eax 22 xor %edx,%edx 23 24 # Since 4 * 32767**2 < 2**32, we can accumulate two at a time 25 1: subl $8,%ecx 26 jl 2f 27 movq (%esi),%mm0 # S0 S1 S2 S3 28 pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2) 29 movq 8(%esi),%mm6 # S4 S5 S6 S7 30 pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2) 31 paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2) 32 movd %mm0,%ebx 33 addl %ebx,%eax 34 adcl $0,%edx 35 psrlq $32,%mm0 36 movd %mm0,%ebx 37 addl %ebx,%eax 38 adcl $0,%edx 39 addl $16,%esi 40 jmp 1b 41 42 2: emms 43 popl %ebx 44 popl %ecx 45 popl %esi 46 popl %ebp 47 ret 48 49 # Evaluate sum of squares of signed 16-bit input samples 50 # long sumsq_wd_mmx_assist(signed short *in,int cnt); 51 # Quick version, only safe for small numbers of small input values... 52 .global sumsq_wd_mmx_assist 53 .type sumsq_wd_mmx_assist,@function 54 .align 16 55 sumsq_wd_mmx_assist: 56 pushl %ebp 57 movl %esp,%ebp 58 pushl %esi 59 60 movl 8(%ebp),%esi 61 movl 12(%ebp),%ecx 62 pxor %mm2,%mm2 # zero sum 63 64 1: subl $8,%ecx 65 jl 2f 66 movq (%esi),%mm0 # S0 S1 S2 S3 67 pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) 68 movq 8(%esi),%mm1 69 pmaddwd %mm1,%mm1 70 paddd %mm1,%mm2 71 paddd %mm0,%mm2 # accumulate 72 73 addl $16,%esi 74 jmp 1b 75 76 2: movd %mm2,%eax # even sum 77 psrlq $32,%mm2 78 movd %mm2,%edx # odd sum 79 addl %edx,%eax 80 emms 81 popl %esi 82 popl %ebp 83 ret 84