Home | History | Annotate | Download | only in fec
      1 # MMX assist routines for sumsq
      2 # Copyright 2001 Phil Karn, KA9Q
      3 # May be used under the terms of the GNU Public License (GPL)
      4 
      5 	.text
      6 
      7 # Evaluate sum of squares of signed 16-bit input samples
      8 #  long long sumsq_mmx_assist(signed short *in,int cnt);
      9 	.global sumsq_mmx_assist
     10 	.type sumsq_mmx_assist,@function
     11 	.align 16
     12 sumsq_mmx_assist:
     13 	pushl %ebp
     14 	movl %esp,%ebp
     15 	pushl %esi
     16 	pushl %ecx
     17 	pushl %ebx
     18 
     19 	movl 8(%ebp),%esi
     20 	movl 12(%ebp),%ecx
     21 	xor %eax,%eax
     22 	xor %edx,%edx
     23 
     24 	# Since 4 * 32767**2 < 2**32, we can accumulate two at a time
     25 1:	subl $8,%ecx
     26 	jl 2f
     27 	movq (%esi),%mm0	# S0 S1 S2 S3
     28 	pmaddwd %mm0,%mm0	# (S0^2+S1^2) (S2^2+S3^2)
     29 	movq 8(%esi),%mm6	# S4 S5 S6 S7
     30 	pmaddwd %mm6,%mm6	# (S4^2+S5^2) (S6^2+S7^2)
     31 	paddd %mm6,%mm0		# (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2)
     32 	movd %mm0,%ebx
     33 	addl %ebx,%eax
     34 	adcl $0,%edx
     35 	psrlq $32,%mm0
     36 	movd %mm0,%ebx
     37 	addl %ebx,%eax
     38 	adcl $0,%edx
     39 	addl $16,%esi
     40 	jmp 1b
     41 
     42 2:	emms
     43 	popl %ebx
     44 	popl %ecx
     45 	popl %esi
     46 	popl %ebp
     47 	ret
     48 
     49 # Evaluate sum of squares of signed 16-bit input samples
     50 #  long sumsq_wd_mmx_assist(signed short *in,int cnt);
     51 #  Quick version, only safe for small numbers of small input values...
     52 	.global sumsq_wd_mmx_assist
     53 	.type sumsq_wd_mmx_assist,@function
     54 	.align 16
     55 sumsq_wd_mmx_assist:
     56 	pushl %ebp
     57 	movl %esp,%ebp
     58 	pushl %esi
     59 
     60 	movl 8(%ebp),%esi
     61 	movl 12(%ebp),%ecx
     62 	pxor %mm2,%mm2		# zero sum
     63 
     64 1:	subl $8,%ecx
     65 	jl 2f
     66 	movq (%esi),%mm0	# S0 S1 S2 S3
     67 	pmaddwd %mm0,%mm0	# (S0*S0+S1*S1) (S2*S2+S3*S3)
     68 	movq 8(%esi),%mm1
     69 	pmaddwd %mm1,%mm1
     70 	paddd %mm1,%mm2
     71 	paddd %mm0,%mm2		# accumulate
     72 
     73 	addl $16,%esi
     74 	jmp 1b
     75 
     76 2:	movd %mm2,%eax		# even sum
     77 	psrlq $32,%mm2
     78 	movd %mm2,%edx		# odd sum
     79 	addl %edx,%eax
     80 	emms
     81 	popl %esi
     82 	popl %ebp
     83 	ret
     84