Home | History | Annotate | Download | only in fec
      1 /* Intel SIMD MMX implementation of Viterbi ACS butterflies
      2    for 64-state (k=7) convolutional code
      3    Copyright 2004 Phil Karn, KA9Q
      4    This code may be used under the terms of the GNU Lesser General Public License (LGPL)
      5 
      6    int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
      7 */
      8 	# MMX (64-bit SIMD) version
      9 	# requires Pentium-MMX, Pentium-II or better
     10 
     11 	# These are offsets into struct v27, defined in viterbi27_mmx.c
     12 	.set DP,128
     13 	.set OLDMETRICS,132
     14 	.set NEWMETRICS,136
     15 	.text
     16 	.global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
     17 	.type update_viterbi27_blk_mmx,@function
     18 	.align 16
     19 
     20 update_viterbi27_blk_mmx:
     21 	pushl %ebp
     22 	movl %esp,%ebp
     23 	pushl %esi
     24 	pushl %edi
     25 	pushl %edx
     26 	pushl %ebx
     27 
     28 	movl 8(%ebp),%edx	# edx = vp
     29 	testl %edx,%edx
     30 	jnz  0f
     31 	movl -1,%eax
     32 	jmp  err
     33 0:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics
     34 	movl NEWMETRICS(%edx),%edi	# edi -> new metrics
     35 	movl DP(%edx),%edx	# edx -> decisions
     36 
     37 1:	movl 16(%ebp),%eax	# eax = nbits
     38 	decl %eax
     39 	jl   2f			# passed zero, we're done
     40 	movl %eax,16(%ebp)
     41 
     42 	movl 12(%ebp),%ebx	# ebx = syms
     43 	movw (%ebx),%ax		# ax = second symbol : first symbol
     44 	addl $2,%ebx
     45 	movl %ebx,12(%ebp)
     46 
     47 	movb %ah,%bl
     48 	andl $255,%eax
     49 	andl $255,%ebx
     50 
     51 	# shift into first array index dimension slot
     52 	shll $5,%eax
     53 	shll $5,%ebx
     54 
     55 	# each invocation of this macro will do 8 butterflies in parallel
     56 	.MACRO butterfly GROUP
     57 	# Compute branch metrics
     58 	movq (Mettab27_1+8*\GROUP)(%eax),%mm3
     59 	movq fifteens,%mm0
     60 
     61 	paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
     62 	paddb ones,%mm3  # emulate pavgb - this may not be necessary
     63 	psrlq $1,%mm3
     64 	pand %mm0,%mm3
     65 
     66 	movq (8*\GROUP)(%esi),%mm6	# Incoming path metric, high bit = 0
     67 	movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
     68 	movq %mm6,%mm1
     69 	movq %mm2,%mm7
     70 
     71 	paddb %mm3,%mm6
     72 	paddb %mm3,%mm2
     73 	pxor  %mm0,%mm3		 # invert branch metric
     74 	paddb %mm3,%mm7		 # path metric for inverted symbols
     75 	paddb %mm3,%mm1
     76 
     77 	# live registers 1 2 6 7
     78 	# Compare mm6 and mm7;  mm1 and mm2
     79 	pxor %mm3,%mm3
     80 	movq %mm6,%mm4
     81 	movq %mm1,%mm5
     82 	psubb %mm7,%mm4		# mm4 = mm6 - mm7
     83 	psubb %mm2,%mm5		# mm5 = mm1 - mm2
     84 	pcmpgtb %mm3,%mm4	# mm4 = first set of decisions (ff = 1 better)
     85 	pcmpgtb %mm3,%mm5	# mm5 = second set of decisions
     86 
     87 	# live registers 1 2 4 5 6 7
     88 	# select survivors
     89 	movq %mm4,%mm0
     90 	pand %mm4,%mm7
     91 	movq %mm5,%mm3
     92 	pand %mm5,%mm2
     93 	pandn %mm6,%mm0
     94 	pandn %mm1,%mm3
     95 	por %mm0,%mm7		# mm7 = first set of survivors
     96 	por %mm3,%mm2		# mm2 = second set of survivors
     97 
     98 	# live registers 2 4 5 7
     99 	# interleave & store decisions in mm4, mm5
    100 	# interleave & store new branch metrics in mm2, mm7
    101 	movq %mm4,%mm3
    102 	movq %mm7,%mm0
    103 	punpckhbw %mm5,%mm4
    104 	punpcklbw %mm5,%mm3
    105 	punpcklbw %mm2,%mm7	# interleave second 8 new metrics
    106 	punpckhbw %mm2,%mm0	# interleave first 8 new metrics
    107 	movq %mm4,(16*\GROUP+8)(%edx)
    108 	movq %mm3,(16*\GROUP)(%edx)
    109 	movq %mm7,(16*\GROUP)(%edi)
    110 	movq %mm0,(16*\GROUP+8)(%edi)
    111 
    112 	.endm
    113 
    114 # invoke macro 4 times for a total of 32 butterflies
    115 	butterfly GROUP=0
    116 	butterfly GROUP=1
    117 	butterfly GROUP=2
    118 	butterfly GROUP=3
    119 
    120 	addl $64,%edx		# bump decision pointer
    121 
    122 	# swap metrics
    123 	movl %esi,%eax
    124 	movl %edi,%esi
    125 	movl %eax,%edi
    126 	jmp 1b
    127 
    128 2:	emms
    129 	movl 8(%ebp),%ebx	# ebx = vp
    130 	# stash metric pointers
    131 	movl %esi,OLDMETRICS(%ebx)
    132 	movl %edi,NEWMETRICS(%ebx)
    133 	movl %edx,DP(%ebx)	# stash incremented value of vp->dp
    134 	xorl %eax,%eax
    135 err:	popl %ebx
    136 	popl %edx
    137 	popl %edi
    138 	popl %esi
    139 	popl %ebp
    140 	ret
    141 
    142 	.data
    143 	.align 8
    144 fifteens:
    145 	.byte 15,15,15,15,15,15,15,15
    146 
    147 	.align 8
    148 ones:	.byte 1,1,1,1,1,1,1,1
    149