Home | History | Annotate | Download | only in fec
      1 /* Intel SIMD MMX implementation of Viterbi ACS butterflies
      2    for 256-state (k=9) convolutional code
      3    Copyright 2004 Phil Karn, KA9Q
      4    This code may be used under the terms of the GNU Lesser General Public License (LGPL)
      5 
      6    void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits);
      7 */
      8 
      9 	# These are offsets into struct v29, defined in viterbi29.h
     10 	.set DP,512
     11 	.set OLDMETRICS,516
     12 	.set NEWMETRICS,520
     13 	.text
     14 	.global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2
     15 	.type update_viterbi29_blk_mmx,@function
     16 	.align 16
     17 
     18 	# MMX (64-bit SIMD) version
     19 	# requires Pentium-MMX, Pentium-II or better
     20 
     21 update_viterbi29_blk_mmx:
     22 	pushl %ebp
     23 	movl %esp,%ebp
     24 	pushl %esi
     25 	pushl %edi
     26 	pushl %edx
     27 	pushl %ebx
     28 
     29 	movl 8(%ebp),%edx	# edx = vp
     30 	movl 8(%ebp),%edx	# edx = vp
     31 	testl %edx,%edx
     32 	jnz  0f
     33 	movl -1,%eax
     34 	jmp  err
     35 0:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics
     36 	movl NEWMETRICS(%edx),%edi	# edi -> new metrics
     37 	movl DP(%edx),%edx	# edx -> decisions
     38 
     39 1:	movl 16(%ebp),%eax	# eax = nbits
     40 	decl %eax
     41 	jl   2f			# passed zero, we're done
     42 	movl %eax,16(%ebp)
     43 
     44 	movl 12(%ebp),%ebx	# ebx = syms
     45 	movw (%ebx),%ax		# ax = second symbol : first symbol
     46 	addl $2,%ebx
     47 	movl %ebx,12(%ebp)
     48 
     49 	movb %ah,%bl
     50 	andl $255,%eax
     51 	andl $255,%ebx
     52 
     53 	# shift into first array index dimension slot
     54 	shll $7,%eax
     55 	shll $7,%ebx
     56 
     57 	# each invocation of this macro will do 8 butterflies in parallel
     58 	.MACRO butterfly GROUP
     59 	# Compute branch metrics
     60 	movq (Mettab29_1+8*\GROUP)(%eax),%mm3
     61 	movq fifteens,%mm0
     62 	paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3
     63 	paddb ones,%mm3  # emulate pavgb - this may not be necessary
     64 	psrlq $1,%mm3
     65 	pand %mm0,%mm3
     66 
     67 	movq (8*\GROUP)(%esi),%mm6	# Incoming path metric, high bit = 0
     68 	movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1
     69 	movq %mm6,%mm1
     70 	movq %mm2,%mm7
     71 
     72 	paddb %mm3,%mm6
     73 	paddb %mm3,%mm2
     74 	pxor  %mm0,%mm3		 # invert branch metric
     75 	paddb %mm3,%mm7		 # path metric for inverted symbols
     76 	paddb %mm3,%mm1
     77 
     78 	# live registers 1 2 6 7
     79 	# Compare mm6 and mm7;  mm1 and mm2
     80 	pxor %mm3,%mm3
     81 	movq %mm6,%mm4
     82 	movq %mm1,%mm5
     83 	psubb %mm7,%mm4		# mm4 = mm6 - mm7
     84 	psubb %mm2,%mm5		# mm5 = mm1 - mm2
     85 	pcmpgtb %mm3,%mm4	# mm4 = first set of decisions (ff = 1 better)
     86 	pcmpgtb %mm3,%mm5	# mm5 = second set of decisions
     87 
     88 	# live registers 1 2 4 5 6 7
     89 	# select survivors
     90 	movq %mm4,%mm0
     91 	pand %mm4,%mm7
     92 	movq %mm5,%mm3
     93 	pand %mm5,%mm2
     94 	pandn %mm6,%mm0
     95 	pandn %mm1,%mm3
     96 	por %mm0,%mm7		# mm7 = first set of survivors
     97 	por %mm3,%mm2		# mm2 = second set of survivors
     98 
     99 	# live registers 2 4 5 7
    100 	# interleave & store decisions in mm4, mm5
    101 	# interleave & store new branch metrics in mm2, mm7
    102 	movq %mm4,%mm3
    103 	movq %mm7,%mm0
    104 	punpckhbw %mm5,%mm4
    105 	punpcklbw %mm5,%mm3
    106 	punpcklbw %mm2,%mm7	# interleave second 8 new metrics
    107 	punpckhbw %mm2,%mm0	# interleave first 8 new metrics
    108 	movq %mm4,(16*\GROUP+8)(%edx)
    109 	movq %mm3,(16*\GROUP)(%edx)
    110 	movq %mm7,(16*\GROUP)(%edi)
    111 	movq %mm0,(16*\GROUP+8)(%edi)
    112 
    113 	.endm
    114 
    115 # invoke macro 16 times for a total of 128 butterflies
    116 	butterfly GROUP=0
    117 	butterfly GROUP=1
    118 	butterfly GROUP=2
    119 	butterfly GROUP=3
    120 	butterfly GROUP=4
    121 	butterfly GROUP=5
    122 	butterfly GROUP=6
    123 	butterfly GROUP=7
    124 	butterfly GROUP=8
    125 	butterfly GROUP=9
    126 	butterfly GROUP=10
    127 	butterfly GROUP=11
    128 	butterfly GROUP=12
    129 	butterfly GROUP=13
    130 	butterfly GROUP=14
    131 	butterfly GROUP=15
    132 
    133 	addl $256,%edx		# bump decision pointer
    134 
    135 	# swap metrics
    136 	movl %esi,%eax
    137 	movl %edi,%esi
    138 	movl %eax,%edi
    139 	jmp 1b
    140 
    141 2:	emms
    142 	movl 8(%ebp),%ebx	# ebx = vp
    143 	# stash metric pointers
    144 	movl %esi,OLDMETRICS(%ebx)
    145 	movl %edi,NEWMETRICS(%ebx)
    146 	movl %edx,DP(%ebx)	# stash incremented value of vp->dp
    147 	xorl %eax,%eax
    148 err:	popl %ebx
    149 	popl %edx
    150 	popl %edi
    151 	popl %esi
    152 	popl %ebp
    153 	ret
    154 
    155 	.data
    156 	.align 8
    157 fifteens:
    158 	.byte 15,15,15,15,15,15,15,15
    159 
    160 	.align 8
    161 ones:	.byte 1,1,1,1,1,1,1,1
    162