Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # sha1_block for Thumb.
     11 #
     12 # January 2007.
     13 #
     14 # The code does not present direct interest to OpenSSL, because of low
     15 # performance. Its purpose is to establish _size_ benchmark. Pretty
     16 # useless one I must say, because 30% or 88 bytes larger ARMv4 code
     17 # [avialable on demand] is almost _twice_ as fast. It should also be
     18 # noted that in-lining of .Lcommon and .Lrotate improves performance
     19 # by over 40%, while code increases by only 10% or 32 bytes. But once
     20 # again, the goal was to establish _size_ benchmark, not performance.
     21 
     22 $output=shift;
     23 open STDOUT,">$output";
     24 
     25 $inline=0;
     26 #$cheat_on_binutils=1;
     27 
     28 $t0="r0";
     29 $t1="r1";
     30 $t2="r2";
     31 $a="r3";
     32 $b="r4";
     33 $c="r5";
     34 $d="r6";
     35 $e="r7";
     36 $K="r8";	# "upper" registers can be used in add/sub and mov insns
     37 $ctx="r9";
     38 $inp="r10";
     39 $len="r11";
     40 $Xi="r12";
     41 
     42 sub common {
     43 <<___;
     44 	sub	$t0,#4
     45 	ldr	$t1,[$t0]
     46 	add	$e,$K			@ E+=K_xx_xx
     47 	lsl	$t2,$a,#5
     48 	add	$t2,$e
     49 	lsr	$e,$a,#27
     50 	add	$t2,$e			@ E+=ROR(A,27)
     51 	add	$t2,$t1			@ E+=X[i]
     52 ___
     53 }
     54 sub rotate {
     55 <<___;
     56 	mov	$e,$d			@ E=D
     57 	mov	$d,$c			@ D=C
     58 	lsl	$c,$b,#30
     59 	lsr	$b,$b,#2
     60 	orr	$c,$b			@ C=ROR(B,2)
     61 	mov	$b,$a			@ B=A
     62 	add	$a,$t2,$t1		@ A=E+F_xx_xx(B,C,D)
     63 ___
     64 }
     65 
     66 sub BODY_00_19 {
     67 $code.=$inline?&common():"\tbl	.Lcommon\n";
     68 $code.=<<___;
     69 	mov	$t1,$c
     70 	eor	$t1,$d
     71 	and	$t1,$b
     72 	eor	$t1,$d			@ F_00_19(B,C,D)
     73 ___
     74 $code.=$inline?&rotate():"\tbl	.Lrotate\n";
     75 }
     76 
     77 sub BODY_20_39 {
     78 $code.=$inline?&common():"\tbl	.Lcommon\n";
     79 $code.=<<___;
     80 	mov	$t1,$b
     81 	eor	$t1,$c
     82 	eor	$t1,$d			@ F_20_39(B,C,D)
     83 ___
     84 $code.=$inline?&rotate():"\tbl	.Lrotate\n";
     85 }
     86 
     87 sub BODY_40_59 {
     88 $code.=$inline?&common():"\tbl	.Lcommon\n";
     89 $code.=<<___;
     90 	mov	$t1,$b
     91 	and	$t1,$c
     92 	mov	$e,$b
     93 	orr	$e,$c
     94 	and	$e,$d
     95 	orr	$t1,$e			@ F_40_59(B,C,D)
     96 ___
     97 $code.=$inline?&rotate():"\tbl	.Lrotate\n";
     98 }
     99 
    100 $code=<<___;
    101 .text
    102 .code	16
    103 
    104 .global	sha1_block_data_order
    105 .type	sha1_block_data_order,%function
    106 
    107 .align	2
    108 sha1_block_data_order:
    109 ___
    110 if ($cheat_on_binutils) {
    111 $code.=<<___;
    112 .code	32
    113 	add	r3,pc,#1
    114 	bx	r3			@ switch to Thumb ISA
    115 .code	16
    116 ___
    117 }
    118 $code.=<<___;
    119 	push	{r4-r7}
    120 	mov	r3,r8
    121 	mov	r4,r9
    122 	mov	r5,r10
    123 	mov	r6,r11
    124 	mov	r7,r12
    125 	push	{r3-r7,lr}
    126 	lsl	r2,#6
    127 	mov	$ctx,r0			@ save context
    128 	mov	$inp,r1			@ save inp
    129 	mov	$len,r2			@ save len
    130 	add	$len,$inp		@ $len to point at inp end
    131 
    132 .Lloop:
    133 	mov	$Xi,sp
    134 	mov	$t2,sp
    135 	sub	$t2,#16*4		@ [3]
    136 .LXload:
    137 	ldrb	$a,[$t1,#0]		@ $t1 is r1 and holds inp
    138 	ldrb	$b,[$t1,#1]
    139 	ldrb	$c,[$t1,#2]
    140 	ldrb	$d,[$t1,#3]
    141 	lsl	$a,#24
    142 	lsl	$b,#16
    143 	lsl	$c,#8
    144 	orr	$a,$b
    145 	orr	$a,$c
    146 	orr	$a,$d
    147 	add	$t1,#4
    148 	push	{$a}
    149 	cmp	sp,$t2
    150 	bne	.LXload			@ [+14*16]
    151 
    152 	mov	$inp,$t1		@ update $inp
    153 	sub	$t2,#32*4
    154 	sub	$t2,#32*4
    155 	mov	$e,#31			@ [+4]
    156 .LXupdate:
    157 	ldr	$a,[sp,#15*4]
    158 	ldr	$b,[sp,#13*4]
    159 	ldr	$c,[sp,#7*4]
    160 	ldr	$d,[sp,#2*4]
    161 	eor	$a,$b
    162 	eor	$a,$c
    163 	eor	$a,$d
    164 	ror	$a,$e
    165 	push	{$a}
    166 	cmp	sp,$t2
    167 	bne	.LXupdate		@ [+(11+1)*64]
    168 
    169 	ldmia	$t0!,{$a,$b,$c,$d,$e}	@ $t0 is r0 and holds ctx
    170 	mov	$t0,$Xi
    171 
    172 	ldr	$t2,.LK_00_19
    173 	mov	$t1,$t0
    174 	sub	$t1,#20*4
    175 	mov	$Xi,$t1
    176 	mov	$K,$t2			@ [+7+4]
    177 .L_00_19:
    178 ___
    179 	&BODY_00_19();
    180 $code.=<<___;
    181 	cmp	$Xi,$t0
    182 	bne	.L_00_19		@ [+(2+9+4+2+8+2)*20]
    183 
    184 	ldr	$t2,.LK_20_39
    185 	mov	$t1,$t0
    186 	sub	$t1,#20*4
    187 	mov	$Xi,$t1
    188 	mov	$K,$t2			@ [+5]
    189 .L_20_39_or_60_79:
    190 ___
    191 	&BODY_20_39();
    192 $code.=<<___;
    193 	cmp	$Xi,$t0
    194 	bne	.L_20_39_or_60_79	@ [+(2+9+3+2+8+2)*20*2]
    195 	cmp	sp,$t0
    196 	beq	.Ldone			@ [+2]
    197 
    198 	ldr	$t2,.LK_40_59
    199 	mov	$t1,$t0
    200 	sub	$t1,#20*4
    201 	mov	$Xi,$t1
    202 	mov	$K,$t2			@ [+5]
    203 .L_40_59:
    204 ___
    205 	&BODY_40_59();
    206 $code.=<<___;
    207 	cmp	$Xi,$t0
    208 	bne	.L_40_59		@ [+(2+9+6+2+8+2)*20]
    209 
    210 	ldr	$t2,.LK_60_79
    211 	mov	$Xi,sp
    212 	mov	$K,$t2
    213 	b	.L_20_39_or_60_79	@ [+4]
    214 .Ldone:
    215 	mov	$t0,$ctx
    216 	ldr	$t1,[$t0,#0]
    217 	ldr	$t2,[$t0,#4]
    218 	add	$a,$t1
    219 	ldr	$t1,[$t0,#8]
    220 	add	$b,$t2
    221 	ldr	$t2,[$t0,#12]
    222 	add	$c,$t1
    223 	ldr	$t1,[$t0,#16]
    224 	add	$d,$t2
    225 	add	$e,$t1
    226 	stmia	$t0!,{$a,$b,$c,$d,$e}	@ [+20]
    227 
    228 	add	sp,#80*4		@ deallocate stack frame
    229 	mov	$t0,$ctx		@ restore ctx
    230 	mov	$t1,$inp		@ restore inp
    231 	cmp	$t1,$len
    232 	beq	.Lexit
    233 	b	.Lloop			@ [+6] total 3212 cycles
    234 .Lexit:
    235 	pop	{r2-r7}
    236 	mov	r8,r2
    237 	mov	r9,r3
    238 	mov	r10,r4
    239 	mov	r11,r5
    240 	mov	r12,r6
    241 	mov	lr,r7
    242 	pop	{r4-r7}
    243 	bx	lr
    244 .align	2
    245 ___
    246 $code.=".Lcommon:\n".&common()."\tmov	pc,lr\n" if (!$inline);
    247 $code.=".Lrotate:\n".&rotate()."\tmov	pc,lr\n" if (!$inline);
    248 $code.=<<___;
    249 .align	2
    250 .LK_00_19:	.word	0x5a827999
    251 .LK_20_39:	.word	0x6ed9eba1
    252 .LK_40_59:	.word	0x8f1bbcdc
    253 .LK_60_79:	.word	0xca62c1d6
    254 .size	sha1_block_data_order,.-sha1_block_data_order
    255 .asciz	"SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>"
    256 ___
    257 
    258 print $code;
    259 close STDOUT; # enforce flush
    260