Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # SHA256 block procedure for ARMv4. May 2007.
     11 
     12 # Performance is ~2x better than gcc 3.4 generated code and in "abso-
     13 # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
     14 # byte.
     15 
     16 $output=shift;
     17 open STDOUT,">$output";
     18 
     19 $ctx="r0";	$t0="r0";
     20 $inp="r1";
     21 $len="r2";	$t1="r2";
     22 $T1="r3";
     23 $A="r4";
     24 $B="r5";
     25 $C="r6";
     26 $D="r7";
     27 $E="r8";
     28 $F="r9";
     29 $G="r10";
     30 $H="r11";
     31 @V=($A,$B,$C,$D,$E,$F,$G,$H);
     32 $t2="r12";
     33 $Ktbl="r14";
     34 
     35 @Sigma0=( 2,13,22);
     36 @Sigma1=( 6,11,25);
     37 @sigma0=( 7,18, 3);
     38 @sigma1=(17,19,10);
     39 
     40 sub BODY_00_15 {
     41 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
     42 
     43 $code.=<<___ if ($i<16);
     44 	ldrb	$T1,[$inp,#3]			@ $i
     45 	ldrb	$t2,[$inp,#2]
     46 	ldrb	$t1,[$inp,#1]
     47 	ldrb	$t0,[$inp],#4
     48 	orr	$T1,$T1,$t2,lsl#8
     49 	orr	$T1,$T1,$t1,lsl#16
     50 	orr	$T1,$T1,$t0,lsl#24
     51 	`"str	$inp,[sp,#17*4]"	if ($i==15)`
     52 ___
     53 $code.=<<___;
     54 	ldr	$t2,[$Ktbl],#4			@ *K256++
     55 	str	$T1,[sp,#`$i%16`*4]
     56 	mov	$t0,$e,ror#$Sigma1[0]
     57 	eor	$t0,$t0,$e,ror#$Sigma1[1]
     58 	eor	$t0,$t0,$e,ror#$Sigma1[2]	@ Sigma1(e)
     59 	add	$T1,$T1,$t0
     60 	eor	$t1,$f,$g
     61 	and	$t1,$t1,$e
     62 	eor	$t1,$t1,$g			@ Ch(e,f,g)
     63 	add	$T1,$T1,$t1
     64 	add	$T1,$T1,$h
     65 	add	$T1,$T1,$t2
     66 	mov	$h,$a,ror#$Sigma0[0]
     67 	eor	$h,$h,$a,ror#$Sigma0[1]
     68 	eor	$h,$h,$a,ror#$Sigma0[2]		@ Sigma0(a)
     69 	orr	$t0,$a,$b
     70 	and	$t0,$t0,$c
     71 	and	$t1,$a,$b
     72 	orr	$t0,$t0,$t1			@ Maj(a,b,c)
     73 	add	$h,$h,$t0
     74 	add	$d,$d,$T1
     75 	add	$h,$h,$T1
     76 ___
     77 }
     78 
     79 sub BODY_16_XX {
     80 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
     81 
     82 $code.=<<___;
     83 	ldr	$t1,[sp,#`($i+1)%16`*4]	@ $i
     84 	ldr	$t2,[sp,#`($i+14)%16`*4]
     85 	ldr	$T1,[sp,#`($i+0)%16`*4]
     86 	ldr	$inp,[sp,#`($i+9)%16`*4]
     87 	mov	$t0,$t1,ror#$sigma0[0]
     88 	eor	$t0,$t0,$t1,ror#$sigma0[1]
     89 	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
     90 	mov	$t1,$t2,ror#$sigma1[0]
     91 	eor	$t1,$t1,$t2,ror#$sigma1[1]
     92 	eor	$t1,$t1,$t2,lsr#$sigma1[2]	@ sigma1(X[i+14])
     93 	add	$T1,$T1,$t0
     94 	add	$T1,$T1,$t1
     95 	add	$T1,$T1,$inp
     96 ___
     97 	&BODY_00_15(@_);
     98 }
     99 
    100 $code=<<___;
    101 .text
    102 .code	32
    103 
    104 .type	K256,%object
    105 .align	5
    106 K256:
    107 .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
    108 .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
    109 .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
    110 .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
    111 .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
    112 .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
    113 .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
    114 .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
    115 .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
    116 .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
    117 .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
    118 .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
    119 .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
    120 .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
    121 .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
    122 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
    123 .size	K256,.-K256
    124 
    125 .global	sha256_block_data_order
    126 .type	sha256_block_data_order,%function
    127 sha256_block_data_order:
    128 	sub	r3,pc,#8		@ sha256_block_data_order
    129 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
    130 	stmdb	sp!,{$ctx,$inp,$len,r4-r12,lr}
    131 	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
    132 	sub	$Ktbl,r3,#256		@ K256
    133 	sub	sp,sp,#16*4		@ alloca(X[16])
    134 .Loop:
    135 ___
    136 for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
    137 $code.=".Lrounds_16_xx:\n";
    138 for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
    139 $code.=<<___;
    140 	and	$t2,$t2,#0xff
    141 	cmp	$t2,#0xf2
    142 	bne	.Lrounds_16_xx
    143 
    144 	ldr	$T1,[sp,#16*4]		@ pull ctx
    145 	ldr	$t0,[$T1,#0]
    146 	ldr	$t1,[$T1,#4]
    147 	ldr	$t2,[$T1,#8]
    148 	add	$A,$A,$t0
    149 	ldr	$t0,[$T1,#12]
    150 	add	$B,$B,$t1
    151 	ldr	$t1,[$T1,#16]
    152 	add	$C,$C,$t2
    153 	ldr	$t2,[$T1,#20]
    154 	add	$D,$D,$t0
    155 	ldr	$t0,[$T1,#24]
    156 	add	$E,$E,$t1
    157 	ldr	$t1,[$T1,#28]
    158 	add	$F,$F,$t2
    159 	ldr	$inp,[sp,#17*4]		@ pull inp
    160 	ldr	$t2,[sp,#18*4]		@ pull inp+len
    161 	add	$G,$G,$t0
    162 	add	$H,$H,$t1
    163 	stmia	$T1,{$A,$B,$C,$D,$E,$F,$G,$H}
    164 	cmp	$inp,$t2
    165 	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
    166 	bne	.Loop
    167 
    168 	add	sp,sp,#`16+3`*4	@ destroy frame
    169 	ldmia	sp!,{r4-r12,lr}
    170 	tst	lr,#1
    171 	moveq	pc,lr			@ be binary compatible with V4, yet
    172 	bx	lr			@ interoperable with Thumb ISA:-)
    173 .size   sha256_block_data_order,.-sha256_block_data_order
    174 .asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
    175 .align	2
    176 ___
    177 
    178 $code =~ s/\`([^\`]*)\`/eval $1/gem;
    179 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
    180 print $code;
    181 close STDOUT; # enforce flush
    182