Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # SHA1 block procedure for Alpha.
     11 
     12 # On 21264 performance is 33% better than code generated by vendor
     13 # compiler, and 75% better than GCC [3.4], and in absolute terms is
     14 # 8.7 cycles per processed byte. Implementation features vectorized
     15 # byte swap, but not Xupdate.
     16 
     17 @X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
     18 	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
     19 $ctx="a0";	# $16
     20 $inp="a1";
     21 $num="a2";
     22 $A="a3";
     23 $B="a4";	# 20
     24 $C="a5";
     25 $D="t8";
     26 $E="t9";	@V=($A,$B,$C,$D,$E);
     27 $t0="t10";	# 24
     28 $t1="t11";
     29 $t2="ra";
     30 $t3="t12";
     31 $K="AT";	# 28
     32 
     33 sub BODY_00_19 {
     34 my ($i,$a,$b,$c,$d,$e)=@_;
     35 my $j=$i+1;
     36 $code.=<<___ if ($i==0);
     37 	ldq_u	@X[0],0+0($inp)
     38 	ldq_u	@X[1],0+7($inp)
     39 ___
     40 $code.=<<___ if (!($i&1) && $i<14);
     41 	ldq_u	@X[$i+2],($i+2)*4+0($inp)
     42 	ldq_u	@X[$i+3],($i+2)*4+7($inp)
     43 ___
     44 $code.=<<___ if (!($i&1) && $i<15);
     45 	extql	@X[$i],$inp,@X[$i]
     46 	extqh	@X[$i+1],$inp,@X[$i+1]
     47 
     48 	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
     49 
     50 	srl	@X[$i],24,$t0		# vectorized byte swap
     51 	srl	@X[$i],8,$t2
     52 
     53 	sll	@X[$i],8,$t3
     54 	sll	@X[$i],24,@X[$i]
     55 	zapnot	$t0,0x11,$t0
     56 	zapnot	$t2,0x22,$t2
     57 
     58 	zapnot	@X[$i],0x88,@X[$i]
     59 	or	$t0,$t2,$t0
     60 	zapnot	$t3,0x44,$t3
     61 	sll	$a,5,$t1
     62 
     63 	or	@X[$i],$t0,@X[$i]
     64 	addl	$K,$e,$e
     65 	and	$b,$c,$t2
     66 	zapnot	$a,0xf,$a
     67 
     68 	or	@X[$i],$t3,@X[$i]
     69 	srl	$a,27,$t0
     70 	bic	$d,$b,$t3
     71 	sll	$b,30,$b
     72 
     73 	extll	@X[$i],4,@X[$i+1]	# extract upper half
     74 	or	$t2,$t3,$t2
     75 	addl	@X[$i],$e,$e
     76 
     77 	addl	$t1,$e,$e
     78 	srl	$b,32,$t3
     79 	zapnot	@X[$i],0xf,@X[$i]
     80 
     81 	addl	$t0,$e,$e
     82 	addl	$t2,$e,$e
     83 	or	$t3,$b,$b
     84 ___
     85 $code.=<<___ if (($i&1) && $i<15);
     86 	sll	$a,5,$t1
     87 	addl	$K,$e,$e
     88 	and	$b,$c,$t2
     89 	zapnot	$a,0xf,$a
     90 
     91 	srl	$a,27,$t0
     92 	addl	@X[$i%16],$e,$e
     93 	bic	$d,$b,$t3
     94 	sll	$b,30,$b
     95 
     96 	or	$t2,$t3,$t2
     97 	addl	$t1,$e,$e
     98 	srl	$b,32,$t3
     99 	zapnot	@X[$i],0xf,@X[$i]
    100 
    101 	addl	$t0,$e,$e
    102 	addl	$t2,$e,$e
    103 	or	$t3,$b,$b
    104 ___
    105 $code.=<<___ if ($i>=15);	# with forward Xupdate
    106 	sll	$a,5,$t1
    107 	addl	$K,$e,$e
    108 	and	$b,$c,$t2
    109 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
    110 
    111 	zapnot	$a,0xf,$a
    112 	addl	@X[$i%16],$e,$e
    113 	bic	$d,$b,$t3
    114 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
    115 
    116 	srl	$a,27,$t0
    117 	addl	$t1,$e,$e
    118 	or	$t2,$t3,$t2
    119 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
    120 
    121 	sll	$b,30,$b
    122 	addl	$t0,$e,$e
    123 	srl	@X[$j%16],31,$t1
    124 
    125 	addl	$t2,$e,$e
    126 	srl	$b,32,$t3
    127 	addl	@X[$j%16],@X[$j%16],@X[$j%16]
    128 
    129 	or	$t3,$b,$b
    130 	zapnot	@X[$i%16],0xf,@X[$i%16]
    131 	or	$t1,@X[$j%16],@X[$j%16]
    132 ___
    133 }
    134 
    135 sub BODY_20_39 {
    136 my ($i,$a,$b,$c,$d,$e)=@_;
    137 my $j=$i+1;
    138 $code.=<<___ if ($i<79);	# with forward Xupdate
    139 	sll	$a,5,$t1
    140 	addl	$K,$e,$e
    141 	zapnot	$a,0xf,$a
    142 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
    143 
    144 	sll	$b,30,$t3
    145 	addl	$t1,$e,$e
    146 	xor	$b,$c,$t2
    147 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
    148 
    149 	srl	$b,2,$b
    150 	addl	@X[$i%16],$e,$e
    151 	xor	$d,$t2,$t2
    152 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
    153 
    154 	srl	@X[$j%16],31,$t1
    155 	addl	$t2,$e,$e
    156 	srl	$a,27,$t0
    157 	addl	@X[$j%16],@X[$j%16],@X[$j%16]
    158 
    159 	or	$t3,$b,$b
    160 	addl	$t0,$e,$e
    161 	or	$t1,@X[$j%16],@X[$j%16]
    162 ___
    163 $code.=<<___ if ($i<77);
    164 	zapnot	@X[$i%16],0xf,@X[$i%16]
    165 ___
    166 $code.=<<___ if ($i==79);	# with context fetch
    167 	sll	$a,5,$t1
    168 	addl	$K,$e,$e
    169 	zapnot	$a,0xf,$a
    170 	ldl	@X[0],0($ctx)
    171 
    172 	sll	$b,30,$t3
    173 	addl	$t1,$e,$e
    174 	xor	$b,$c,$t2
    175 	ldl	@X[1],4($ctx)
    176 
    177 	srl	$b,2,$b
    178 	addl	@X[$i%16],$e,$e
    179 	xor	$d,$t2,$t2
    180 	ldl	@X[2],8($ctx)
    181 
    182 	srl	$a,27,$t0
    183 	addl	$t2,$e,$e
    184 	ldl	@X[3],12($ctx)
    185 
    186 	or	$t3,$b,$b
    187 	addl	$t0,$e,$e
    188 	ldl	@X[4],16($ctx)
    189 ___
    190 }
    191 
    192 sub BODY_40_59 {
    193 my ($i,$a,$b,$c,$d,$e)=@_;
    194 my $j=$i+1;
    195 $code.=<<___;	# with forward Xupdate
    196 	sll	$a,5,$t1
    197 	addl	$K,$e,$e
    198 	zapnot	$a,0xf,$a
    199 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
    200 
    201 	srl	$a,27,$t0
    202 	and	$b,$c,$t2
    203 	and	$b,$d,$t3
    204 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
    205 
    206 	sll	$b,30,$b
    207 	addl	$t1,$e,$e
    208 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
    209 
    210 	srl	@X[$j%16],31,$t1
    211 	addl	$t0,$e,$e
    212 	or	$t2,$t3,$t2
    213 	and	$c,$d,$t3
    214 
    215 	or	$t2,$t3,$t2
    216 	srl	$b,32,$t3
    217 	addl	@X[$i%16],$e,$e
    218 	addl	@X[$j%16],@X[$j%16],@X[$j%16]
    219 
    220 	or	$t3,$b,$b
    221 	addl	$t2,$e,$e
    222 	or	$t1,@X[$j%16],@X[$j%16]
    223 	zapnot	@X[$i%16],0xf,@X[$i%16]
    224 ___
    225 }
    226 
    227 $code=<<___;
    228 #ifdef __linux__
    229 #include <asm/regdef.h>
    230 #else
    231 #include <asm.h>
    232 #include <regdef.h>
    233 #endif
    234 
    235 .text
    236 
    237 .set	noat
    238 .set	noreorder
    239 .globl	sha1_block_data_order
    240 .align	5
    241 .ent	sha1_block_data_order
    242 sha1_block_data_order:
    243 	lda	sp,-64(sp)
    244 	stq	ra,0(sp)
    245 	stq	s0,8(sp)
    246 	stq	s1,16(sp)
    247 	stq	s2,24(sp)
    248 	stq	s3,32(sp)
    249 	stq	s4,40(sp)
    250 	stq	s5,48(sp)
    251 	stq	fp,56(sp)
    252 	.mask	0x0400fe00,-64
    253 	.frame	sp,64,ra
    254 	.prologue 0
    255 
    256 	ldl	$A,0($ctx)
    257 	ldl	$B,4($ctx)
    258 	sll	$num,6,$num
    259 	ldl	$C,8($ctx)
    260 	ldl	$D,12($ctx)
    261 	ldl	$E,16($ctx)
    262 	addq	$inp,$num,$num
    263 
    264 .Lloop:
    265 	.set	noreorder
    266 	ldah	$K,23170(zero)
    267 	zapnot	$B,0xf,$B
    268 	lda	$K,31129($K)	# K_00_19
    269 ___
    270 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
    271 
    272 $code.=<<___;
    273 	ldah	$K,28378(zero)
    274 	lda	$K,-5215($K)	# K_20_39
    275 ___
    276 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    277 
    278 $code.=<<___;
    279 	ldah	$K,-28900(zero)
    280 	lda	$K,-17188($K)	# K_40_59
    281 ___
    282 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
    283 
    284 $code.=<<___;
    285 	ldah	$K,-13725(zero)
    286 	lda	$K,-15914($K)	# K_60_79
    287 ___
    288 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    289 
    290 $code.=<<___;
    291 	addl	@X[0],$A,$A
    292 	addl	@X[1],$B,$B
    293 	addl	@X[2],$C,$C
    294 	addl	@X[3],$D,$D
    295 	addl	@X[4],$E,$E
    296 	stl	$A,0($ctx)
    297 	stl	$B,4($ctx)
    298 	addq	$inp,64,$inp
    299 	stl	$C,8($ctx)
    300 	stl	$D,12($ctx)
    301 	stl	$E,16($ctx)
    302 	cmpult	$inp,$num,$t1
    303 	bne	$t1,.Lloop
    304 
    305 	.set	noreorder
    306 	ldq	ra,0(sp)
    307 	ldq	s0,8(sp)
    308 	ldq	s1,16(sp)
    309 	ldq	s2,24(sp)
    310 	ldq	s3,32(sp)
    311 	ldq	s4,40(sp)
    312 	ldq	s5,48(sp)
    313 	ldq	fp,56(sp)
    314 	lda	sp,64(sp)
    315 	ret	(ra)
    316 .end	sha1_block_data_order
    317 .ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
    318 .align	2
    319 ___
    320 $output=shift and open STDOUT,">$output";
    321 print $code;
    322 close STDOUT;
    323