Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # SHA1 block procedure for s390x.
     11 
     12 # April 2007.
     13 #
     14 # Performance is >30% better than gcc 3.3 generated code. But the real
     15 # twist is that SHA1 hardware support is detected and utilized. In
     16 # which case performance can reach further >4.5x for larger chunks.
     17 
     18 # January 2009.
     19 #
     20 # Optimize Xupdate for amount of memory references and reschedule
     21 # instructions to favour dual-issue z10 pipeline. On z10 hardware is
     22 # "only" ~2.3x faster than software.
     23 
     24 $kimdfunc=1;	# magic function code for kimd instruction
     25 
     26 $output=shift;
     27 open STDOUT,">$output";
     28 
     29 $K_00_39="%r0"; $K=$K_00_39;
     30 $K_40_79="%r1";
     31 $ctx="%r2";	$prefetch="%r2";
     32 $inp="%r3";
     33 $len="%r4";
     34 
     35 $A="%r5";
     36 $B="%r6";
     37 $C="%r7";
     38 $D="%r8";
     39 $E="%r9";	@V=($A,$B,$C,$D,$E);
     40 $t0="%r10";
     41 $t1="%r11";
     42 @X=("%r12","%r13","%r14");
     43 $sp="%r15";
     44 
     45 $frame=160+16*4;
     46 
     47 sub Xupdate {
     48 my $i=shift;
     49 
     50 $code.=<<___ if ($i==15);
     51 	lg	$prefetch,160($sp)	### Xupdate(16) warm-up
     52 	lr	$X[0],$X[2]
     53 ___
     54 return if ($i&1);	# Xupdate is vectorized and executed every 2nd cycle
     55 $code.=<<___ if ($i<16);
     56 	lg	$X[0],`$i*4`($inp)	### Xload($i)
     57 	rllg	$X[1],$X[0],32
     58 ___
     59 $code.=<<___ if ($i>=16);
     60 	xgr	$X[0],$prefetch		### Xupdate($i)
     61 	lg	$prefetch,`160+4*(($i+2)%16)`($sp)
     62 	xg	$X[0],`160+4*(($i+8)%16)`($sp)
     63 	xgr	$X[0],$prefetch
     64 	rll	$X[0],$X[0],1
     65 	rllg	$X[1],$X[0],32
     66 	rll	$X[1],$X[1],1
     67 	rllg	$X[0],$X[1],32
     68 	lr	$X[2],$X[1]		# feedback
     69 ___
     70 $code.=<<___ if ($i<=70);
     71 	stg	$X[0],`160+4*($i%16)`($sp)
     72 ___
     73 unshift(@X,pop(@X));
     74 }
     75 
     76 sub BODY_00_19 {
     77 my ($i,$a,$b,$c,$d,$e)=@_;
     78 my $xi=$X[1];
     79 
     80 	&Xupdate($i);
     81 $code.=<<___;
     82 	alr	$e,$K		### $i
     83 	rll	$t1,$a,5
     84 	lr	$t0,$d
     85 	xr	$t0,$c
     86 	alr	$e,$t1
     87 	nr	$t0,$b
     88 	alr	$e,$xi
     89 	xr	$t0,$d
     90 	rll	$b,$b,30
     91 	alr	$e,$t0
     92 ___
     93 }
     94 
     95 sub BODY_20_39 {
     96 my ($i,$a,$b,$c,$d,$e)=@_;
     97 my $xi=$X[1];
     98 
     99 	&Xupdate($i);
    100 $code.=<<___;
    101 	alr	$e,$K		### $i
    102 	rll	$t1,$a,5
    103 	lr	$t0,$b
    104 	alr	$e,$t1
    105 	xr	$t0,$c
    106 	alr	$e,$xi
    107 	xr	$t0,$d
    108 	rll	$b,$b,30
    109 	alr	$e,$t0
    110 ___
    111 }
    112 
    113 sub BODY_40_59 {
    114 my ($i,$a,$b,$c,$d,$e)=@_;
    115 my $xi=$X[1];
    116 
    117 	&Xupdate($i);
    118 $code.=<<___;
    119 	alr	$e,$K		### $i
    120 	rll	$t1,$a,5
    121 	lr	$t0,$b
    122 	alr	$e,$t1
    123 	or	$t0,$c
    124 	lr	$t1,$b
    125 	nr	$t0,$d
    126 	nr	$t1,$c
    127 	alr	$e,$xi
    128 	or	$t0,$t1
    129 	rll	$b,$b,30
    130 	alr	$e,$t0
    131 ___
    132 }
    133 
    134 $code.=<<___;
    135 .text
    136 .align	64
    137 .type	Ktable,\@object
    138 Ktable: .long	0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
    139 	.skip	48	#.long	0,0,0,0,0,0,0,0,0,0,0,0
    140 .size	Ktable,.-Ktable
    141 .globl	sha1_block_data_order
    142 .type	sha1_block_data_order,\@function
    143 sha1_block_data_order:
    144 ___
    145 $code.=<<___ if ($kimdfunc);
    146 	larl	%r1,OPENSSL_s390xcap_P
    147 	lg	%r0,0(%r1)
    148 	tmhl	%r0,0x4000	# check for message-security assist
    149 	jz	.Lsoftware
    150 	lghi	%r0,0
    151 	la	%r1,16($sp)
    152 	.long	0xb93e0002	# kimd %r0,%r2
    153 	lg	%r0,16($sp)
    154 	tmhh	%r0,`0x8000>>$kimdfunc`
    155 	jz	.Lsoftware
    156 	lghi	%r0,$kimdfunc
    157 	lgr	%r1,$ctx
    158 	lgr	%r2,$inp
    159 	sllg	%r3,$len,6
    160 	.long	0xb93e0002	# kimd %r0,%r2
    161 	brc	1,.-4		# pay attention to "partial completion"
    162 	br	%r14
    163 .align	16
    164 .Lsoftware:
    165 ___
    166 $code.=<<___;
    167 	lghi	%r1,-$frame
    168 	stg	$ctx,16($sp)
    169 	stmg	%r6,%r15,48($sp)
    170 	lgr	%r0,$sp
    171 	la	$sp,0(%r1,$sp)
    172 	stg	%r0,0($sp)
    173 
    174 	larl	$t0,Ktable
    175 	llgf	$A,0($ctx)
    176 	llgf	$B,4($ctx)
    177 	llgf	$C,8($ctx)
    178 	llgf	$D,12($ctx)
    179 	llgf	$E,16($ctx)
    180 
    181 	lg	$K_00_39,0($t0)
    182 	lg	$K_40_79,8($t0)
    183 
    184 .Lloop:
    185 	rllg	$K_00_39,$K_00_39,32
    186 ___
    187 for ($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
    188 $code.=<<___;
    189 	rllg	$K_00_39,$K_00_39,32
    190 ___
    191 for (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    192 $code.=<<___;	$K=$K_40_79;
    193 	rllg	$K_40_79,$K_40_79,32
    194 ___
    195 for (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
    196 $code.=<<___;
    197 	rllg	$K_40_79,$K_40_79,32
    198 ___
    199 for (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    200 $code.=<<___;
    201 
    202 	lg	$ctx,`$frame+16`($sp)
    203 	la	$inp,64($inp)
    204 	al	$A,0($ctx)
    205 	al	$B,4($ctx)
    206 	al	$C,8($ctx)
    207 	al	$D,12($ctx)
    208 	al	$E,16($ctx)
    209 	st	$A,0($ctx)
    210 	st	$B,4($ctx)
    211 	st	$C,8($ctx)
    212 	st	$D,12($ctx)
    213 	st	$E,16($ctx)
    214 	brct	$len,.Lloop
    215 
    216 	lmg	%r6,%r15,`$frame+48`($sp)
    217 	br	%r14
    218 .size	sha1_block_data_order,.-sha1_block_data_order
    219 .string	"SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
    220 .comm	OPENSSL_s390xcap_P,8,8
    221 ___
    222 
    223 $code =~ s/\`([^\`]*)\`/eval $1/gem;
    224 
    225 print $code;
    226 close STDOUT;
    227