Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 #
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 #
     10 # February 2009
     11 #
     12 # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
     13 # "cluster" Address Generation Interlocks, so that one pipeline stall
     14 # resolves several dependencies.
     15 
     16 $rp="%r14";
     17 $sp="%r15";
     18 $code=<<___;
     19 .text
     20 
     21 ___
     22 
     23 # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
     24 {
     25 $acc="%r0";
     26 $cnt="%r1";
     27 $key="%r2";
     28 $len="%r3";
     29 $inp="%r4";
     30 $out="%r5";
     31 
     32 @XX=("%r6","%r7");
     33 @TX=("%r8","%r9");
     34 $YY="%r10";
     35 $TY="%r11";
     36 
     37 $code.=<<___;
     38 .globl	RC4
     39 .type	RC4,\@function
     40 .align	64
     41 RC4:
     42 	stmg	%r6,%r11,48($sp)
     43 	llgc	$XX[0],0($key)
     44 	llgc	$YY,1($key)
     45 	la	$XX[0],1($XX[0])
     46 	nill	$XX[0],0xff
     47 	srlg	$cnt,$len,3
     48 	ltgr	$cnt,$cnt
     49 	llgc	$TX[0],2($XX[0],$key)
     50 	jz	.Lshort
     51 	j	.Loop8
     52 
     53 .align	64
     54 .Loop8:
     55 ___
     56 for ($i=0;$i<8;$i++) {
     57 $code.=<<___;
     58 	la	$YY,0($YY,$TX[0])	# $i
     59 	nill	$YY,255
     60 	la	$XX[1],1($XX[0])
     61 	nill	$XX[1],255
     62 ___
     63 $code.=<<___ if ($i==1);
     64 	llgc	$acc,2($TY,$key)
     65 ___
     66 $code.=<<___ if ($i>1);
     67 	sllg	$acc,$acc,8
     68 	ic	$acc,2($TY,$key)
     69 ___
     70 $code.=<<___;
     71 	llgc	$TY,2($YY,$key)
     72 	stc	$TX[0],2($YY,$key)
     73 	llgc	$TX[1],2($XX[1],$key)
     74 	stc	$TY,2($XX[0],$key)
     75 	cr	$XX[1],$YY
     76 	jne	.Lcmov$i
     77 	la	$TX[1],0($TX[0])
     78 .Lcmov$i:
     79 	la	$TY,0($TY,$TX[0])
     80 	nill	$TY,255
     81 ___
     82 push(@TX,shift(@TX)); push(@XX,shift(@XX));     # "rotate" registers
     83 }
     84 
     85 $code.=<<___;
     86 	lg	$TX[1],0($inp)
     87 	sllg	$acc,$acc,8
     88 	la	$inp,8($inp)
     89 	ic	$acc,2($TY,$key)
     90 	xgr	$acc,$TX[1]
     91 	stg	$acc,0($out)
     92 	la	$out,8($out)
     93 	brct	$cnt,.Loop8
     94 
     95 .Lshort:
     96 	lghi	$acc,7
     97 	ngr	$len,$acc
     98 	jz	.Lexit
     99 	j	.Loop1
    100 
    101 .align	16
    102 .Loop1:
    103 	la	$YY,0($YY,$TX[0])
    104 	nill	$YY,255
    105 	llgc	$TY,2($YY,$key)
    106 	stc	$TX[0],2($YY,$key)
    107 	stc	$TY,2($XX[0],$key)
    108 	ar	$TY,$TX[0]
    109 	ahi	$XX[0],1
    110 	nill	$TY,255
    111 	nill	$XX[0],255
    112 	llgc	$acc,0($inp)
    113 	la	$inp,1($inp)
    114 	llgc	$TY,2($TY,$key)
    115 	llgc	$TX[0],2($XX[0],$key)
    116 	xr	$acc,$TY
    117 	stc	$acc,0($out)
    118 	la	$out,1($out)
    119 	brct	$len,.Loop1
    120 
    121 .Lexit:
    122 	ahi	$XX[0],-1
    123 	stc	$XX[0],0($key)
    124 	stc	$YY,1($key)
    125 	lmg	%r6,%r11,48($sp)
    126 	br	$rp
    127 .size	RC4,.-RC4
    128 .string	"RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
    129 
    130 ___
    131 }
    132 
    133 # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
    134 {
    135 $cnt="%r0";
    136 $idx="%r1";
    137 $key="%r2";
    138 $len="%r3";
    139 $inp="%r4";
    140 $acc="%r5";
    141 $dat="%r6";
    142 $ikey="%r7";
    143 $iinp="%r8";
    144 
    145 $code.=<<___;
    146 .globl	RC4_set_key
    147 .type	RC4_set_key,\@function
    148 .align	64
    149 RC4_set_key:
    150 	stmg	%r6,%r8,48($sp)
    151 	lhi	$cnt,256
    152 	la	$idx,0(%r0)
    153 	sth	$idx,0($key)
    154 .align	4
    155 .L1stloop:
    156 	stc	$idx,2($idx,$key)
    157 	la	$idx,1($idx)
    158 	brct	$cnt,.L1stloop
    159 
    160 	lghi	$ikey,-256
    161 	lr	$cnt,$len
    162 	la	$iinp,0(%r0)
    163 	la	$idx,0(%r0)
    164 .align	16
    165 .L2ndloop:
    166 	llgc	$acc,2+256($ikey,$key)
    167 	llgc	$dat,0($iinp,$inp)
    168 	la	$idx,0($idx,$acc)
    169 	la	$ikey,1($ikey)
    170 	la	$idx,0($idx,$dat)
    171 	nill	$idx,255
    172 	la	$iinp,1($iinp)
    173 	tml	$ikey,255
    174 	llgc	$dat,2($idx,$key)
    175 	stc	$dat,2+256-1($ikey,$key)
    176 	stc	$acc,2($idx,$key)
    177 	jz	.Ldone
    178 	brct	$cnt,.L2ndloop
    179 	lr	$cnt,$len
    180 	la	$iinp,0(%r0)
    181 	j	.L2ndloop
    182 .Ldone:
    183 	lmg	%r6,%r8,48($sp)
    184 	br	$rp
    185 .size	RC4_set_key,.-RC4_set_key
    186 
    187 ___
    188 }
    189 
    190 # const char *RC4_options()
    191 $code.=<<___;
    192 .globl	RC4_options
    193 .type	RC4_options,\@function
    194 .align	16
    195 RC4_options:
    196 	larl	%r2,.Loptions
    197 	br	%r14
    198 .size	RC4_options,.-RC4_options
    199 .section	.rodata
    200 .Loptions:
    201 .align	8
    202 .string	"rc4(8x,char)"
    203 ___
    204 
    205 print $code;
    206