1 #!/usr/bin/env perl 2 # 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 # 10 # February 2009 11 # 12 # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to 13 # "cluster" Address Generation Interlocks, so that one pipeline stall 14 # resolves several dependencies. 15 16 $rp="%r14"; 17 $sp="%r15"; 18 $code=<<___; 19 .text 20 21 ___ 22 23 # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) 24 { 25 $acc="%r0"; 26 $cnt="%r1"; 27 $key="%r2"; 28 $len="%r3"; 29 $inp="%r4"; 30 $out="%r5"; 31 32 @XX=("%r6","%r7"); 33 @TX=("%r8","%r9"); 34 $YY="%r10"; 35 $TY="%r11"; 36 37 $code.=<<___; 38 .globl RC4 39 .type RC4,\@function 40 .align 64 41 RC4: 42 stmg %r6,%r11,48($sp) 43 llgc $XX[0],0($key) 44 llgc $YY,1($key) 45 la $XX[0],1($XX[0]) 46 nill $XX[0],0xff 47 srlg $cnt,$len,3 48 ltgr $cnt,$cnt 49 llgc $TX[0],2($XX[0],$key) 50 jz .Lshort 51 j .Loop8 52 53 .align 64 54 .Loop8: 55 ___ 56 for ($i=0;$i<8;$i++) { 57 $code.=<<___; 58 la $YY,0($YY,$TX[0]) # $i 59 nill $YY,255 60 la $XX[1],1($XX[0]) 61 nill $XX[1],255 62 ___ 63 $code.=<<___ if ($i==1); 64 llgc $acc,2($TY,$key) 65 ___ 66 $code.=<<___ if ($i>1); 67 sllg $acc,$acc,8 68 ic $acc,2($TY,$key) 69 ___ 70 $code.=<<___; 71 llgc $TY,2($YY,$key) 72 stc $TX[0],2($YY,$key) 73 llgc $TX[1],2($XX[1],$key) 74 stc $TY,2($XX[0],$key) 75 cr $XX[1],$YY 76 jne .Lcmov$i 77 la $TX[1],0($TX[0]) 78 .Lcmov$i: 79 la $TY,0($TY,$TX[0]) 80 nill $TY,255 81 ___ 82 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 83 } 84 85 $code.=<<___; 86 lg $TX[1],0($inp) 87 sllg $acc,$acc,8 88 la $inp,8($inp) 89 ic $acc,2($TY,$key) 90 xgr $acc,$TX[1] 91 stg $acc,0($out) 92 la $out,8($out) 93 brct $cnt,.Loop8 94 95 .Lshort: 96 lghi $acc,7 97 ngr $len,$acc 98 jz .Lexit 99 j .Loop1 100 101 .align 16 102 .Loop1: 103 la $YY,0($YY,$TX[0]) 104 nill $YY,255 105 llgc $TY,2($YY,$key) 106 stc $TX[0],2($YY,$key) 107 stc $TY,2($XX[0],$key) 108 ar $TY,$TX[0] 109 ahi $XX[0],1 110 nill $TY,255 111 nill $XX[0],255 112 llgc $acc,0($inp) 113 la $inp,1($inp) 114 llgc $TY,2($TY,$key) 115 llgc $TX[0],2($XX[0],$key) 116 xr $acc,$TY 117 stc $acc,0($out) 118 la $out,1($out) 119 brct $len,.Loop1 120 121 .Lexit: 122 ahi $XX[0],-1 123 stc $XX[0],0($key) 124 stc $YY,1($key) 125 lmg %r6,%r11,48($sp) 126 br $rp 127 .size RC4,.-RC4 128 .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 129 130 ___ 131 } 132 133 # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) 134 { 135 $cnt="%r0"; 136 $idx="%r1"; 137 $key="%r2"; 138 $len="%r3"; 139 $inp="%r4"; 140 $acc="%r5"; 141 $dat="%r6"; 142 $ikey="%r7"; 143 $iinp="%r8"; 144 145 $code.=<<___; 146 .globl RC4_set_key 147 .type RC4_set_key,\@function 148 .align 64 149 RC4_set_key: 150 stmg %r6,%r8,48($sp) 151 lhi $cnt,256 152 la $idx,0(%r0) 153 sth $idx,0($key) 154 .align 4 155 .L1stloop: 156 stc $idx,2($idx,$key) 157 la $idx,1($idx) 158 brct $cnt,.L1stloop 159 160 lghi $ikey,-256 161 lr $cnt,$len 162 la $iinp,0(%r0) 163 la $idx,0(%r0) 164 .align 16 165 .L2ndloop: 166 llgc $acc,2+256($ikey,$key) 167 llgc $dat,0($iinp,$inp) 168 la $idx,0($idx,$acc) 169 la $ikey,1($ikey) 170 la $idx,0($idx,$dat) 171 nill $idx,255 172 la $iinp,1($iinp) 173 tml $ikey,255 174 llgc $dat,2($idx,$key) 175 stc $dat,2+256-1($ikey,$key) 176 stc $acc,2($idx,$key) 177 jz .Ldone 178 brct $cnt,.L2ndloop 179 lr $cnt,$len 180 la $iinp,0(%r0) 181 j .L2ndloop 182 .Ldone: 183 lmg %r6,%r8,48($sp) 184 br $rp 185 .size RC4_set_key,.-RC4_set_key 186 187 ___ 188 } 189 190 # const char *RC4_options() 191 $code.=<<___; 192 .globl RC4_options 193 .type RC4_options,\@function 194 .align 16 195 RC4_options: 196 larl %r2,.Loptions 197 br %r14 198 .size RC4_options,.-RC4_options 199 .section .rodata 200 .Loptions: 201 .align 8 202 .string "rc4(8x,char)" 203 ___ 204 205 print $code; 206