1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # SHA1 block procedure for Alpha. 11 12 # On 21264 performance is 33% better than code generated by vendor 13 # compiler, and 75% better than GCC [3.4], and in absolute terms is 14 # 8.7 cycles per processed byte. Implementation features vectorized 15 # byte swap, but not Xupdate. 16 17 @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", 18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); 19 $ctx="a0"; # $16 20 $inp="a1"; 21 $num="a2"; 22 $A="a3"; 23 $B="a4"; # 20 24 $C="a5"; 25 $D="t8"; 26 $E="t9"; @V=($A,$B,$C,$D,$E); 27 $t0="t10"; # 24 28 $t1="t11"; 29 $t2="ra"; 30 $t3="t12"; 31 $K="AT"; # 28 32 33 sub BODY_00_19 { 34 my ($i,$a,$b,$c,$d,$e)=@_; 35 my $j=$i+1; 36 $code.=<<___ if ($i==0); 37 ldq_u @X[0],0+0($inp) 38 ldq_u @X[1],0+7($inp) 39 ___ 40 $code.=<<___ if (!($i&1) && $i<14); 41 ldq_u @X[$i+2],($i+2)*4+0($inp) 42 ldq_u @X[$i+3],($i+2)*4+7($inp) 43 ___ 44 $code.=<<___ if (!($i&1) && $i<15); 45 extql @X[$i],$inp,@X[$i] 46 extqh @X[$i+1],$inp,@X[$i+1] 47 48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched 49 50 srl @X[$i],24,$t0 # vectorized byte swap 51 srl @X[$i],8,$t2 52 53 sll @X[$i],8,$t3 54 sll @X[$i],24,@X[$i] 55 zapnot $t0,0x11,$t0 56 zapnot $t2,0x22,$t2 57 58 zapnot @X[$i],0x88,@X[$i] 59 or $t0,$t2,$t0 60 zapnot $t3,0x44,$t3 61 sll $a,5,$t1 62 63 or @X[$i],$t0,@X[$i] 64 addl $K,$e,$e 65 and $b,$c,$t2 66 zapnot $a,0xf,$a 67 68 or @X[$i],$t3,@X[$i] 69 srl $a,27,$t0 70 bic $d,$b,$t3 71 sll $b,30,$b 72 73 extll @X[$i],4,@X[$i+1] # extract upper half 74 or $t2,$t3,$t2 75 addl @X[$i],$e,$e 76 77 addl $t1,$e,$e 78 srl $b,32,$t3 79 zapnot @X[$i],0xf,@X[$i] 80 81 addl $t0,$e,$e 82 addl $t2,$e,$e 83 or $t3,$b,$b 84 ___ 85 $code.=<<___ if (($i&1) && $i<15); 86 sll $a,5,$t1 87 addl $K,$e,$e 88 and $b,$c,$t2 89 zapnot $a,0xf,$a 90 91 srl $a,27,$t0 92 addl @X[$i%16],$e,$e 93 bic $d,$b,$t3 94 sll $b,30,$b 95 96 or $t2,$t3,$t2 97 addl $t1,$e,$e 98 srl $b,32,$t3 99 zapnot @X[$i],0xf,@X[$i] 100 101 addl $t0,$e,$e 102 addl $t2,$e,$e 103 or $t3,$b,$b 104 ___ 105 $code.=<<___ if ($i>=15); # with forward Xupdate 106 sll $a,5,$t1 107 addl $K,$e,$e 108 and $b,$c,$t2 109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 110 111 zapnot $a,0xf,$a 112 addl @X[$i%16],$e,$e 113 bic $d,$b,$t3 114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 115 116 srl $a,27,$t0 117 addl $t1,$e,$e 118 or $t2,$t3,$t2 119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 120 121 sll $b,30,$b 122 addl $t0,$e,$e 123 srl @X[$j%16],31,$t1 124 125 addl $t2,$e,$e 126 srl $b,32,$t3 127 addl @X[$j%16],@X[$j%16],@X[$j%16] 128 129 or $t3,$b,$b 130 zapnot @X[$i%16],0xf,@X[$i%16] 131 or $t1,@X[$j%16],@X[$j%16] 132 ___ 133 } 134 135 sub BODY_20_39 { 136 my ($i,$a,$b,$c,$d,$e)=@_; 137 my $j=$i+1; 138 $code.=<<___ if ($i<79); # with forward Xupdate 139 sll $a,5,$t1 140 addl $K,$e,$e 141 zapnot $a,0xf,$a 142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 143 144 sll $b,30,$t3 145 addl $t1,$e,$e 146 xor $b,$c,$t2 147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 148 149 srl $b,2,$b 150 addl @X[$i%16],$e,$e 151 xor $d,$t2,$t2 152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 153 154 srl @X[$j%16],31,$t1 155 addl $t2,$e,$e 156 srl $a,27,$t0 157 addl @X[$j%16],@X[$j%16],@X[$j%16] 158 159 or $t3,$b,$b 160 addl $t0,$e,$e 161 or $t1,@X[$j%16],@X[$j%16] 162 ___ 163 $code.=<<___ if ($i<77); 164 zapnot @X[$i%16],0xf,@X[$i%16] 165 ___ 166 $code.=<<___ if ($i==79); # with context fetch 167 sll $a,5,$t1 168 addl $K,$e,$e 169 zapnot $a,0xf,$a 170 ldl @X[0],0($ctx) 171 172 sll $b,30,$t3 173 addl $t1,$e,$e 174 xor $b,$c,$t2 175 ldl @X[1],4($ctx) 176 177 srl $b,2,$b 178 addl @X[$i%16],$e,$e 179 xor $d,$t2,$t2 180 ldl @X[2],8($ctx) 181 182 srl $a,27,$t0 183 addl $t2,$e,$e 184 ldl @X[3],12($ctx) 185 186 or $t3,$b,$b 187 addl $t0,$e,$e 188 ldl @X[4],16($ctx) 189 ___ 190 } 191 192 sub BODY_40_59 { 193 my ($i,$a,$b,$c,$d,$e)=@_; 194 my $j=$i+1; 195 $code.=<<___; # with forward Xupdate 196 sll $a,5,$t1 197 addl $K,$e,$e 198 zapnot $a,0xf,$a 199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 200 201 srl $a,27,$t0 202 and $b,$c,$t2 203 and $b,$d,$t3 204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 205 206 sll $b,30,$b 207 addl $t1,$e,$e 208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 209 210 srl @X[$j%16],31,$t1 211 addl $t0,$e,$e 212 or $t2,$t3,$t2 213 and $c,$d,$t3 214 215 or $t2,$t3,$t2 216 srl $b,32,$t3 217 addl @X[$i%16],$e,$e 218 addl @X[$j%16],@X[$j%16],@X[$j%16] 219 220 or $t3,$b,$b 221 addl $t2,$e,$e 222 or $t1,@X[$j%16],@X[$j%16] 223 zapnot @X[$i%16],0xf,@X[$i%16] 224 ___ 225 } 226 227 $code=<<___; 228 #ifdef __linux__ 229 #include <asm/regdef.h> 230 #else 231 #include <asm.h> 232 #include <regdef.h> 233 #endif 234 235 .text 236 237 .set noat 238 .set noreorder 239 .globl sha1_block_data_order 240 .align 5 241 .ent sha1_block_data_order 242 sha1_block_data_order: 243 lda sp,-64(sp) 244 stq ra,0(sp) 245 stq s0,8(sp) 246 stq s1,16(sp) 247 stq s2,24(sp) 248 stq s3,32(sp) 249 stq s4,40(sp) 250 stq s5,48(sp) 251 stq fp,56(sp) 252 .mask 0x0400fe00,-64 253 .frame sp,64,ra 254 .prologue 0 255 256 ldl $A,0($ctx) 257 ldl $B,4($ctx) 258 sll $num,6,$num 259 ldl $C,8($ctx) 260 ldl $D,12($ctx) 261 ldl $E,16($ctx) 262 addq $inp,$num,$num 263 264 .Lloop: 265 .set noreorder 266 ldah $K,23170(zero) 267 zapnot $B,0xf,$B 268 lda $K,31129($K) # K_00_19 269 ___ 270 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 271 272 $code.=<<___; 273 ldah $K,28378(zero) 274 lda $K,-5215($K) # K_20_39 275 ___ 276 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 277 278 $code.=<<___; 279 ldah $K,-28900(zero) 280 lda $K,-17188($K) # K_40_59 281 ___ 282 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 283 284 $code.=<<___; 285 ldah $K,-13725(zero) 286 lda $K,-15914($K) # K_60_79 287 ___ 288 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 289 290 $code.=<<___; 291 addl @X[0],$A,$A 292 addl @X[1],$B,$B 293 addl @X[2],$C,$C 294 addl @X[3],$D,$D 295 addl @X[4],$E,$E 296 stl $A,0($ctx) 297 stl $B,4($ctx) 298 addq $inp,64,$inp 299 stl $C,8($ctx) 300 stl $D,12($ctx) 301 stl $E,16($ctx) 302 cmpult $inp,$num,$t1 303 bne $t1,.Lloop 304 305 .set noreorder 306 ldq ra,0(sp) 307 ldq s0,8(sp) 308 ldq s1,16(sp) 309 ldq s2,24(sp) 310 ldq s3,32(sp) 311 ldq s4,40(sp) 312 ldq s5,48(sp) 313 ldq fp,56(sp) 314 lda sp,64(sp) 315 ret (ra) 316 .end sha1_block_data_order 317 .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" 318 .align 2 319 ___ 320 $output=shift and open STDOUT,">$output"; 321 print $code; 322 close STDOUT; 323