1 #!/usr/bin/env perl 2 3 # ==================================================================== 4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL 5 # project. The module is, however, dual licensed under OpenSSL and 6 # CRYPTOGAMS licenses depending on where you obtain it. For further 7 # details see http://www.openssl.org/~appro/cryptogams/. 8 # ==================================================================== 9 10 # SHA1 block procedure for MIPS. 11 12 # Performance improvement is 30% on unaligned input. The "secret" is 13 # to deploy lwl/lwr pair to load unaligned input. One could have 14 # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 15 # compatible subroutine. There is room for minor optimization on 16 # little-endian platforms... 17 18 ###################################################################### 19 # There is a number of MIPS ABI in use, O32 and N32/64 are most 20 # widely used. Then there is a new contender: NUBI. It appears that if 21 # one picks the latter, it's possible to arrange code in ABI neutral 22 # manner. Therefore let's stick to NUBI register layout: 23 # 24 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 25 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 26 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 27 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 28 # 29 # The return value is placed in $a0. Following coding rules facilitate 30 # interoperability: 31 # 32 # - never ever touch $tp, "thread pointer", former $gp; 33 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting 34 # old code]; 35 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 36 # 37 # For reference here is register layout for N32/64 MIPS ABIs: 38 # 39 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 40 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 41 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 42 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 43 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 44 # 45 $flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 46 47 if ($flavour =~ /64|n32/i) { 48 $PTR_ADD="dadd"; # incidentally works even on n32 49 $PTR_SUB="dsub"; # incidentally works even on n32 50 $REG_S="sd"; 51 $REG_L="ld"; 52 $PTR_SLL="dsll"; # incidentally works even on n32 53 $SZREG=8; 54 } else { 55 $PTR_ADD="add"; 56 $PTR_SUB="sub"; 57 $REG_S="sw"; 58 $REG_L="lw"; 59 $PTR_SLL="sll"; 60 $SZREG=4; 61 } 62 # 63 # <appro (at] openssl.org> 64 # 65 ###################################################################### 66 67 $big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; 68 69 for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } 70 open STDOUT,">$output"; 71 72 if (!defined($big_endian)) 73 { $big_endian=(unpack('L',pack('N',1))==1); } 74 75 # offsets of the Most and Least Significant Bytes 76 $MSB=$big_endian?0:3; 77 $LSB=3&~$MSB; 78 79 @X=map("\$$_",(8..23)); # a4-a7,s0-s11 80 81 $ctx=$a0; 82 $inp=$a1; 83 $num=$a2; 84 $A="\$1"; 85 $B="\$2"; 86 $C="\$3"; 87 $D="\$7"; 88 $E="\$24"; @V=($A,$B,$C,$D,$E); 89 $t0="\$25"; 90 $t1=$num; # $num is offloaded to stack 91 $t2="\$30"; # fp 92 $K="\$31"; # ra 93 94 sub BODY_00_14 { 95 my ($i,$a,$b,$c,$d,$e)=@_; 96 my $j=$i+1; 97 $code.=<<___ if (!$big_endian); 98 srl $t0,@X[$i],24 # byte swap($i) 99 srl $t1,@X[$i],8 100 andi $t2,@X[$i],0xFF00 101 sll @X[$i],@X[$i],24 102 andi $t1,0xFF00 103 sll $t2,$t2,8 104 or @X[$i],$t0 105 or $t1,$t2 106 or @X[$i],$t1 107 ___ 108 $code.=<<___; 109 lwl @X[$j],$j*4+$MSB($inp) 110 sll $t0,$a,5 # $i 111 addu $e,$K 112 lwr @X[$j],$j*4+$LSB($inp) 113 srl $t1,$a,27 114 addu $e,$t0 115 xor $t0,$c,$d 116 addu $e,$t1 117 sll $t2,$b,30 118 and $t0,$b 119 srl $b,$b,2 120 xor $t0,$d 121 addu $e,@X[$i] 122 or $b,$t2 123 addu $e,$t0 124 ___ 125 } 126 127 sub BODY_15_19 { 128 my ($i,$a,$b,$c,$d,$e)=@_; 129 my $j=$i+1; 130 131 $code.=<<___ if (!$big_endian && $i==15); 132 srl $t0,@X[$i],24 # byte swap($i) 133 srl $t1,@X[$i],8 134 andi $t2,@X[$i],0xFF00 135 sll @X[$i],@X[$i],24 136 andi $t1,0xFF00 137 sll $t2,$t2,8 138 or @X[$i],$t0 139 or @X[$i],$t1 140 or @X[$i],$t2 141 ___ 142 $code.=<<___; 143 xor @X[$j%16],@X[($j+2)%16] 144 sll $t0,$a,5 # $i 145 addu $e,$K 146 srl $t1,$a,27 147 addu $e,$t0 148 xor @X[$j%16],@X[($j+8)%16] 149 xor $t0,$c,$d 150 addu $e,$t1 151 xor @X[$j%16],@X[($j+13)%16] 152 sll $t2,$b,30 153 and $t0,$b 154 srl $t1,@X[$j%16],31 155 addu @X[$j%16],@X[$j%16] 156 srl $b,$b,2 157 xor $t0,$d 158 or @X[$j%16],$t1 159 addu $e,@X[$i%16] 160 or $b,$t2 161 addu $e,$t0 162 ___ 163 } 164 165 sub BODY_20_39 { 166 my ($i,$a,$b,$c,$d,$e)=@_; 167 my $j=$i+1; 168 $code.=<<___ if ($i<79); 169 xor @X[$j%16],@X[($j+2)%16] 170 sll $t0,$a,5 # $i 171 addu $e,$K 172 srl $t1,$a,27 173 addu $e,$t0 174 xor @X[$j%16],@X[($j+8)%16] 175 xor $t0,$c,$d 176 addu $e,$t1 177 xor @X[$j%16],@X[($j+13)%16] 178 sll $t2,$b,30 179 xor $t0,$b 180 srl $t1,@X[$j%16],31 181 addu @X[$j%16],@X[$j%16] 182 srl $b,$b,2 183 addu $e,@X[$i%16] 184 or @X[$j%16],$t1 185 or $b,$t2 186 addu $e,$t0 187 ___ 188 $code.=<<___ if ($i==79); 189 lw @X[0],0($ctx) 190 sll $t0,$a,5 # $i 191 addu $e,$K 192 lw @X[1],4($ctx) 193 srl $t1,$a,27 194 addu $e,$t0 195 lw @X[2],8($ctx) 196 xor $t0,$c,$d 197 addu $e,$t1 198 lw @X[3],12($ctx) 199 sll $t2,$b,30 200 xor $t0,$b 201 lw @X[4],16($ctx) 202 srl $b,$b,2 203 addu $e,@X[$i%16] 204 or $b,$t2 205 addu $e,$t0 206 ___ 207 } 208 209 sub BODY_40_59 { 210 my ($i,$a,$b,$c,$d,$e)=@_; 211 my $j=$i+1; 212 $code.=<<___ if ($i<79); 213 xor @X[$j%16],@X[($j+2)%16] 214 sll $t0,$a,5 # $i 215 addu $e,$K 216 srl $t1,$a,27 217 addu $e,$t0 218 xor @X[$j%16],@X[($j+8)%16] 219 and $t0,$c,$d 220 addu $e,$t1 221 xor @X[$j%16],@X[($j+13)%16] 222 sll $t2,$b,30 223 addu $e,$t0 224 srl $t1,@X[$j%16],31 225 xor $t0,$c,$d 226 addu @X[$j%16],@X[$j%16] 227 and $t0,$b 228 srl $b,$b,2 229 or @X[$j%16],$t1 230 addu $e,@X[$i%16] 231 or $b,$t2 232 addu $e,$t0 233 ___ 234 } 235 236 $FRAMESIZE=16; # large enough to accomodate NUBI saved registers 237 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; 238 239 $code=<<___; 240 #ifdef OPENSSL_FIPSCANISTER 241 # include <openssl/fipssyms.h> 242 #endif 243 244 .text 245 246 .set noat 247 .set noreorder 248 .align 5 249 .globl sha1_block_data_order 250 .ent sha1_block_data_order 251 sha1_block_data_order: 252 .frame $sp,$FRAMESIZE*$SZREG,$ra 253 .mask $SAVED_REGS_MASK,-$SZREG 254 .set noreorder 255 $PTR_SUB $sp,$FRAMESIZE*$SZREG 256 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 257 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 258 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 259 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 260 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 261 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 262 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 263 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 264 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 265 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 266 ___ 267 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 268 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 269 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 270 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 271 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 272 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 273 ___ 274 $code.=<<___; 275 $PTR_SLL $num,6 276 $PTR_ADD $num,$inp 277 $REG_S $num,0($sp) 278 lw $A,0($ctx) 279 lw $B,4($ctx) 280 lw $C,8($ctx) 281 lw $D,12($ctx) 282 b .Loop 283 lw $E,16($ctx) 284 .align 4 285 .Loop: 286 .set reorder 287 lwl @X[0],$MSB($inp) 288 lui $K,0x5a82 289 lwr @X[0],$LSB($inp) 290 ori $K,0x7999 # K_00_19 291 ___ 292 for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 293 for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 294 $code.=<<___; 295 lui $K,0x6ed9 296 ori $K,0xeba1 # K_20_39 297 ___ 298 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 299 $code.=<<___; 300 lui $K,0x8f1b 301 ori $K,0xbcdc # K_40_59 302 ___ 303 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 304 $code.=<<___; 305 lui $K,0xca62 306 ori $K,0xc1d6 # K_60_79 307 ___ 308 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 309 $code.=<<___; 310 $PTR_ADD $inp,64 311 $REG_L $num,0($sp) 312 313 addu $A,$X[0] 314 addu $B,$X[1] 315 sw $A,0($ctx) 316 addu $C,$X[2] 317 addu $D,$X[3] 318 sw $B,4($ctx) 319 addu $E,$X[4] 320 sw $C,8($ctx) 321 sw $D,12($ctx) 322 sw $E,16($ctx) 323 .set noreorder 324 bne $inp,$num,.Loop 325 nop 326 327 .set noreorder 328 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 329 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 330 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 331 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 332 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 333 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 334 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 335 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 336 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 337 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 338 ___ 339 $code.=<<___ if ($flavour =~ /nubi/i); 340 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 341 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 342 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 343 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 344 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 345 ___ 346 $code.=<<___; 347 jr $ra 348 $PTR_ADD $sp,$FRAMESIZE*$SZREG 349 .end sha1_block_data_order 350 .rdata 351 .asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 352 ___ 353 print $code; 354 close STDOUT; 355