Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for MIPS
     11 
     12 # October 2010
     13 #
     14 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
     15 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
     16 # faster than gcc-generated code, which is not very impressive. But
     17 # recall that compressed S-box requires extra processing, namely
     18 # additional rotations. Rotations are implemented with lwl/lwr pairs,
     19 # which is normally used for loading unaligned data. Another cool
     20 # thing about this module is its endian neutrality, which means that
     21 # it processes data without ever changing byte order...
     22 
     23 ######################################################################
     24 # There is a number of MIPS ABI in use, O32 and N32/64 are most
     25 # widely used. Then there is a new contender: NUBI. It appears that if
     26 # one picks the latter, it's possible to arrange code in ABI neutral
     27 # manner. Therefore let's stick to NUBI register layout:
     28 #
     29 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
     30 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
     31 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
     32 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
     33 #
     34 # The return value is placed in $a0. Following coding rules facilitate
     35 # interoperability:
     36 #
     37 # - never ever touch $tp, "thread pointer", former $gp;
     38 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
     39 #   old code];
     40 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
     41 #
     42 # For reference here is register layout for N32/64 MIPS ABIs:
     43 #
     44 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
     45 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
     46 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
     47 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
     48 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
     49 #
     50 $flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
     51 
     52 if ($flavour =~ /64|n32/i) {
     53 	$PTR_ADD="dadd";	# incidentally works even on n32
     54 	$PTR_SUB="dsub";	# incidentally works even on n32
     55 	$REG_S="sd";
     56 	$REG_L="ld";
     57 	$PTR_SLL="dsll";	# incidentally works even on n32
     58 	$SZREG=8;
     59 } else {
     60 	$PTR_ADD="add";
     61 	$PTR_SUB="sub";
     62 	$REG_S="sw";
     63 	$REG_L="lw";
     64 	$PTR_SLL="sll";
     65 	$SZREG=4;
     66 }
     67 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
     68 #
     69 # <appro (at] openssl.org>
     70 #
     71 ######################################################################
     72 
     73 $big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
     74 
     75 for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
     76 open STDOUT,">$output";
     77 
     78 if (!defined($big_endian))
     79 {    $big_endian=(unpack('L',pack('N',1))==1);   }
     80 
     81 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
     82 open STDOUT,">$output";
     83 
     84 my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
     85 
     86 $code.=<<___;
     87 .text
     88 #ifdef OPENSSL_FIPSCANISTER
     89 # include <openssl/fipssyms.h>
     90 #endif
     91 
     92 #if !defined(__vxworks) || defined(__pic__)
     93 .option	pic2
     94 #endif
     95 .set	noat
     96 ___
     97 
     99 {{{
    100 my $FRAMESIZE=16*$SZREG;
    101 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
    102 
    103 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
    104 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
    105 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
    106 my ($key0,$cnt)=($gp,$fp);
    107 
    108 # instuction ordering is "stolen" from output from MIPSpro assembler
    109 # invoked with -mips3 -O3 arguments...
    110 $code.=<<___;
    111 .align	5
    112 .ent	_mips_AES_encrypt
    113 _mips_AES_encrypt:
    114 	.frame	$sp,0,$ra
    115 	.set	reorder
    116 	lw	$t0,0($key)
    117 	lw	$t1,4($key)
    118 	lw	$t2,8($key)
    119 	lw	$t3,12($key)
    120 	lw	$cnt,240($key)
    121 	$PTR_ADD $key0,$key,16
    122 
    123 	xor	$s0,$t0
    124 	xor	$s1,$t1
    125 	xor	$s2,$t2
    126 	xor	$s3,$t3
    127 
    128 	sub	$cnt,1
    129 	_xtr	$i0,$s1,16-2
    130 .Loop_enc:
    131 	_xtr	$i1,$s2,16-2
    132 	_xtr	$i2,$s3,16-2
    133 	_xtr	$i3,$s0,16-2
    134 	and	$i0,0x3fc
    135 	and	$i1,0x3fc
    136 	and	$i2,0x3fc
    137 	and	$i3,0x3fc
    138 	$PTR_ADD $i0,$Tbl
    139 	$PTR_ADD $i1,$Tbl
    140 	$PTR_ADD $i2,$Tbl
    141 	$PTR_ADD $i3,$Tbl
    142 	lwl	$t0,3($i0)		# Te1[s1>>16]
    143 	lwl	$t1,3($i1)		# Te1[s2>>16]
    144 	lwl	$t2,3($i2)		# Te1[s3>>16]
    145 	lwl	$t3,3($i3)		# Te1[s0>>16]
    146 	lwr	$t0,2($i0)		# Te1[s1>>16]
    147 	lwr	$t1,2($i1)		# Te1[s2>>16]
    148 	lwr	$t2,2($i2)		# Te1[s3>>16]
    149 	lwr	$t3,2($i3)		# Te1[s0>>16]
    150 
    151 	_xtr	$i0,$s2,8-2
    152 	_xtr	$i1,$s3,8-2
    153 	_xtr	$i2,$s0,8-2
    154 	_xtr	$i3,$s1,8-2
    155 	and	$i0,0x3fc
    156 	and	$i1,0x3fc
    157 	and	$i2,0x3fc
    158 	and	$i3,0x3fc
    159 	$PTR_ADD $i0,$Tbl
    160 	$PTR_ADD $i1,$Tbl
    161 	$PTR_ADD $i2,$Tbl
    162 	$PTR_ADD $i3,$Tbl
    163 	lwl	$t4,2($i0)		# Te2[s2>>8]
    164 	lwl	$t5,2($i1)		# Te2[s3>>8]
    165 	lwl	$t6,2($i2)		# Te2[s0>>8]
    166 	lwl	$t7,2($i3)		# Te2[s1>>8]
    167 	lwr	$t4,1($i0)		# Te2[s2>>8]
    168 	lwr	$t5,1($i1)		# Te2[s3>>8]
    169 	lwr	$t6,1($i2)		# Te2[s0>>8]
    170 	lwr	$t7,1($i3)		# Te2[s1>>8]
    171 
    172 	_xtr	$i0,$s3,0-2
    173 	_xtr	$i1,$s0,0-2
    174 	_xtr	$i2,$s1,0-2
    175 	_xtr	$i3,$s2,0-2
    176 	and	$i0,0x3fc
    177 	and	$i1,0x3fc
    178 	and	$i2,0x3fc
    179 	and	$i3,0x3fc
    180 	$PTR_ADD $i0,$Tbl
    181 	$PTR_ADD $i1,$Tbl
    182 	$PTR_ADD $i2,$Tbl
    183 	$PTR_ADD $i3,$Tbl
    184 	lwl	$t8,1($i0)		# Te3[s3]
    185 	lwl	$t9,1($i1)		# Te3[s0]
    186 	lwl	$t10,1($i2)		# Te3[s1]
    187 	lwl	$t11,1($i3)		# Te3[s2]
    188 	lwr	$t8,0($i0)		# Te3[s3]
    189 	lwr	$t9,0($i1)		# Te3[s0]
    190 	lwr	$t10,0($i2)		# Te3[s1]
    191 	lwr	$t11,0($i3)		# Te3[s2]
    192 
    193 	_xtr	$i0,$s0,24-2
    194 	_xtr	$i1,$s1,24-2
    195 	_xtr	$i2,$s2,24-2
    196 	_xtr	$i3,$s3,24-2
    197 	and	$i0,0x3fc
    198 	and	$i1,0x3fc
    199 	and	$i2,0x3fc
    200 	and	$i3,0x3fc
    201 	$PTR_ADD $i0,$Tbl
    202 	$PTR_ADD $i1,$Tbl
    203 	$PTR_ADD $i2,$Tbl
    204 	$PTR_ADD $i3,$Tbl
    205 	xor	$t0,$t4
    206 	xor	$t1,$t5
    207 	xor	$t2,$t6
    208 	xor	$t3,$t7
    209 	lw	$t4,0($i0)		# Te0[s0>>24]
    210 	lw	$t5,0($i1)		# Te0[s1>>24]
    211 	lw	$t6,0($i2)		# Te0[s2>>24]
    212 	lw	$t7,0($i3)		# Te0[s3>>24]
    213 
    214 	lw	$s0,0($key0)
    215 	lw	$s1,4($key0)
    216 	lw	$s2,8($key0)
    217 	lw	$s3,12($key0)
    218 
    219 	xor	$t0,$t8
    220 	xor	$t1,$t9
    221 	xor	$t2,$t10
    222 	xor	$t3,$t11
    223 
    224 	xor	$t0,$t4
    225 	xor	$t1,$t5
    226 	xor	$t2,$t6
    227 	xor	$t3,$t7
    228 
    229 	sub	$cnt,1
    230 	$PTR_ADD $key0,16
    231 	xor	$s0,$t0
    232 	xor	$s1,$t1
    233 	xor	$s2,$t2
    234 	xor	$s3,$t3
    235 	.set	noreorder
    236 	bnez	$cnt,.Loop_enc
    237 	_xtr	$i0,$s1,16-2
    238 
    239 	.set	reorder
    240 	_xtr	$i1,$s2,16-2
    241 	_xtr	$i2,$s3,16-2
    242 	_xtr	$i3,$s0,16-2
    243 	and	$i0,0x3fc
    244 	and	$i1,0x3fc
    245 	and	$i2,0x3fc
    246 	and	$i3,0x3fc
    247 	$PTR_ADD $i0,$Tbl
    248 	$PTR_ADD $i1,$Tbl
    249 	$PTR_ADD $i2,$Tbl
    250 	$PTR_ADD $i3,$Tbl
    251 	lbu	$t0,2($i0)		# Te4[s1>>16]
    252 	lbu	$t1,2($i1)		# Te4[s2>>16]
    253 	lbu	$t2,2($i2)		# Te4[s3>>16]
    254 	lbu	$t3,2($i3)		# Te4[s0>>16]
    255 
    256 	_xtr	$i0,$s2,8-2
    257 	_xtr	$i1,$s3,8-2
    258 	_xtr	$i2,$s0,8-2
    259 	_xtr	$i3,$s1,8-2
    260 	and	$i0,0x3fc
    261 	and	$i1,0x3fc
    262 	and	$i2,0x3fc
    263 	and	$i3,0x3fc
    264 	$PTR_ADD $i0,$Tbl
    265 	$PTR_ADD $i1,$Tbl
    266 	$PTR_ADD $i2,$Tbl
    267 	$PTR_ADD $i3,$Tbl
    268 	lbu	$t4,2($i0)		# Te4[s2>>8]
    269 	lbu	$t5,2($i1)		# Te4[s3>>8]
    270 	lbu	$t6,2($i2)		# Te4[s0>>8]
    271 	lbu	$t7,2($i3)		# Te4[s1>>8]
    272 
    273 	_xtr	$i0,$s0,24-2
    274 	_xtr	$i1,$s1,24-2
    275 	_xtr	$i2,$s2,24-2
    276 	_xtr	$i3,$s3,24-2
    277 	and	$i0,0x3fc
    278 	and	$i1,0x3fc
    279 	and	$i2,0x3fc
    280 	and	$i3,0x3fc
    281 	$PTR_ADD $i0,$Tbl
    282 	$PTR_ADD $i1,$Tbl
    283 	$PTR_ADD $i2,$Tbl
    284 	$PTR_ADD $i3,$Tbl
    285 	lbu	$t8,2($i0)		# Te4[s0>>24]
    286 	lbu	$t9,2($i1)		# Te4[s1>>24]
    287 	lbu	$t10,2($i2)		# Te4[s2>>24]
    288 	lbu	$t11,2($i3)		# Te4[s3>>24]
    289 
    290 	_xtr	$i0,$s3,0-2
    291 	_xtr	$i1,$s0,0-2
    292 	_xtr	$i2,$s1,0-2
    293 	_xtr	$i3,$s2,0-2
    294 	and	$i0,0x3fc
    295 	and	$i1,0x3fc
    296 	and	$i2,0x3fc
    297 	and	$i3,0x3fc
    298 
    299 	_ins	$t0,16
    300 	_ins	$t1,16
    301 	_ins	$t2,16
    302 	_ins	$t3,16
    303 
    304 	_ins	$t4,8
    305 	_ins	$t5,8
    306 	_ins	$t6,8
    307 	_ins	$t7,8
    308 
    309 	xor	$t0,$t4
    310 	xor	$t1,$t5
    311 	xor	$t2,$t6
    312 	xor	$t3,$t7
    313 
    314 	$PTR_ADD $i0,$Tbl
    315 	$PTR_ADD $i1,$Tbl
    316 	$PTR_ADD $i2,$Tbl
    317 	$PTR_ADD $i3,$Tbl
    318 	lbu	$t4,2($i0)		# Te4[s3]
    319 	lbu	$t5,2($i1)		# Te4[s0]
    320 	lbu	$t6,2($i2)		# Te4[s1]
    321 	lbu	$t7,2($i3)		# Te4[s2]
    322 
    323 	_ins	$t8,24
    324 	_ins	$t9,24
    325 	_ins	$t10,24
    326 	_ins	$t11,24
    327 
    328 	lw	$s0,0($key0)
    329 	lw	$s1,4($key0)
    330 	lw	$s2,8($key0)
    331 	lw	$s3,12($key0)
    332 
    333 	xor	$t0,$t8
    334 	xor	$t1,$t9
    335 	xor	$t2,$t10
    336 	xor	$t3,$t11
    337 
    338 	_ins	$t4,0
    339 	_ins	$t5,0
    340 	_ins	$t6,0
    341 	_ins	$t7,0
    342 
    343 	xor	$t0,$t4
    344 	xor	$t1,$t5
    345 	xor	$t2,$t6
    346 	xor	$t3,$t7
    347 
    348 	xor	$s0,$t0
    349 	xor	$s1,$t1
    350 	xor	$s2,$t2
    351 	xor	$s3,$t3
    352 
    353 	jr	$ra
    354 .end	_mips_AES_encrypt
    355 
    356 .align	5
    357 .globl	AES_encrypt
    358 .ent	AES_encrypt
    359 AES_encrypt:
    360 	.frame	$sp,$FRAMESIZE,$ra
    361 	.mask	$SAVED_REGS_MASK,-$SZREG
    362 	.set	noreorder
    363 ___
    364 $code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
    365 	.cpload	$pf
    366 ___
    367 $code.=<<___;
    368 	$PTR_SUB $sp,$FRAMESIZE
    369 	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
    370 	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
    371 	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
    372 	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
    373 	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
    374 	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
    375 	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
    376 	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
    377 	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
    378 	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
    379 ___
    380 $code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
    381 	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
    382 	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
    383 	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
    384 	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
    385 	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
    386 ___
    387 $code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
    388 	.cplocal	$Tbl
    389 	.cpsetup	$pf,$zero,AES_encrypt
    390 ___
    391 $code.=<<___;
    392 	.set	reorder
    393 	la	$Tbl,AES_Te		# PIC-ified 'load address'
    394 
    395 	lwl	$s0,0+$MSB($inp)
    396 	lwl	$s1,4+$MSB($inp)
    397 	lwl	$s2,8+$MSB($inp)
    398 	lwl	$s3,12+$MSB($inp)
    399 	lwr	$s0,0+$LSB($inp)
    400 	lwr	$s1,4+$LSB($inp)
    401 	lwr	$s2,8+$LSB($inp)
    402 	lwr	$s3,12+$LSB($inp)
    403 
    404 	bal	_mips_AES_encrypt
    405 
    406 	swr	$s0,0+$LSB($out)
    407 	swr	$s1,4+$LSB($out)
    408 	swr	$s2,8+$LSB($out)
    409 	swr	$s3,12+$LSB($out)
    410 	swl	$s0,0+$MSB($out)
    411 	swl	$s1,4+$MSB($out)
    412 	swl	$s2,8+$MSB($out)
    413 	swl	$s3,12+$MSB($out)
    414 
    415 	.set	noreorder
    416 	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
    417 	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
    418 	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
    419 	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
    420 	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
    421 	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
    422 	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
    423 	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
    424 	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
    425 	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
    426 ___
    427 $code.=<<___ if ($flavour =~ /nubi/i);
    428 	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
    429 	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
    430 	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
    431 	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
    432 	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
    433 ___
    434 $code.=<<___;
    435 	jr	$ra
    436 	$PTR_ADD $sp,$FRAMESIZE
    437 .end	AES_encrypt
    438 ___
    439 
    441 $code.=<<___;
    442 .align	5
    443 .ent	_mips_AES_decrypt
    444 _mips_AES_decrypt:
    445 	.frame	$sp,0,$ra
    446 	.set	reorder
    447 	lw	$t0,0($key)
    448 	lw	$t1,4($key)
    449 	lw	$t2,8($key)
    450 	lw	$t3,12($key)
    451 	lw	$cnt,240($key)
    452 	$PTR_ADD $key0,$key,16
    453 
    454 	xor	$s0,$t0
    455 	xor	$s1,$t1
    456 	xor	$s2,$t2
    457 	xor	$s3,$t3
    458 
    459 	sub	$cnt,1
    460 	_xtr	$i0,$s3,16-2
    461 .Loop_dec:
    462 	_xtr	$i1,$s0,16-2
    463 	_xtr	$i2,$s1,16-2
    464 	_xtr	$i3,$s2,16-2
    465 	and	$i0,0x3fc
    466 	and	$i1,0x3fc
    467 	and	$i2,0x3fc
    468 	and	$i3,0x3fc
    469 	$PTR_ADD $i0,$Tbl
    470 	$PTR_ADD $i1,$Tbl
    471 	$PTR_ADD $i2,$Tbl
    472 	$PTR_ADD $i3,$Tbl
    473 	lwl	$t0,3($i0)		# Td1[s3>>16]
    474 	lwl	$t1,3($i1)		# Td1[s0>>16]
    475 	lwl	$t2,3($i2)		# Td1[s1>>16]
    476 	lwl	$t3,3($i3)		# Td1[s2>>16]
    477 	lwr	$t0,2($i0)		# Td1[s3>>16]
    478 	lwr	$t1,2($i1)		# Td1[s0>>16]
    479 	lwr	$t2,2($i2)		# Td1[s1>>16]
    480 	lwr	$t3,2($i3)		# Td1[s2>>16]
    481 
    482 	_xtr	$i0,$s2,8-2
    483 	_xtr	$i1,$s3,8-2
    484 	_xtr	$i2,$s0,8-2
    485 	_xtr	$i3,$s1,8-2
    486 	and	$i0,0x3fc
    487 	and	$i1,0x3fc
    488 	and	$i2,0x3fc
    489 	and	$i3,0x3fc
    490 	$PTR_ADD $i0,$Tbl
    491 	$PTR_ADD $i1,$Tbl
    492 	$PTR_ADD $i2,$Tbl
    493 	$PTR_ADD $i3,$Tbl
    494 	lwl	$t4,2($i0)		# Td2[s2>>8]
    495 	lwl	$t5,2($i1)		# Td2[s3>>8]
    496 	lwl	$t6,2($i2)		# Td2[s0>>8]
    497 	lwl	$t7,2($i3)		# Td2[s1>>8]
    498 	lwr	$t4,1($i0)		# Td2[s2>>8]
    499 	lwr	$t5,1($i1)		# Td2[s3>>8]
    500 	lwr	$t6,1($i2)		# Td2[s0>>8]
    501 	lwr	$t7,1($i3)		# Td2[s1>>8]
    502 
    503 	_xtr	$i0,$s1,0-2
    504 	_xtr	$i1,$s2,0-2
    505 	_xtr	$i2,$s3,0-2
    506 	_xtr	$i3,$s0,0-2
    507 	and	$i0,0x3fc
    508 	and	$i1,0x3fc
    509 	and	$i2,0x3fc
    510 	and	$i3,0x3fc
    511 	$PTR_ADD $i0,$Tbl
    512 	$PTR_ADD $i1,$Tbl
    513 	$PTR_ADD $i2,$Tbl
    514 	$PTR_ADD $i3,$Tbl
    515 	lwl	$t8,1($i0)		# Td3[s1]
    516 	lwl	$t9,1($i1)		# Td3[s2]
    517 	lwl	$t10,1($i2)		# Td3[s3]
    518 	lwl	$t11,1($i3)		# Td3[s0]
    519 	lwr	$t8,0($i0)		# Td3[s1]
    520 	lwr	$t9,0($i1)		# Td3[s2]
    521 	lwr	$t10,0($i2)		# Td3[s3]
    522 	lwr	$t11,0($i3)		# Td3[s0]
    523 
    524 	_xtr	$i0,$s0,24-2
    525 	_xtr	$i1,$s1,24-2
    526 	_xtr	$i2,$s2,24-2
    527 	_xtr	$i3,$s3,24-2
    528 	and	$i0,0x3fc
    529 	and	$i1,0x3fc
    530 	and	$i2,0x3fc
    531 	and	$i3,0x3fc
    532 	$PTR_ADD $i0,$Tbl
    533 	$PTR_ADD $i1,$Tbl
    534 	$PTR_ADD $i2,$Tbl
    535 	$PTR_ADD $i3,$Tbl
    536 
    537 	xor	$t0,$t4
    538 	xor	$t1,$t5
    539 	xor	$t2,$t6
    540 	xor	$t3,$t7
    541 
    542 
    543 	lw	$t4,0($i0)		# Td0[s0>>24]
    544 	lw	$t5,0($i1)		# Td0[s1>>24]
    545 	lw	$t6,0($i2)		# Td0[s2>>24]
    546 	lw	$t7,0($i3)		# Td0[s3>>24]
    547 
    548 	lw	$s0,0($key0)
    549 	lw	$s1,4($key0)
    550 	lw	$s2,8($key0)
    551 	lw	$s3,12($key0)
    552 
    553 	xor	$t0,$t8
    554 	xor	$t1,$t9
    555 	xor	$t2,$t10
    556 	xor	$t3,$t11
    557 
    558 	xor	$t0,$t4
    559 	xor	$t1,$t5
    560 	xor	$t2,$t6
    561 	xor	$t3,$t7
    562 
    563 	sub	$cnt,1
    564 	$PTR_ADD $key0,16
    565 	xor	$s0,$t0
    566 	xor	$s1,$t1
    567 	xor	$s2,$t2
    568 	xor	$s3,$t3
    569 	.set	noreorder
    570 	bnez	$cnt,.Loop_dec
    571 	_xtr	$i0,$s3,16-2
    572 
    573 	.set	reorder
    574 	lw	$t4,1024($Tbl)		# prefetch Td4
    575 	lw	$t5,1024+32($Tbl)
    576 	lw	$t6,1024+64($Tbl)
    577 	lw	$t7,1024+96($Tbl)
    578 	lw	$t8,1024+128($Tbl)
    579 	lw	$t9,1024+160($Tbl)
    580 	lw	$t10,1024+192($Tbl)
    581 	lw	$t11,1024+224($Tbl)
    582 
    583 	_xtr	$i0,$s3,16
    584 	_xtr	$i1,$s0,16
    585 	_xtr	$i2,$s1,16
    586 	_xtr	$i3,$s2,16
    587 	and	$i0,0xff
    588 	and	$i1,0xff
    589 	and	$i2,0xff
    590 	and	$i3,0xff
    591 	$PTR_ADD $i0,$Tbl
    592 	$PTR_ADD $i1,$Tbl
    593 	$PTR_ADD $i2,$Tbl
    594 	$PTR_ADD $i3,$Tbl
    595 	lbu	$t0,1024($i0)		# Td4[s3>>16]
    596 	lbu	$t1,1024($i1)		# Td4[s0>>16]
    597 	lbu	$t2,1024($i2)		# Td4[s1>>16]
    598 	lbu	$t3,1024($i3)		# Td4[s2>>16]
    599 
    600 	_xtr	$i0,$s2,8
    601 	_xtr	$i1,$s3,8
    602 	_xtr	$i2,$s0,8
    603 	_xtr	$i3,$s1,8
    604 	and	$i0,0xff
    605 	and	$i1,0xff
    606 	and	$i2,0xff
    607 	and	$i3,0xff
    608 	$PTR_ADD $i0,$Tbl
    609 	$PTR_ADD $i1,$Tbl
    610 	$PTR_ADD $i2,$Tbl
    611 	$PTR_ADD $i3,$Tbl
    612 	lbu	$t4,1024($i0)		# Td4[s2>>8]
    613 	lbu	$t5,1024($i1)		# Td4[s3>>8]
    614 	lbu	$t6,1024($i2)		# Td4[s0>>8]
    615 	lbu	$t7,1024($i3)		# Td4[s1>>8]
    616 
    617 	_xtr	$i0,$s0,24
    618 	_xtr	$i1,$s1,24
    619 	_xtr	$i2,$s2,24
    620 	_xtr	$i3,$s3,24
    621 	$PTR_ADD $i0,$Tbl
    622 	$PTR_ADD $i1,$Tbl
    623 	$PTR_ADD $i2,$Tbl
    624 	$PTR_ADD $i3,$Tbl
    625 	lbu	$t8,1024($i0)		# Td4[s0>>24]
    626 	lbu	$t9,1024($i1)		# Td4[s1>>24]
    627 	lbu	$t10,1024($i2)		# Td4[s2>>24]
    628 	lbu	$t11,1024($i3)		# Td4[s3>>24]
    629 
    630 	_xtr	$i0,$s1,0
    631 	_xtr	$i1,$s2,0
    632 	_xtr	$i2,$s3,0
    633 	_xtr	$i3,$s0,0
    634 
    635 	_ins	$t0,16
    636 	_ins	$t1,16
    637 	_ins	$t2,16
    638 	_ins	$t3,16
    639 
    640 	_ins	$t4,8
    641 	_ins	$t5,8
    642 	_ins	$t6,8
    643 	_ins	$t7,8
    644 
    645 	xor	$t0,$t4
    646 	xor	$t1,$t5
    647 	xor	$t2,$t6
    648 	xor	$t3,$t7
    649 
    650 	$PTR_ADD $i0,$Tbl
    651 	$PTR_ADD $i1,$Tbl
    652 	$PTR_ADD $i2,$Tbl
    653 	$PTR_ADD $i3,$Tbl
    654 	lbu	$t4,1024($i0)		# Td4[s1]
    655 	lbu	$t5,1024($i1)		# Td4[s2]
    656 	lbu	$t6,1024($i2)		# Td4[s3]
    657 	lbu	$t7,1024($i3)		# Td4[s0]
    658 
    659 	_ins	$t8,24
    660 	_ins	$t9,24
    661 	_ins	$t10,24
    662 	_ins	$t11,24
    663 
    664 	lw	$s0,0($key0)
    665 	lw	$s1,4($key0)
    666 	lw	$s2,8($key0)
    667 	lw	$s3,12($key0)
    668 
    669 	_ins	$t4,0
    670 	_ins	$t5,0
    671 	_ins	$t6,0
    672 	_ins	$t7,0
    673 
    674 
    675 	xor	$t0,$t8
    676 	xor	$t1,$t9
    677 	xor	$t2,$t10
    678 	xor	$t3,$t11
    679 
    680 	xor	$t0,$t4
    681 	xor	$t1,$t5
    682 	xor	$t2,$t6
    683 	xor	$t3,$t7
    684 
    685 	xor	$s0,$t0
    686 	xor	$s1,$t1
    687 	xor	$s2,$t2
    688 	xor	$s3,$t3
    689 
    690 	jr	$ra
    691 .end	_mips_AES_decrypt
    692 
    693 .align	5
    694 .globl	AES_decrypt
    695 .ent	AES_decrypt
    696 AES_decrypt:
    697 	.frame	$sp,$FRAMESIZE,$ra
    698 	.mask	$SAVED_REGS_MASK,-$SZREG
    699 	.set	noreorder
    700 ___
    701 $code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
    702 	.cpload	$pf
    703 ___
    704 $code.=<<___;
    705 	$PTR_SUB $sp,$FRAMESIZE
    706 	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
    707 	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
    708 	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
    709 	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
    710 	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
    711 	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
    712 	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
    713 	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
    714 	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
    715 	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
    716 ___
    717 $code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
    718 	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
    719 	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
    720 	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
    721 	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
    722 	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
    723 ___
    724 $code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
    725 	.cplocal	$Tbl
    726 	.cpsetup	$pf,$zero,AES_decrypt
    727 ___
    728 $code.=<<___;
    729 	.set	reorder
    730 	la	$Tbl,AES_Td		# PIC-ified 'load address'
    731 
    732 	lwl	$s0,0+$MSB($inp)
    733 	lwl	$s1,4+$MSB($inp)
    734 	lwl	$s2,8+$MSB($inp)
    735 	lwl	$s3,12+$MSB($inp)
    736 	lwr	$s0,0+$LSB($inp)
    737 	lwr	$s1,4+$LSB($inp)
    738 	lwr	$s2,8+$LSB($inp)
    739 	lwr	$s3,12+$LSB($inp)
    740 
    741 	bal	_mips_AES_decrypt
    742 
    743 	swr	$s0,0+$LSB($out)
    744 	swr	$s1,4+$LSB($out)
    745 	swr	$s2,8+$LSB($out)
    746 	swr	$s3,12+$LSB($out)
    747 	swl	$s0,0+$MSB($out)
    748 	swl	$s1,4+$MSB($out)
    749 	swl	$s2,8+$MSB($out)
    750 	swl	$s3,12+$MSB($out)
    751 
    752 	.set	noreorder
    753 	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
    754 	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
    755 	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
    756 	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
    757 	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
    758 	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
    759 	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
    760 	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
    761 	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
    762 	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
    763 ___
    764 $code.=<<___ if ($flavour =~ /nubi/i);
    765 	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
    766 	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
    767 	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
    768 	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
    769 	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
    770 ___
    771 $code.=<<___;
    772 	jr	$ra
    773 	$PTR_ADD $sp,$FRAMESIZE
    774 .end	AES_decrypt
    775 ___
    776 }}}
    777 
    779 {{{
    780 my $FRAMESIZE=8*$SZREG;
    781 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
    782 
    783 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
    784 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
    785 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
    786 my ($rcon,$cnt)=($gp,$fp);
    787 
    788 $code.=<<___;
    789 .align	5
    790 .ent	_mips_AES_set_encrypt_key
    791 _mips_AES_set_encrypt_key:
    792 	.frame	$sp,0,$ra
    793 	.set	noreorder
    794 	beqz	$inp,.Lekey_done
    795 	li	$t0,-1
    796 	beqz	$key,.Lekey_done
    797 	$PTR_ADD $rcon,$Tbl,1024+256
    798 
    799 	.set	reorder
    800 	lwl	$rk0,0+$MSB($inp)	# load 128 bits
    801 	lwl	$rk1,4+$MSB($inp)
    802 	lwl	$rk2,8+$MSB($inp)
    803 	lwl	$rk3,12+$MSB($inp)
    804 	li	$at,128
    805 	lwr	$rk0,0+$LSB($inp)
    806 	lwr	$rk1,4+$LSB($inp)
    807 	lwr	$rk2,8+$LSB($inp)
    808 	lwr	$rk3,12+$LSB($inp)
    809 	.set	noreorder
    810 	beq	$bits,$at,.L128bits
    811 	li	$cnt,10
    812 
    813 	.set	reorder
    814 	lwl	$rk4,16+$MSB($inp)	# load 192 bits
    815 	lwl	$rk5,20+$MSB($inp)
    816 	li	$at,192
    817 	lwr	$rk4,16+$LSB($inp)
    818 	lwr	$rk5,20+$LSB($inp)
    819 	.set	noreorder
    820 	beq	$bits,$at,.L192bits
    821 	li	$cnt,8
    822 
    823 	.set	reorder
    824 	lwl	$rk6,24+$MSB($inp)	# load 256 bits
    825 	lwl	$rk7,28+$MSB($inp)
    826 	li	$at,256
    827 	lwr	$rk6,24+$LSB($inp)
    828 	lwr	$rk7,28+$LSB($inp)
    829 	.set	noreorder
    830 	beq	$bits,$at,.L256bits
    831 	li	$cnt,7
    832 
    833 	b	.Lekey_done
    834 	li	$t0,-2
    835 
    836 .align	4
    837 .L128bits:
    838 	.set	reorder
    839 	srl	$i0,$rk3,16
    840 	srl	$i1,$rk3,8
    841 	and	$i0,0xff
    842 	and	$i1,0xff
    843 	and	$i2,$rk3,0xff
    844 	srl	$i3,$rk3,24
    845 	$PTR_ADD $i0,$Tbl
    846 	$PTR_ADD $i1,$Tbl
    847 	$PTR_ADD $i2,$Tbl
    848 	$PTR_ADD $i3,$Tbl
    849 	lbu	$i0,1024($i0)
    850 	lbu	$i1,1024($i1)
    851 	lbu	$i2,1024($i2)
    852 	lbu	$i3,1024($i3)
    853 
    854 	sw	$rk0,0($key)
    855 	sw	$rk1,4($key)
    856 	sw	$rk2,8($key)
    857 	sw	$rk3,12($key)
    858 	sub	$cnt,1
    859 	$PTR_ADD $key,16
    860 
    861 	_bias	$i0,24
    862 	_bias	$i1,16
    863 	_bias	$i2,8
    864 	_bias	$i3,0
    865 
    866 	xor	$rk0,$i0
    867 	lw	$i0,0($rcon)
    868 	xor	$rk0,$i1
    869 	xor	$rk0,$i2
    870 	xor	$rk0,$i3
    871 	xor	$rk0,$i0
    872 
    873 	xor	$rk1,$rk0
    874 	xor	$rk2,$rk1
    875 	xor	$rk3,$rk2
    876 
    877 	.set	noreorder
    878 	bnez	$cnt,.L128bits
    879 	$PTR_ADD $rcon,4
    880 
    881 	sw	$rk0,0($key)
    882 	sw	$rk1,4($key)
    883 	sw	$rk2,8($key)
    884 	li	$cnt,10
    885 	sw	$rk3,12($key)
    886 	li	$t0,0
    887 	sw	$cnt,80($key)
    888 	b	.Lekey_done
    889 	$PTR_SUB $key,10*16
    890 
    891 .align	4
    892 .L192bits:
    893 	.set	reorder
    894 	srl	$i0,$rk5,16
    895 	srl	$i1,$rk5,8
    896 	and	$i0,0xff
    897 	and	$i1,0xff
    898 	and	$i2,$rk5,0xff
    899 	srl	$i3,$rk5,24
    900 	$PTR_ADD $i0,$Tbl
    901 	$PTR_ADD $i1,$Tbl
    902 	$PTR_ADD $i2,$Tbl
    903 	$PTR_ADD $i3,$Tbl
    904 	lbu	$i0,1024($i0)
    905 	lbu	$i1,1024($i1)
    906 	lbu	$i2,1024($i2)
    907 	lbu	$i3,1024($i3)
    908 
    909 	sw	$rk0,0($key)
    910 	sw	$rk1,4($key)
    911 	sw	$rk2,8($key)
    912 	sw	$rk3,12($key)
    913 	sw	$rk4,16($key)
    914 	sw	$rk5,20($key)
    915 	sub	$cnt,1
    916 	$PTR_ADD $key,24
    917 
    918 	_bias	$i0,24
    919 	_bias	$i1,16
    920 	_bias	$i2,8
    921 	_bias	$i3,0
    922 
    923 	xor	$rk0,$i0
    924 	lw	$i0,0($rcon)
    925 	xor	$rk0,$i1
    926 	xor	$rk0,$i2
    927 	xor	$rk0,$i3
    928 	xor	$rk0,$i0
    929 
    930 	xor	$rk1,$rk0
    931 	xor	$rk2,$rk1
    932 	xor	$rk3,$rk2
    933 	xor	$rk4,$rk3
    934 	xor	$rk5,$rk4
    935 
    936 	.set	noreorder
    937 	bnez	$cnt,.L192bits
    938 	$PTR_ADD $rcon,4
    939 
    940 	sw	$rk0,0($key)
    941 	sw	$rk1,4($key)
    942 	sw	$rk2,8($key)
    943 	li	$cnt,12
    944 	sw	$rk3,12($key)
    945 	li	$t0,0
    946 	sw	$cnt,48($key)
    947 	b	.Lekey_done
    948 	$PTR_SUB $key,12*16
    949 
    950 .align	4
    951 .L256bits:
    952 	.set	reorder
    953 	srl	$i0,$rk7,16
    954 	srl	$i1,$rk7,8
    955 	and	$i0,0xff
    956 	and	$i1,0xff
    957 	and	$i2,$rk7,0xff
    958 	srl	$i3,$rk7,24
    959 	$PTR_ADD $i0,$Tbl
    960 	$PTR_ADD $i1,$Tbl
    961 	$PTR_ADD $i2,$Tbl
    962 	$PTR_ADD $i3,$Tbl
    963 	lbu	$i0,1024($i0)
    964 	lbu	$i1,1024($i1)
    965 	lbu	$i2,1024($i2)
    966 	lbu	$i3,1024($i3)
    967 
    968 	sw	$rk0,0($key)
    969 	sw	$rk1,4($key)
    970 	sw	$rk2,8($key)
    971 	sw	$rk3,12($key)
    972 	sw	$rk4,16($key)
    973 	sw	$rk5,20($key)
    974 	sw	$rk6,24($key)
    975 	sw	$rk7,28($key)
    976 	sub	$cnt,1
    977 
    978 	_bias	$i0,24
    979 	_bias	$i1,16
    980 	_bias	$i2,8
    981 	_bias	$i3,0
    982 
    983 	xor	$rk0,$i0
    984 	lw	$i0,0($rcon)
    985 	xor	$rk0,$i1
    986 	xor	$rk0,$i2
    987 	xor	$rk0,$i3
    988 	xor	$rk0,$i0
    989 
    990 	xor	$rk1,$rk0
    991 	xor	$rk2,$rk1
    992 	xor	$rk3,$rk2
    993 	beqz	$cnt,.L256bits_done
    994 
    995 	srl	$i0,$rk3,24
    996 	srl	$i1,$rk3,16
    997 	srl	$i2,$rk3,8
    998 	and	$i3,$rk3,0xff
    999 	and	$i1,0xff
   1000 	and	$i2,0xff
   1001 	$PTR_ADD $i0,$Tbl
   1002 	$PTR_ADD $i1,$Tbl
   1003 	$PTR_ADD $i2,$Tbl
   1004 	$PTR_ADD $i3,$Tbl
   1005 	lbu	$i0,1024($i0)
   1006 	lbu	$i1,1024($i1)
   1007 	lbu	$i2,1024($i2)
   1008 	lbu	$i3,1024($i3)
   1009 	sll	$i0,24
   1010 	sll	$i1,16
   1011 	sll	$i2,8
   1012 
   1013 	xor	$rk4,$i0
   1014 	xor	$rk4,$i1
   1015 	xor	$rk4,$i2
   1016 	xor	$rk4,$i3
   1017 
   1018 	xor	$rk5,$rk4
   1019 	xor	$rk6,$rk5
   1020 	xor	$rk7,$rk6
   1021 
   1022 	$PTR_ADD $key,32
   1023 	.set	noreorder
   1024 	b	.L256bits
   1025 	$PTR_ADD $rcon,4
   1026 
   1027 .L256bits_done:
   1028 	sw	$rk0,32($key)
   1029 	sw	$rk1,36($key)
   1030 	sw	$rk2,40($key)
   1031 	li	$cnt,14
   1032 	sw	$rk3,44($key)
   1033 	li	$t0,0
   1034 	sw	$cnt,48($key)
   1035 	$PTR_SUB $key,12*16
   1036 
   1037 .Lekey_done:
   1038 	jr	$ra
   1039 	nop
   1040 .end	_mips_AES_set_encrypt_key
   1041 
   1042 .globl	AES_set_encrypt_key
   1043 .ent	AES_set_encrypt_key
   1044 AES_set_encrypt_key:
   1045 	.frame	$sp,$FRAMESIZE,$ra
   1046 	.mask	$SAVED_REGS_MASK,-$SZREG
   1047 	.set	noreorder
   1048 ___
   1049 $code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
   1050 	.cpload	$pf
   1051 ___
   1052 $code.=<<___;
   1053 	$PTR_SUB $sp,$FRAMESIZE
   1054 	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
   1055 	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
   1056 ___
   1057 $code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
   1058 	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
   1059 	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
   1060 	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
   1061 	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
   1062 	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
   1063 ___
   1064 $code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
   1065 	.cplocal	$Tbl
   1066 	.cpsetup	$pf,$zero,AES_set_encrypt_key
   1067 ___
   1068 $code.=<<___;
   1069 	.set	reorder
   1070 	la	$Tbl,AES_Te		# PIC-ified 'load address'
   1071 
   1072 	bal	_mips_AES_set_encrypt_key
   1073 
   1074 	.set	noreorder
   1075 	move	$a0,$t0
   1076 	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
   1077 	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
   1078 ___
   1079 $code.=<<___ if ($flavour =~ /nubi/i);
   1080 	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
   1081 	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
   1082 	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
   1083 	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
   1084 	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
   1085 ___
   1086 $code.=<<___;
   1087 	jr	$ra
   1088 	$PTR_ADD $sp,$FRAMESIZE
   1089 .end	AES_set_encrypt_key
   1090 ___
   1091 
   1093 my ($head,$tail)=($inp,$bits);
   1094 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
   1095 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
   1096 $code.=<<___;
   1097 .align	5
   1098 .globl	AES_set_decrypt_key
   1099 .ent	AES_set_decrypt_key
   1100 AES_set_decrypt_key:
   1101 	.frame	$sp,$FRAMESIZE,$ra
   1102 	.mask	$SAVED_REGS_MASK,-$SZREG
   1103 	.set	noreorder
   1104 ___
   1105 $code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
   1106 	.cpload	$pf
   1107 ___
   1108 $code.=<<___;
   1109 	$PTR_SUB $sp,$FRAMESIZE
   1110 	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
   1111 	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
   1112 ___
   1113 $code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
   1114 	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
   1115 	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
   1116 	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
   1117 	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
   1118 	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
   1119 ___
   1120 $code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
   1121 	.cplocal	$Tbl
   1122 	.cpsetup	$pf,$zero,AES_set_decrypt_key
   1123 ___
   1124 $code.=<<___;
   1125 	.set	reorder
   1126 	la	$Tbl,AES_Te		# PIC-ified 'load address'
   1127 
   1128 	bal	_mips_AES_set_encrypt_key
   1129 
   1130 	bltz	$t0,.Ldkey_done
   1131 
   1132 	sll	$at,$cnt,4
   1133 	$PTR_ADD $head,$key,0
   1134 	$PTR_ADD $tail,$key,$at
   1135 .align	4
   1136 .Lswap:
   1137 	lw	$rk0,0($head)
   1138 	lw	$rk1,4($head)
   1139 	lw	$rk2,8($head)
   1140 	lw	$rk3,12($head)
   1141 	lw	$rk4,0($tail)
   1142 	lw	$rk5,4($tail)
   1143 	lw	$rk6,8($tail)
   1144 	lw	$rk7,12($tail)
   1145 	sw	$rk0,0($tail)
   1146 	sw	$rk1,4($tail)
   1147 	sw	$rk2,8($tail)
   1148 	sw	$rk3,12($tail)
   1149 	$PTR_ADD $head,16
   1150 	$PTR_SUB $tail,16
   1151 	sw	$rk4,-16($head)
   1152 	sw	$rk5,-12($head)
   1153 	sw	$rk6,-8($head)
   1154 	sw	$rk7,-4($head)
   1155 	bne	$head,$tail,.Lswap
   1156 
   1157 	lw	$tp1,16($key)		# modulo-scheduled
   1158 	lui	$x80808080,0x8080
   1159 	sub	$cnt,1
   1160 	or	$x80808080,0x8080
   1161 	sll	$cnt,2
   1162 	$PTR_ADD $key,16
   1163 	lui	$x1b1b1b1b,0x1b1b
   1164 	nor	$x7f7f7f7f,$zero,$x80808080
   1165 	or	$x1b1b1b1b,0x1b1b
   1166 .align	4
   1167 .Lmix:
   1168 	and	$m,$tp1,$x80808080
   1169 	and	$tp2,$tp1,$x7f7f7f7f
   1170 	srl	$tp4,$m,7
   1171 	addu	$tp2,$tp2		# tp2<<1
   1172 	subu	$m,$tp4
   1173 	and	$m,$x1b1b1b1b
   1174 	xor	$tp2,$m
   1175 
   1176 	and	$m,$tp2,$x80808080
   1177 	and	$tp4,$tp2,$x7f7f7f7f
   1178 	srl	$tp8,$m,7
   1179 	addu	$tp4,$tp4		# tp4<<1
   1180 	subu	$m,$tp8
   1181 	and	$m,$x1b1b1b1b
   1182 	xor	$tp4,$m
   1183 
   1184 	and	$m,$tp4,$x80808080
   1185 	and	$tp8,$tp4,$x7f7f7f7f
   1186 	srl	$tp9,$m,7
   1187 	addu	$tp8,$tp8		# tp8<<1
   1188 	subu	$m,$tp9
   1189 	and	$m,$x1b1b1b1b
   1190 	xor	$tp8,$m
   1191 
   1192 	xor	$tp9,$tp8,$tp1
   1193 	xor	$tpe,$tp8,$tp4
   1194 	xor	$tpb,$tp9,$tp2
   1195 	xor	$tpd,$tp9,$tp4
   1196 
   1197 	_ror	$tp1,$tpd,16
   1198 	 xor	$tpe,$tp2
   1199 	_ror	$tp2,$tpd,-16
   1200 	xor	$tpe,$tp1
   1201 	_ror	$tp1,$tp9,8
   1202 	xor	$tpe,$tp2
   1203 	_ror	$tp2,$tp9,-24
   1204 	xor	$tpe,$tp1
   1205 	_ror	$tp1,$tpb,24
   1206 	xor	$tpe,$tp2
   1207 	_ror	$tp2,$tpb,-8
   1208 	xor	$tpe,$tp1
   1209 	lw	$tp1,4($key)		# modulo-scheduled
   1210 	xor	$tpe,$tp2
   1211 	sub	$cnt,1
   1212 	sw	$tpe,0($key)
   1213 	$PTR_ADD $key,4
   1214 	bnez	$cnt,.Lmix
   1215 
   1216 	li	$t0,0
   1217 .Ldkey_done:
   1218 	.set	noreorder
   1219 	move	$a0,$t0
   1220 	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
   1221 	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
   1222 ___
   1223 $code.=<<___ if ($flavour =~ /nubi/i);
   1224 	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
   1225 	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
   1226 	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
   1227 	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
   1228 	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
   1229 ___
   1230 $code.=<<___;
   1231 	jr	$ra
   1232 	$PTR_ADD $sp,$FRAMESIZE
   1233 .end	AES_set_decrypt_key
   1234 ___
   1235 }}}
   1236 
   1237 ######################################################################
   1238 # Tables are kept in endian-neutral manner
   1239 $code.=<<___;
   1240 .rdata
   1241 .align	6
   1242 AES_Te:
   1243 .byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
   1244 .byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
   1245 .byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
   1246 .byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
   1247 .byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
   1248 .byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
   1249 .byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
   1250 .byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
   1251 .byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
   1252 .byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
   1253 .byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
   1254 .byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
   1255 .byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
   1256 .byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
   1257 .byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
   1258 .byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
   1259 .byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
   1260 .byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
   1261 .byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
   1262 .byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
   1263 .byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
   1264 .byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
   1265 .byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
   1266 .byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
   1267 .byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
   1268 .byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
   1269 .byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
   1270 .byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
   1271 .byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
   1272 .byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
   1273 .byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
   1274 .byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
   1275 .byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
   1276 .byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
   1277 .byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
   1278 .byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
   1279 .byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
   1280 .byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
   1281 .byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
   1282 .byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
   1283 .byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
   1284 .byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
   1285 .byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
   1286 .byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
   1287 .byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
   1288 .byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
   1289 .byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
   1290 .byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
   1291 .byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
   1292 .byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
   1293 .byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
   1294 .byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
   1295 .byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
   1296 .byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
   1297 .byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
   1298 .byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
   1299 .byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
   1300 .byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
   1301 .byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
   1302 .byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
   1303 .byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
   1304 .byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
   1305 .byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
   1306 .byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
   1307 .byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
   1308 .byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
   1309 .byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
   1310 .byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
   1311 .byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
   1312 .byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
   1313 .byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
   1314 .byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
   1315 .byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
   1316 .byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
   1317 .byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
   1318 .byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
   1319 .byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
   1320 .byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
   1321 .byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
   1322 .byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
   1323 .byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
   1324 .byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
   1325 .byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
   1326 .byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
   1327 .byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
   1328 .byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
   1329 .byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
   1330 .byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
   1331 .byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
   1332 .byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
   1333 .byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
   1334 .byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
   1335 .byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
   1336 .byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
   1337 .byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
   1338 .byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
   1339 .byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
   1340 .byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
   1341 .byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
   1342 .byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
   1343 .byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
   1344 .byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
   1345 .byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
   1346 .byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
   1347 .byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
   1348 .byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
   1349 .byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
   1350 .byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
   1351 .byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
   1352 .byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
   1353 .byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
   1354 .byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
   1355 .byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
   1356 .byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
   1357 .byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
   1358 .byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
   1359 .byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
   1360 .byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
   1361 .byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
   1362 .byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
   1363 .byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
   1364 .byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
   1365 .byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
   1366 .byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
   1367 .byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
   1368 .byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
   1369 .byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
   1370 .byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
   1371 
   1372 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
   1373 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
   1374 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
   1375 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
   1376 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
   1377 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
   1378 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
   1379 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
   1380 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
   1381 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
   1382 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
   1383 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
   1384 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
   1385 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
   1386 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
   1387 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
   1388 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
   1389 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
   1390 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
   1391 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
   1392 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
   1393 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
   1394 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
   1395 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
   1396 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
   1397 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
   1398 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
   1399 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
   1400 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
   1401 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
   1402 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
   1403 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
   1404 
   1405 .byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
   1406 .byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
   1407 .byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
   1408 .byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
   1409 .byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
   1410 
   1411 .align	6
   1412 AES_Td:
   1413 .byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
   1414 .byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
   1415 .byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
   1416 .byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
   1417 .byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
   1418 .byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
   1419 .byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
   1420 .byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
   1421 .byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
   1422 .byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
   1423 .byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
   1424 .byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
   1425 .byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
   1426 .byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
   1427 .byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
   1428 .byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
   1429 .byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
   1430 .byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
   1431 .byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
   1432 .byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
   1433 .byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
   1434 .byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
   1435 .byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
   1436 .byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
   1437 .byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
   1438 .byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
   1439 .byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
   1440 .byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
   1441 .byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
   1442 .byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
   1443 .byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
   1444 .byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
   1445 .byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
   1446 .byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
   1447 .byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
   1448 .byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
   1449 .byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
   1450 .byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
   1451 .byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
   1452 .byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
   1453 .byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
   1454 .byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
   1455 .byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
   1456 .byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
   1457 .byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
   1458 .byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
   1459 .byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
   1460 .byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
   1461 .byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
   1462 .byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
   1463 .byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
   1464 .byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
   1465 .byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
   1466 .byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
   1467 .byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
   1468 .byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
   1469 .byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
   1470 .byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
   1471 .byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
   1472 .byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
   1473 .byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
   1474 .byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
   1475 .byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
   1476 .byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
   1477 .byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
   1478 .byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
   1479 .byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
   1480 .byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
   1481 .byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
   1482 .byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
   1483 .byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
   1484 .byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
   1485 .byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
   1486 .byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
   1487 .byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
   1488 .byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
   1489 .byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
   1490 .byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
   1491 .byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
   1492 .byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
   1493 .byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
   1494 .byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
   1495 .byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
   1496 .byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
   1497 .byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
   1498 .byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
   1499 .byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
   1500 .byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
   1501 .byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
   1502 .byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
   1503 .byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
   1504 .byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
   1505 .byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
   1506 .byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
   1507 .byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
   1508 .byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
   1509 .byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
   1510 .byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
   1511 .byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
   1512 .byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
   1513 .byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
   1514 .byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
   1515 .byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
   1516 .byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
   1517 .byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
   1518 .byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
   1519 .byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
   1520 .byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
   1521 .byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
   1522 .byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
   1523 .byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
   1524 .byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
   1525 .byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
   1526 .byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
   1527 .byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
   1528 .byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
   1529 .byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
   1530 .byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
   1531 .byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
   1532 .byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
   1533 .byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
   1534 .byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
   1535 .byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
   1536 .byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
   1537 .byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
   1538 .byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
   1539 .byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
   1540 .byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
   1541 
   1542 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
   1543 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
   1544 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
   1545 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
   1546 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
   1547 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
   1548 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
   1549 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
   1550 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
   1551 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
   1552 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
   1553 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
   1554 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
   1555 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
   1556 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
   1557 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
   1558 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
   1559 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
   1560 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
   1561 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
   1562 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
   1563 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
   1564 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
   1565 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
   1566 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
   1567 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
   1568 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
   1569 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
   1570 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
   1571 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
   1572 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
   1573 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
   1574 ___
   1575 
   1577 foreach (split("\n",$code)) {
   1578 	s/\`([^\`]*)\`/eval $1/ge;
   1579 
   1580 	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
   1581 	# with byte order dependencies...
   1582 	if (/^\s+_/) {
   1583 	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
   1584 
   1585 	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
   1586 		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
   1587 					:		eval("24-$3"))/e or
   1588 	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
   1589 		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
   1590 					:		eval("24-$3"))/e or
   1591 	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
   1592 		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
   1593 					:		eval("$3*-1"))/e or
   1594 	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
   1595 		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
   1596 					:		eval("($3-16)&31"))/e;
   1597 
   1598 	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
   1599 		sprintf("sll\t$1,$2,$3")/e				or
   1600 	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
   1601 		sprintf("and\t$1,$2,0xff")/e				or
   1602 	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
   1603 	}
   1604 
   1605 	# convert lwl/lwr and swr/swl to little-endian order
   1606 	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
   1607 	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
   1608 		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
   1609 	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
   1610 		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
   1611 	}
   1612 
   1613 	print $_,"\n";
   1614 }
   1615 
   1616 close STDOUT;
   1617