Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # Needs more work: key setup, CBC routine...
     11 #
     12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
     13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
     14 # 4.0. But these are not the ones currently used! Their "compact"
     15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
     16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
     17 # at 1/3 of ppc_AES_decrypt.
     18 
     19 # February 2010
     20 #
     21 # Rescheduling instructions to favour Power6 pipeline gave 10%
     22 # performance improvement on the platfrom in question (and marginal
     23 # improvement even on others). It should be noted that Power6 fails
     24 # to process byte in 18 cycles, only in 23, because it fails to issue
     25 # 4 load instructions in two cycles, only in 3. As result non-compact
     26 # block subroutines are 25% slower than one would expect. Compact
     27 # functions scale better, because they have pure computational part,
     28 # which scales perfectly with clock frequency. To be specific
     29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
     30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
     31 
     32 $flavour = shift;
     33 
     34 if ($flavour =~ /64/) {
     35 	$SIZE_T	=8;
     36 	$LRSAVE	=2*$SIZE_T;
     37 	$STU	="stdu";
     38 	$POP	="ld";
     39 	$PUSH	="std";
     40 } elsif ($flavour =~ /32/) {
     41 	$SIZE_T	=4;
     42 	$LRSAVE	=$SIZE_T;
     43 	$STU	="stwu";
     44 	$POP	="lwz";
     45 	$PUSH	="stw";
     46 } else { die "nonsense $flavour"; }
     47 
     48 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
     49 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
     50 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
     51 die "can't locate ppc-xlate.pl";
     52 
     53 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
     54 
     55 $FRAME=32*$SIZE_T;
     56 
     57 sub _data_word()
     58 { my $i;
     59     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
     60 }
     61 
     62 $sp="r1";
     63 $toc="r2";
     64 $inp="r3";
     65 $out="r4";
     66 $key="r5";
     67 
     68 $Tbl0="r3";
     69 $Tbl1="r6";
     70 $Tbl2="r7";
     71 $Tbl3="r2";
     72 
     73 $s0="r8";
     74 $s1="r9";
     75 $s2="r10";
     76 $s3="r11";
     77 
     78 $t0="r12";
     79 $t1="r13";
     80 $t2="r14";
     81 $t3="r15";
     82 
     83 $acc00="r16";
     84 $acc01="r17";
     85 $acc02="r18";
     86 $acc03="r19";
     87 
     88 $acc04="r20";
     89 $acc05="r21";
     90 $acc06="r22";
     91 $acc07="r23";
     92 
     93 $acc08="r24";
     94 $acc09="r25";
     95 $acc10="r26";
     96 $acc11="r27";
     97 
     98 $acc12="r28";
     99 $acc13="r29";
    100 $acc14="r30";
    101 $acc15="r31";
    102 
    103 # stay away from TLS pointer
    104 if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
    105 else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
    106 $mask80=$Tbl2;
    107 $mask1b=$Tbl3;
    108 
    109 $code.=<<___;
    110 .machine	"any"
    111 .text
    112 
    113 .align	7
    114 LAES_Te:
    115 	mflr	r0
    116 	bcl	20,31,\$+4
    117 	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
    118 	addi	$Tbl0,$Tbl0,`128-8`
    119 	mtlr	r0
    120 	blr
    121 	.long	0
    122 	.byte	0,12,0x14,0,0,0,0,0
    123 	.space	`64-9*4`
    124 LAES_Td:
    125 	mflr	r0
    126 	bcl	20,31,\$+4
    127 	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
    128 	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
    129 	mtlr	r0
    130 	blr
    131 	.long	0
    132 	.byte	0,12,0x14,0,0,0,0,0
    133 	.space	`128-64-9*4`
    134 ___
    135 &_data_word(
    136 	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
    137 	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
    138 	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
    139 	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
    140 	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
    141 	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
    142 	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
    143 	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
    144 	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
    145 	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
    146 	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
    147 	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
    148 	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
    149 	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
    150 	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
    151 	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
    152 	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
    153 	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
    154 	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
    155 	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
    156 	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
    157 	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
    158 	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
    159 	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
    160 	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
    161 	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
    162 	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
    163 	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
    164 	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
    165 	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
    166 	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
    167 	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
    168 	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
    169 	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
    170 	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
    171 	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
    172 	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
    173 	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
    174 	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
    175 	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
    176 	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
    177 	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
    178 	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
    179 	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
    180 	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
    181 	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
    182 	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
    183 	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
    184 	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
    185 	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
    186 	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
    187 	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
    188 	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
    189 	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
    190 	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
    191 	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
    192 	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
    193 	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
    194 	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
    195 	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
    196 	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
    197 	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
    198 	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
    199 	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
    200 $code.=<<___;
    201 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    202 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    203 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    204 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    205 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    206 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    207 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    208 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    209 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    210 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    211 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    212 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    213 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    214 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    215 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    216 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    217 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    218 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    219 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    220 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    221 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    222 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    223 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    224 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    225 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    226 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    227 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    228 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    229 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    230 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    231 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    232 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    233 ___
    234 &_data_word(
    235 	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
    236 	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
    237 	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
    238 	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
    239 	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
    240 	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
    241 	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
    242 	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
    243 	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
    244 	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
    245 	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
    246 	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
    247 	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
    248 	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
    249 	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
    250 	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
    251 	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
    252 	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
    253 	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
    254 	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
    255 	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
    256 	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
    257 	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
    258 	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
    259 	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
    260 	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
    261 	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
    262 	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
    263 	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
    264 	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
    265 	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
    266 	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
    267 	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
    268 	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
    269 	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
    270 	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
    271 	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
    272 	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
    273 	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
    274 	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
    275 	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
    276 	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
    277 	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
    278 	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
    279 	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
    280 	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
    281 	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
    282 	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
    283 	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
    284 	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
    285 	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
    286 	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
    287 	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
    288 	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
    289 	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
    290 	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
    291 	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
    292 	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
    293 	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
    294 	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
    295 	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
    296 	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
    297 	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
    298 	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
    299 $code.=<<___;
    300 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    301 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    302 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    303 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    304 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    305 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    306 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    307 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    308 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    309 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    310 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    311 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    312 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    313 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    314 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    315 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    316 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    317 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    318 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    319 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    320 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    321 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    322 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    323 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    324 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    325 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    326 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    327 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    328 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    329 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    330 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    331 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    332 
    333 
    334 .globl	.AES_encrypt
    335 .align	7
    336 .AES_encrypt:
    337 	$STU	$sp,-$FRAME($sp)
    338 	mflr	r0
    339 
    340 	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
    341 	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
    342 	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
    343 	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
    344 	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
    345 	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
    346 	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
    347 	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
    348 	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
    349 	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
    350 	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
    351 	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
    352 	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
    353 	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
    354 	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
    355 	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
    356 	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
    357 	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
    358 	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
    359 	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
    360 	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
    361 
    362 	andi.	$t0,$inp,3
    363 	andi.	$t1,$out,3
    364 	or.	$t0,$t0,$t1
    365 	bne	Lenc_unaligned
    366 
    367 Lenc_unaligned_ok:
    368 	lwz	$s0,0($inp)
    369 	lwz	$s1,4($inp)
    370 	lwz	$s2,8($inp)
    371 	lwz	$s3,12($inp)
    372 	bl	LAES_Te
    373 	bl	Lppc_AES_encrypt_compact
    374 	stw	$s0,0($out)
    375 	stw	$s1,4($out)
    376 	stw	$s2,8($out)
    377 	stw	$s3,12($out)
    378 	b	Lenc_done
    379 
    380 Lenc_unaligned:
    381 	subfic	$t0,$inp,4096
    382 	subfic	$t1,$out,4096
    383 	andi.	$t0,$t0,4096-16
    384 	beq	Lenc_xpage
    385 	andi.	$t1,$t1,4096-16
    386 	bne	Lenc_unaligned_ok
    387 
    388 Lenc_xpage:
    389 	lbz	$acc00,0($inp)
    390 	lbz	$acc01,1($inp)
    391 	lbz	$acc02,2($inp)
    392 	lbz	$s0,3($inp)
    393 	lbz	$acc04,4($inp)
    394 	lbz	$acc05,5($inp)
    395 	lbz	$acc06,6($inp)
    396 	lbz	$s1,7($inp)
    397 	lbz	$acc08,8($inp)
    398 	lbz	$acc09,9($inp)
    399 	lbz	$acc10,10($inp)
    400 	insrwi	$s0,$acc00,8,0
    401 	lbz	$s2,11($inp)
    402 	insrwi	$s1,$acc04,8,0
    403 	lbz	$acc12,12($inp)
    404 	insrwi	$s0,$acc01,8,8
    405 	lbz	$acc13,13($inp)
    406 	insrwi	$s1,$acc05,8,8
    407 	lbz	$acc14,14($inp)
    408 	insrwi	$s0,$acc02,8,16
    409 	lbz	$s3,15($inp)
    410 	insrwi	$s1,$acc06,8,16
    411 	insrwi	$s2,$acc08,8,0
    412 	insrwi	$s3,$acc12,8,0
    413 	insrwi	$s2,$acc09,8,8
    414 	insrwi	$s3,$acc13,8,8
    415 	insrwi	$s2,$acc10,8,16
    416 	insrwi	$s3,$acc14,8,16
    417 
    418 	bl	LAES_Te
    419 	bl	Lppc_AES_encrypt_compact
    420 
    421 	extrwi	$acc00,$s0,8,0
    422 	extrwi	$acc01,$s0,8,8
    423 	stb	$acc00,0($out)
    424 	extrwi	$acc02,$s0,8,16
    425 	stb	$acc01,1($out)
    426 	stb	$acc02,2($out)
    427 	extrwi	$acc04,$s1,8,0
    428 	stb	$s0,3($out)
    429 	extrwi	$acc05,$s1,8,8
    430 	stb	$acc04,4($out)
    431 	extrwi	$acc06,$s1,8,16
    432 	stb	$acc05,5($out)
    433 	stb	$acc06,6($out)
    434 	extrwi	$acc08,$s2,8,0
    435 	stb	$s1,7($out)
    436 	extrwi	$acc09,$s2,8,8
    437 	stb	$acc08,8($out)
    438 	extrwi	$acc10,$s2,8,16
    439 	stb	$acc09,9($out)
    440 	stb	$acc10,10($out)
    441 	extrwi	$acc12,$s3,8,0
    442 	stb	$s2,11($out)
    443 	extrwi	$acc13,$s3,8,8
    444 	stb	$acc12,12($out)
    445 	extrwi	$acc14,$s3,8,16
    446 	stb	$acc13,13($out)
    447 	stb	$acc14,14($out)
    448 	stb	$s3,15($out)
    449 
    450 Lenc_done:
    451 	$POP	r0,`$FRAME+$LRSAVE`($sp)
    452 	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
    453 	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
    454 	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
    455 	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
    456 	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
    457 	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
    458 	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
    459 	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
    460 	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
    461 	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
    462 	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
    463 	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
    464 	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
    465 	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
    466 	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
    467 	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
    468 	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
    469 	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
    470 	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
    471 	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
    472 	mtlr	r0
    473 	addi	$sp,$sp,$FRAME
    474 	blr
    475 	.long	0
    476 	.byte	0,12,4,1,0x80,18,3,0
    477 	.long	0
    478 
    479 .align	5
    480 Lppc_AES_encrypt:
    481 	lwz	$acc00,240($key)
    482 	addi	$Tbl1,$Tbl0,3
    483 	lwz	$t0,0($key)
    484 	addi	$Tbl2,$Tbl0,2
    485 	lwz	$t1,4($key)
    486 	addi	$Tbl3,$Tbl0,1
    487 	lwz	$t2,8($key)
    488 	addi	$acc00,$acc00,-1
    489 	lwz	$t3,12($key)
    490 	addi	$key,$key,16
    491 	xor	$s0,$s0,$t0
    492 	xor	$s1,$s1,$t1
    493 	xor	$s2,$s2,$t2
    494 	xor	$s3,$s3,$t3
    495 	mtctr	$acc00
    496 .align	4
    497 Lenc_loop:
    498 	rlwinm	$acc00,$s0,`32-24+3`,21,28
    499 	rlwinm	$acc01,$s1,`32-24+3`,21,28
    500 	rlwinm	$acc02,$s2,`32-24+3`,21,28
    501 	rlwinm	$acc03,$s3,`32-24+3`,21,28
    502 	lwz	$t0,0($key)
    503 	rlwinm	$acc04,$s1,`32-16+3`,21,28
    504 	lwz	$t1,4($key)
    505 	rlwinm	$acc05,$s2,`32-16+3`,21,28
    506 	lwz	$t2,8($key)
    507 	rlwinm	$acc06,$s3,`32-16+3`,21,28
    508 	lwz	$t3,12($key)
    509 	rlwinm	$acc07,$s0,`32-16+3`,21,28
    510 	lwzx	$acc00,$Tbl0,$acc00
    511 	rlwinm	$acc08,$s2,`32-8+3`,21,28
    512 	lwzx	$acc01,$Tbl0,$acc01
    513 	rlwinm	$acc09,$s3,`32-8+3`,21,28
    514 	lwzx	$acc02,$Tbl0,$acc02
    515 	rlwinm	$acc10,$s0,`32-8+3`,21,28
    516 	lwzx	$acc03,$Tbl0,$acc03
    517 	rlwinm	$acc11,$s1,`32-8+3`,21,28
    518 	lwzx	$acc04,$Tbl1,$acc04
    519 	rlwinm	$acc12,$s3,`0+3`,21,28
    520 	lwzx	$acc05,$Tbl1,$acc05
    521 	rlwinm	$acc13,$s0,`0+3`,21,28
    522 	lwzx	$acc06,$Tbl1,$acc06
    523 	rlwinm	$acc14,$s1,`0+3`,21,28
    524 	lwzx	$acc07,$Tbl1,$acc07
    525 	rlwinm	$acc15,$s2,`0+3`,21,28
    526 	lwzx	$acc08,$Tbl2,$acc08
    527 	xor	$t0,$t0,$acc00
    528 	lwzx	$acc09,$Tbl2,$acc09
    529 	xor	$t1,$t1,$acc01
    530 	lwzx	$acc10,$Tbl2,$acc10
    531 	xor	$t2,$t2,$acc02
    532 	lwzx	$acc11,$Tbl2,$acc11
    533 	xor	$t3,$t3,$acc03
    534 	lwzx	$acc12,$Tbl3,$acc12
    535 	xor	$t0,$t0,$acc04
    536 	lwzx	$acc13,$Tbl3,$acc13
    537 	xor	$t1,$t1,$acc05
    538 	lwzx	$acc14,$Tbl3,$acc14
    539 	xor	$t2,$t2,$acc06
    540 	lwzx	$acc15,$Tbl3,$acc15
    541 	xor	$t3,$t3,$acc07
    542 	xor	$t0,$t0,$acc08
    543 	xor	$t1,$t1,$acc09
    544 	xor	$t2,$t2,$acc10
    545 	xor	$t3,$t3,$acc11
    546 	xor	$s0,$t0,$acc12
    547 	xor	$s1,$t1,$acc13
    548 	xor	$s2,$t2,$acc14
    549 	xor	$s3,$t3,$acc15
    550 	addi	$key,$key,16
    551 	bdnz-	Lenc_loop
    552 
    553 	addi	$Tbl2,$Tbl0,2048
    554 	nop
    555 	lwz	$t0,0($key)
    556 	rlwinm	$acc00,$s0,`32-24`,24,31
    557 	lwz	$t1,4($key)
    558 	rlwinm	$acc01,$s1,`32-24`,24,31
    559 	lwz	$t2,8($key)
    560 	rlwinm	$acc02,$s2,`32-24`,24,31
    561 	lwz	$t3,12($key)
    562 	rlwinm	$acc03,$s3,`32-24`,24,31
    563 	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
    564 	rlwinm	$acc04,$s1,`32-16`,24,31
    565 	lwz	$acc09,`2048+32`($Tbl0)
    566 	rlwinm	$acc05,$s2,`32-16`,24,31
    567 	lwz	$acc10,`2048+64`($Tbl0)
    568 	rlwinm	$acc06,$s3,`32-16`,24,31
    569 	lwz	$acc11,`2048+96`($Tbl0)
    570 	rlwinm	$acc07,$s0,`32-16`,24,31
    571 	lwz	$acc12,`2048+128`($Tbl0)
    572 	rlwinm	$acc08,$s2,`32-8`,24,31
    573 	lwz	$acc13,`2048+160`($Tbl0)
    574 	rlwinm	$acc09,$s3,`32-8`,24,31
    575 	lwz	$acc14,`2048+192`($Tbl0)
    576 	rlwinm	$acc10,$s0,`32-8`,24,31
    577 	lwz	$acc15,`2048+224`($Tbl0)
    578 	rlwinm	$acc11,$s1,`32-8`,24,31
    579 	lbzx	$acc00,$Tbl2,$acc00
    580 	rlwinm	$acc12,$s3,`0`,24,31
    581 	lbzx	$acc01,$Tbl2,$acc01
    582 	rlwinm	$acc13,$s0,`0`,24,31
    583 	lbzx	$acc02,$Tbl2,$acc02
    584 	rlwinm	$acc14,$s1,`0`,24,31
    585 	lbzx	$acc03,$Tbl2,$acc03
    586 	rlwinm	$acc15,$s2,`0`,24,31
    587 	lbzx	$acc04,$Tbl2,$acc04
    588 	rlwinm	$s0,$acc00,24,0,7
    589 	lbzx	$acc05,$Tbl2,$acc05
    590 	rlwinm	$s1,$acc01,24,0,7
    591 	lbzx	$acc06,$Tbl2,$acc06
    592 	rlwinm	$s2,$acc02,24,0,7
    593 	lbzx	$acc07,$Tbl2,$acc07
    594 	rlwinm	$s3,$acc03,24,0,7
    595 	lbzx	$acc08,$Tbl2,$acc08
    596 	rlwimi	$s0,$acc04,16,8,15
    597 	lbzx	$acc09,$Tbl2,$acc09
    598 	rlwimi	$s1,$acc05,16,8,15
    599 	lbzx	$acc10,$Tbl2,$acc10
    600 	rlwimi	$s2,$acc06,16,8,15
    601 	lbzx	$acc11,$Tbl2,$acc11
    602 	rlwimi	$s3,$acc07,16,8,15
    603 	lbzx	$acc12,$Tbl2,$acc12
    604 	rlwimi	$s0,$acc08,8,16,23
    605 	lbzx	$acc13,$Tbl2,$acc13
    606 	rlwimi	$s1,$acc09,8,16,23
    607 	lbzx	$acc14,$Tbl2,$acc14
    608 	rlwimi	$s2,$acc10,8,16,23
    609 	lbzx	$acc15,$Tbl2,$acc15
    610 	rlwimi	$s3,$acc11,8,16,23
    611 	or	$s0,$s0,$acc12
    612 	or	$s1,$s1,$acc13
    613 	or	$s2,$s2,$acc14
    614 	or	$s3,$s3,$acc15
    615 	xor	$s0,$s0,$t0
    616 	xor	$s1,$s1,$t1
    617 	xor	$s2,$s2,$t2
    618 	xor	$s3,$s3,$t3
    619 	blr
    620 	.long	0
    621 	.byte	0,12,0x14,0,0,0,0,0
    622 
    623 .align	4
    624 Lppc_AES_encrypt_compact:
    625 	lwz	$acc00,240($key)
    626 	addi	$Tbl1,$Tbl0,2048
    627 	lwz	$t0,0($key)
    628 	lis	$mask80,0x8080
    629 	lwz	$t1,4($key)
    630 	lis	$mask1b,0x1b1b
    631 	lwz	$t2,8($key)
    632 	ori	$mask80,$mask80,0x8080
    633 	lwz	$t3,12($key)
    634 	ori	$mask1b,$mask1b,0x1b1b
    635 	addi	$key,$key,16
    636 	mtctr	$acc00
    637 .align	4
    638 Lenc_compact_loop:
    639 	xor	$s0,$s0,$t0
    640 	xor	$s1,$s1,$t1
    641 	rlwinm	$acc00,$s0,`32-24`,24,31
    642 	xor	$s2,$s2,$t2
    643 	rlwinm	$acc01,$s1,`32-24`,24,31
    644 	xor	$s3,$s3,$t3
    645 	rlwinm	$acc02,$s2,`32-24`,24,31
    646 	rlwinm	$acc03,$s3,`32-24`,24,31
    647 	rlwinm	$acc04,$s1,`32-16`,24,31
    648 	rlwinm	$acc05,$s2,`32-16`,24,31
    649 	rlwinm	$acc06,$s3,`32-16`,24,31
    650 	rlwinm	$acc07,$s0,`32-16`,24,31
    651 	lbzx	$acc00,$Tbl1,$acc00
    652 	rlwinm	$acc08,$s2,`32-8`,24,31
    653 	lbzx	$acc01,$Tbl1,$acc01
    654 	rlwinm	$acc09,$s3,`32-8`,24,31
    655 	lbzx	$acc02,$Tbl1,$acc02
    656 	rlwinm	$acc10,$s0,`32-8`,24,31
    657 	lbzx	$acc03,$Tbl1,$acc03
    658 	rlwinm	$acc11,$s1,`32-8`,24,31
    659 	lbzx	$acc04,$Tbl1,$acc04
    660 	rlwinm	$acc12,$s3,`0`,24,31
    661 	lbzx	$acc05,$Tbl1,$acc05
    662 	rlwinm	$acc13,$s0,`0`,24,31
    663 	lbzx	$acc06,$Tbl1,$acc06
    664 	rlwinm	$acc14,$s1,`0`,24,31
    665 	lbzx	$acc07,$Tbl1,$acc07
    666 	rlwinm	$acc15,$s2,`0`,24,31
    667 	lbzx	$acc08,$Tbl1,$acc08
    668 	rlwinm	$s0,$acc00,24,0,7
    669 	lbzx	$acc09,$Tbl1,$acc09
    670 	rlwinm	$s1,$acc01,24,0,7
    671 	lbzx	$acc10,$Tbl1,$acc10
    672 	rlwinm	$s2,$acc02,24,0,7
    673 	lbzx	$acc11,$Tbl1,$acc11
    674 	rlwinm	$s3,$acc03,24,0,7
    675 	lbzx	$acc12,$Tbl1,$acc12
    676 	rlwimi	$s0,$acc04,16,8,15
    677 	lbzx	$acc13,$Tbl1,$acc13
    678 	rlwimi	$s1,$acc05,16,8,15
    679 	lbzx	$acc14,$Tbl1,$acc14
    680 	rlwimi	$s2,$acc06,16,8,15
    681 	lbzx	$acc15,$Tbl1,$acc15
    682 	rlwimi	$s3,$acc07,16,8,15
    683 	rlwimi	$s0,$acc08,8,16,23
    684 	rlwimi	$s1,$acc09,8,16,23
    685 	rlwimi	$s2,$acc10,8,16,23
    686 	rlwimi	$s3,$acc11,8,16,23
    687 	lwz	$t0,0($key)
    688 	or	$s0,$s0,$acc12
    689 	lwz	$t1,4($key)
    690 	or	$s1,$s1,$acc13
    691 	lwz	$t2,8($key)
    692 	or	$s2,$s2,$acc14
    693 	lwz	$t3,12($key)
    694 	or	$s3,$s3,$acc15
    695 
    696 	addi	$key,$key,16
    697 	bdz	Lenc_compact_done
    698 
    699 	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
    700 	and	$acc01,$s1,$mask80
    701 	and	$acc02,$s2,$mask80
    702 	and	$acc03,$s3,$mask80
    703 	srwi	$acc04,$acc00,7		# r1>>7
    704 	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
    705 	srwi	$acc05,$acc01,7
    706 	andc	$acc09,$s1,$mask80
    707 	srwi	$acc06,$acc02,7
    708 	andc	$acc10,$s2,$mask80
    709 	srwi	$acc07,$acc03,7
    710 	andc	$acc11,$s3,$mask80
    711 	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
    712 	sub	$acc01,$acc01,$acc05
    713 	sub	$acc02,$acc02,$acc06
    714 	sub	$acc03,$acc03,$acc07
    715 	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
    716 	add	$acc09,$acc09,$acc09
    717 	add	$acc10,$acc10,$acc10
    718 	add	$acc11,$acc11,$acc11
    719 	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
    720 	and	$acc01,$acc01,$mask1b
    721 	and	$acc02,$acc02,$mask1b
    722 	and	$acc03,$acc03,$mask1b
    723 	xor	$acc00,$acc00,$acc08	# r2
    724 	xor	$acc01,$acc01,$acc09
    725 	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
    726 	xor	$acc02,$acc02,$acc10
    727 	 rotlwi	$acc13,$s1,16
    728 	xor	$acc03,$acc03,$acc11
    729 	 rotlwi	$acc14,$s2,16
    730 
    731 	xor	$s0,$s0,$acc00		# r0^r2
    732 	rotlwi	$acc15,$s3,16
    733 	xor	$s1,$s1,$acc01
    734 	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
    735 	xor	$s2,$s2,$acc02
    736 	rotrwi	$s1,$s1,24
    737 	xor	$s3,$s3,$acc03
    738 	rotrwi	$s2,$s2,24
    739 	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
    740 	rotrwi	$s3,$s3,24
    741 	xor	$s1,$s1,$acc01
    742 	xor	$s2,$s2,$acc02
    743 	xor	$s3,$s3,$acc03
    744 	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
    745 	xor	$s0,$s0,$acc12		#
    746 	rotlwi	$acc09,$acc13,8
    747 	xor	$s1,$s1,$acc13
    748 	rotlwi	$acc10,$acc14,8
    749 	xor	$s2,$s2,$acc14
    750 	rotlwi	$acc11,$acc15,8
    751 	xor	$s3,$s3,$acc15
    752 	xor	$s0,$s0,$acc08		#
    753 	xor	$s1,$s1,$acc09
    754 	xor	$s2,$s2,$acc10
    755 	xor	$s3,$s3,$acc11
    756 
    757 	b	Lenc_compact_loop
    758 .align	4
    759 Lenc_compact_done:
    760 	xor	$s0,$s0,$t0
    761 	xor	$s1,$s1,$t1
    762 	xor	$s2,$s2,$t2
    763 	xor	$s3,$s3,$t3
    764 	blr
    765 	.long	0
    766 	.byte	0,12,0x14,0,0,0,0,0
    767 
    768 .globl	.AES_decrypt
    769 .align	7
    770 .AES_decrypt:
    771 	$STU	$sp,-$FRAME($sp)
    772 	mflr	r0
    773 
    774 	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
    775 	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
    776 	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
    777 	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
    778 	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
    779 	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
    780 	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
    781 	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
    782 	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
    783 	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
    784 	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
    785 	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
    786 	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
    787 	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
    788 	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
    789 	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
    790 	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
    791 	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
    792 	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
    793 	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
    794 	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
    795 
    796 	andi.	$t0,$inp,3
    797 	andi.	$t1,$out,3
    798 	or.	$t0,$t0,$t1
    799 	bne	Ldec_unaligned
    800 
    801 Ldec_unaligned_ok:
    802 	lwz	$s0,0($inp)
    803 	lwz	$s1,4($inp)
    804 	lwz	$s2,8($inp)
    805 	lwz	$s3,12($inp)
    806 	bl	LAES_Td
    807 	bl	Lppc_AES_decrypt_compact
    808 	stw	$s0,0($out)
    809 	stw	$s1,4($out)
    810 	stw	$s2,8($out)
    811 	stw	$s3,12($out)
    812 	b	Ldec_done
    813 
    814 Ldec_unaligned:
    815 	subfic	$t0,$inp,4096
    816 	subfic	$t1,$out,4096
    817 	andi.	$t0,$t0,4096-16
    818 	beq	Ldec_xpage
    819 	andi.	$t1,$t1,4096-16
    820 	bne	Ldec_unaligned_ok
    821 
    822 Ldec_xpage:
    823 	lbz	$acc00,0($inp)
    824 	lbz	$acc01,1($inp)
    825 	lbz	$acc02,2($inp)
    826 	lbz	$s0,3($inp)
    827 	lbz	$acc04,4($inp)
    828 	lbz	$acc05,5($inp)
    829 	lbz	$acc06,6($inp)
    830 	lbz	$s1,7($inp)
    831 	lbz	$acc08,8($inp)
    832 	lbz	$acc09,9($inp)
    833 	lbz	$acc10,10($inp)
    834 	insrwi	$s0,$acc00,8,0
    835 	lbz	$s2,11($inp)
    836 	insrwi	$s1,$acc04,8,0
    837 	lbz	$acc12,12($inp)
    838 	insrwi	$s0,$acc01,8,8
    839 	lbz	$acc13,13($inp)
    840 	insrwi	$s1,$acc05,8,8
    841 	lbz	$acc14,14($inp)
    842 	insrwi	$s0,$acc02,8,16
    843 	lbz	$s3,15($inp)
    844 	insrwi	$s1,$acc06,8,16
    845 	insrwi	$s2,$acc08,8,0
    846 	insrwi	$s3,$acc12,8,0
    847 	insrwi	$s2,$acc09,8,8
    848 	insrwi	$s3,$acc13,8,8
    849 	insrwi	$s2,$acc10,8,16
    850 	insrwi	$s3,$acc14,8,16
    851 
    852 	bl	LAES_Td
    853 	bl	Lppc_AES_decrypt_compact
    854 
    855 	extrwi	$acc00,$s0,8,0
    856 	extrwi	$acc01,$s0,8,8
    857 	stb	$acc00,0($out)
    858 	extrwi	$acc02,$s0,8,16
    859 	stb	$acc01,1($out)
    860 	stb	$acc02,2($out)
    861 	extrwi	$acc04,$s1,8,0
    862 	stb	$s0,3($out)
    863 	extrwi	$acc05,$s1,8,8
    864 	stb	$acc04,4($out)
    865 	extrwi	$acc06,$s1,8,16
    866 	stb	$acc05,5($out)
    867 	stb	$acc06,6($out)
    868 	extrwi	$acc08,$s2,8,0
    869 	stb	$s1,7($out)
    870 	extrwi	$acc09,$s2,8,8
    871 	stb	$acc08,8($out)
    872 	extrwi	$acc10,$s2,8,16
    873 	stb	$acc09,9($out)
    874 	stb	$acc10,10($out)
    875 	extrwi	$acc12,$s3,8,0
    876 	stb	$s2,11($out)
    877 	extrwi	$acc13,$s3,8,8
    878 	stb	$acc12,12($out)
    879 	extrwi	$acc14,$s3,8,16
    880 	stb	$acc13,13($out)
    881 	stb	$acc14,14($out)
    882 	stb	$s3,15($out)
    883 
    884 Ldec_done:
    885 	$POP	r0,`$FRAME+$LRSAVE`($sp)
    886 	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
    887 	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
    888 	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
    889 	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
    890 	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
    891 	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
    892 	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
    893 	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
    894 	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
    895 	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
    896 	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
    897 	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
    898 	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
    899 	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
    900 	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
    901 	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
    902 	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
    903 	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
    904 	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
    905 	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
    906 	mtlr	r0
    907 	addi	$sp,$sp,$FRAME
    908 	blr
    909 	.long	0
    910 	.byte	0,12,4,1,0x80,18,3,0
    911 	.long	0
    912 
    913 .align	5
    914 Lppc_AES_decrypt:
    915 	lwz	$acc00,240($key)
    916 	addi	$Tbl1,$Tbl0,3
    917 	lwz	$t0,0($key)
    918 	addi	$Tbl2,$Tbl0,2
    919 	lwz	$t1,4($key)
    920 	addi	$Tbl3,$Tbl0,1
    921 	lwz	$t2,8($key)
    922 	addi	$acc00,$acc00,-1
    923 	lwz	$t3,12($key)
    924 	addi	$key,$key,16
    925 	xor	$s0,$s0,$t0
    926 	xor	$s1,$s1,$t1
    927 	xor	$s2,$s2,$t2
    928 	xor	$s3,$s3,$t3
    929 	mtctr	$acc00
    930 .align	4
    931 Ldec_loop:
    932 	rlwinm	$acc00,$s0,`32-24+3`,21,28
    933 	rlwinm	$acc01,$s1,`32-24+3`,21,28
    934 	rlwinm	$acc02,$s2,`32-24+3`,21,28
    935 	rlwinm	$acc03,$s3,`32-24+3`,21,28
    936 	lwz	$t0,0($key)
    937 	rlwinm	$acc04,$s3,`32-16+3`,21,28
    938 	lwz	$t1,4($key)
    939 	rlwinm	$acc05,$s0,`32-16+3`,21,28
    940 	lwz	$t2,8($key)
    941 	rlwinm	$acc06,$s1,`32-16+3`,21,28
    942 	lwz	$t3,12($key)
    943 	rlwinm	$acc07,$s2,`32-16+3`,21,28
    944 	lwzx	$acc00,$Tbl0,$acc00
    945 	rlwinm	$acc08,$s2,`32-8+3`,21,28
    946 	lwzx	$acc01,$Tbl0,$acc01
    947 	rlwinm	$acc09,$s3,`32-8+3`,21,28
    948 	lwzx	$acc02,$Tbl0,$acc02
    949 	rlwinm	$acc10,$s0,`32-8+3`,21,28
    950 	lwzx	$acc03,$Tbl0,$acc03
    951 	rlwinm	$acc11,$s1,`32-8+3`,21,28
    952 	lwzx	$acc04,$Tbl1,$acc04
    953 	rlwinm	$acc12,$s1,`0+3`,21,28
    954 	lwzx	$acc05,$Tbl1,$acc05
    955 	rlwinm	$acc13,$s2,`0+3`,21,28
    956 	lwzx	$acc06,$Tbl1,$acc06
    957 	rlwinm	$acc14,$s3,`0+3`,21,28
    958 	lwzx	$acc07,$Tbl1,$acc07
    959 	rlwinm	$acc15,$s0,`0+3`,21,28
    960 	lwzx	$acc08,$Tbl2,$acc08
    961 	xor	$t0,$t0,$acc00
    962 	lwzx	$acc09,$Tbl2,$acc09
    963 	xor	$t1,$t1,$acc01
    964 	lwzx	$acc10,$Tbl2,$acc10
    965 	xor	$t2,$t2,$acc02
    966 	lwzx	$acc11,$Tbl2,$acc11
    967 	xor	$t3,$t3,$acc03
    968 	lwzx	$acc12,$Tbl3,$acc12
    969 	xor	$t0,$t0,$acc04
    970 	lwzx	$acc13,$Tbl3,$acc13
    971 	xor	$t1,$t1,$acc05
    972 	lwzx	$acc14,$Tbl3,$acc14
    973 	xor	$t2,$t2,$acc06
    974 	lwzx	$acc15,$Tbl3,$acc15
    975 	xor	$t3,$t3,$acc07
    976 	xor	$t0,$t0,$acc08
    977 	xor	$t1,$t1,$acc09
    978 	xor	$t2,$t2,$acc10
    979 	xor	$t3,$t3,$acc11
    980 	xor	$s0,$t0,$acc12
    981 	xor	$s1,$t1,$acc13
    982 	xor	$s2,$t2,$acc14
    983 	xor	$s3,$t3,$acc15
    984 	addi	$key,$key,16
    985 	bdnz-	Ldec_loop
    986 
    987 	addi	$Tbl2,$Tbl0,2048
    988 	nop
    989 	lwz	$t0,0($key)
    990 	rlwinm	$acc00,$s0,`32-24`,24,31
    991 	lwz	$t1,4($key)
    992 	rlwinm	$acc01,$s1,`32-24`,24,31
    993 	lwz	$t2,8($key)
    994 	rlwinm	$acc02,$s2,`32-24`,24,31
    995 	lwz	$t3,12($key)
    996 	rlwinm	$acc03,$s3,`32-24`,24,31
    997 	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
    998 	rlwinm	$acc04,$s3,`32-16`,24,31
    999 	lwz	$acc09,`2048+32`($Tbl0)
   1000 	rlwinm	$acc05,$s0,`32-16`,24,31
   1001 	lwz	$acc10,`2048+64`($Tbl0)
   1002 	lbzx	$acc00,$Tbl2,$acc00
   1003 	lwz	$acc11,`2048+96`($Tbl0)
   1004 	lbzx	$acc01,$Tbl2,$acc01
   1005 	lwz	$acc12,`2048+128`($Tbl0)
   1006 	rlwinm	$acc06,$s1,`32-16`,24,31
   1007 	lwz	$acc13,`2048+160`($Tbl0)
   1008 	rlwinm	$acc07,$s2,`32-16`,24,31
   1009 	lwz	$acc14,`2048+192`($Tbl0)
   1010 	rlwinm	$acc08,$s2,`32-8`,24,31
   1011 	lwz	$acc15,`2048+224`($Tbl0)
   1012 	rlwinm	$acc09,$s3,`32-8`,24,31
   1013 	lbzx	$acc02,$Tbl2,$acc02
   1014 	rlwinm	$acc10,$s0,`32-8`,24,31
   1015 	lbzx	$acc03,$Tbl2,$acc03
   1016 	rlwinm	$acc11,$s1,`32-8`,24,31
   1017 	lbzx	$acc04,$Tbl2,$acc04
   1018 	rlwinm	$acc12,$s1,`0`,24,31
   1019 	lbzx	$acc05,$Tbl2,$acc05
   1020 	rlwinm	$acc13,$s2,`0`,24,31
   1021 	lbzx	$acc06,$Tbl2,$acc06
   1022 	rlwinm	$acc14,$s3,`0`,24,31
   1023 	lbzx	$acc07,$Tbl2,$acc07
   1024 	rlwinm	$acc15,$s0,`0`,24,31
   1025 	lbzx	$acc08,$Tbl2,$acc08
   1026 	rlwinm	$s0,$acc00,24,0,7
   1027 	lbzx	$acc09,$Tbl2,$acc09
   1028 	rlwinm	$s1,$acc01,24,0,7
   1029 	lbzx	$acc10,$Tbl2,$acc10
   1030 	rlwinm	$s2,$acc02,24,0,7
   1031 	lbzx	$acc11,$Tbl2,$acc11
   1032 	rlwinm	$s3,$acc03,24,0,7
   1033 	lbzx	$acc12,$Tbl2,$acc12
   1034 	rlwimi	$s0,$acc04,16,8,15
   1035 	lbzx	$acc13,$Tbl2,$acc13
   1036 	rlwimi	$s1,$acc05,16,8,15
   1037 	lbzx	$acc14,$Tbl2,$acc14
   1038 	rlwimi	$s2,$acc06,16,8,15
   1039 	lbzx	$acc15,$Tbl2,$acc15
   1040 	rlwimi	$s3,$acc07,16,8,15
   1041 	rlwimi	$s0,$acc08,8,16,23
   1042 	rlwimi	$s1,$acc09,8,16,23
   1043 	rlwimi	$s2,$acc10,8,16,23
   1044 	rlwimi	$s3,$acc11,8,16,23
   1045 	or	$s0,$s0,$acc12
   1046 	or	$s1,$s1,$acc13
   1047 	or	$s2,$s2,$acc14
   1048 	or	$s3,$s3,$acc15
   1049 	xor	$s0,$s0,$t0
   1050 	xor	$s1,$s1,$t1
   1051 	xor	$s2,$s2,$t2
   1052 	xor	$s3,$s3,$t3
   1053 	blr
   1054 	.long	0
   1055 	.byte	0,12,0x14,0,0,0,0,0
   1056 
   1057 .align	4
   1058 Lppc_AES_decrypt_compact:
   1059 	lwz	$acc00,240($key)
   1060 	addi	$Tbl1,$Tbl0,2048
   1061 	lwz	$t0,0($key)
   1062 	lis	$mask80,0x8080
   1063 	lwz	$t1,4($key)
   1064 	lis	$mask1b,0x1b1b
   1065 	lwz	$t2,8($key)
   1066 	ori	$mask80,$mask80,0x8080
   1067 	lwz	$t3,12($key)
   1068 	ori	$mask1b,$mask1b,0x1b1b
   1069 	addi	$key,$key,16
   1070 ___
   1071 $code.=<<___ if ($SIZE_T==8);
   1072 	insrdi	$mask80,$mask80,32,0
   1073 	insrdi	$mask1b,$mask1b,32,0
   1074 ___
   1075 $code.=<<___;
   1076 	mtctr	$acc00
   1077 .align	4
   1078 Ldec_compact_loop:
   1079 	xor	$s0,$s0,$t0
   1080 	xor	$s1,$s1,$t1
   1081 	rlwinm	$acc00,$s0,`32-24`,24,31
   1082 	xor	$s2,$s2,$t2
   1083 	rlwinm	$acc01,$s1,`32-24`,24,31
   1084 	xor	$s3,$s3,$t3
   1085 	rlwinm	$acc02,$s2,`32-24`,24,31
   1086 	rlwinm	$acc03,$s3,`32-24`,24,31
   1087 	rlwinm	$acc04,$s3,`32-16`,24,31
   1088 	rlwinm	$acc05,$s0,`32-16`,24,31
   1089 	rlwinm	$acc06,$s1,`32-16`,24,31
   1090 	rlwinm	$acc07,$s2,`32-16`,24,31
   1091 	lbzx	$acc00,$Tbl1,$acc00
   1092 	rlwinm	$acc08,$s2,`32-8`,24,31
   1093 	lbzx	$acc01,$Tbl1,$acc01
   1094 	rlwinm	$acc09,$s3,`32-8`,24,31
   1095 	lbzx	$acc02,$Tbl1,$acc02
   1096 	rlwinm	$acc10,$s0,`32-8`,24,31
   1097 	lbzx	$acc03,$Tbl1,$acc03
   1098 	rlwinm	$acc11,$s1,`32-8`,24,31
   1099 	lbzx	$acc04,$Tbl1,$acc04
   1100 	rlwinm	$acc12,$s1,`0`,24,31
   1101 	lbzx	$acc05,$Tbl1,$acc05
   1102 	rlwinm	$acc13,$s2,`0`,24,31
   1103 	lbzx	$acc06,$Tbl1,$acc06
   1104 	rlwinm	$acc14,$s3,`0`,24,31
   1105 	lbzx	$acc07,$Tbl1,$acc07
   1106 	rlwinm	$acc15,$s0,`0`,24,31
   1107 	lbzx	$acc08,$Tbl1,$acc08
   1108 	rlwinm	$s0,$acc00,24,0,7
   1109 	lbzx	$acc09,$Tbl1,$acc09
   1110 	rlwinm	$s1,$acc01,24,0,7
   1111 	lbzx	$acc10,$Tbl1,$acc10
   1112 	rlwinm	$s2,$acc02,24,0,7
   1113 	lbzx	$acc11,$Tbl1,$acc11
   1114 	rlwinm	$s3,$acc03,24,0,7
   1115 	lbzx	$acc12,$Tbl1,$acc12
   1116 	rlwimi	$s0,$acc04,16,8,15
   1117 	lbzx	$acc13,$Tbl1,$acc13
   1118 	rlwimi	$s1,$acc05,16,8,15
   1119 	lbzx	$acc14,$Tbl1,$acc14
   1120 	rlwimi	$s2,$acc06,16,8,15
   1121 	lbzx	$acc15,$Tbl1,$acc15
   1122 	rlwimi	$s3,$acc07,16,8,15
   1123 	rlwimi	$s0,$acc08,8,16,23
   1124 	rlwimi	$s1,$acc09,8,16,23
   1125 	rlwimi	$s2,$acc10,8,16,23
   1126 	rlwimi	$s3,$acc11,8,16,23
   1127 	lwz	$t0,0($key)
   1128 	or	$s0,$s0,$acc12
   1129 	lwz	$t1,4($key)
   1130 	or	$s1,$s1,$acc13
   1131 	lwz	$t2,8($key)
   1132 	or	$s2,$s2,$acc14
   1133 	lwz	$t3,12($key)
   1134 	or	$s3,$s3,$acc15
   1135 
   1136 	addi	$key,$key,16
   1137 	bdz	Ldec_compact_done
   1138 ___
   1139 $code.=<<___ if ($SIZE_T==8);
   1140 	# vectorized permutation improves decrypt performance by 10%
   1141 	insrdi	$s0,$s1,32,0
   1142 	insrdi	$s2,$s3,32,0
   1143 
   1144 	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
   1145 	and	$acc02,$s2,$mask80
   1146 	srdi	$acc04,$acc00,7		# r1>>7
   1147 	srdi	$acc06,$acc02,7
   1148 	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
   1149 	andc	$acc10,$s2,$mask80
   1150 	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
   1151 	sub	$acc02,$acc02,$acc06
   1152 	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
   1153 	add	$acc10,$acc10,$acc10
   1154 	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1155 	and	$acc02,$acc02,$mask1b
   1156 	xor	$acc00,$acc00,$acc08	# r2
   1157 	xor	$acc02,$acc02,$acc10
   1158 
   1159 	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
   1160 	and	$acc06,$acc02,$mask80
   1161 	srdi	$acc08,$acc04,7		# r1>>7
   1162 	srdi	$acc10,$acc06,7
   1163 	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
   1164 	andc	$acc14,$acc02,$mask80
   1165 	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
   1166 	sub	$acc06,$acc06,$acc10
   1167 	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
   1168 	add	$acc14,$acc14,$acc14
   1169 	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1170 	and	$acc06,$acc06,$mask1b
   1171 	xor	$acc04,$acc04,$acc12	# r4
   1172 	xor	$acc06,$acc06,$acc14
   1173 
   1174 	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
   1175 	and	$acc10,$acc06,$mask80
   1176 	srdi	$acc12,$acc08,7		# r1>>7
   1177 	srdi	$acc14,$acc10,7
   1178 	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
   1179 	sub	$acc10,$acc10,$acc14
   1180 	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
   1181 	andc	$acc14,$acc06,$mask80
   1182 	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
   1183 	add	$acc14,$acc14,$acc14
   1184 	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1185 	and	$acc10,$acc10,$mask1b
   1186 	xor	$acc08,$acc08,$acc12	# r8
   1187 	xor	$acc10,$acc10,$acc14
   1188 
   1189 	xor	$acc00,$acc00,$s0	# r2^r0
   1190 	xor	$acc02,$acc02,$s2
   1191 	xor	$acc04,$acc04,$s0	# r4^r0
   1192 	xor	$acc06,$acc06,$s2
   1193 
   1194 	extrdi	$acc01,$acc00,32,0
   1195 	extrdi	$acc03,$acc02,32,0
   1196 	extrdi	$acc05,$acc04,32,0
   1197 	extrdi	$acc07,$acc06,32,0
   1198 	extrdi	$acc09,$acc08,32,0
   1199 	extrdi	$acc11,$acc10,32,0
   1200 ___
   1201 $code.=<<___ if ($SIZE_T==4);
   1202 	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
   1203 	and	$acc01,$s1,$mask80
   1204 	and	$acc02,$s2,$mask80
   1205 	and	$acc03,$s3,$mask80
   1206 	srwi	$acc04,$acc00,7		# r1>>7
   1207 	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
   1208 	srwi	$acc05,$acc01,7
   1209 	andc	$acc09,$s1,$mask80
   1210 	srwi	$acc06,$acc02,7
   1211 	andc	$acc10,$s2,$mask80
   1212 	srwi	$acc07,$acc03,7
   1213 	andc	$acc11,$s3,$mask80
   1214 	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
   1215 	sub	$acc01,$acc01,$acc05
   1216 	sub	$acc02,$acc02,$acc06
   1217 	sub	$acc03,$acc03,$acc07
   1218 	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
   1219 	add	$acc09,$acc09,$acc09
   1220 	add	$acc10,$acc10,$acc10
   1221 	add	$acc11,$acc11,$acc11
   1222 	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1223 	and	$acc01,$acc01,$mask1b
   1224 	and	$acc02,$acc02,$mask1b
   1225 	and	$acc03,$acc03,$mask1b
   1226 	xor	$acc00,$acc00,$acc08	# r2
   1227 	xor	$acc01,$acc01,$acc09
   1228 	xor	$acc02,$acc02,$acc10
   1229 	xor	$acc03,$acc03,$acc11
   1230 
   1231 	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
   1232 	and	$acc05,$acc01,$mask80
   1233 	and	$acc06,$acc02,$mask80
   1234 	and	$acc07,$acc03,$mask80
   1235 	srwi	$acc08,$acc04,7		# r1>>7
   1236 	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
   1237 	srwi	$acc09,$acc05,7
   1238 	andc	$acc13,$acc01,$mask80
   1239 	srwi	$acc10,$acc06,7
   1240 	andc	$acc14,$acc02,$mask80
   1241 	srwi	$acc11,$acc07,7
   1242 	andc	$acc15,$acc03,$mask80
   1243 	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
   1244 	sub	$acc05,$acc05,$acc09
   1245 	sub	$acc06,$acc06,$acc10
   1246 	sub	$acc07,$acc07,$acc11
   1247 	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
   1248 	add	$acc13,$acc13,$acc13
   1249 	add	$acc14,$acc14,$acc14
   1250 	add	$acc15,$acc15,$acc15
   1251 	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1252 	and	$acc05,$acc05,$mask1b
   1253 	and	$acc06,$acc06,$mask1b
   1254 	and	$acc07,$acc07,$mask1b
   1255 	xor	$acc04,$acc04,$acc12	# r4
   1256 	xor	$acc05,$acc05,$acc13
   1257 	xor	$acc06,$acc06,$acc14
   1258 	xor	$acc07,$acc07,$acc15
   1259 
   1260 	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
   1261 	and	$acc09,$acc05,$mask80
   1262 	srwi	$acc12,$acc08,7		# r1>>7
   1263 	and	$acc10,$acc06,$mask80
   1264 	srwi	$acc13,$acc09,7
   1265 	and	$acc11,$acc07,$mask80
   1266 	srwi	$acc14,$acc10,7
   1267 	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
   1268 	srwi	$acc15,$acc11,7
   1269 	sub	$acc09,$acc09,$acc13
   1270 	sub	$acc10,$acc10,$acc14
   1271 	sub	$acc11,$acc11,$acc15
   1272 	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
   1273 	andc	$acc13,$acc05,$mask80
   1274 	andc	$acc14,$acc06,$mask80
   1275 	andc	$acc15,$acc07,$mask80
   1276 	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
   1277 	add	$acc13,$acc13,$acc13
   1278 	add	$acc14,$acc14,$acc14
   1279 	add	$acc15,$acc15,$acc15
   1280 	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
   1281 	and	$acc09,$acc09,$mask1b
   1282 	and	$acc10,$acc10,$mask1b
   1283 	and	$acc11,$acc11,$mask1b
   1284 	xor	$acc08,$acc08,$acc12	# r8
   1285 	xor	$acc09,$acc09,$acc13
   1286 	xor	$acc10,$acc10,$acc14
   1287 	xor	$acc11,$acc11,$acc15
   1288 
   1289 	xor	$acc00,$acc00,$s0	# r2^r0
   1290 	xor	$acc01,$acc01,$s1
   1291 	xor	$acc02,$acc02,$s2
   1292 	xor	$acc03,$acc03,$s3
   1293 	xor	$acc04,$acc04,$s0	# r4^r0
   1294 	xor	$acc05,$acc05,$s1
   1295 	xor	$acc06,$acc06,$s2
   1296 	xor	$acc07,$acc07,$s3
   1297 ___
   1298 $code.=<<___;
   1299 	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
   1300 	rotrwi	$s1,$s1,8
   1301 	xor	$s0,$s0,$acc00		# ^= r2^r0
   1302 	rotrwi	$s2,$s2,8
   1303 	xor	$s1,$s1,$acc01
   1304 	rotrwi	$s3,$s3,8
   1305 	xor	$s2,$s2,$acc02
   1306 	xor	$s3,$s3,$acc03
   1307 	xor	$acc00,$acc00,$acc08
   1308 	xor	$acc01,$acc01,$acc09
   1309 	xor	$acc02,$acc02,$acc10
   1310 	xor	$acc03,$acc03,$acc11
   1311 	xor	$s0,$s0,$acc04		# ^= r4^r0
   1312 	rotrwi	$acc00,$acc00,24
   1313 	xor	$s1,$s1,$acc05
   1314 	rotrwi	$acc01,$acc01,24
   1315 	xor	$s2,$s2,$acc06
   1316 	rotrwi	$acc02,$acc02,24
   1317 	xor	$s3,$s3,$acc07
   1318 	rotrwi	$acc03,$acc03,24
   1319 	xor	$acc04,$acc04,$acc08
   1320 	xor	$acc05,$acc05,$acc09
   1321 	xor	$acc06,$acc06,$acc10
   1322 	xor	$acc07,$acc07,$acc11
   1323 	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
   1324 	rotrwi	$acc04,$acc04,16
   1325 	xor	$s1,$s1,$acc09
   1326 	rotrwi	$acc05,$acc05,16
   1327 	xor	$s2,$s2,$acc10
   1328 	rotrwi	$acc06,$acc06,16
   1329 	xor	$s3,$s3,$acc11
   1330 	rotrwi	$acc07,$acc07,16
   1331 	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
   1332 	rotrwi	$acc08,$acc08,8
   1333 	xor	$s1,$s1,$acc01
   1334 	rotrwi	$acc09,$acc09,8
   1335 	xor	$s2,$s2,$acc02
   1336 	rotrwi	$acc10,$acc10,8
   1337 	xor	$s3,$s3,$acc03
   1338 	rotrwi	$acc11,$acc11,8
   1339 	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
   1340 	xor	$s1,$s1,$acc05
   1341 	xor	$s2,$s2,$acc06
   1342 	xor	$s3,$s3,$acc07
   1343 	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)	
   1344 	xor	$s1,$s1,$acc09	
   1345 	xor	$s2,$s2,$acc10	
   1346 	xor	$s3,$s3,$acc11	
   1347 
   1348 	b	Ldec_compact_loop
   1349 .align	4
   1350 Ldec_compact_done:
   1351 	xor	$s0,$s0,$t0
   1352 	xor	$s1,$s1,$t1
   1353 	xor	$s2,$s2,$t2
   1354 	xor	$s3,$s3,$t3
   1355 	blr
   1356 	.long	0
   1357 	.byte	0,12,0x14,0,0,0,0,0
   1358 
   1359 .asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
   1360 .align	7
   1361 ___
   1362 
   1363 $code =~ s/\`([^\`]*)\`/eval $1/gem;
   1364 print $code;
   1365 close STDOUT;
   1366