Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for ARMv4
     11 
     12 # January 2007.
     13 #
     14 # Code uses single 1K S-box and is >2 times faster than code generated
     15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     16 # allows to merge logical or arithmetic operation with shift or rotate
     17 # in one instruction and emit combined result every cycle. The module
     18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     19 # key [on single-issue Xscale PXA250 core].
     20 
     21 # May 2007.
     22 #
     23 # AES_set_[en|de]crypt_key is added.
     24 
     25 # July 2010.
     26 #
     27 # Rescheduling for dual-issue pipeline resulted in 12% improvement on
     28 # Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
     29 
     30 # February 2011.
     31 #
     32 # Profiler-assisted and platform-specific optimization resulted in 16%
     33 # improvement on Cortex A8 core and ~21.5 cycles per byte.
     34 
     35 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
     36 open STDOUT,">$output";
     37 
     38 $s0="r0";
     39 $s1="r1";
     40 $s2="r2";
     41 $s3="r3";
     42 $t1="r4";
     43 $t2="r5";
     44 $t3="r6";
     45 $i1="r7";
     46 $i2="r8";
     47 $i3="r9";
     48 
     49 $tbl="r10";
     50 $key="r11";
     51 $rounds="r12";
     52 
     53 $code=<<___;
     54 #include "arm_arch.h"
     55 .text
     56 .code	32
     57 
     58 .type	AES_Te,%object
     59 .align	5
     60 AES_Te:
     61 .word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     62 .word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     63 .word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     64 .word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     65 .word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     66 .word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     67 .word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     68 .word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     69 .word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     70 .word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     71 .word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     72 .word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     73 .word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     74 .word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     75 .word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     76 .word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     77 .word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     78 .word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     79 .word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     80 .word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     81 .word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     82 .word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     83 .word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     84 .word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
     85 .word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
     86 .word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
     87 .word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
     88 .word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
     89 .word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
     90 .word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
     91 .word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
     92 .word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
     93 .word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
     94 .word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
     95 .word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
     96 .word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
     97 .word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
     98 .word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
     99 .word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
    100 .word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
    101 .word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
    102 .word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
    103 .word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
    104 .word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
    105 .word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
    106 .word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
    107 .word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
    108 .word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
    109 .word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
    110 .word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
    111 .word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
    112 .word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
    113 .word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    114 .word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    115 .word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    116 .word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    117 .word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    118 .word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    119 .word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    120 .word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    121 .word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    122 .word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    123 .word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    124 .word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    125 @ Te4[256]
    126 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    127 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    128 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    129 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    130 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    131 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    132 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    133 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    134 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    135 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    136 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    137 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    138 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    139 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    140 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    141 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    142 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    143 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    144 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    145 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    146 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    147 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    148 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    149 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    150 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    151 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    152 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    153 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    154 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    155 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    156 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    157 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    158 @ rcon[]
    159 .word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    160 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    161 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    162 .size	AES_Te,.-AES_Te
    163 
    164 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
    165 @ 		 const AES_KEY *key) {
    166 .global AES_encrypt
    167 .type   AES_encrypt,%function
    168 .align	5
    169 AES_encrypt:
    170 	sub	r3,pc,#8		@ AES_encrypt
    171 	stmdb   sp!,{r1,r4-r12,lr}
    172 	mov	$rounds,r0		@ inp
    173 	mov	$key,r2
    174 	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
    175 #if __ARM_ARCH__<7
    176 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    177 	ldrb	$t1,[$rounds,#2]	@ manner...
    178 	ldrb	$t2,[$rounds,#1]
    179 	ldrb	$t3,[$rounds,#0]
    180 	orr	$s0,$s0,$t1,lsl#8
    181 	ldrb	$s1,[$rounds,#7]
    182 	orr	$s0,$s0,$t2,lsl#16
    183 	ldrb	$t1,[$rounds,#6]
    184 	orr	$s0,$s0,$t3,lsl#24
    185 	ldrb	$t2,[$rounds,#5]
    186 	ldrb	$t3,[$rounds,#4]
    187 	orr	$s1,$s1,$t1,lsl#8
    188 	ldrb	$s2,[$rounds,#11]
    189 	orr	$s1,$s1,$t2,lsl#16
    190 	ldrb	$t1,[$rounds,#10]
    191 	orr	$s1,$s1,$t3,lsl#24
    192 	ldrb	$t2,[$rounds,#9]
    193 	ldrb	$t3,[$rounds,#8]
    194 	orr	$s2,$s2,$t1,lsl#8
    195 	ldrb	$s3,[$rounds,#15]
    196 	orr	$s2,$s2,$t2,lsl#16
    197 	ldrb	$t1,[$rounds,#14]
    198 	orr	$s2,$s2,$t3,lsl#24
    199 	ldrb	$t2,[$rounds,#13]
    200 	ldrb	$t3,[$rounds,#12]
    201 	orr	$s3,$s3,$t1,lsl#8
    202 	orr	$s3,$s3,$t2,lsl#16
    203 	orr	$s3,$s3,$t3,lsl#24
    204 #else
    205 	ldr	$s0,[$rounds,#0]
    206 	ldr	$s1,[$rounds,#4]
    207 	ldr	$s2,[$rounds,#8]
    208 	ldr	$s3,[$rounds,#12]
    209 #ifdef __ARMEL__
    210 	rev	$s0,$s0
    211 	rev	$s1,$s1
    212 	rev	$s2,$s2
    213 	rev	$s3,$s3
    214 #endif
    215 #endif
    216 	bl	_armv4_AES_encrypt
    217 
    218 	ldr	$rounds,[sp],#4		@ pop out
    219 #if __ARM_ARCH__>=7
    220 #ifdef __ARMEL__
    221 	rev	$s0,$s0
    222 	rev	$s1,$s1
    223 	rev	$s2,$s2
    224 	rev	$s3,$s3
    225 #endif
    226 	str	$s0,[$rounds,#0]
    227 	str	$s1,[$rounds,#4]
    228 	str	$s2,[$rounds,#8]
    229 	str	$s3,[$rounds,#12]
    230 #else
    231 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    232 	mov	$t2,$s0,lsr#16		@ manner...
    233 	mov	$t3,$s0,lsr#8
    234 	strb	$t1,[$rounds,#0]
    235 	strb	$t2,[$rounds,#1]
    236 	mov	$t1,$s1,lsr#24
    237 	strb	$t3,[$rounds,#2]
    238 	mov	$t2,$s1,lsr#16
    239 	strb	$s0,[$rounds,#3]
    240 	mov	$t3,$s1,lsr#8
    241 	strb	$t1,[$rounds,#4]
    242 	strb	$t2,[$rounds,#5]
    243 	mov	$t1,$s2,lsr#24
    244 	strb	$t3,[$rounds,#6]
    245 	mov	$t2,$s2,lsr#16
    246 	strb	$s1,[$rounds,#7]
    247 	mov	$t3,$s2,lsr#8
    248 	strb	$t1,[$rounds,#8]
    249 	strb	$t2,[$rounds,#9]
    250 	mov	$t1,$s3,lsr#24
    251 	strb	$t3,[$rounds,#10]
    252 	mov	$t2,$s3,lsr#16
    253 	strb	$s2,[$rounds,#11]
    254 	mov	$t3,$s3,lsr#8
    255 	strb	$t1,[$rounds,#12]
    256 	strb	$t2,[$rounds,#13]
    257 	strb	$t3,[$rounds,#14]
    258 	strb	$s3,[$rounds,#15]
    259 #endif
    260 #if __ARM_ARCH__>=5
    261 	ldmia	sp!,{r4-r12,pc}
    262 #else
    263 	ldmia   sp!,{r4-r12,lr}
    264 	tst	lr,#1
    265 	moveq	pc,lr			@ be binary compatible with V4, yet
    266 	bx	lr			@ interoperable with Thumb ISA:-)
    267 #endif
    268 .size	AES_encrypt,.-AES_encrypt
    269 
    270 .type   _armv4_AES_encrypt,%function
    271 .align	2
    272 _armv4_AES_encrypt:
    273 	str	lr,[sp,#-4]!		@ push lr
    274 	ldmia	$key!,{$t1-$i1}
    275 	eor	$s0,$s0,$t1
    276 	ldr	$rounds,[$key,#240-16]
    277 	eor	$s1,$s1,$t2
    278 	eor	$s2,$s2,$t3
    279 	eor	$s3,$s3,$i1
    280 	sub	$rounds,$rounds,#1
    281 	mov	lr,#255
    282 
    283 	and	$i1,lr,$s0
    284 	and	$i2,lr,$s0,lsr#8
    285 	and	$i3,lr,$s0,lsr#16
    286 	mov	$s0,$s0,lsr#24
    287 .Lenc_loop:
    288 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    289 	and	$i1,lr,$s1,lsr#16	@ i0
    290 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    291 	and	$i2,lr,$s1
    292 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    293 	and	$i3,lr,$s1,lsr#8
    294 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    295 	mov	$s1,$s1,lsr#24
    296 
    297 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    298 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    299 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    300 	eor	$s0,$s0,$i1,ror#8
    301 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    302 	and	$i1,lr,$s2,lsr#8	@ i0
    303 	eor	$t2,$t2,$i2,ror#8
    304 	and	$i2,lr,$s2,lsr#16	@ i1
    305 	eor	$t3,$t3,$i3,ror#8
    306 	and	$i3,lr,$s2
    307 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    308 	eor	$s1,$s1,$t1,ror#24
    309 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    310 	mov	$s2,$s2,lsr#24
    311 
    312 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    313 	eor	$s0,$s0,$i1,ror#16
    314 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    315 	and	$i1,lr,$s3		@ i0
    316 	eor	$s1,$s1,$i2,ror#8
    317 	and	$i2,lr,$s3,lsr#8	@ i1
    318 	eor	$t3,$t3,$i3,ror#16
    319 	and	$i3,lr,$s3,lsr#16	@ i2
    320 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    321 	eor	$s2,$s2,$t2,ror#16
    322 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    323 	mov	$s3,$s3,lsr#24
    324 
    325 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    326 	eor	$s0,$s0,$i1,ror#24
    327 	ldr	$i1,[$key],#16
    328 	eor	$s1,$s1,$i2,ror#16
    329 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    330 	eor	$s2,$s2,$i3,ror#8
    331 	ldr	$t1,[$key,#-12]
    332 	eor	$s3,$s3,$t3,ror#8
    333 
    334 	ldr	$t2,[$key,#-8]
    335 	eor	$s0,$s0,$i1
    336 	ldr	$t3,[$key,#-4]
    337 	and	$i1,lr,$s0
    338 	eor	$s1,$s1,$t1
    339 	and	$i2,lr,$s0,lsr#8
    340 	eor	$s2,$s2,$t2
    341 	and	$i3,lr,$s0,lsr#16
    342 	eor	$s3,$s3,$t3
    343 	mov	$s0,$s0,lsr#24
    344 
    345 	subs	$rounds,$rounds,#1
    346 	bne	.Lenc_loop
    347 
    348 	add	$tbl,$tbl,#2
    349 
    350 	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    351 	and	$i1,lr,$s1,lsr#16	@ i0
    352 	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    353 	and	$i2,lr,$s1
    354 	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    355 	and	$i3,lr,$s1,lsr#8
    356 	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    357 	mov	$s1,$s1,lsr#24
    358 
    359 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    360 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    361 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    362 	eor	$s0,$i1,$s0,lsl#8
    363 	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    364 	and	$i1,lr,$s2,lsr#8	@ i0
    365 	eor	$t2,$i2,$t2,lsl#8
    366 	and	$i2,lr,$s2,lsr#16	@ i1
    367 	eor	$t3,$i3,$t3,lsl#8
    368 	and	$i3,lr,$s2
    369 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    370 	eor	$s1,$t1,$s1,lsl#24
    371 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    372 	mov	$s2,$s2,lsr#24
    373 
    374 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    375 	eor	$s0,$i1,$s0,lsl#8
    376 	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    377 	and	$i1,lr,$s3		@ i0
    378 	eor	$s1,$s1,$i2,lsl#16
    379 	and	$i2,lr,$s3,lsr#8	@ i1
    380 	eor	$t3,$i3,$t3,lsl#8
    381 	and	$i3,lr,$s3,lsr#16	@ i2
    382 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    383 	eor	$s2,$t2,$s2,lsl#24
    384 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    385 	mov	$s3,$s3,lsr#24
    386 
    387 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    388 	eor	$s0,$i1,$s0,lsl#8
    389 	ldr	$i1,[$key,#0]
    390 	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    391 	eor	$s1,$s1,$i2,lsl#8
    392 	ldr	$t1,[$key,#4]
    393 	eor	$s2,$s2,$i3,lsl#16
    394 	ldr	$t2,[$key,#8]
    395 	eor	$s3,$t3,$s3,lsl#24
    396 	ldr	$t3,[$key,#12]
    397 
    398 	eor	$s0,$s0,$i1
    399 	eor	$s1,$s1,$t1
    400 	eor	$s2,$s2,$t2
    401 	eor	$s3,$s3,$t3
    402 
    403 	sub	$tbl,$tbl,#2
    404 	ldr	pc,[sp],#4		@ pop and return
    405 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    406 
    407 .global private_AES_set_encrypt_key
    408 .type   private_AES_set_encrypt_key,%function
    409 .align	5
    410 private_AES_set_encrypt_key:
    411 _armv4_AES_set_encrypt_key:
    412 	sub	r3,pc,#8		@ AES_set_encrypt_key
    413 	teq	r0,#0
    414 	moveq	r0,#-1
    415 	beq	.Labrt
    416 	teq	r2,#0
    417 	moveq	r0,#-1
    418 	beq	.Labrt
    419 
    420 	teq	r1,#128
    421 	beq	.Lok
    422 	teq	r1,#192
    423 	beq	.Lok
    424 	teq	r1,#256
    425 	movne	r0,#-1
    426 	bne	.Labrt
    427 
    428 .Lok:	stmdb   sp!,{r4-r12,lr}
    429 	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
    430 
    431 	mov	$rounds,r0		@ inp
    432 	mov	lr,r1			@ bits
    433 	mov	$key,r2			@ key
    434 
    435 #if __ARM_ARCH__<7
    436 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    437 	ldrb	$t1,[$rounds,#2]	@ manner...
    438 	ldrb	$t2,[$rounds,#1]
    439 	ldrb	$t3,[$rounds,#0]
    440 	orr	$s0,$s0,$t1,lsl#8
    441 	ldrb	$s1,[$rounds,#7]
    442 	orr	$s0,$s0,$t2,lsl#16
    443 	ldrb	$t1,[$rounds,#6]
    444 	orr	$s0,$s0,$t3,lsl#24
    445 	ldrb	$t2,[$rounds,#5]
    446 	ldrb	$t3,[$rounds,#4]
    447 	orr	$s1,$s1,$t1,lsl#8
    448 	ldrb	$s2,[$rounds,#11]
    449 	orr	$s1,$s1,$t2,lsl#16
    450 	ldrb	$t1,[$rounds,#10]
    451 	orr	$s1,$s1,$t3,lsl#24
    452 	ldrb	$t2,[$rounds,#9]
    453 	ldrb	$t3,[$rounds,#8]
    454 	orr	$s2,$s2,$t1,lsl#8
    455 	ldrb	$s3,[$rounds,#15]
    456 	orr	$s2,$s2,$t2,lsl#16
    457 	ldrb	$t1,[$rounds,#14]
    458 	orr	$s2,$s2,$t3,lsl#24
    459 	ldrb	$t2,[$rounds,#13]
    460 	ldrb	$t3,[$rounds,#12]
    461 	orr	$s3,$s3,$t1,lsl#8
    462 	str	$s0,[$key],#16
    463 	orr	$s3,$s3,$t2,lsl#16
    464 	str	$s1,[$key,#-12]
    465 	orr	$s3,$s3,$t3,lsl#24
    466 	str	$s2,[$key,#-8]
    467 	str	$s3,[$key,#-4]
    468 #else
    469 	ldr	$s0,[$rounds,#0]
    470 	ldr	$s1,[$rounds,#4]
    471 	ldr	$s2,[$rounds,#8]
    472 	ldr	$s3,[$rounds,#12]
    473 #ifdef __ARMEL__
    474 	rev	$s0,$s0
    475 	rev	$s1,$s1
    476 	rev	$s2,$s2
    477 	rev	$s3,$s3
    478 #endif
    479 	str	$s0,[$key],#16
    480 	str	$s1,[$key,#-12]
    481 	str	$s2,[$key,#-8]
    482 	str	$s3,[$key,#-4]
    483 #endif
    484 
    485 	teq	lr,#128
    486 	bne	.Lnot128
    487 	mov	$rounds,#10
    488 	str	$rounds,[$key,#240-16]
    489 	add	$t3,$tbl,#256			@ rcon
    490 	mov	lr,#255
    491 
    492 .L128_loop:
    493 	and	$t2,lr,$s3,lsr#24
    494 	and	$i1,lr,$s3,lsr#16
    495 	ldrb	$t2,[$tbl,$t2]
    496 	and	$i2,lr,$s3,lsr#8
    497 	ldrb	$i1,[$tbl,$i1]
    498 	and	$i3,lr,$s3
    499 	ldrb	$i2,[$tbl,$i2]
    500 	orr	$t2,$t2,$i1,lsl#24
    501 	ldrb	$i3,[$tbl,$i3]
    502 	orr	$t2,$t2,$i2,lsl#16
    503 	ldr	$t1,[$t3],#4			@ rcon[i++]
    504 	orr	$t2,$t2,$i3,lsl#8
    505 	eor	$t2,$t2,$t1
    506 	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    507 	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    508 	str	$s0,[$key],#16
    509 	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    510 	str	$s1,[$key,#-12]
    511 	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    512 	str	$s2,[$key,#-8]
    513 	subs	$rounds,$rounds,#1
    514 	str	$s3,[$key,#-4]
    515 	bne	.L128_loop
    516 	sub	r2,$key,#176
    517 	b	.Ldone
    518 
    519 .Lnot128:
    520 #if __ARM_ARCH__<7
    521 	ldrb	$i2,[$rounds,#19]
    522 	ldrb	$t1,[$rounds,#18]
    523 	ldrb	$t2,[$rounds,#17]
    524 	ldrb	$t3,[$rounds,#16]
    525 	orr	$i2,$i2,$t1,lsl#8
    526 	ldrb	$i3,[$rounds,#23]
    527 	orr	$i2,$i2,$t2,lsl#16
    528 	ldrb	$t1,[$rounds,#22]
    529 	orr	$i2,$i2,$t3,lsl#24
    530 	ldrb	$t2,[$rounds,#21]
    531 	ldrb	$t3,[$rounds,#20]
    532 	orr	$i3,$i3,$t1,lsl#8
    533 	orr	$i3,$i3,$t2,lsl#16
    534 	str	$i2,[$key],#8
    535 	orr	$i3,$i3,$t3,lsl#24
    536 	str	$i3,[$key,#-4]
    537 #else
    538 	ldr	$i2,[$rounds,#16]
    539 	ldr	$i3,[$rounds,#20]
    540 #ifdef __ARMEL__
    541 	rev	$i2,$i2
    542 	rev	$i3,$i3
    543 #endif
    544 	str	$i2,[$key],#8
    545 	str	$i3,[$key,#-4]
    546 #endif
    547 
    548 	teq	lr,#192
    549 	bne	.Lnot192
    550 	mov	$rounds,#12
    551 	str	$rounds,[$key,#240-24]
    552 	add	$t3,$tbl,#256			@ rcon
    553 	mov	lr,#255
    554 	mov	$rounds,#8
    555 
    556 .L192_loop:
    557 	and	$t2,lr,$i3,lsr#24
    558 	and	$i1,lr,$i3,lsr#16
    559 	ldrb	$t2,[$tbl,$t2]
    560 	and	$i2,lr,$i3,lsr#8
    561 	ldrb	$i1,[$tbl,$i1]
    562 	and	$i3,lr,$i3
    563 	ldrb	$i2,[$tbl,$i2]
    564 	orr	$t2,$t2,$i1,lsl#24
    565 	ldrb	$i3,[$tbl,$i3]
    566 	orr	$t2,$t2,$i2,lsl#16
    567 	ldr	$t1,[$t3],#4			@ rcon[i++]
    568 	orr	$t2,$t2,$i3,lsl#8
    569 	eor	$i3,$t2,$t1
    570 	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    571 	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    572 	str	$s0,[$key],#24
    573 	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    574 	str	$s1,[$key,#-20]
    575 	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    576 	str	$s2,[$key,#-16]
    577 	subs	$rounds,$rounds,#1
    578 	str	$s3,[$key,#-12]
    579 	subeq	r2,$key,#216
    580 	beq	.Ldone
    581 
    582 	ldr	$i1,[$key,#-32]
    583 	ldr	$i2,[$key,#-28]
    584 	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    585 	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    586 	str	$i1,[$key,#-8]
    587 	str	$i3,[$key,#-4]
    588 	b	.L192_loop
    589 
    590 .Lnot192:
    591 #if __ARM_ARCH__<7
    592 	ldrb	$i2,[$rounds,#27]
    593 	ldrb	$t1,[$rounds,#26]
    594 	ldrb	$t2,[$rounds,#25]
    595 	ldrb	$t3,[$rounds,#24]
    596 	orr	$i2,$i2,$t1,lsl#8
    597 	ldrb	$i3,[$rounds,#31]
    598 	orr	$i2,$i2,$t2,lsl#16
    599 	ldrb	$t1,[$rounds,#30]
    600 	orr	$i2,$i2,$t3,lsl#24
    601 	ldrb	$t2,[$rounds,#29]
    602 	ldrb	$t3,[$rounds,#28]
    603 	orr	$i3,$i3,$t1,lsl#8
    604 	orr	$i3,$i3,$t2,lsl#16
    605 	str	$i2,[$key],#8
    606 	orr	$i3,$i3,$t3,lsl#24
    607 	str	$i3,[$key,#-4]
    608 #else
    609 	ldr	$i2,[$rounds,#24]
    610 	ldr	$i3,[$rounds,#28]
    611 #ifdef __ARMEL__
    612 	rev	$i2,$i2
    613 	rev	$i3,$i3
    614 #endif
    615 	str	$i2,[$key],#8
    616 	str	$i3,[$key,#-4]
    617 #endif
    618 
    619 	mov	$rounds,#14
    620 	str	$rounds,[$key,#240-32]
    621 	add	$t3,$tbl,#256			@ rcon
    622 	mov	lr,#255
    623 	mov	$rounds,#7
    624 
    625 .L256_loop:
    626 	and	$t2,lr,$i3,lsr#24
    627 	and	$i1,lr,$i3,lsr#16
    628 	ldrb	$t2,[$tbl,$t2]
    629 	and	$i2,lr,$i3,lsr#8
    630 	ldrb	$i1,[$tbl,$i1]
    631 	and	$i3,lr,$i3
    632 	ldrb	$i2,[$tbl,$i2]
    633 	orr	$t2,$t2,$i1,lsl#24
    634 	ldrb	$i3,[$tbl,$i3]
    635 	orr	$t2,$t2,$i2,lsl#16
    636 	ldr	$t1,[$t3],#4			@ rcon[i++]
    637 	orr	$t2,$t2,$i3,lsl#8
    638 	eor	$i3,$t2,$t1
    639 	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    640 	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    641 	str	$s0,[$key],#32
    642 	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    643 	str	$s1,[$key,#-28]
    644 	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    645 	str	$s2,[$key,#-24]
    646 	subs	$rounds,$rounds,#1
    647 	str	$s3,[$key,#-20]
    648 	subeq	r2,$key,#256
    649 	beq	.Ldone
    650 
    651 	and	$t2,lr,$s3
    652 	and	$i1,lr,$s3,lsr#8
    653 	ldrb	$t2,[$tbl,$t2]
    654 	and	$i2,lr,$s3,lsr#16
    655 	ldrb	$i1,[$tbl,$i1]
    656 	and	$i3,lr,$s3,lsr#24
    657 	ldrb	$i2,[$tbl,$i2]
    658 	orr	$t2,$t2,$i1,lsl#8
    659 	ldrb	$i3,[$tbl,$i3]
    660 	orr	$t2,$t2,$i2,lsl#16
    661 	ldr	$t1,[$key,#-48]
    662 	orr	$t2,$t2,$i3,lsl#24
    663 
    664 	ldr	$i1,[$key,#-44]
    665 	ldr	$i2,[$key,#-40]
    666 	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    667 	ldr	$i3,[$key,#-36]
    668 	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    669 	str	$t1,[$key,#-16]
    670 	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    671 	str	$i1,[$key,#-12]
    672 	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    673 	str	$i2,[$key,#-8]
    674 	str	$i3,[$key,#-4]
    675 	b	.L256_loop
    676 
    677 .Ldone:	mov	r0,#0
    678 	ldmia   sp!,{r4-r12,lr}
    679 .Labrt:	tst	lr,#1
    680 	moveq	pc,lr			@ be binary compatible with V4, yet
    681 	bx	lr			@ interoperable with Thumb ISA:-)
    682 .size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
    683 
    684 .global private_AES_set_decrypt_key
    685 .type   private_AES_set_decrypt_key,%function
    686 .align	5
    687 private_AES_set_decrypt_key:
    688 	str	lr,[sp,#-4]!            @ push lr
    689 	bl	_armv4_AES_set_encrypt_key
    690 	teq	r0,#0
    691 	ldrne	lr,[sp],#4              @ pop lr
    692 	bne	.Labrt
    693 
    694 	stmdb   sp!,{r4-r12}
    695 
    696 	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
    697 	mov	$key,r2			@ which is AES_KEY *key
    698 	mov	$i1,r2
    699 	add	$i2,r2,$rounds,lsl#4
    700 
    701 .Linv:	ldr	$s0,[$i1]
    702 	ldr	$s1,[$i1,#4]
    703 	ldr	$s2,[$i1,#8]
    704 	ldr	$s3,[$i1,#12]
    705 	ldr	$t1,[$i2]
    706 	ldr	$t2,[$i2,#4]
    707 	ldr	$t3,[$i2,#8]
    708 	ldr	$i3,[$i2,#12]
    709 	str	$s0,[$i2],#-16
    710 	str	$s1,[$i2,#16+4]
    711 	str	$s2,[$i2,#16+8]
    712 	str	$s3,[$i2,#16+12]
    713 	str	$t1,[$i1],#16
    714 	str	$t2,[$i1,#-12]
    715 	str	$t3,[$i1,#-8]
    716 	str	$i3,[$i1,#-4]
    717 	teq	$i1,$i2
    718 	bne	.Linv
    719 ___
    720 $mask80=$i1;
    721 $mask1b=$i2;
    722 $mask7f=$i3;
    723 $code.=<<___;
    724 	ldr	$s0,[$key,#16]!		@ prefetch tp1
    725 	mov	$mask80,#0x80
    726 	mov	$mask1b,#0x1b
    727 	orr	$mask80,$mask80,#0x8000
    728 	orr	$mask1b,$mask1b,#0x1b00
    729 	orr	$mask80,$mask80,$mask80,lsl#16
    730 	orr	$mask1b,$mask1b,$mask1b,lsl#16
    731 	sub	$rounds,$rounds,#1
    732 	mvn	$mask7f,$mask80
    733 	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    734 
    735 .Lmix:	and	$t1,$s0,$mask80
    736 	and	$s1,$s0,$mask7f
    737 	sub	$t1,$t1,$t1,lsr#7
    738 	and	$t1,$t1,$mask1b
    739 	eor	$s1,$t1,$s1,lsl#1	@ tp2
    740 
    741 	and	$t1,$s1,$mask80
    742 	and	$s2,$s1,$mask7f
    743 	sub	$t1,$t1,$t1,lsr#7
    744 	and	$t1,$t1,$mask1b
    745 	eor	$s2,$t1,$s2,lsl#1	@ tp4
    746 
    747 	and	$t1,$s2,$mask80
    748 	and	$s3,$s2,$mask7f
    749 	sub	$t1,$t1,$t1,lsr#7
    750 	and	$t1,$t1,$mask1b
    751 	eor	$s3,$t1,$s3,lsl#1	@ tp8
    752 
    753 	eor	$t1,$s1,$s2
    754 	eor	$t2,$s0,$s3		@ tp9
    755 	eor	$t1,$t1,$s3		@ tpe
    756 	eor	$t1,$t1,$s1,ror#24
    757 	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    758 	eor	$t1,$t1,$s2,ror#16
    759 	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    760 	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    761 
    762 	ldr	$s0,[$key,#4]		@ prefetch tp1
    763 	str	$t1,[$key],#4
    764 	subs	$rounds,$rounds,#1
    765 	bne	.Lmix
    766 
    767 	mov	r0,#0
    768 #if __ARM_ARCH__>=5
    769 	ldmia	sp!,{r4-r12,pc}
    770 #else
    771 	ldmia   sp!,{r4-r12,lr}
    772 	tst	lr,#1
    773 	moveq	pc,lr			@ be binary compatible with V4, yet
    774 	bx	lr			@ interoperable with Thumb ISA:-)
    775 #endif
    776 .size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
    777 
    778 .type	AES_Td,%object
    779 .align	5
    780 AES_Td:
    781 .word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    782 .word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    783 .word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    784 .word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    785 .word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    786 .word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    787 .word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    788 .word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    789 .word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    790 .word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    791 .word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    792 .word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    793 .word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    794 .word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    795 .word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    796 .word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    797 .word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    798 .word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    799 .word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    800 .word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    801 .word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    802 .word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    803 .word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    804 .word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    805 .word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    806 .word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    807 .word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    808 .word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    809 .word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    810 .word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    811 .word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    812 .word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    813 .word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    814 .word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    815 .word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    816 .word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    817 .word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    818 .word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    819 .word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    820 .word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    821 .word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    822 .word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    823 .word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    824 .word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    825 .word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    826 .word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    827 .word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    828 .word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    829 .word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    830 .word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    831 .word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    832 .word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    833 .word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    834 .word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    835 .word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    836 .word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    837 .word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    838 .word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    839 .word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    840 .word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    841 .word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    842 .word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    843 .word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    844 .word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    845 @ Td4[256]
    846 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    847 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    848 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    849 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    850 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    851 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    852 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    853 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    854 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    855 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    856 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    857 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    858 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    859 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    860 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    861 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    862 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    863 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    864 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    865 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    866 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    867 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    868 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    869 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    870 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    871 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    872 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    873 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    874 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    875 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    876 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    877 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    878 .size	AES_Td,.-AES_Td
    879 
    880 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
    881 @ 		 const AES_KEY *key) {
    882 .global AES_decrypt
    883 .type   AES_decrypt,%function
    884 .align	5
    885 AES_decrypt:
    886 	sub	r3,pc,#8		@ AES_decrypt
    887 	stmdb   sp!,{r1,r4-r12,lr}
    888 	mov	$rounds,r0		@ inp
    889 	mov	$key,r2
    890 	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
    891 #if __ARM_ARCH__<7
    892 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    893 	ldrb	$t1,[$rounds,#2]	@ manner...
    894 	ldrb	$t2,[$rounds,#1]
    895 	ldrb	$t3,[$rounds,#0]
    896 	orr	$s0,$s0,$t1,lsl#8
    897 	ldrb	$s1,[$rounds,#7]
    898 	orr	$s0,$s0,$t2,lsl#16
    899 	ldrb	$t1,[$rounds,#6]
    900 	orr	$s0,$s0,$t3,lsl#24
    901 	ldrb	$t2,[$rounds,#5]
    902 	ldrb	$t3,[$rounds,#4]
    903 	orr	$s1,$s1,$t1,lsl#8
    904 	ldrb	$s2,[$rounds,#11]
    905 	orr	$s1,$s1,$t2,lsl#16
    906 	ldrb	$t1,[$rounds,#10]
    907 	orr	$s1,$s1,$t3,lsl#24
    908 	ldrb	$t2,[$rounds,#9]
    909 	ldrb	$t3,[$rounds,#8]
    910 	orr	$s2,$s2,$t1,lsl#8
    911 	ldrb	$s3,[$rounds,#15]
    912 	orr	$s2,$s2,$t2,lsl#16
    913 	ldrb	$t1,[$rounds,#14]
    914 	orr	$s2,$s2,$t3,lsl#24
    915 	ldrb	$t2,[$rounds,#13]
    916 	ldrb	$t3,[$rounds,#12]
    917 	orr	$s3,$s3,$t1,lsl#8
    918 	orr	$s3,$s3,$t2,lsl#16
    919 	orr	$s3,$s3,$t3,lsl#24
    920 #else
    921 	ldr	$s0,[$rounds,#0]
    922 	ldr	$s1,[$rounds,#4]
    923 	ldr	$s2,[$rounds,#8]
    924 	ldr	$s3,[$rounds,#12]
    925 #ifdef __ARMEL__
    926 	rev	$s0,$s0
    927 	rev	$s1,$s1
    928 	rev	$s2,$s2
    929 	rev	$s3,$s3
    930 #endif
    931 #endif
    932 	bl	_armv4_AES_decrypt
    933 
    934 	ldr	$rounds,[sp],#4		@ pop out
    935 #if __ARM_ARCH__>=7
    936 #ifdef __ARMEL__
    937 	rev	$s0,$s0
    938 	rev	$s1,$s1
    939 	rev	$s2,$s2
    940 	rev	$s3,$s3
    941 #endif
    942 	str	$s0,[$rounds,#0]
    943 	str	$s1,[$rounds,#4]
    944 	str	$s2,[$rounds,#8]
    945 	str	$s3,[$rounds,#12]
    946 #else
    947 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    948 	mov	$t2,$s0,lsr#16		@ manner...
    949 	mov	$t3,$s0,lsr#8
    950 	strb	$t1,[$rounds,#0]
    951 	strb	$t2,[$rounds,#1]
    952 	mov	$t1,$s1,lsr#24
    953 	strb	$t3,[$rounds,#2]
    954 	mov	$t2,$s1,lsr#16
    955 	strb	$s0,[$rounds,#3]
    956 	mov	$t3,$s1,lsr#8
    957 	strb	$t1,[$rounds,#4]
    958 	strb	$t2,[$rounds,#5]
    959 	mov	$t1,$s2,lsr#24
    960 	strb	$t3,[$rounds,#6]
    961 	mov	$t2,$s2,lsr#16
    962 	strb	$s1,[$rounds,#7]
    963 	mov	$t3,$s2,lsr#8
    964 	strb	$t1,[$rounds,#8]
    965 	strb	$t2,[$rounds,#9]
    966 	mov	$t1,$s3,lsr#24
    967 	strb	$t3,[$rounds,#10]
    968 	mov	$t2,$s3,lsr#16
    969 	strb	$s2,[$rounds,#11]
    970 	mov	$t3,$s3,lsr#8
    971 	strb	$t1,[$rounds,#12]
    972 	strb	$t2,[$rounds,#13]
    973 	strb	$t3,[$rounds,#14]
    974 	strb	$s3,[$rounds,#15]
    975 #endif
    976 #if __ARM_ARCH__>=5
    977 	ldmia	sp!,{r4-r12,pc}
    978 #else
    979 	ldmia   sp!,{r4-r12,lr}
    980 	tst	lr,#1
    981 	moveq	pc,lr			@ be binary compatible with V4, yet
    982 	bx	lr			@ interoperable with Thumb ISA:-)
    983 #endif
    984 .size	AES_decrypt,.-AES_decrypt
    985 
    986 .type   _armv4_AES_decrypt,%function
    987 .align	2
    988 _armv4_AES_decrypt:
    989 	str	lr,[sp,#-4]!		@ push lr
    990 	ldmia	$key!,{$t1-$i1}
    991 	eor	$s0,$s0,$t1
    992 	ldr	$rounds,[$key,#240-16]
    993 	eor	$s1,$s1,$t2
    994 	eor	$s2,$s2,$t3
    995 	eor	$s3,$s3,$i1
    996 	sub	$rounds,$rounds,#1
    997 	mov	lr,#255
    998 
    999 	and	$i1,lr,$s0,lsr#16
   1000 	and	$i2,lr,$s0,lsr#8
   1001 	and	$i3,lr,$s0
   1002 	mov	$s0,$s0,lsr#24
   1003 .Ldec_loop:
   1004 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
   1005 	and	$i1,lr,$s1		@ i0
   1006 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
   1007 	and	$i2,lr,$s1,lsr#16
   1008 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
   1009 	and	$i3,lr,$s1,lsr#8
   1010 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
   1011 	mov	$s1,$s1,lsr#24
   1012 
   1013 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
   1014 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
   1015 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
   1016 	eor	$s0,$s0,$i1,ror#24
   1017 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
   1018 	and	$i1,lr,$s2,lsr#8	@ i0
   1019 	eor	$t2,$i2,$t2,ror#8
   1020 	and	$i2,lr,$s2		@ i1
   1021 	eor	$t3,$i3,$t3,ror#8
   1022 	and	$i3,lr,$s2,lsr#16
   1023 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
   1024 	eor	$s1,$s1,$t1,ror#8
   1025 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
   1026 	mov	$s2,$s2,lsr#24
   1027 
   1028 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
   1029 	eor	$s0,$s0,$i1,ror#16
   1030 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
   1031 	and	$i1,lr,$s3,lsr#16	@ i0
   1032 	eor	$s1,$s1,$i2,ror#24
   1033 	and	$i2,lr,$s3,lsr#8	@ i1
   1034 	eor	$t3,$i3,$t3,ror#8
   1035 	and	$i3,lr,$s3		@ i2
   1036 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
   1037 	eor	$s2,$s2,$t2,ror#8
   1038 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
   1039 	mov	$s3,$s3,lsr#24
   1040 
   1041 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
   1042 	eor	$s0,$s0,$i1,ror#8
   1043 	ldr	$i1,[$key],#16
   1044 	eor	$s1,$s1,$i2,ror#16
   1045 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
   1046 	eor	$s2,$s2,$i3,ror#24
   1047 
   1048 	ldr	$t1,[$key,#-12]
   1049 	eor	$s0,$s0,$i1
   1050 	ldr	$t2,[$key,#-8]
   1051 	eor	$s3,$s3,$t3,ror#8
   1052 	ldr	$t3,[$key,#-4]
   1053 	and	$i1,lr,$s0,lsr#16
   1054 	eor	$s1,$s1,$t1
   1055 	and	$i2,lr,$s0,lsr#8
   1056 	eor	$s2,$s2,$t2
   1057 	and	$i3,lr,$s0
   1058 	eor	$s3,$s3,$t3
   1059 	mov	$s0,$s0,lsr#24
   1060 
   1061 	subs	$rounds,$rounds,#1
   1062 	bne	.Ldec_loop
   1063 
   1064 	add	$tbl,$tbl,#1024
   1065 
   1066 	ldr	$t2,[$tbl,#0]		@ prefetch Td4
   1067 	ldr	$t3,[$tbl,#32]
   1068 	ldr	$t1,[$tbl,#64]
   1069 	ldr	$t2,[$tbl,#96]
   1070 	ldr	$t3,[$tbl,#128]
   1071 	ldr	$t1,[$tbl,#160]
   1072 	ldr	$t2,[$tbl,#192]
   1073 	ldr	$t3,[$tbl,#224]
   1074 
   1075 	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
   1076 	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
   1077 	and	$i1,lr,$s1		@ i0
   1078 	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
   1079 	and	$i2,lr,$s1,lsr#16
   1080 	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
   1081 	and	$i3,lr,$s1,lsr#8
   1082 
   1083 	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
   1084 	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
   1085 	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
   1086 	eor	$s0,$i1,$s0,lsl#24
   1087 	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
   1088 	eor	$s1,$t1,$s1,lsl#8
   1089 	and	$i1,lr,$s2,lsr#8	@ i0
   1090 	eor	$t2,$t2,$i2,lsl#8
   1091 	and	$i2,lr,$s2		@ i1
   1092 	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
   1093 	eor	$t3,$t3,$i3,lsl#8
   1094 	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
   1095 	and	$i3,lr,$s2,lsr#16
   1096 
   1097 	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
   1098 	eor	$s0,$s0,$i1,lsl#8
   1099 	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
   1100 	eor	$s1,$i2,$s1,lsl#16
   1101 	and	$i1,lr,$s3,lsr#16	@ i0
   1102 	eor	$s2,$t2,$s2,lsl#16
   1103 	and	$i2,lr,$s3,lsr#8	@ i1
   1104 	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1105 	eor	$t3,$t3,$i3,lsl#16
   1106 	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1107 	and	$i3,lr,$s3		@ i2
   1108 
   1109 	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1110 	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
   1111 	eor	$s0,$s0,$i1,lsl#16
   1112 	ldr	$i1,[$key,#0]
   1113 	eor	$s1,$s1,$i2,lsl#8
   1114 	ldr	$t1,[$key,#4]
   1115 	eor	$s2,$i3,$s2,lsl#8
   1116 	ldr	$t2,[$key,#8]
   1117 	eor	$s3,$t3,$s3,lsl#24
   1118 	ldr	$t3,[$key,#12]
   1119 
   1120 	eor	$s0,$s0,$i1
   1121 	eor	$s1,$s1,$t1
   1122 	eor	$s2,$s2,$t2
   1123 	eor	$s3,$s3,$t3
   1124 
   1125 	sub	$tbl,$tbl,#1024
   1126 	ldr	pc,[sp],#4		@ pop and return
   1127 .size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1128 .asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1129 .align	2
   1130 ___
   1131 
   1132 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1133 print $code;
   1134 close STDOUT;	# enforce flush
   1135