Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for ARMv4
     11 
     12 # January 2007.
     13 #
     14 # Code uses single 1K S-box and is >2 times faster than code generated
     15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     16 # allows to merge logical or arithmetic operation with shift or rotate
     17 # in one instruction and emit combined result every cycle. The module
     18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     19 # key [on single-issue Xscale PXA250 core].
     20 
     21 # May 2007.
     22 #
     23 # AES_set_[en|de]crypt_key is added.
     24 
     25 # July 2010.
     26 #
     27 # Rescheduling for dual-issue pipeline resulted in 12% improvement on
     28 # Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
     29 
     30 # February 2011.
     31 #
     32 # Profiler-assisted and platform-specific optimization resulted in 16%
     33 # improvement on Cortex A8 core and ~21.5 cycles per byte.
     34 
     35 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
     36 open STDOUT,">$output";
     37 
     38 $s0="r0";
     39 $s1="r1";
     40 $s2="r2";
     41 $s3="r3";
     42 $t1="r4";
     43 $t2="r5";
     44 $t3="r6";
     45 $i1="r7";
     46 $i2="r8";
     47 $i3="r9";
     48 
     49 $tbl="r10";
     50 $key="r11";
     51 $rounds="r12";
     52 
     53 $code=<<___;
     54 #ifndef __KERNEL__
     55 # include "arm_arch.h"
     56 #else
     57 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
     58 #endif
     59 
     60 .text
     61 #if __ARM_ARCH__<7
     62 .code	32
     63 #else
     64 .syntax	unified
     65 # ifdef __thumb2__
     66 .thumb
     67 # else
     68 .code	32
     69 # endif
     70 #endif
     71 
     72 .type	AES_Te,%object
     73 .align	5
     74 AES_Te:
     75 .word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     76 .word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     77 .word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     78 .word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     79 .word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     80 .word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     81 .word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     82 .word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     83 .word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     84 .word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     85 .word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     86 .word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     87 .word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     88 .word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     89 .word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     90 .word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     91 .word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     92 .word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     93 .word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     94 .word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     95 .word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     96 .word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     97 .word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     98 .word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
     99 .word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
    100 .word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
    101 .word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
    102 .word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
    103 .word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
    104 .word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
    105 .word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
    106 .word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
    107 .word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
    108 .word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
    109 .word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
    110 .word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
    111 .word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
    112 .word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
    113 .word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
    114 .word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
    115 .word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
    116 .word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
    117 .word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
    118 .word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
    119 .word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
    120 .word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
    121 .word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
    122 .word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
    123 .word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
    124 .word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
    125 .word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
    126 .word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
    127 .word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    128 .word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    129 .word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    130 .word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    131 .word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    132 .word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    133 .word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    134 .word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    135 .word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    136 .word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    137 .word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    138 .word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    139 @ Te4[256]
    140 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    141 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    142 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    143 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    144 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    145 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    146 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    147 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    148 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    149 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    150 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    151 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    152 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    153 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    154 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    155 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    156 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    157 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    158 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    159 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    160 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    161 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    162 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    163 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    164 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    165 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    166 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    167 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    168 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    169 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    170 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    171 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    172 @ rcon[]
    173 .word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    174 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    175 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    176 .size	AES_Te,.-AES_Te
    177 
    178 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
    179 @ 		 const AES_KEY *key) {
    180 .global AES_encrypt
    181 .type   AES_encrypt,%function
    182 .align	5
    183 AES_encrypt:
    184 #if __ARM_ARCH__<7
    185 	sub	r3,pc,#8		@ AES_encrypt
    186 #else
    187 	adr	r3,AES_encrypt
    188 #endif
    189 	stmdb   sp!,{r1,r4-r12,lr}
    190 	mov	$rounds,r0		@ inp
    191 	mov	$key,r2
    192 	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
    193 #if __ARM_ARCH__<7
    194 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    195 	ldrb	$t1,[$rounds,#2]	@ manner...
    196 	ldrb	$t2,[$rounds,#1]
    197 	ldrb	$t3,[$rounds,#0]
    198 	orr	$s0,$s0,$t1,lsl#8
    199 	ldrb	$s1,[$rounds,#7]
    200 	orr	$s0,$s0,$t2,lsl#16
    201 	ldrb	$t1,[$rounds,#6]
    202 	orr	$s0,$s0,$t3,lsl#24
    203 	ldrb	$t2,[$rounds,#5]
    204 	ldrb	$t3,[$rounds,#4]
    205 	orr	$s1,$s1,$t1,lsl#8
    206 	ldrb	$s2,[$rounds,#11]
    207 	orr	$s1,$s1,$t2,lsl#16
    208 	ldrb	$t1,[$rounds,#10]
    209 	orr	$s1,$s1,$t3,lsl#24
    210 	ldrb	$t2,[$rounds,#9]
    211 	ldrb	$t3,[$rounds,#8]
    212 	orr	$s2,$s2,$t1,lsl#8
    213 	ldrb	$s3,[$rounds,#15]
    214 	orr	$s2,$s2,$t2,lsl#16
    215 	ldrb	$t1,[$rounds,#14]
    216 	orr	$s2,$s2,$t3,lsl#24
    217 	ldrb	$t2,[$rounds,#13]
    218 	ldrb	$t3,[$rounds,#12]
    219 	orr	$s3,$s3,$t1,lsl#8
    220 	orr	$s3,$s3,$t2,lsl#16
    221 	orr	$s3,$s3,$t3,lsl#24
    222 #else
    223 	ldr	$s0,[$rounds,#0]
    224 	ldr	$s1,[$rounds,#4]
    225 	ldr	$s2,[$rounds,#8]
    226 	ldr	$s3,[$rounds,#12]
    227 #ifdef __ARMEL__
    228 	rev	$s0,$s0
    229 	rev	$s1,$s1
    230 	rev	$s2,$s2
    231 	rev	$s3,$s3
    232 #endif
    233 #endif
    234 	bl	_armv4_AES_encrypt
    235 
    236 	ldr	$rounds,[sp],#4		@ pop out
    237 #if __ARM_ARCH__>=7
    238 #ifdef __ARMEL__
    239 	rev	$s0,$s0
    240 	rev	$s1,$s1
    241 	rev	$s2,$s2
    242 	rev	$s3,$s3
    243 #endif
    244 	str	$s0,[$rounds,#0]
    245 	str	$s1,[$rounds,#4]
    246 	str	$s2,[$rounds,#8]
    247 	str	$s3,[$rounds,#12]
    248 #else
    249 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    250 	mov	$t2,$s0,lsr#16		@ manner...
    251 	mov	$t3,$s0,lsr#8
    252 	strb	$t1,[$rounds,#0]
    253 	strb	$t2,[$rounds,#1]
    254 	mov	$t1,$s1,lsr#24
    255 	strb	$t3,[$rounds,#2]
    256 	mov	$t2,$s1,lsr#16
    257 	strb	$s0,[$rounds,#3]
    258 	mov	$t3,$s1,lsr#8
    259 	strb	$t1,[$rounds,#4]
    260 	strb	$t2,[$rounds,#5]
    261 	mov	$t1,$s2,lsr#24
    262 	strb	$t3,[$rounds,#6]
    263 	mov	$t2,$s2,lsr#16
    264 	strb	$s1,[$rounds,#7]
    265 	mov	$t3,$s2,lsr#8
    266 	strb	$t1,[$rounds,#8]
    267 	strb	$t2,[$rounds,#9]
    268 	mov	$t1,$s3,lsr#24
    269 	strb	$t3,[$rounds,#10]
    270 	mov	$t2,$s3,lsr#16
    271 	strb	$s2,[$rounds,#11]
    272 	mov	$t3,$s3,lsr#8
    273 	strb	$t1,[$rounds,#12]
    274 	strb	$t2,[$rounds,#13]
    275 	strb	$t3,[$rounds,#14]
    276 	strb	$s3,[$rounds,#15]
    277 #endif
    278 #if __ARM_ARCH__>=5
    279 	ldmia	sp!,{r4-r12,pc}
    280 #else
    281 	ldmia   sp!,{r4-r12,lr}
    282 	tst	lr,#1
    283 	moveq	pc,lr			@ be binary compatible with V4, yet
    284 	bx	lr			@ interoperable with Thumb ISA:-)
    285 #endif
    286 .size	AES_encrypt,.-AES_encrypt
    287 
    288 .type   _armv4_AES_encrypt,%function
    289 .align	2
    290 _armv4_AES_encrypt:
    291 	str	lr,[sp,#-4]!		@ push lr
    292 	ldmia	$key!,{$t1-$i1}
    293 	eor	$s0,$s0,$t1
    294 	ldr	$rounds,[$key,#240-16]
    295 	eor	$s1,$s1,$t2
    296 	eor	$s2,$s2,$t3
    297 	eor	$s3,$s3,$i1
    298 	sub	$rounds,$rounds,#1
    299 	mov	lr,#255
    300 
    301 	and	$i1,lr,$s0
    302 	and	$i2,lr,$s0,lsr#8
    303 	and	$i3,lr,$s0,lsr#16
    304 	mov	$s0,$s0,lsr#24
    305 .Lenc_loop:
    306 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    307 	and	$i1,lr,$s1,lsr#16	@ i0
    308 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    309 	and	$i2,lr,$s1
    310 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    311 	and	$i3,lr,$s1,lsr#8
    312 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    313 	mov	$s1,$s1,lsr#24
    314 
    315 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    316 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    317 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    318 	eor	$s0,$s0,$i1,ror#8
    319 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    320 	and	$i1,lr,$s2,lsr#8	@ i0
    321 	eor	$t2,$t2,$i2,ror#8
    322 	and	$i2,lr,$s2,lsr#16	@ i1
    323 	eor	$t3,$t3,$i3,ror#8
    324 	and	$i3,lr,$s2
    325 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    326 	eor	$s1,$s1,$t1,ror#24
    327 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    328 	mov	$s2,$s2,lsr#24
    329 
    330 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    331 	eor	$s0,$s0,$i1,ror#16
    332 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    333 	and	$i1,lr,$s3		@ i0
    334 	eor	$s1,$s1,$i2,ror#8
    335 	and	$i2,lr,$s3,lsr#8	@ i1
    336 	eor	$t3,$t3,$i3,ror#16
    337 	and	$i3,lr,$s3,lsr#16	@ i2
    338 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    339 	eor	$s2,$s2,$t2,ror#16
    340 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    341 	mov	$s3,$s3,lsr#24
    342 
    343 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    344 	eor	$s0,$s0,$i1,ror#24
    345 	ldr	$i1,[$key],#16
    346 	eor	$s1,$s1,$i2,ror#16
    347 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    348 	eor	$s2,$s2,$i3,ror#8
    349 	ldr	$t1,[$key,#-12]
    350 	eor	$s3,$s3,$t3,ror#8
    351 
    352 	ldr	$t2,[$key,#-8]
    353 	eor	$s0,$s0,$i1
    354 	ldr	$t3,[$key,#-4]
    355 	and	$i1,lr,$s0
    356 	eor	$s1,$s1,$t1
    357 	and	$i2,lr,$s0,lsr#8
    358 	eor	$s2,$s2,$t2
    359 	and	$i3,lr,$s0,lsr#16
    360 	eor	$s3,$s3,$t3
    361 	mov	$s0,$s0,lsr#24
    362 
    363 	subs	$rounds,$rounds,#1
    364 	bne	.Lenc_loop
    365 
    366 	add	$tbl,$tbl,#2
    367 
    368 	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    369 	and	$i1,lr,$s1,lsr#16	@ i0
    370 	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    371 	and	$i2,lr,$s1
    372 	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    373 	and	$i3,lr,$s1,lsr#8
    374 	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    375 	mov	$s1,$s1,lsr#24
    376 
    377 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    378 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    379 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    380 	eor	$s0,$i1,$s0,lsl#8
    381 	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    382 	and	$i1,lr,$s2,lsr#8	@ i0
    383 	eor	$t2,$i2,$t2,lsl#8
    384 	and	$i2,lr,$s2,lsr#16	@ i1
    385 	eor	$t3,$i3,$t3,lsl#8
    386 	and	$i3,lr,$s2
    387 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    388 	eor	$s1,$t1,$s1,lsl#24
    389 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    390 	mov	$s2,$s2,lsr#24
    391 
    392 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    393 	eor	$s0,$i1,$s0,lsl#8
    394 	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    395 	and	$i1,lr,$s3		@ i0
    396 	eor	$s1,$s1,$i2,lsl#16
    397 	and	$i2,lr,$s3,lsr#8	@ i1
    398 	eor	$t3,$i3,$t3,lsl#8
    399 	and	$i3,lr,$s3,lsr#16	@ i2
    400 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    401 	eor	$s2,$t2,$s2,lsl#24
    402 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    403 	mov	$s3,$s3,lsr#24
    404 
    405 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    406 	eor	$s0,$i1,$s0,lsl#8
    407 	ldr	$i1,[$key,#0]
    408 	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    409 	eor	$s1,$s1,$i2,lsl#8
    410 	ldr	$t1,[$key,#4]
    411 	eor	$s2,$s2,$i3,lsl#16
    412 	ldr	$t2,[$key,#8]
    413 	eor	$s3,$t3,$s3,lsl#24
    414 	ldr	$t3,[$key,#12]
    415 
    416 	eor	$s0,$s0,$i1
    417 	eor	$s1,$s1,$t1
    418 	eor	$s2,$s2,$t2
    419 	eor	$s3,$s3,$t3
    420 
    421 	sub	$tbl,$tbl,#2
    422 	ldr	pc,[sp],#4		@ pop and return
    423 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    424 
    425 .global private_AES_set_encrypt_key
    426 .type   private_AES_set_encrypt_key,%function
    427 .align	5
    428 private_AES_set_encrypt_key:
    429 _armv4_AES_set_encrypt_key:
    430 #if __ARM_ARCH__<7
    431 	sub	r3,pc,#8		@ AES_set_encrypt_key
    432 #else
    433 	adr	r3,private_AES_set_encrypt_key
    434 #endif
    435 	teq	r0,#0
    436 #if __ARM_ARCH__>=7
    437 	itt	eq			@ Thumb2 thing, sanity check in ARM
    438 #endif
    439 	moveq	r0,#-1
    440 	beq	.Labrt
    441 	teq	r2,#0
    442 #if __ARM_ARCH__>=7
    443 	itt	eq			@ Thumb2 thing, sanity check in ARM
    444 #endif
    445 	moveq	r0,#-1
    446 	beq	.Labrt
    447 
    448 	teq	r1,#128
    449 	beq	.Lok
    450 	teq	r1,#192
    451 	beq	.Lok
    452 	teq	r1,#256
    453 #if __ARM_ARCH__>=7
    454 	itt	ne			@ Thumb2 thing, sanity check in ARM
    455 #endif
    456 	movne	r0,#-1
    457 	bne	.Labrt
    458 
    459 .Lok:	stmdb   sp!,{r4-r12,lr}
    460 	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
    461 
    462 	mov	$rounds,r0		@ inp
    463 	mov	lr,r1			@ bits
    464 	mov	$key,r2			@ key
    465 
    466 #if __ARM_ARCH__<7
    467 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    468 	ldrb	$t1,[$rounds,#2]	@ manner...
    469 	ldrb	$t2,[$rounds,#1]
    470 	ldrb	$t3,[$rounds,#0]
    471 	orr	$s0,$s0,$t1,lsl#8
    472 	ldrb	$s1,[$rounds,#7]
    473 	orr	$s0,$s0,$t2,lsl#16
    474 	ldrb	$t1,[$rounds,#6]
    475 	orr	$s0,$s0,$t3,lsl#24
    476 	ldrb	$t2,[$rounds,#5]
    477 	ldrb	$t3,[$rounds,#4]
    478 	orr	$s1,$s1,$t1,lsl#8
    479 	ldrb	$s2,[$rounds,#11]
    480 	orr	$s1,$s1,$t2,lsl#16
    481 	ldrb	$t1,[$rounds,#10]
    482 	orr	$s1,$s1,$t3,lsl#24
    483 	ldrb	$t2,[$rounds,#9]
    484 	ldrb	$t3,[$rounds,#8]
    485 	orr	$s2,$s2,$t1,lsl#8
    486 	ldrb	$s3,[$rounds,#15]
    487 	orr	$s2,$s2,$t2,lsl#16
    488 	ldrb	$t1,[$rounds,#14]
    489 	orr	$s2,$s2,$t3,lsl#24
    490 	ldrb	$t2,[$rounds,#13]
    491 	ldrb	$t3,[$rounds,#12]
    492 	orr	$s3,$s3,$t1,lsl#8
    493 	str	$s0,[$key],#16
    494 	orr	$s3,$s3,$t2,lsl#16
    495 	str	$s1,[$key,#-12]
    496 	orr	$s3,$s3,$t3,lsl#24
    497 	str	$s2,[$key,#-8]
    498 	str	$s3,[$key,#-4]
    499 #else
    500 	ldr	$s0,[$rounds,#0]
    501 	ldr	$s1,[$rounds,#4]
    502 	ldr	$s2,[$rounds,#8]
    503 	ldr	$s3,[$rounds,#12]
    504 #ifdef __ARMEL__
    505 	rev	$s0,$s0
    506 	rev	$s1,$s1
    507 	rev	$s2,$s2
    508 	rev	$s3,$s3
    509 #endif
    510 	str	$s0,[$key],#16
    511 	str	$s1,[$key,#-12]
    512 	str	$s2,[$key,#-8]
    513 	str	$s3,[$key,#-4]
    514 #endif
    515 
    516 	teq	lr,#128
    517 	bne	.Lnot128
    518 	mov	$rounds,#10
    519 	str	$rounds,[$key,#240-16]
    520 	add	$t3,$tbl,#256			@ rcon
    521 	mov	lr,#255
    522 
    523 .L128_loop:
    524 	and	$t2,lr,$s3,lsr#24
    525 	and	$i1,lr,$s3,lsr#16
    526 	ldrb	$t2,[$tbl,$t2]
    527 	and	$i2,lr,$s3,lsr#8
    528 	ldrb	$i1,[$tbl,$i1]
    529 	and	$i3,lr,$s3
    530 	ldrb	$i2,[$tbl,$i2]
    531 	orr	$t2,$t2,$i1,lsl#24
    532 	ldrb	$i3,[$tbl,$i3]
    533 	orr	$t2,$t2,$i2,lsl#16
    534 	ldr	$t1,[$t3],#4			@ rcon[i++]
    535 	orr	$t2,$t2,$i3,lsl#8
    536 	eor	$t2,$t2,$t1
    537 	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    538 	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    539 	str	$s0,[$key],#16
    540 	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    541 	str	$s1,[$key,#-12]
    542 	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    543 	str	$s2,[$key,#-8]
    544 	subs	$rounds,$rounds,#1
    545 	str	$s3,[$key,#-4]
    546 	bne	.L128_loop
    547 	sub	r2,$key,#176
    548 	b	.Ldone
    549 
    550 .Lnot128:
    551 #if __ARM_ARCH__<7
    552 	ldrb	$i2,[$rounds,#19]
    553 	ldrb	$t1,[$rounds,#18]
    554 	ldrb	$t2,[$rounds,#17]
    555 	ldrb	$t3,[$rounds,#16]
    556 	orr	$i2,$i2,$t1,lsl#8
    557 	ldrb	$i3,[$rounds,#23]
    558 	orr	$i2,$i2,$t2,lsl#16
    559 	ldrb	$t1,[$rounds,#22]
    560 	orr	$i2,$i2,$t3,lsl#24
    561 	ldrb	$t2,[$rounds,#21]
    562 	ldrb	$t3,[$rounds,#20]
    563 	orr	$i3,$i3,$t1,lsl#8
    564 	orr	$i3,$i3,$t2,lsl#16
    565 	str	$i2,[$key],#8
    566 	orr	$i3,$i3,$t3,lsl#24
    567 	str	$i3,[$key,#-4]
    568 #else
    569 	ldr	$i2,[$rounds,#16]
    570 	ldr	$i3,[$rounds,#20]
    571 #ifdef __ARMEL__
    572 	rev	$i2,$i2
    573 	rev	$i3,$i3
    574 #endif
    575 	str	$i2,[$key],#8
    576 	str	$i3,[$key,#-4]
    577 #endif
    578 
    579 	teq	lr,#192
    580 	bne	.Lnot192
    581 	mov	$rounds,#12
    582 	str	$rounds,[$key,#240-24]
    583 	add	$t3,$tbl,#256			@ rcon
    584 	mov	lr,#255
    585 	mov	$rounds,#8
    586 
    587 .L192_loop:
    588 	and	$t2,lr,$i3,lsr#24
    589 	and	$i1,lr,$i3,lsr#16
    590 	ldrb	$t2,[$tbl,$t2]
    591 	and	$i2,lr,$i3,lsr#8
    592 	ldrb	$i1,[$tbl,$i1]
    593 	and	$i3,lr,$i3
    594 	ldrb	$i2,[$tbl,$i2]
    595 	orr	$t2,$t2,$i1,lsl#24
    596 	ldrb	$i3,[$tbl,$i3]
    597 	orr	$t2,$t2,$i2,lsl#16
    598 	ldr	$t1,[$t3],#4			@ rcon[i++]
    599 	orr	$t2,$t2,$i3,lsl#8
    600 	eor	$i3,$t2,$t1
    601 	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    602 	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    603 	str	$s0,[$key],#24
    604 	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    605 	str	$s1,[$key,#-20]
    606 	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    607 	str	$s2,[$key,#-16]
    608 	subs	$rounds,$rounds,#1
    609 	str	$s3,[$key,#-12]
    610 #if __ARM_ARCH__>=7
    611 	itt	eq				@ Thumb2 thing, sanity check in ARM
    612 #endif
    613 	subeq	r2,$key,#216
    614 	beq	.Ldone
    615 
    616 	ldr	$i1,[$key,#-32]
    617 	ldr	$i2,[$key,#-28]
    618 	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    619 	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    620 	str	$i1,[$key,#-8]
    621 	str	$i3,[$key,#-4]
    622 	b	.L192_loop
    623 
    624 .Lnot192:
    625 #if __ARM_ARCH__<7
    626 	ldrb	$i2,[$rounds,#27]
    627 	ldrb	$t1,[$rounds,#26]
    628 	ldrb	$t2,[$rounds,#25]
    629 	ldrb	$t3,[$rounds,#24]
    630 	orr	$i2,$i2,$t1,lsl#8
    631 	ldrb	$i3,[$rounds,#31]
    632 	orr	$i2,$i2,$t2,lsl#16
    633 	ldrb	$t1,[$rounds,#30]
    634 	orr	$i2,$i2,$t3,lsl#24
    635 	ldrb	$t2,[$rounds,#29]
    636 	ldrb	$t3,[$rounds,#28]
    637 	orr	$i3,$i3,$t1,lsl#8
    638 	orr	$i3,$i3,$t2,lsl#16
    639 	str	$i2,[$key],#8
    640 	orr	$i3,$i3,$t3,lsl#24
    641 	str	$i3,[$key,#-4]
    642 #else
    643 	ldr	$i2,[$rounds,#24]
    644 	ldr	$i3,[$rounds,#28]
    645 #ifdef __ARMEL__
    646 	rev	$i2,$i2
    647 	rev	$i3,$i3
    648 #endif
    649 	str	$i2,[$key],#8
    650 	str	$i3,[$key,#-4]
    651 #endif
    652 
    653 	mov	$rounds,#14
    654 	str	$rounds,[$key,#240-32]
    655 	add	$t3,$tbl,#256			@ rcon
    656 	mov	lr,#255
    657 	mov	$rounds,#7
    658 
    659 .L256_loop:
    660 	and	$t2,lr,$i3,lsr#24
    661 	and	$i1,lr,$i3,lsr#16
    662 	ldrb	$t2,[$tbl,$t2]
    663 	and	$i2,lr,$i3,lsr#8
    664 	ldrb	$i1,[$tbl,$i1]
    665 	and	$i3,lr,$i3
    666 	ldrb	$i2,[$tbl,$i2]
    667 	orr	$t2,$t2,$i1,lsl#24
    668 	ldrb	$i3,[$tbl,$i3]
    669 	orr	$t2,$t2,$i2,lsl#16
    670 	ldr	$t1,[$t3],#4			@ rcon[i++]
    671 	orr	$t2,$t2,$i3,lsl#8
    672 	eor	$i3,$t2,$t1
    673 	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    674 	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    675 	str	$s0,[$key],#32
    676 	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    677 	str	$s1,[$key,#-28]
    678 	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    679 	str	$s2,[$key,#-24]
    680 	subs	$rounds,$rounds,#1
    681 	str	$s3,[$key,#-20]
    682 #if __ARM_ARCH__>=7
    683 	itt	eq				@ Thumb2 thing, sanity check in ARM
    684 #endif
    685 	subeq	r2,$key,#256
    686 	beq	.Ldone
    687 
    688 	and	$t2,lr,$s3
    689 	and	$i1,lr,$s3,lsr#8
    690 	ldrb	$t2,[$tbl,$t2]
    691 	and	$i2,lr,$s3,lsr#16
    692 	ldrb	$i1,[$tbl,$i1]
    693 	and	$i3,lr,$s3,lsr#24
    694 	ldrb	$i2,[$tbl,$i2]
    695 	orr	$t2,$t2,$i1,lsl#8
    696 	ldrb	$i3,[$tbl,$i3]
    697 	orr	$t2,$t2,$i2,lsl#16
    698 	ldr	$t1,[$key,#-48]
    699 	orr	$t2,$t2,$i3,lsl#24
    700 
    701 	ldr	$i1,[$key,#-44]
    702 	ldr	$i2,[$key,#-40]
    703 	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    704 	ldr	$i3,[$key,#-36]
    705 	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    706 	str	$t1,[$key,#-16]
    707 	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    708 	str	$i1,[$key,#-12]
    709 	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    710 	str	$i2,[$key,#-8]
    711 	str	$i3,[$key,#-4]
    712 	b	.L256_loop
    713 
    714 .align	2
    715 .Ldone:	mov	r0,#0
    716 	ldmia   sp!,{r4-r12,lr}
    717 .Labrt:
    718 #if __ARM_ARCH__>=5
    719 	ret				@ bx lr
    720 #else
    721 	tst	lr,#1
    722 	moveq	pc,lr			@ be binary compatible with V4, yet
    723 	bx	lr			@ interoperable with Thumb ISA:-)
    724 #endif
    725 .size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
    726 
    727 .global private_AES_set_decrypt_key
    728 .type   private_AES_set_decrypt_key,%function
    729 .align	5
    730 private_AES_set_decrypt_key:
    731 	str	lr,[sp,#-4]!            @ push lr
    732 	bl	_armv4_AES_set_encrypt_key
    733 	teq	r0,#0
    734 	ldr	lr,[sp],#4              @ pop lr
    735 	bne	.Labrt
    736 
    737 	mov	r0,r2			@ AES_set_encrypt_key preserves r2,
    738 	mov	r1,r2			@ which is AES_KEY *key
    739 	b	_armv4_AES_set_enc2dec_key
    740 .size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
    741 
    742 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
    743 .global	AES_set_enc2dec_key
    744 .type	AES_set_enc2dec_key,%function
    745 .align	5
    746 AES_set_enc2dec_key:
    747 _armv4_AES_set_enc2dec_key:
    748 	stmdb   sp!,{r4-r12,lr}
    749 
    750 	ldr	$rounds,[r0,#240]
    751 	mov	$i1,r0			@ input
    752 	add	$i2,r0,$rounds,lsl#4
    753 	mov	$key,r1			@ ouput
    754 	add	$tbl,r1,$rounds,lsl#4
    755 	str	$rounds,[r1,#240]
    756 
    757 .Linv:	ldr	$s0,[$i1],#16
    758 	ldr	$s1,[$i1,#-12]
    759 	ldr	$s2,[$i1,#-8]
    760 	ldr	$s3,[$i1,#-4]
    761 	ldr	$t1,[$i2],#-16
    762 	ldr	$t2,[$i2,#16+4]
    763 	ldr	$t3,[$i2,#16+8]
    764 	ldr	$i3,[$i2,#16+12]
    765 	str	$s0,[$tbl],#-16
    766 	str	$s1,[$tbl,#16+4]
    767 	str	$s2,[$tbl,#16+8]
    768 	str	$s3,[$tbl,#16+12]
    769 	str	$t1,[$key],#16
    770 	str	$t2,[$key,#-12]
    771 	str	$t3,[$key,#-8]
    772 	str	$i3,[$key,#-4]
    773 	teq	$i1,$i2
    774 	bne	.Linv
    775 
    776 	ldr	$s0,[$i1]
    777 	ldr	$s1,[$i1,#4]
    778 	ldr	$s2,[$i1,#8]
    779 	ldr	$s3,[$i1,#12]
    780 	str	$s0,[$key]
    781 	str	$s1,[$key,#4]
    782 	str	$s2,[$key,#8]
    783 	str	$s3,[$key,#12]
    784 	sub	$key,$key,$rounds,lsl#3
    785 ___
    786 $mask80=$i1;
    787 $mask1b=$i2;
    788 $mask7f=$i3;
    789 $code.=<<___;
    790 	ldr	$s0,[$key,#16]!		@ prefetch tp1
    791 	mov	$mask80,#0x80
    792 	mov	$mask1b,#0x1b
    793 	orr	$mask80,$mask80,#0x8000
    794 	orr	$mask1b,$mask1b,#0x1b00
    795 	orr	$mask80,$mask80,$mask80,lsl#16
    796 	orr	$mask1b,$mask1b,$mask1b,lsl#16
    797 	sub	$rounds,$rounds,#1
    798 	mvn	$mask7f,$mask80
    799 	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    800 
    801 .Lmix:	and	$t1,$s0,$mask80
    802 	and	$s1,$s0,$mask7f
    803 	sub	$t1,$t1,$t1,lsr#7
    804 	and	$t1,$t1,$mask1b
    805 	eor	$s1,$t1,$s1,lsl#1	@ tp2
    806 
    807 	and	$t1,$s1,$mask80
    808 	and	$s2,$s1,$mask7f
    809 	sub	$t1,$t1,$t1,lsr#7
    810 	and	$t1,$t1,$mask1b
    811 	eor	$s2,$t1,$s2,lsl#1	@ tp4
    812 
    813 	and	$t1,$s2,$mask80
    814 	and	$s3,$s2,$mask7f
    815 	sub	$t1,$t1,$t1,lsr#7
    816 	and	$t1,$t1,$mask1b
    817 	eor	$s3,$t1,$s3,lsl#1	@ tp8
    818 
    819 	eor	$t1,$s1,$s2
    820 	eor	$t2,$s0,$s3		@ tp9
    821 	eor	$t1,$t1,$s3		@ tpe
    822 	eor	$t1,$t1,$s1,ror#24
    823 	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    824 	eor	$t1,$t1,$s2,ror#16
    825 	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    826 	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    827 
    828 	ldr	$s0,[$key,#4]		@ prefetch tp1
    829 	str	$t1,[$key],#4
    830 	subs	$rounds,$rounds,#1
    831 	bne	.Lmix
    832 
    833 	mov	r0,#0
    834 #if __ARM_ARCH__>=5
    835 	ldmia	sp!,{r4-r12,pc}
    836 #else
    837 	ldmia   sp!,{r4-r12,lr}
    838 	tst	lr,#1
    839 	moveq	pc,lr			@ be binary compatible with V4, yet
    840 	bx	lr			@ interoperable with Thumb ISA:-)
    841 #endif
    842 .size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
    843 
    844 .type	AES_Td,%object
    845 .align	5
    846 AES_Td:
    847 .word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    848 .word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    849 .word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    850 .word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    851 .word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    852 .word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    853 .word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    854 .word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    855 .word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    856 .word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    857 .word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    858 .word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    859 .word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    860 .word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    861 .word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    862 .word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    863 .word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    864 .word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    865 .word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    866 .word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    867 .word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    868 .word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    869 .word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    870 .word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    871 .word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    872 .word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    873 .word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    874 .word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    875 .word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    876 .word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    877 .word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    878 .word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    879 .word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    880 .word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    881 .word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    882 .word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    883 .word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    884 .word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    885 .word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    886 .word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    887 .word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    888 .word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    889 .word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    890 .word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    891 .word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    892 .word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    893 .word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    894 .word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    895 .word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    896 .word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    897 .word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    898 .word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    899 .word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    900 .word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    901 .word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    902 .word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    903 .word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    904 .word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    905 .word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    906 .word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    907 .word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    908 .word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    909 .word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    910 .word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    911 @ Td4[256]
    912 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    913 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    914 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    915 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    916 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    917 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    918 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    919 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    920 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    921 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    922 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    923 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    924 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    925 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    926 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    927 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    928 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    929 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    930 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    931 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    932 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    933 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    934 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    935 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    936 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    937 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    938 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    939 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    940 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    941 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    942 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    943 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    944 .size	AES_Td,.-AES_Td
    945 
    946 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
    947 @ 		 const AES_KEY *key) {
    948 .global AES_decrypt
    949 .type   AES_decrypt,%function
    950 .align	5
    951 AES_decrypt:
    952 #if __ARM_ARCH__<7
    953 	sub	r3,pc,#8		@ AES_decrypt
    954 #else
    955 	adr	r3,AES_decrypt
    956 #endif
    957 	stmdb   sp!,{r1,r4-r12,lr}
    958 	mov	$rounds,r0		@ inp
    959 	mov	$key,r2
    960 	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
    961 #if __ARM_ARCH__<7
    962 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    963 	ldrb	$t1,[$rounds,#2]	@ manner...
    964 	ldrb	$t2,[$rounds,#1]
    965 	ldrb	$t3,[$rounds,#0]
    966 	orr	$s0,$s0,$t1,lsl#8
    967 	ldrb	$s1,[$rounds,#7]
    968 	orr	$s0,$s0,$t2,lsl#16
    969 	ldrb	$t1,[$rounds,#6]
    970 	orr	$s0,$s0,$t3,lsl#24
    971 	ldrb	$t2,[$rounds,#5]
    972 	ldrb	$t3,[$rounds,#4]
    973 	orr	$s1,$s1,$t1,lsl#8
    974 	ldrb	$s2,[$rounds,#11]
    975 	orr	$s1,$s1,$t2,lsl#16
    976 	ldrb	$t1,[$rounds,#10]
    977 	orr	$s1,$s1,$t3,lsl#24
    978 	ldrb	$t2,[$rounds,#9]
    979 	ldrb	$t3,[$rounds,#8]
    980 	orr	$s2,$s2,$t1,lsl#8
    981 	ldrb	$s3,[$rounds,#15]
    982 	orr	$s2,$s2,$t2,lsl#16
    983 	ldrb	$t1,[$rounds,#14]
    984 	orr	$s2,$s2,$t3,lsl#24
    985 	ldrb	$t2,[$rounds,#13]
    986 	ldrb	$t3,[$rounds,#12]
    987 	orr	$s3,$s3,$t1,lsl#8
    988 	orr	$s3,$s3,$t2,lsl#16
    989 	orr	$s3,$s3,$t3,lsl#24
    990 #else
    991 	ldr	$s0,[$rounds,#0]
    992 	ldr	$s1,[$rounds,#4]
    993 	ldr	$s2,[$rounds,#8]
    994 	ldr	$s3,[$rounds,#12]
    995 #ifdef __ARMEL__
    996 	rev	$s0,$s0
    997 	rev	$s1,$s1
    998 	rev	$s2,$s2
    999 	rev	$s3,$s3
   1000 #endif
   1001 #endif
   1002 	bl	_armv4_AES_decrypt
   1003 
   1004 	ldr	$rounds,[sp],#4		@ pop out
   1005 #if __ARM_ARCH__>=7
   1006 #ifdef __ARMEL__
   1007 	rev	$s0,$s0
   1008 	rev	$s1,$s1
   1009 	rev	$s2,$s2
   1010 	rev	$s3,$s3
   1011 #endif
   1012 	str	$s0,[$rounds,#0]
   1013 	str	$s1,[$rounds,#4]
   1014 	str	$s2,[$rounds,#8]
   1015 	str	$s3,[$rounds,#12]
   1016 #else
   1017 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
   1018 	mov	$t2,$s0,lsr#16		@ manner...
   1019 	mov	$t3,$s0,lsr#8
   1020 	strb	$t1,[$rounds,#0]
   1021 	strb	$t2,[$rounds,#1]
   1022 	mov	$t1,$s1,lsr#24
   1023 	strb	$t3,[$rounds,#2]
   1024 	mov	$t2,$s1,lsr#16
   1025 	strb	$s0,[$rounds,#3]
   1026 	mov	$t3,$s1,lsr#8
   1027 	strb	$t1,[$rounds,#4]
   1028 	strb	$t2,[$rounds,#5]
   1029 	mov	$t1,$s2,lsr#24
   1030 	strb	$t3,[$rounds,#6]
   1031 	mov	$t2,$s2,lsr#16
   1032 	strb	$s1,[$rounds,#7]
   1033 	mov	$t3,$s2,lsr#8
   1034 	strb	$t1,[$rounds,#8]
   1035 	strb	$t2,[$rounds,#9]
   1036 	mov	$t1,$s3,lsr#24
   1037 	strb	$t3,[$rounds,#10]
   1038 	mov	$t2,$s3,lsr#16
   1039 	strb	$s2,[$rounds,#11]
   1040 	mov	$t3,$s3,lsr#8
   1041 	strb	$t1,[$rounds,#12]
   1042 	strb	$t2,[$rounds,#13]
   1043 	strb	$t3,[$rounds,#14]
   1044 	strb	$s3,[$rounds,#15]
   1045 #endif
   1046 #if __ARM_ARCH__>=5
   1047 	ldmia	sp!,{r4-r12,pc}
   1048 #else
   1049 	ldmia   sp!,{r4-r12,lr}
   1050 	tst	lr,#1
   1051 	moveq	pc,lr			@ be binary compatible with V4, yet
   1052 	bx	lr			@ interoperable with Thumb ISA:-)
   1053 #endif
   1054 .size	AES_decrypt,.-AES_decrypt
   1055 
   1056 .type   _armv4_AES_decrypt,%function
   1057 .align	2
   1058 _armv4_AES_decrypt:
   1059 	str	lr,[sp,#-4]!		@ push lr
   1060 	ldmia	$key!,{$t1-$i1}
   1061 	eor	$s0,$s0,$t1
   1062 	ldr	$rounds,[$key,#240-16]
   1063 	eor	$s1,$s1,$t2
   1064 	eor	$s2,$s2,$t3
   1065 	eor	$s3,$s3,$i1
   1066 	sub	$rounds,$rounds,#1
   1067 	mov	lr,#255
   1068 
   1069 	and	$i1,lr,$s0,lsr#16
   1070 	and	$i2,lr,$s0,lsr#8
   1071 	and	$i3,lr,$s0
   1072 	mov	$s0,$s0,lsr#24
   1073 .Ldec_loop:
   1074 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
   1075 	and	$i1,lr,$s1		@ i0
   1076 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
   1077 	and	$i2,lr,$s1,lsr#16
   1078 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
   1079 	and	$i3,lr,$s1,lsr#8
   1080 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
   1081 	mov	$s1,$s1,lsr#24
   1082 
   1083 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
   1084 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
   1085 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
   1086 	eor	$s0,$s0,$i1,ror#24
   1087 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
   1088 	and	$i1,lr,$s2,lsr#8	@ i0
   1089 	eor	$t2,$i2,$t2,ror#8
   1090 	and	$i2,lr,$s2		@ i1
   1091 	eor	$t3,$i3,$t3,ror#8
   1092 	and	$i3,lr,$s2,lsr#16
   1093 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
   1094 	eor	$s1,$s1,$t1,ror#8
   1095 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
   1096 	mov	$s2,$s2,lsr#24
   1097 
   1098 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
   1099 	eor	$s0,$s0,$i1,ror#16
   1100 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
   1101 	and	$i1,lr,$s3,lsr#16	@ i0
   1102 	eor	$s1,$s1,$i2,ror#24
   1103 	and	$i2,lr,$s3,lsr#8	@ i1
   1104 	eor	$t3,$i3,$t3,ror#8
   1105 	and	$i3,lr,$s3		@ i2
   1106 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
   1107 	eor	$s2,$s2,$t2,ror#8
   1108 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
   1109 	mov	$s3,$s3,lsr#24
   1110 
   1111 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
   1112 	eor	$s0,$s0,$i1,ror#8
   1113 	ldr	$i1,[$key],#16
   1114 	eor	$s1,$s1,$i2,ror#16
   1115 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
   1116 	eor	$s2,$s2,$i3,ror#24
   1117 
   1118 	ldr	$t1,[$key,#-12]
   1119 	eor	$s0,$s0,$i1
   1120 	ldr	$t2,[$key,#-8]
   1121 	eor	$s3,$s3,$t3,ror#8
   1122 	ldr	$t3,[$key,#-4]
   1123 	and	$i1,lr,$s0,lsr#16
   1124 	eor	$s1,$s1,$t1
   1125 	and	$i2,lr,$s0,lsr#8
   1126 	eor	$s2,$s2,$t2
   1127 	and	$i3,lr,$s0
   1128 	eor	$s3,$s3,$t3
   1129 	mov	$s0,$s0,lsr#24
   1130 
   1131 	subs	$rounds,$rounds,#1
   1132 	bne	.Ldec_loop
   1133 
   1134 	add	$tbl,$tbl,#1024
   1135 
   1136 	ldr	$t2,[$tbl,#0]		@ prefetch Td4
   1137 	ldr	$t3,[$tbl,#32]
   1138 	ldr	$t1,[$tbl,#64]
   1139 	ldr	$t2,[$tbl,#96]
   1140 	ldr	$t3,[$tbl,#128]
   1141 	ldr	$t1,[$tbl,#160]
   1142 	ldr	$t2,[$tbl,#192]
   1143 	ldr	$t3,[$tbl,#224]
   1144 
   1145 	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
   1146 	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
   1147 	and	$i1,lr,$s1		@ i0
   1148 	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
   1149 	and	$i2,lr,$s1,lsr#16
   1150 	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
   1151 	and	$i3,lr,$s1,lsr#8
   1152 
   1153 	add	$s1,$tbl,$s1,lsr#24
   1154 	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
   1155 	ldrb	$s1,[$s1]		@ Td4[s1>>24]
   1156 	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
   1157 	eor	$s0,$i1,$s0,lsl#24
   1158 	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
   1159 	eor	$s1,$t1,$s1,lsl#8
   1160 	and	$i1,lr,$s2,lsr#8	@ i0
   1161 	eor	$t2,$t2,$i2,lsl#8
   1162 	and	$i2,lr,$s2		@ i1
   1163 	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
   1164 	eor	$t3,$t3,$i3,lsl#8
   1165 	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
   1166 	and	$i3,lr,$s2,lsr#16
   1167 
   1168 	add	$s2,$tbl,$s2,lsr#24
   1169 	ldrb	$s2,[$s2]		@ Td4[s2>>24]
   1170 	eor	$s0,$s0,$i1,lsl#8
   1171 	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
   1172 	eor	$s1,$i2,$s1,lsl#16
   1173 	and	$i1,lr,$s3,lsr#16	@ i0
   1174 	eor	$s2,$t2,$s2,lsl#16
   1175 	and	$i2,lr,$s3,lsr#8	@ i1
   1176 	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1177 	eor	$t3,$t3,$i3,lsl#16
   1178 	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1179 	and	$i3,lr,$s3		@ i2
   1180 
   1181 	add	$s3,$tbl,$s3,lsr#24
   1182 	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1183 	ldrb	$s3,[$s3]		@ Td4[s3>>24]
   1184 	eor	$s0,$s0,$i1,lsl#16
   1185 	ldr	$i1,[$key,#0]
   1186 	eor	$s1,$s1,$i2,lsl#8
   1187 	ldr	$t1,[$key,#4]
   1188 	eor	$s2,$i3,$s2,lsl#8
   1189 	ldr	$t2,[$key,#8]
   1190 	eor	$s3,$t3,$s3,lsl#24
   1191 	ldr	$t3,[$key,#12]
   1192 
   1193 	eor	$s0,$s0,$i1
   1194 	eor	$s1,$s1,$t1
   1195 	eor	$s2,$s2,$t2
   1196 	eor	$s3,$s3,$t3
   1197 
   1198 	sub	$tbl,$tbl,#1024
   1199 	ldr	pc,[sp],#4		@ pop and return
   1200 .size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1201 .asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1202 .align	2
   1203 ___
   1204 
   1205 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1206 $code =~ s/\bret\b/bx\tlr/gm;
   1207 
   1208 open SELF,$0;
   1209 while(<SELF>) {
   1210 	next if (/^#!/);
   1211 	last if (!s/^#/@/ and !/^$/);
   1212 	print;
   1213 }
   1214 close SELF;
   1215 
   1216 print $code;
   1217 close STDOUT;	# enforce flush
   1218