Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for ARMv4
     11 
     12 # January 2007.
     13 #
     14 # Code uses single 1K S-box and is >2 times faster than code generated
     15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     16 # allows to merge logical or arithmetic operation with shift or rotate
     17 # in one instruction and emit combined result every cycle. The module
     18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     19 # key [on single-issue Xscale PXA250 core].
     20 
     21 # May 2007.
     22 #
     23 # AES_set_[en|de]crypt_key is added.
     24 
     25 # July 2010.
     26 #
     27 # Rescheduling for dual-issue pipeline resulted in 12% improvement on
     28 # Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
     29 
     30 # February 2011.
     31 #
     32 # Profiler-assisted and platform-specific optimization resulted in 16%
     33 # improvement on Cortex A8 core and ~21.5 cycles per byte.
     34 
     35 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
     36 open STDOUT,">$output";
     37 
     38 $s0="r0";
     39 $s1="r1";
     40 $s2="r2";
     41 $s3="r3";
     42 $t1="r4";
     43 $t2="r5";
     44 $t3="r6";
     45 $i1="r7";
     46 $i2="r8";
     47 $i3="r9";
     48 
     49 $tbl="r10";
     50 $key="r11";
     51 $rounds="r12";
     52 
     53 $code=<<___;
     54 #if defined(__arm__)
     55 #ifndef __KERNEL__
     56 # include "arm_arch.h"
     57 #else
     58 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
     59 #endif
     60 
     61 .text
     62 #if __ARM_ARCH__<7
     63 .code	32
     64 #else
     65 .syntax	unified
     66 # ifdef __thumb2__
     67 .thumb
     68 # else
     69 .code	32
     70 # endif
     71 #endif
     72 
     73 .type	AES_Te,%object
     74 .align	5
     75 AES_Te:
     76 .word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     77 .word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     78 .word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     79 .word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     80 .word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     81 .word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     82 .word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     83 .word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     84 .word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     85 .word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     86 .word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     87 .word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     88 .word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     89 .word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     90 .word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     91 .word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     92 .word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     93 .word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     94 .word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     95 .word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     96 .word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     97 .word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     98 .word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     99 .word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
    100 .word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
    101 .word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
    102 .word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
    103 .word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
    104 .word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
    105 .word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
    106 .word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
    107 .word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
    108 .word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
    109 .word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
    110 .word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
    111 .word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
    112 .word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
    113 .word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
    114 .word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
    115 .word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
    116 .word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
    117 .word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
    118 .word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
    119 .word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
    120 .word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
    121 .word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
    122 .word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
    123 .word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
    124 .word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
    125 .word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
    126 .word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
    127 .word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
    128 .word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    129 .word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    130 .word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    131 .word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    132 .word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    133 .word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    134 .word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    135 .word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    136 .word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    137 .word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    138 .word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    139 .word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    140 @ Te4[256]
    141 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    142 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    143 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    144 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    145 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    146 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    147 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    148 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    149 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    150 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    151 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    152 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    153 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    154 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    155 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    156 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    157 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    158 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    159 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    160 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    161 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    162 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    163 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    164 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    165 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    166 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    167 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    168 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    169 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    170 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    171 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    172 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    173 @ rcon[]
    174 .word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    175 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    176 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    177 .size	AES_Te,.-AES_Te
    178 
    179 @ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
    180 @ 		       const AES_KEY *key) {
    181 .global asm_AES_encrypt
    182 .hidden asm_AES_encrypt
    183 .type   asm_AES_encrypt,%function
    184 .align	5
    185 asm_AES_encrypt:
    186 #if __ARM_ARCH__<7
    187 	sub	r3,pc,#8		@ asm_AES_encrypt
    188 #else
    189 	adr	r3,asm_AES_encrypt
    190 #endif
    191 	stmdb   sp!,{r1,r4-r12,lr}
    192 	mov	$rounds,r0		@ inp
    193 	mov	$key,r2
    194 	sub	$tbl,r3,#asm_AES_encrypt-AES_Te	@ Te
    195 #if __ARM_ARCH__<7
    196 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    197 	ldrb	$t1,[$rounds,#2]	@ manner...
    198 	ldrb	$t2,[$rounds,#1]
    199 	ldrb	$t3,[$rounds,#0]
    200 	orr	$s0,$s0,$t1,lsl#8
    201 	ldrb	$s1,[$rounds,#7]
    202 	orr	$s0,$s0,$t2,lsl#16
    203 	ldrb	$t1,[$rounds,#6]
    204 	orr	$s0,$s0,$t3,lsl#24
    205 	ldrb	$t2,[$rounds,#5]
    206 	ldrb	$t3,[$rounds,#4]
    207 	orr	$s1,$s1,$t1,lsl#8
    208 	ldrb	$s2,[$rounds,#11]
    209 	orr	$s1,$s1,$t2,lsl#16
    210 	ldrb	$t1,[$rounds,#10]
    211 	orr	$s1,$s1,$t3,lsl#24
    212 	ldrb	$t2,[$rounds,#9]
    213 	ldrb	$t3,[$rounds,#8]
    214 	orr	$s2,$s2,$t1,lsl#8
    215 	ldrb	$s3,[$rounds,#15]
    216 	orr	$s2,$s2,$t2,lsl#16
    217 	ldrb	$t1,[$rounds,#14]
    218 	orr	$s2,$s2,$t3,lsl#24
    219 	ldrb	$t2,[$rounds,#13]
    220 	ldrb	$t3,[$rounds,#12]
    221 	orr	$s3,$s3,$t1,lsl#8
    222 	orr	$s3,$s3,$t2,lsl#16
    223 	orr	$s3,$s3,$t3,lsl#24
    224 #else
    225 	ldr	$s0,[$rounds,#0]
    226 	ldr	$s1,[$rounds,#4]
    227 	ldr	$s2,[$rounds,#8]
    228 	ldr	$s3,[$rounds,#12]
    229 #ifdef __ARMEL__
    230 	rev	$s0,$s0
    231 	rev	$s1,$s1
    232 	rev	$s2,$s2
    233 	rev	$s3,$s3
    234 #endif
    235 #endif
    236 	bl	_armv4_AES_encrypt
    237 
    238 	ldr	$rounds,[sp],#4		@ pop out
    239 #if __ARM_ARCH__>=7
    240 #ifdef __ARMEL__
    241 	rev	$s0,$s0
    242 	rev	$s1,$s1
    243 	rev	$s2,$s2
    244 	rev	$s3,$s3
    245 #endif
    246 	str	$s0,[$rounds,#0]
    247 	str	$s1,[$rounds,#4]
    248 	str	$s2,[$rounds,#8]
    249 	str	$s3,[$rounds,#12]
    250 #else
    251 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    252 	mov	$t2,$s0,lsr#16		@ manner...
    253 	mov	$t3,$s0,lsr#8
    254 	strb	$t1,[$rounds,#0]
    255 	strb	$t2,[$rounds,#1]
    256 	mov	$t1,$s1,lsr#24
    257 	strb	$t3,[$rounds,#2]
    258 	mov	$t2,$s1,lsr#16
    259 	strb	$s0,[$rounds,#3]
    260 	mov	$t3,$s1,lsr#8
    261 	strb	$t1,[$rounds,#4]
    262 	strb	$t2,[$rounds,#5]
    263 	mov	$t1,$s2,lsr#24
    264 	strb	$t3,[$rounds,#6]
    265 	mov	$t2,$s2,lsr#16
    266 	strb	$s1,[$rounds,#7]
    267 	mov	$t3,$s2,lsr#8
    268 	strb	$t1,[$rounds,#8]
    269 	strb	$t2,[$rounds,#9]
    270 	mov	$t1,$s3,lsr#24
    271 	strb	$t3,[$rounds,#10]
    272 	mov	$t2,$s3,lsr#16
    273 	strb	$s2,[$rounds,#11]
    274 	mov	$t3,$s3,lsr#8
    275 	strb	$t1,[$rounds,#12]
    276 	strb	$t2,[$rounds,#13]
    277 	strb	$t3,[$rounds,#14]
    278 	strb	$s3,[$rounds,#15]
    279 #endif
    280 #if __ARM_ARCH__>=5
    281 	ldmia	sp!,{r4-r12,pc}
    282 #else
    283 	ldmia   sp!,{r4-r12,lr}
    284 	tst	lr,#1
    285 	moveq	pc,lr			@ be binary compatible with V4, yet
    286 	bx	lr			@ interoperable with Thumb ISA:-)
    287 #endif
    288 .size	asm_AES_encrypt,.-asm_AES_encrypt
    289 
    290 .type   _armv4_AES_encrypt,%function
    291 .align	2
    292 _armv4_AES_encrypt:
    293 	str	lr,[sp,#-4]!		@ push lr
    294 	ldmia	$key!,{$t1-$i1}
    295 	eor	$s0,$s0,$t1
    296 	ldr	$rounds,[$key,#240-16]
    297 	eor	$s1,$s1,$t2
    298 	eor	$s2,$s2,$t3
    299 	eor	$s3,$s3,$i1
    300 	sub	$rounds,$rounds,#1
    301 	mov	lr,#255
    302 
    303 	and	$i1,lr,$s0
    304 	and	$i2,lr,$s0,lsr#8
    305 	and	$i3,lr,$s0,lsr#16
    306 	mov	$s0,$s0,lsr#24
    307 .Lenc_loop:
    308 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    309 	and	$i1,lr,$s1,lsr#16	@ i0
    310 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    311 	and	$i2,lr,$s1
    312 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    313 	and	$i3,lr,$s1,lsr#8
    314 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    315 	mov	$s1,$s1,lsr#24
    316 
    317 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    318 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    319 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    320 	eor	$s0,$s0,$i1,ror#8
    321 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    322 	and	$i1,lr,$s2,lsr#8	@ i0
    323 	eor	$t2,$t2,$i2,ror#8
    324 	and	$i2,lr,$s2,lsr#16	@ i1
    325 	eor	$t3,$t3,$i3,ror#8
    326 	and	$i3,lr,$s2
    327 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    328 	eor	$s1,$s1,$t1,ror#24
    329 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    330 	mov	$s2,$s2,lsr#24
    331 
    332 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    333 	eor	$s0,$s0,$i1,ror#16
    334 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    335 	and	$i1,lr,$s3		@ i0
    336 	eor	$s1,$s1,$i2,ror#8
    337 	and	$i2,lr,$s3,lsr#8	@ i1
    338 	eor	$t3,$t3,$i3,ror#16
    339 	and	$i3,lr,$s3,lsr#16	@ i2
    340 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    341 	eor	$s2,$s2,$t2,ror#16
    342 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    343 	mov	$s3,$s3,lsr#24
    344 
    345 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    346 	eor	$s0,$s0,$i1,ror#24
    347 	ldr	$i1,[$key],#16
    348 	eor	$s1,$s1,$i2,ror#16
    349 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    350 	eor	$s2,$s2,$i3,ror#8
    351 	ldr	$t1,[$key,#-12]
    352 	eor	$s3,$s3,$t3,ror#8
    353 
    354 	ldr	$t2,[$key,#-8]
    355 	eor	$s0,$s0,$i1
    356 	ldr	$t3,[$key,#-4]
    357 	and	$i1,lr,$s0
    358 	eor	$s1,$s1,$t1
    359 	and	$i2,lr,$s0,lsr#8
    360 	eor	$s2,$s2,$t2
    361 	and	$i3,lr,$s0,lsr#16
    362 	eor	$s3,$s3,$t3
    363 	mov	$s0,$s0,lsr#24
    364 
    365 	subs	$rounds,$rounds,#1
    366 	bne	.Lenc_loop
    367 
    368 	add	$tbl,$tbl,#2
    369 
    370 	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    371 	and	$i1,lr,$s1,lsr#16	@ i0
    372 	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    373 	and	$i2,lr,$s1
    374 	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    375 	and	$i3,lr,$s1,lsr#8
    376 	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    377 	mov	$s1,$s1,lsr#24
    378 
    379 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    380 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    381 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    382 	eor	$s0,$i1,$s0,lsl#8
    383 	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    384 	and	$i1,lr,$s2,lsr#8	@ i0
    385 	eor	$t2,$i2,$t2,lsl#8
    386 	and	$i2,lr,$s2,lsr#16	@ i1
    387 	eor	$t3,$i3,$t3,lsl#8
    388 	and	$i3,lr,$s2
    389 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    390 	eor	$s1,$t1,$s1,lsl#24
    391 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    392 	mov	$s2,$s2,lsr#24
    393 
    394 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    395 	eor	$s0,$i1,$s0,lsl#8
    396 	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    397 	and	$i1,lr,$s3		@ i0
    398 	eor	$s1,$s1,$i2,lsl#16
    399 	and	$i2,lr,$s3,lsr#8	@ i1
    400 	eor	$t3,$i3,$t3,lsl#8
    401 	and	$i3,lr,$s3,lsr#16	@ i2
    402 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    403 	eor	$s2,$t2,$s2,lsl#24
    404 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    405 	mov	$s3,$s3,lsr#24
    406 
    407 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    408 	eor	$s0,$i1,$s0,lsl#8
    409 	ldr	$i1,[$key,#0]
    410 	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    411 	eor	$s1,$s1,$i2,lsl#8
    412 	ldr	$t1,[$key,#4]
    413 	eor	$s2,$s2,$i3,lsl#16
    414 	ldr	$t2,[$key,#8]
    415 	eor	$s3,$t3,$s3,lsl#24
    416 	ldr	$t3,[$key,#12]
    417 
    418 	eor	$s0,$s0,$i1
    419 	eor	$s1,$s1,$t1
    420 	eor	$s2,$s2,$t2
    421 	eor	$s3,$s3,$t3
    422 
    423 	sub	$tbl,$tbl,#2
    424 	ldr	pc,[sp],#4		@ pop and return
    425 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    426 
    427 .global asm_AES_set_encrypt_key
    428 .hidden asm_AES_set_encrypt_key
    429 .type   asm_AES_set_encrypt_key,%function
    430 .align	5
    431 asm_AES_set_encrypt_key:
    432 _armv4_AES_set_encrypt_key:
    433 #if __ARM_ARCH__<7
    434 	sub	r3,pc,#8		@ asm_AES_set_encrypt_key
    435 #else
    436 	adr	r3,asm_AES_set_encrypt_key
    437 #endif
    438 	teq	r0,#0
    439 #if __ARM_ARCH__>=7
    440 	itt	eq			@ Thumb2 thing, sanity check in ARM
    441 #endif
    442 	moveq	r0,#-1
    443 	beq	.Labrt
    444 	teq	r2,#0
    445 #if __ARM_ARCH__>=7
    446 	itt	eq			@ Thumb2 thing, sanity check in ARM
    447 #endif
    448 	moveq	r0,#-1
    449 	beq	.Labrt
    450 
    451 	teq	r1,#128
    452 	beq	.Lok
    453 	teq	r1,#192
    454 	beq	.Lok
    455 	teq	r1,#256
    456 #if __ARM_ARCH__>=7
    457 	itt	ne			@ Thumb2 thing, sanity check in ARM
    458 #endif
    459 	movne	r0,#-1
    460 	bne	.Labrt
    461 
    462 .Lok:	stmdb   sp!,{r4-r12,lr}
    463 	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
    464 
    465 	mov	$rounds,r0		@ inp
    466 	mov	lr,r1			@ bits
    467 	mov	$key,r2			@ key
    468 
    469 #if __ARM_ARCH__<7
    470 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    471 	ldrb	$t1,[$rounds,#2]	@ manner...
    472 	ldrb	$t2,[$rounds,#1]
    473 	ldrb	$t3,[$rounds,#0]
    474 	orr	$s0,$s0,$t1,lsl#8
    475 	ldrb	$s1,[$rounds,#7]
    476 	orr	$s0,$s0,$t2,lsl#16
    477 	ldrb	$t1,[$rounds,#6]
    478 	orr	$s0,$s0,$t3,lsl#24
    479 	ldrb	$t2,[$rounds,#5]
    480 	ldrb	$t3,[$rounds,#4]
    481 	orr	$s1,$s1,$t1,lsl#8
    482 	ldrb	$s2,[$rounds,#11]
    483 	orr	$s1,$s1,$t2,lsl#16
    484 	ldrb	$t1,[$rounds,#10]
    485 	orr	$s1,$s1,$t3,lsl#24
    486 	ldrb	$t2,[$rounds,#9]
    487 	ldrb	$t3,[$rounds,#8]
    488 	orr	$s2,$s2,$t1,lsl#8
    489 	ldrb	$s3,[$rounds,#15]
    490 	orr	$s2,$s2,$t2,lsl#16
    491 	ldrb	$t1,[$rounds,#14]
    492 	orr	$s2,$s2,$t3,lsl#24
    493 	ldrb	$t2,[$rounds,#13]
    494 	ldrb	$t3,[$rounds,#12]
    495 	orr	$s3,$s3,$t1,lsl#8
    496 	str	$s0,[$key],#16
    497 	orr	$s3,$s3,$t2,lsl#16
    498 	str	$s1,[$key,#-12]
    499 	orr	$s3,$s3,$t3,lsl#24
    500 	str	$s2,[$key,#-8]
    501 	str	$s3,[$key,#-4]
    502 #else
    503 	ldr	$s0,[$rounds,#0]
    504 	ldr	$s1,[$rounds,#4]
    505 	ldr	$s2,[$rounds,#8]
    506 	ldr	$s3,[$rounds,#12]
    507 #ifdef __ARMEL__
    508 	rev	$s0,$s0
    509 	rev	$s1,$s1
    510 	rev	$s2,$s2
    511 	rev	$s3,$s3
    512 #endif
    513 	str	$s0,[$key],#16
    514 	str	$s1,[$key,#-12]
    515 	str	$s2,[$key,#-8]
    516 	str	$s3,[$key,#-4]
    517 #endif
    518 
    519 	teq	lr,#128
    520 	bne	.Lnot128
    521 	mov	$rounds,#10
    522 	str	$rounds,[$key,#240-16]
    523 	add	$t3,$tbl,#256			@ rcon
    524 	mov	lr,#255
    525 
    526 .L128_loop:
    527 	and	$t2,lr,$s3,lsr#24
    528 	and	$i1,lr,$s3,lsr#16
    529 	ldrb	$t2,[$tbl,$t2]
    530 	and	$i2,lr,$s3,lsr#8
    531 	ldrb	$i1,[$tbl,$i1]
    532 	and	$i3,lr,$s3
    533 	ldrb	$i2,[$tbl,$i2]
    534 	orr	$t2,$t2,$i1,lsl#24
    535 	ldrb	$i3,[$tbl,$i3]
    536 	orr	$t2,$t2,$i2,lsl#16
    537 	ldr	$t1,[$t3],#4			@ rcon[i++]
    538 	orr	$t2,$t2,$i3,lsl#8
    539 	eor	$t2,$t2,$t1
    540 	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    541 	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    542 	str	$s0,[$key],#16
    543 	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    544 	str	$s1,[$key,#-12]
    545 	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    546 	str	$s2,[$key,#-8]
    547 	subs	$rounds,$rounds,#1
    548 	str	$s3,[$key,#-4]
    549 	bne	.L128_loop
    550 	sub	r2,$key,#176
    551 	b	.Ldone
    552 
    553 .Lnot128:
    554 #if __ARM_ARCH__<7
    555 	ldrb	$i2,[$rounds,#19]
    556 	ldrb	$t1,[$rounds,#18]
    557 	ldrb	$t2,[$rounds,#17]
    558 	ldrb	$t3,[$rounds,#16]
    559 	orr	$i2,$i2,$t1,lsl#8
    560 	ldrb	$i3,[$rounds,#23]
    561 	orr	$i2,$i2,$t2,lsl#16
    562 	ldrb	$t1,[$rounds,#22]
    563 	orr	$i2,$i2,$t3,lsl#24
    564 	ldrb	$t2,[$rounds,#21]
    565 	ldrb	$t3,[$rounds,#20]
    566 	orr	$i3,$i3,$t1,lsl#8
    567 	orr	$i3,$i3,$t2,lsl#16
    568 	str	$i2,[$key],#8
    569 	orr	$i3,$i3,$t3,lsl#24
    570 	str	$i3,[$key,#-4]
    571 #else
    572 	ldr	$i2,[$rounds,#16]
    573 	ldr	$i3,[$rounds,#20]
    574 #ifdef __ARMEL__
    575 	rev	$i2,$i2
    576 	rev	$i3,$i3
    577 #endif
    578 	str	$i2,[$key],#8
    579 	str	$i3,[$key,#-4]
    580 #endif
    581 
    582 	teq	lr,#192
    583 	bne	.Lnot192
    584 	mov	$rounds,#12
    585 	str	$rounds,[$key,#240-24]
    586 	add	$t3,$tbl,#256			@ rcon
    587 	mov	lr,#255
    588 	mov	$rounds,#8
    589 
    590 .L192_loop:
    591 	and	$t2,lr,$i3,lsr#24
    592 	and	$i1,lr,$i3,lsr#16
    593 	ldrb	$t2,[$tbl,$t2]
    594 	and	$i2,lr,$i3,lsr#8
    595 	ldrb	$i1,[$tbl,$i1]
    596 	and	$i3,lr,$i3
    597 	ldrb	$i2,[$tbl,$i2]
    598 	orr	$t2,$t2,$i1,lsl#24
    599 	ldrb	$i3,[$tbl,$i3]
    600 	orr	$t2,$t2,$i2,lsl#16
    601 	ldr	$t1,[$t3],#4			@ rcon[i++]
    602 	orr	$t2,$t2,$i3,lsl#8
    603 	eor	$i3,$t2,$t1
    604 	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    605 	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    606 	str	$s0,[$key],#24
    607 	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    608 	str	$s1,[$key,#-20]
    609 	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    610 	str	$s2,[$key,#-16]
    611 	subs	$rounds,$rounds,#1
    612 	str	$s3,[$key,#-12]
    613 #if __ARM_ARCH__>=7
    614 	itt	eq				@ Thumb2 thing, sanity check in ARM
    615 #endif
    616 	subeq	r2,$key,#216
    617 	beq	.Ldone
    618 
    619 	ldr	$i1,[$key,#-32]
    620 	ldr	$i2,[$key,#-28]
    621 	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    622 	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    623 	str	$i1,[$key,#-8]
    624 	str	$i3,[$key,#-4]
    625 	b	.L192_loop
    626 
    627 .Lnot192:
    628 #if __ARM_ARCH__<7
    629 	ldrb	$i2,[$rounds,#27]
    630 	ldrb	$t1,[$rounds,#26]
    631 	ldrb	$t2,[$rounds,#25]
    632 	ldrb	$t3,[$rounds,#24]
    633 	orr	$i2,$i2,$t1,lsl#8
    634 	ldrb	$i3,[$rounds,#31]
    635 	orr	$i2,$i2,$t2,lsl#16
    636 	ldrb	$t1,[$rounds,#30]
    637 	orr	$i2,$i2,$t3,lsl#24
    638 	ldrb	$t2,[$rounds,#29]
    639 	ldrb	$t3,[$rounds,#28]
    640 	orr	$i3,$i3,$t1,lsl#8
    641 	orr	$i3,$i3,$t2,lsl#16
    642 	str	$i2,[$key],#8
    643 	orr	$i3,$i3,$t3,lsl#24
    644 	str	$i3,[$key,#-4]
    645 #else
    646 	ldr	$i2,[$rounds,#24]
    647 	ldr	$i3,[$rounds,#28]
    648 #ifdef __ARMEL__
    649 	rev	$i2,$i2
    650 	rev	$i3,$i3
    651 #endif
    652 	str	$i2,[$key],#8
    653 	str	$i3,[$key,#-4]
    654 #endif
    655 
    656 	mov	$rounds,#14
    657 	str	$rounds,[$key,#240-32]
    658 	add	$t3,$tbl,#256			@ rcon
    659 	mov	lr,#255
    660 	mov	$rounds,#7
    661 
    662 .L256_loop:
    663 	and	$t2,lr,$i3,lsr#24
    664 	and	$i1,lr,$i3,lsr#16
    665 	ldrb	$t2,[$tbl,$t2]
    666 	and	$i2,lr,$i3,lsr#8
    667 	ldrb	$i1,[$tbl,$i1]
    668 	and	$i3,lr,$i3
    669 	ldrb	$i2,[$tbl,$i2]
    670 	orr	$t2,$t2,$i1,lsl#24
    671 	ldrb	$i3,[$tbl,$i3]
    672 	orr	$t2,$t2,$i2,lsl#16
    673 	ldr	$t1,[$t3],#4			@ rcon[i++]
    674 	orr	$t2,$t2,$i3,lsl#8
    675 	eor	$i3,$t2,$t1
    676 	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    677 	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    678 	str	$s0,[$key],#32
    679 	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    680 	str	$s1,[$key,#-28]
    681 	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    682 	str	$s2,[$key,#-24]
    683 	subs	$rounds,$rounds,#1
    684 	str	$s3,[$key,#-20]
    685 #if __ARM_ARCH__>=7
    686 	itt	eq				@ Thumb2 thing, sanity check in ARM
    687 #endif
    688 	subeq	r2,$key,#256
    689 	beq	.Ldone
    690 
    691 	and	$t2,lr,$s3
    692 	and	$i1,lr,$s3,lsr#8
    693 	ldrb	$t2,[$tbl,$t2]
    694 	and	$i2,lr,$s3,lsr#16
    695 	ldrb	$i1,[$tbl,$i1]
    696 	and	$i3,lr,$s3,lsr#24
    697 	ldrb	$i2,[$tbl,$i2]
    698 	orr	$t2,$t2,$i1,lsl#8
    699 	ldrb	$i3,[$tbl,$i3]
    700 	orr	$t2,$t2,$i2,lsl#16
    701 	ldr	$t1,[$key,#-48]
    702 	orr	$t2,$t2,$i3,lsl#24
    703 
    704 	ldr	$i1,[$key,#-44]
    705 	ldr	$i2,[$key,#-40]
    706 	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    707 	ldr	$i3,[$key,#-36]
    708 	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    709 	str	$t1,[$key,#-16]
    710 	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    711 	str	$i1,[$key,#-12]
    712 	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    713 	str	$i2,[$key,#-8]
    714 	str	$i3,[$key,#-4]
    715 	b	.L256_loop
    716 
    717 .align	2
    718 .Ldone:	mov	r0,#0
    719 	ldmia   sp!,{r4-r12,lr}
    720 .Labrt:
    721 #if defined(__thumb2__) && __ARM_ARCH__>=7
    722 	.short	0x4770			@ bx lr in Thumb2 encoding
    723 #else
    724 	tst	lr,#1
    725 	moveq	pc,lr			@ be binary compatible with V4, yet
    726 	bx	lr			@ interoperable with Thumb ISA:-)
    727 #endif
    728 .size	asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
    729 
    730 .global asm_AES_set_decrypt_key
    731 .hidden asm_AES_set_decrypt_key
    732 .type   asm_AES_set_decrypt_key,%function
    733 .align	5
    734 asm_AES_set_decrypt_key:
    735 	str	lr,[sp,#-4]!            @ push lr
    736 	bl	_armv4_AES_set_encrypt_key
    737 	teq	r0,#0
    738 	ldr	lr,[sp],#4              @ pop lr
    739 	bne	.Labrt
    740 
    741 	mov	r0,r2			@ asm_AES_set_encrypt_key preserves r2,
    742 	mov	r1,r2			@ which is AES_KEY *key
    743 	b	_armv4_AES_set_enc2dec_key
    744 .size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
    745 
    746 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
    747 .global	AES_set_enc2dec_key
    748 .hidden	AES_set_enc2dec_key
    749 .type	AES_set_enc2dec_key,%function
    750 .align	5
    751 AES_set_enc2dec_key:
    752 _armv4_AES_set_enc2dec_key:
    753 	stmdb   sp!,{r4-r12,lr}
    754 
    755 	ldr	$rounds,[r0,#240]
    756 	mov	$i1,r0			@ input
    757 	add	$i2,r0,$rounds,lsl#4
    758 	mov	$key,r1			@ ouput
    759 	add	$tbl,r1,$rounds,lsl#4
    760 	str	$rounds,[r1,#240]
    761 
    762 .Linv:	ldr	$s0,[$i1],#16
    763 	ldr	$s1,[$i1,#-12]
    764 	ldr	$s2,[$i1,#-8]
    765 	ldr	$s3,[$i1,#-4]
    766 	ldr	$t1,[$i2],#-16
    767 	ldr	$t2,[$i2,#16+4]
    768 	ldr	$t3,[$i2,#16+8]
    769 	ldr	$i3,[$i2,#16+12]
    770 	str	$s0,[$tbl],#-16
    771 	str	$s1,[$tbl,#16+4]
    772 	str	$s2,[$tbl,#16+8]
    773 	str	$s3,[$tbl,#16+12]
    774 	str	$t1,[$key],#16
    775 	str	$t2,[$key,#-12]
    776 	str	$t3,[$key,#-8]
    777 	str	$i3,[$key,#-4]
    778 	teq	$i1,$i2
    779 	bne	.Linv
    780 
    781 	ldr	$s0,[$i1]
    782 	ldr	$s1,[$i1,#4]
    783 	ldr	$s2,[$i1,#8]
    784 	ldr	$s3,[$i1,#12]
    785 	str	$s0,[$key]
    786 	str	$s1,[$key,#4]
    787 	str	$s2,[$key,#8]
    788 	str	$s3,[$key,#12]
    789 	sub	$key,$key,$rounds,lsl#3
    790 ___
    791 $mask80=$i1;
    792 $mask1b=$i2;
    793 $mask7f=$i3;
    794 $code.=<<___;
    795 	ldr	$s0,[$key,#16]!		@ prefetch tp1
    796 	mov	$mask80,#0x80
    797 	mov	$mask1b,#0x1b
    798 	orr	$mask80,$mask80,#0x8000
    799 	orr	$mask1b,$mask1b,#0x1b00
    800 	orr	$mask80,$mask80,$mask80,lsl#16
    801 	orr	$mask1b,$mask1b,$mask1b,lsl#16
    802 	sub	$rounds,$rounds,#1
    803 	mvn	$mask7f,$mask80
    804 	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    805 
    806 .Lmix:	and	$t1,$s0,$mask80
    807 	and	$s1,$s0,$mask7f
    808 	sub	$t1,$t1,$t1,lsr#7
    809 	and	$t1,$t1,$mask1b
    810 	eor	$s1,$t1,$s1,lsl#1	@ tp2
    811 
    812 	and	$t1,$s1,$mask80
    813 	and	$s2,$s1,$mask7f
    814 	sub	$t1,$t1,$t1,lsr#7
    815 	and	$t1,$t1,$mask1b
    816 	eor	$s2,$t1,$s2,lsl#1	@ tp4
    817 
    818 	and	$t1,$s2,$mask80
    819 	and	$s3,$s2,$mask7f
    820 	sub	$t1,$t1,$t1,lsr#7
    821 	and	$t1,$t1,$mask1b
    822 	eor	$s3,$t1,$s3,lsl#1	@ tp8
    823 
    824 	eor	$t1,$s1,$s2
    825 	eor	$t2,$s0,$s3		@ tp9
    826 	eor	$t1,$t1,$s3		@ tpe
    827 	eor	$t1,$t1,$s1,ror#24
    828 	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    829 	eor	$t1,$t1,$s2,ror#16
    830 	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    831 	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    832 
    833 	ldr	$s0,[$key,#4]		@ prefetch tp1
    834 	str	$t1,[$key],#4
    835 	subs	$rounds,$rounds,#1
    836 	bne	.Lmix
    837 
    838 	mov	r0,#0
    839 #if __ARM_ARCH__>=5
    840 	ldmia	sp!,{r4-r12,pc}
    841 #else
    842 	ldmia   sp!,{r4-r12,lr}
    843 	tst	lr,#1
    844 	moveq	pc,lr			@ be binary compatible with V4, yet
    845 	bx	lr			@ interoperable with Thumb ISA:-)
    846 #endif
    847 .size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
    848 
    849 .type	AES_Td,%object
    850 .align	5
    851 AES_Td:
    852 .word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    853 .word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    854 .word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    855 .word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    856 .word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    857 .word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    858 .word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    859 .word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    860 .word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    861 .word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    862 .word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    863 .word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    864 .word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    865 .word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    866 .word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    867 .word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    868 .word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    869 .word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    870 .word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    871 .word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    872 .word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    873 .word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    874 .word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    875 .word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    876 .word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    877 .word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    878 .word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    879 .word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    880 .word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    881 .word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    882 .word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    883 .word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    884 .word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    885 .word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    886 .word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    887 .word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    888 .word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    889 .word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    890 .word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    891 .word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    892 .word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    893 .word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    894 .word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    895 .word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    896 .word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    897 .word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    898 .word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    899 .word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    900 .word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    901 .word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    902 .word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    903 .word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    904 .word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    905 .word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    906 .word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    907 .word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    908 .word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    909 .word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    910 .word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    911 .word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    912 .word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    913 .word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    914 .word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    915 .word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    916 @ Td4[256]
    917 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    918 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    919 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    920 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    921 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    922 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    923 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    924 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    925 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    926 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    927 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    928 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    929 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    930 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    931 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    932 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    933 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    934 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    935 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    936 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    937 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    938 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    939 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    940 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    941 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    942 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    943 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    944 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    945 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    946 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    947 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    948 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    949 .size	AES_Td,.-AES_Td
    950 
    951 @ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
    952 @ 		       const AES_KEY *key) {
    953 .global asm_AES_decrypt
    954 .hidden asm_AES_decrypt
    955 .type   asm_AES_decrypt,%function
    956 .align	5
    957 asm_AES_decrypt:
    958 #if __ARM_ARCH__<7
    959 	sub	r3,pc,#8		@ asm_AES_decrypt
    960 #else
    961 	adr	r3,asm_AES_decrypt
    962 #endif
    963 	stmdb   sp!,{r1,r4-r12,lr}
    964 	mov	$rounds,r0		@ inp
    965 	mov	$key,r2
    966 	sub	$tbl,r3,#asm_AES_decrypt-AES_Td		@ Td
    967 #if __ARM_ARCH__<7
    968 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    969 	ldrb	$t1,[$rounds,#2]	@ manner...
    970 	ldrb	$t2,[$rounds,#1]
    971 	ldrb	$t3,[$rounds,#0]
    972 	orr	$s0,$s0,$t1,lsl#8
    973 	ldrb	$s1,[$rounds,#7]
    974 	orr	$s0,$s0,$t2,lsl#16
    975 	ldrb	$t1,[$rounds,#6]
    976 	orr	$s0,$s0,$t3,lsl#24
    977 	ldrb	$t2,[$rounds,#5]
    978 	ldrb	$t3,[$rounds,#4]
    979 	orr	$s1,$s1,$t1,lsl#8
    980 	ldrb	$s2,[$rounds,#11]
    981 	orr	$s1,$s1,$t2,lsl#16
    982 	ldrb	$t1,[$rounds,#10]
    983 	orr	$s1,$s1,$t3,lsl#24
    984 	ldrb	$t2,[$rounds,#9]
    985 	ldrb	$t3,[$rounds,#8]
    986 	orr	$s2,$s2,$t1,lsl#8
    987 	ldrb	$s3,[$rounds,#15]
    988 	orr	$s2,$s2,$t2,lsl#16
    989 	ldrb	$t1,[$rounds,#14]
    990 	orr	$s2,$s2,$t3,lsl#24
    991 	ldrb	$t2,[$rounds,#13]
    992 	ldrb	$t3,[$rounds,#12]
    993 	orr	$s3,$s3,$t1,lsl#8
    994 	orr	$s3,$s3,$t2,lsl#16
    995 	orr	$s3,$s3,$t3,lsl#24
    996 #else
    997 	ldr	$s0,[$rounds,#0]
    998 	ldr	$s1,[$rounds,#4]
    999 	ldr	$s2,[$rounds,#8]
   1000 	ldr	$s3,[$rounds,#12]
   1001 #ifdef __ARMEL__
   1002 	rev	$s0,$s0
   1003 	rev	$s1,$s1
   1004 	rev	$s2,$s2
   1005 	rev	$s3,$s3
   1006 #endif
   1007 #endif
   1008 	bl	_armv4_AES_decrypt
   1009 
   1010 	ldr	$rounds,[sp],#4		@ pop out
   1011 #if __ARM_ARCH__>=7
   1012 #ifdef __ARMEL__
   1013 	rev	$s0,$s0
   1014 	rev	$s1,$s1
   1015 	rev	$s2,$s2
   1016 	rev	$s3,$s3
   1017 #endif
   1018 	str	$s0,[$rounds,#0]
   1019 	str	$s1,[$rounds,#4]
   1020 	str	$s2,[$rounds,#8]
   1021 	str	$s3,[$rounds,#12]
   1022 #else
   1023 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
   1024 	mov	$t2,$s0,lsr#16		@ manner...
   1025 	mov	$t3,$s0,lsr#8
   1026 	strb	$t1,[$rounds,#0]
   1027 	strb	$t2,[$rounds,#1]
   1028 	mov	$t1,$s1,lsr#24
   1029 	strb	$t3,[$rounds,#2]
   1030 	mov	$t2,$s1,lsr#16
   1031 	strb	$s0,[$rounds,#3]
   1032 	mov	$t3,$s1,lsr#8
   1033 	strb	$t1,[$rounds,#4]
   1034 	strb	$t2,[$rounds,#5]
   1035 	mov	$t1,$s2,lsr#24
   1036 	strb	$t3,[$rounds,#6]
   1037 	mov	$t2,$s2,lsr#16
   1038 	strb	$s1,[$rounds,#7]
   1039 	mov	$t3,$s2,lsr#8
   1040 	strb	$t1,[$rounds,#8]
   1041 	strb	$t2,[$rounds,#9]
   1042 	mov	$t1,$s3,lsr#24
   1043 	strb	$t3,[$rounds,#10]
   1044 	mov	$t2,$s3,lsr#16
   1045 	strb	$s2,[$rounds,#11]
   1046 	mov	$t3,$s3,lsr#8
   1047 	strb	$t1,[$rounds,#12]
   1048 	strb	$t2,[$rounds,#13]
   1049 	strb	$t3,[$rounds,#14]
   1050 	strb	$s3,[$rounds,#15]
   1051 #endif
   1052 #if __ARM_ARCH__>=5
   1053 	ldmia	sp!,{r4-r12,pc}
   1054 #else
   1055 	ldmia   sp!,{r4-r12,lr}
   1056 	tst	lr,#1
   1057 	moveq	pc,lr			@ be binary compatible with V4, yet
   1058 	bx	lr			@ interoperable with Thumb ISA:-)
   1059 #endif
   1060 .size	asm_AES_decrypt,.-asm_AES_decrypt
   1061 
   1062 .type   _armv4_AES_decrypt,%function
   1063 .align	2
   1064 _armv4_AES_decrypt:
   1065 	str	lr,[sp,#-4]!		@ push lr
   1066 	ldmia	$key!,{$t1-$i1}
   1067 	eor	$s0,$s0,$t1
   1068 	ldr	$rounds,[$key,#240-16]
   1069 	eor	$s1,$s1,$t2
   1070 	eor	$s2,$s2,$t3
   1071 	eor	$s3,$s3,$i1
   1072 	sub	$rounds,$rounds,#1
   1073 	mov	lr,#255
   1074 
   1075 	and	$i1,lr,$s0,lsr#16
   1076 	and	$i2,lr,$s0,lsr#8
   1077 	and	$i3,lr,$s0
   1078 	mov	$s0,$s0,lsr#24
   1079 .Ldec_loop:
   1080 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
   1081 	and	$i1,lr,$s1		@ i0
   1082 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
   1083 	and	$i2,lr,$s1,lsr#16
   1084 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
   1085 	and	$i3,lr,$s1,lsr#8
   1086 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
   1087 	mov	$s1,$s1,lsr#24
   1088 
   1089 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
   1090 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
   1091 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
   1092 	eor	$s0,$s0,$i1,ror#24
   1093 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
   1094 	and	$i1,lr,$s2,lsr#8	@ i0
   1095 	eor	$t2,$i2,$t2,ror#8
   1096 	and	$i2,lr,$s2		@ i1
   1097 	eor	$t3,$i3,$t3,ror#8
   1098 	and	$i3,lr,$s2,lsr#16
   1099 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
   1100 	eor	$s1,$s1,$t1,ror#8
   1101 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
   1102 	mov	$s2,$s2,lsr#24
   1103 
   1104 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
   1105 	eor	$s0,$s0,$i1,ror#16
   1106 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
   1107 	and	$i1,lr,$s3,lsr#16	@ i0
   1108 	eor	$s1,$s1,$i2,ror#24
   1109 	and	$i2,lr,$s3,lsr#8	@ i1
   1110 	eor	$t3,$i3,$t3,ror#8
   1111 	and	$i3,lr,$s3		@ i2
   1112 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
   1113 	eor	$s2,$s2,$t2,ror#8
   1114 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
   1115 	mov	$s3,$s3,lsr#24
   1116 
   1117 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
   1118 	eor	$s0,$s0,$i1,ror#8
   1119 	ldr	$i1,[$key],#16
   1120 	eor	$s1,$s1,$i2,ror#16
   1121 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
   1122 	eor	$s2,$s2,$i3,ror#24
   1123 
   1124 	ldr	$t1,[$key,#-12]
   1125 	eor	$s0,$s0,$i1
   1126 	ldr	$t2,[$key,#-8]
   1127 	eor	$s3,$s3,$t3,ror#8
   1128 	ldr	$t3,[$key,#-4]
   1129 	and	$i1,lr,$s0,lsr#16
   1130 	eor	$s1,$s1,$t1
   1131 	and	$i2,lr,$s0,lsr#8
   1132 	eor	$s2,$s2,$t2
   1133 	and	$i3,lr,$s0
   1134 	eor	$s3,$s3,$t3
   1135 	mov	$s0,$s0,lsr#24
   1136 
   1137 	subs	$rounds,$rounds,#1
   1138 	bne	.Ldec_loop
   1139 
   1140 	add	$tbl,$tbl,#1024
   1141 
   1142 	ldr	$t2,[$tbl,#0]		@ prefetch Td4
   1143 	ldr	$t3,[$tbl,#32]
   1144 	ldr	$t1,[$tbl,#64]
   1145 	ldr	$t2,[$tbl,#96]
   1146 	ldr	$t3,[$tbl,#128]
   1147 	ldr	$t1,[$tbl,#160]
   1148 	ldr	$t2,[$tbl,#192]
   1149 	ldr	$t3,[$tbl,#224]
   1150 
   1151 	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
   1152 	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
   1153 	and	$i1,lr,$s1		@ i0
   1154 	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
   1155 	and	$i2,lr,$s1,lsr#16
   1156 	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
   1157 	and	$i3,lr,$s1,lsr#8
   1158 
   1159 	add	$s1,$tbl,$s1,lsr#24
   1160 	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
   1161 	ldrb	$s1,[$s1]		@ Td4[s1>>24]
   1162 	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
   1163 	eor	$s0,$i1,$s0,lsl#24
   1164 	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
   1165 	eor	$s1,$t1,$s1,lsl#8
   1166 	and	$i1,lr,$s2,lsr#8	@ i0
   1167 	eor	$t2,$t2,$i2,lsl#8
   1168 	and	$i2,lr,$s2		@ i1
   1169 	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
   1170 	eor	$t3,$t3,$i3,lsl#8
   1171 	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
   1172 	and	$i3,lr,$s2,lsr#16
   1173 
   1174 	add	$s2,$tbl,$s2,lsr#24
   1175 	ldrb	$s2,[$s2]		@ Td4[s2>>24]
   1176 	eor	$s0,$s0,$i1,lsl#8
   1177 	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
   1178 	eor	$s1,$i2,$s1,lsl#16
   1179 	and	$i1,lr,$s3,lsr#16	@ i0
   1180 	eor	$s2,$t2,$s2,lsl#16
   1181 	and	$i2,lr,$s3,lsr#8	@ i1
   1182 	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1183 	eor	$t3,$t3,$i3,lsl#16
   1184 	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1185 	and	$i3,lr,$s3		@ i2
   1186 
   1187 	add	$s3,$tbl,$s3,lsr#24
   1188 	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1189 	ldrb	$s3,[$s3]		@ Td4[s3>>24]
   1190 	eor	$s0,$s0,$i1,lsl#16
   1191 	ldr	$i1,[$key,#0]
   1192 	eor	$s1,$s1,$i2,lsl#8
   1193 	ldr	$t1,[$key,#4]
   1194 	eor	$s2,$i3,$s2,lsl#8
   1195 	ldr	$t2,[$key,#8]
   1196 	eor	$s3,$t3,$s3,lsl#24
   1197 	ldr	$t3,[$key,#12]
   1198 
   1199 	eor	$s0,$s0,$i1
   1200 	eor	$s1,$s1,$t1
   1201 	eor	$s2,$s2,$t2
   1202 	eor	$s3,$s3,$t3
   1203 
   1204 	sub	$tbl,$tbl,#1024
   1205 	ldr	pc,[sp],#4		@ pop and return
   1206 .size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1207 .asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1208 .align	2
   1209 
   1210 #endif
   1211 ___
   1212 
   1213 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1214 
   1215 open SELF,$0;
   1216 while(<SELF>) {
   1217 	next if (/^#!/);
   1218 	last if (!s/^#/@/ and !/^$/);
   1219 	print;
   1220 }
   1221 close SELF;
   1222 
   1223 print $code;
   1224 close STDOUT;	# enforce flush
   1225