Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for ARMv4
     11 
     12 # January 2007.
     13 #
     14 # Code uses single 1K S-box and is >2 times faster than code generated
     15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     16 # allows to merge logical or arithmetic operation with shift or rotate
     17 # in one instruction and emit combined result every cycle. The module
     18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     19 # key [on single-issue Xscale PXA250 core].
     20 
     21 # May 2007.
     22 #
     23 # AES_set_[en|de]crypt_key is added.
     24 
     25 # July 2010.
     26 #
     27 # Rescheduling for dual-issue pipeline resulted in 12% improvement on
     28 # Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
     29 
     30 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
     31 open STDOUT,">$output";
     32 
     33 $s0="r0";
     34 $s1="r1";
     35 $s2="r2";
     36 $s3="r3";
     37 $t1="r4";
     38 $t2="r5";
     39 $t3="r6";
     40 $i1="r7";
     41 $i2="r8";
     42 $i3="r9";
     43 
     44 $tbl="r10";
     45 $key="r11";
     46 $rounds="r12";
     47 
     48 $code=<<___;
     49 .text
     50 .code	32
     51 
     52 .type	AES_Te,%object
     53 .align	5
     54 AES_Te:
     55 .word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     56 .word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     57 .word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     58 .word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     59 .word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     60 .word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     61 .word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     62 .word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     63 .word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     64 .word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     65 .word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     66 .word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     67 .word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     68 .word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     69 .word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     70 .word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     71 .word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     72 .word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     73 .word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     74 .word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     75 .word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     76 .word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     77 .word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     78 .word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
     79 .word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
     80 .word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
     81 .word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
     82 .word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
     83 .word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
     84 .word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
     85 .word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
     86 .word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
     87 .word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
     88 .word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
     89 .word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
     90 .word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
     91 .word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
     92 .word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
     93 .word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
     94 .word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
     95 .word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
     96 .word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
     97 .word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
     98 .word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
     99 .word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
    100 .word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
    101 .word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
    102 .word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
    103 .word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
    104 .word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
    105 .word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
    106 .word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
    107 .word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    108 .word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    109 .word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    110 .word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    111 .word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    112 .word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    113 .word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    114 .word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    115 .word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    116 .word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    117 .word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    118 .word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    119 @ Te4[256]
    120 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    121 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    122 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    123 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    124 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    125 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    126 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    127 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    128 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    129 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    130 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    131 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    132 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    133 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    134 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    135 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    136 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    137 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    138 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    139 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    140 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    141 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    142 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    143 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    144 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    145 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    146 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    147 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    148 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    149 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    150 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    151 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    152 @ rcon[]
    153 .word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    154 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    155 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    156 .size	AES_Te,.-AES_Te
    157 
    158 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
    159 @ 		 const AES_KEY *key) {
    160 .global AES_encrypt
    161 .type   AES_encrypt,%function
    162 .align	5
    163 AES_encrypt:
    164 	sub	r3,pc,#8		@ AES_encrypt
    165 	stmdb   sp!,{r1,r4-r12,lr}
    166 	mov	$rounds,r0		@ inp
    167 	mov	$key,r2
    168 	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
    169 
    170 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    171 	ldrb	$t1,[$rounds,#2]	@ manner...
    172 	ldrb	$t2,[$rounds,#1]
    173 	ldrb	$t3,[$rounds,#0]
    174 	orr	$s0,$s0,$t1,lsl#8
    175 	ldrb	$s1,[$rounds,#7]
    176 	orr	$s0,$s0,$t2,lsl#16
    177 	ldrb	$t1,[$rounds,#6]
    178 	orr	$s0,$s0,$t3,lsl#24
    179 	ldrb	$t2,[$rounds,#5]
    180 	ldrb	$t3,[$rounds,#4]
    181 	orr	$s1,$s1,$t1,lsl#8
    182 	ldrb	$s2,[$rounds,#11]
    183 	orr	$s1,$s1,$t2,lsl#16
    184 	ldrb	$t1,[$rounds,#10]
    185 	orr	$s1,$s1,$t3,lsl#24
    186 	ldrb	$t2,[$rounds,#9]
    187 	ldrb	$t3,[$rounds,#8]
    188 	orr	$s2,$s2,$t1,lsl#8
    189 	ldrb	$s3,[$rounds,#15]
    190 	orr	$s2,$s2,$t2,lsl#16
    191 	ldrb	$t1,[$rounds,#14]
    192 	orr	$s2,$s2,$t3,lsl#24
    193 	ldrb	$t2,[$rounds,#13]
    194 	ldrb	$t3,[$rounds,#12]
    195 	orr	$s3,$s3,$t1,lsl#8
    196 	orr	$s3,$s3,$t2,lsl#16
    197 	orr	$s3,$s3,$t3,lsl#24
    198 
    199 	bl	_armv4_AES_encrypt
    200 
    201 	ldr	$rounds,[sp],#4		@ pop out
    202 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    203 	mov	$t2,$s0,lsr#16		@ manner...
    204 	mov	$t3,$s0,lsr#8
    205 	strb	$t1,[$rounds,#0]
    206 	strb	$t2,[$rounds,#1]
    207 	mov	$t1,$s1,lsr#24
    208 	strb	$t3,[$rounds,#2]
    209 	mov	$t2,$s1,lsr#16
    210 	strb	$s0,[$rounds,#3]
    211 	mov	$t3,$s1,lsr#8
    212 	strb	$t1,[$rounds,#4]
    213 	strb	$t2,[$rounds,#5]
    214 	mov	$t1,$s2,lsr#24
    215 	strb	$t3,[$rounds,#6]
    216 	mov	$t2,$s2,lsr#16
    217 	strb	$s1,[$rounds,#7]
    218 	mov	$t3,$s2,lsr#8
    219 	strb	$t1,[$rounds,#8]
    220 	strb	$t2,[$rounds,#9]
    221 	mov	$t1,$s3,lsr#24
    222 	strb	$t3,[$rounds,#10]
    223 	mov	$t2,$s3,lsr#16
    224 	strb	$s2,[$rounds,#11]
    225 	mov	$t3,$s3,lsr#8
    226 	strb	$t1,[$rounds,#12]
    227 	strb	$t2,[$rounds,#13]
    228 	strb	$t3,[$rounds,#14]
    229 	strb	$s3,[$rounds,#15]
    230 
    231 	ldmia   sp!,{r4-r12,lr}
    232 	tst	lr,#1
    233 	moveq	pc,lr			@ be binary compatible with V4, yet
    234 	bx	lr			@ interoperable with Thumb ISA:-)
    235 .size	AES_encrypt,.-AES_encrypt
    236 
    237 .type   _armv4_AES_encrypt,%function
    238 .align	2
    239 _armv4_AES_encrypt:
    240 	str	lr,[sp,#-4]!		@ push lr
    241 	ldmia	$key!,{$t1-$i1}
    242 	eor	$s0,$s0,$t1
    243 	ldr	$rounds,[$key,#240-16]
    244 	eor	$s1,$s1,$t2
    245 	eor	$s2,$s2,$t3
    246 	eor	$s3,$s3,$i1
    247 	sub	$rounds,$rounds,#1
    248 	mov	lr,#255
    249 
    250 	and	$i1,lr,$s0
    251 	and	$i2,lr,$s0,lsr#8
    252 	and	$i3,lr,$s0,lsr#16
    253 	mov	$s0,$s0,lsr#24
    254 .Lenc_loop:
    255 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    256 	and	$i1,lr,$s1,lsr#16	@ i0
    257 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    258 	and	$i2,lr,$s1
    259 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    260 	and	$i3,lr,$s1,lsr#8
    261 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    262 	mov	$s1,$s1,lsr#24
    263 
    264 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    265 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    266 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    267 	eor	$s0,$s0,$i1,ror#8
    268 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    269 	and	$i1,lr,$s2,lsr#8	@ i0
    270 	eor	$t2,$t2,$i2,ror#8
    271 	and	$i2,lr,$s2,lsr#16	@ i1
    272 	eor	$t3,$t3,$i3,ror#8
    273 	and	$i3,lr,$s2
    274 	eor	$s1,$s1,$t1,ror#24
    275 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    276 	mov	$s2,$s2,lsr#24
    277 
    278 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    279 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    280 	eor	$s0,$s0,$i1,ror#16
    281 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    282 	and	$i1,lr,$s3		@ i0
    283 	eor	$s1,$s1,$i2,ror#8
    284 	and	$i2,lr,$s3,lsr#8	@ i1
    285 	eor	$t3,$t3,$i3,ror#16
    286 	and	$i3,lr,$s3,lsr#16	@ i2
    287 	eor	$s2,$s2,$t2,ror#16
    288 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    289 	mov	$s3,$s3,lsr#24
    290 
    291 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    292 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    293 	eor	$s0,$s0,$i1,ror#24
    294 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    295 	eor	$s1,$s1,$i2,ror#16
    296 	ldr	$i1,[$key],#16
    297 	eor	$s2,$s2,$i3,ror#8
    298 	ldr	$t1,[$key,#-12]
    299 	eor	$s3,$s3,$t3,ror#8
    300 
    301 	ldr	$t2,[$key,#-8]
    302 	eor	$s0,$s0,$i1
    303 	ldr	$t3,[$key,#-4]
    304 	and	$i1,lr,$s0
    305 	eor	$s1,$s1,$t1
    306 	and	$i2,lr,$s0,lsr#8
    307 	eor	$s2,$s2,$t2
    308 	and	$i3,lr,$s0,lsr#16
    309 	eor	$s3,$s3,$t3
    310 	mov	$s0,$s0,lsr#24
    311 
    312 	subs	$rounds,$rounds,#1
    313 	bne	.Lenc_loop
    314 
    315 	add	$tbl,$tbl,#2
    316 
    317 	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    318 	and	$i1,lr,$s1,lsr#16	@ i0
    319 	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    320 	and	$i2,lr,$s1
    321 	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    322 	and	$i3,lr,$s1,lsr#8
    323 	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    324 	mov	$s1,$s1,lsr#24
    325 
    326 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    327 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    328 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    329 	eor	$s0,$i1,$s0,lsl#8
    330 	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    331 	and	$i1,lr,$s2,lsr#8	@ i0
    332 	eor	$t2,$i2,$t2,lsl#8
    333 	and	$i2,lr,$s2,lsr#16	@ i1
    334 	eor	$t3,$i3,$t3,lsl#8
    335 	and	$i3,lr,$s2
    336 	eor	$s1,$t1,$s1,lsl#24
    337 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    338 	mov	$s2,$s2,lsr#24
    339 
    340 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    341 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    342 	eor	$s0,$i1,$s0,lsl#8
    343 	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    344 	and	$i1,lr,$s3		@ i0
    345 	eor	$s1,$s1,$i2,lsl#16
    346 	and	$i2,lr,$s3,lsr#8	@ i1
    347 	eor	$t3,$i3,$t3,lsl#8
    348 	and	$i3,lr,$s3,lsr#16	@ i2
    349 	eor	$s2,$t2,$s2,lsl#24
    350 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    351 	mov	$s3,$s3,lsr#24
    352 
    353 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    354 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    355 	eor	$s0,$i1,$s0,lsl#8
    356 	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    357 	ldr	$i1,[$key,#0]
    358 	eor	$s1,$s1,$i2,lsl#8
    359 	ldr	$t1,[$key,#4]
    360 	eor	$s2,$s2,$i3,lsl#16
    361 	ldr	$t2,[$key,#8]
    362 	eor	$s3,$t3,$s3,lsl#24
    363 	ldr	$t3,[$key,#12]
    364 
    365 	eor	$s0,$s0,$i1
    366 	eor	$s1,$s1,$t1
    367 	eor	$s2,$s2,$t2
    368 	eor	$s3,$s3,$t3
    369 
    370 	sub	$tbl,$tbl,#2
    371 	ldr	pc,[sp],#4		@ pop and return
    372 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    373 
    374 .global AES_set_encrypt_key
    375 .type   AES_set_encrypt_key,%function
    376 .align	5
    377 AES_set_encrypt_key:
    378 	sub	r3,pc,#8		@ AES_set_encrypt_key
    379 	teq	r0,#0
    380 	moveq	r0,#-1
    381 	beq	.Labrt
    382 	teq	r2,#0
    383 	moveq	r0,#-1
    384 	beq	.Labrt
    385 
    386 	teq	r1,#128
    387 	beq	.Lok
    388 	teq	r1,#192
    389 	beq	.Lok
    390 	teq	r1,#256
    391 	movne	r0,#-1
    392 	bne	.Labrt
    393 
    394 .Lok:	stmdb   sp!,{r4-r12,lr}
    395 	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
    396 
    397 	mov	$rounds,r0		@ inp
    398 	mov	lr,r1			@ bits
    399 	mov	$key,r2			@ key
    400 
    401 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    402 	ldrb	$t1,[$rounds,#2]	@ manner...
    403 	ldrb	$t2,[$rounds,#1]
    404 	ldrb	$t3,[$rounds,#0]
    405 	orr	$s0,$s0,$t1,lsl#8
    406 	ldrb	$s1,[$rounds,#7]
    407 	orr	$s0,$s0,$t2,lsl#16
    408 	ldrb	$t1,[$rounds,#6]
    409 	orr	$s0,$s0,$t3,lsl#24
    410 	ldrb	$t2,[$rounds,#5]
    411 	ldrb	$t3,[$rounds,#4]
    412 	orr	$s1,$s1,$t1,lsl#8
    413 	ldrb	$s2,[$rounds,#11]
    414 	orr	$s1,$s1,$t2,lsl#16
    415 	ldrb	$t1,[$rounds,#10]
    416 	orr	$s1,$s1,$t3,lsl#24
    417 	ldrb	$t2,[$rounds,#9]
    418 	ldrb	$t3,[$rounds,#8]
    419 	orr	$s2,$s2,$t1,lsl#8
    420 	ldrb	$s3,[$rounds,#15]
    421 	orr	$s2,$s2,$t2,lsl#16
    422 	ldrb	$t1,[$rounds,#14]
    423 	orr	$s2,$s2,$t3,lsl#24
    424 	ldrb	$t2,[$rounds,#13]
    425 	ldrb	$t3,[$rounds,#12]
    426 	orr	$s3,$s3,$t1,lsl#8
    427 	str	$s0,[$key],#16
    428 	orr	$s3,$s3,$t2,lsl#16
    429 	str	$s1,[$key,#-12]
    430 	orr	$s3,$s3,$t3,lsl#24
    431 	str	$s2,[$key,#-8]
    432 	str	$s3,[$key,#-4]
    433 
    434 	teq	lr,#128
    435 	bne	.Lnot128
    436 	mov	$rounds,#10
    437 	str	$rounds,[$key,#240-16]
    438 	add	$t3,$tbl,#256			@ rcon
    439 	mov	lr,#255
    440 
    441 .L128_loop:
    442 	and	$t2,lr,$s3,lsr#24
    443 	and	$i1,lr,$s3,lsr#16
    444 	ldrb	$t2,[$tbl,$t2]
    445 	and	$i2,lr,$s3,lsr#8
    446 	ldrb	$i1,[$tbl,$i1]
    447 	and	$i3,lr,$s3
    448 	ldrb	$i2,[$tbl,$i2]
    449 	orr	$t2,$t2,$i1,lsl#24
    450 	ldrb	$i3,[$tbl,$i3]
    451 	orr	$t2,$t2,$i2,lsl#16
    452 	ldr	$t1,[$t3],#4			@ rcon[i++]
    453 	orr	$t2,$t2,$i3,lsl#8
    454 	eor	$t2,$t2,$t1
    455 	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    456 	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    457 	str	$s0,[$key],#16
    458 	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    459 	str	$s1,[$key,#-12]
    460 	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    461 	str	$s2,[$key,#-8]
    462 	subs	$rounds,$rounds,#1
    463 	str	$s3,[$key,#-4]
    464 	bne	.L128_loop
    465 	sub	r2,$key,#176
    466 	b	.Ldone
    467 
    468 .Lnot128:
    469 	ldrb	$i2,[$rounds,#19]
    470 	ldrb	$t1,[$rounds,#18]
    471 	ldrb	$t2,[$rounds,#17]
    472 	ldrb	$t3,[$rounds,#16]
    473 	orr	$i2,$i2,$t1,lsl#8
    474 	ldrb	$i3,[$rounds,#23]
    475 	orr	$i2,$i2,$t2,lsl#16
    476 	ldrb	$t1,[$rounds,#22]
    477 	orr	$i2,$i2,$t3,lsl#24
    478 	ldrb	$t2,[$rounds,#21]
    479 	ldrb	$t3,[$rounds,#20]
    480 	orr	$i3,$i3,$t1,lsl#8
    481 	orr	$i3,$i3,$t2,lsl#16
    482 	str	$i2,[$key],#8
    483 	orr	$i3,$i3,$t3,lsl#24
    484 	str	$i3,[$key,#-4]
    485 
    486 	teq	lr,#192
    487 	bne	.Lnot192
    488 	mov	$rounds,#12
    489 	str	$rounds,[$key,#240-24]
    490 	add	$t3,$tbl,#256			@ rcon
    491 	mov	lr,#255
    492 	mov	$rounds,#8
    493 
    494 .L192_loop:
    495 	and	$t2,lr,$i3,lsr#24
    496 	and	$i1,lr,$i3,lsr#16
    497 	ldrb	$t2,[$tbl,$t2]
    498 	and	$i2,lr,$i3,lsr#8
    499 	ldrb	$i1,[$tbl,$i1]
    500 	and	$i3,lr,$i3
    501 	ldrb	$i2,[$tbl,$i2]
    502 	orr	$t2,$t2,$i1,lsl#24
    503 	ldrb	$i3,[$tbl,$i3]
    504 	orr	$t2,$t2,$i2,lsl#16
    505 	ldr	$t1,[$t3],#4			@ rcon[i++]
    506 	orr	$t2,$t2,$i3,lsl#8
    507 	eor	$i3,$t2,$t1
    508 	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    509 	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    510 	str	$s0,[$key],#24
    511 	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    512 	str	$s1,[$key,#-20]
    513 	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    514 	str	$s2,[$key,#-16]
    515 	subs	$rounds,$rounds,#1
    516 	str	$s3,[$key,#-12]
    517 	subeq	r2,$key,#216
    518 	beq	.Ldone
    519 
    520 	ldr	$i1,[$key,#-32]
    521 	ldr	$i2,[$key,#-28]
    522 	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    523 	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    524 	str	$i1,[$key,#-8]
    525 	str	$i3,[$key,#-4]
    526 	b	.L192_loop
    527 
    528 .Lnot192:
    529 	ldrb	$i2,[$rounds,#27]
    530 	ldrb	$t1,[$rounds,#26]
    531 	ldrb	$t2,[$rounds,#25]
    532 	ldrb	$t3,[$rounds,#24]
    533 	orr	$i2,$i2,$t1,lsl#8
    534 	ldrb	$i3,[$rounds,#31]
    535 	orr	$i2,$i2,$t2,lsl#16
    536 	ldrb	$t1,[$rounds,#30]
    537 	orr	$i2,$i2,$t3,lsl#24
    538 	ldrb	$t2,[$rounds,#29]
    539 	ldrb	$t3,[$rounds,#28]
    540 	orr	$i3,$i3,$t1,lsl#8
    541 	orr	$i3,$i3,$t2,lsl#16
    542 	str	$i2,[$key],#8
    543 	orr	$i3,$i3,$t3,lsl#24
    544 	str	$i3,[$key,#-4]
    545 
    546 	mov	$rounds,#14
    547 	str	$rounds,[$key,#240-32]
    548 	add	$t3,$tbl,#256			@ rcon
    549 	mov	lr,#255
    550 	mov	$rounds,#7
    551 
    552 .L256_loop:
    553 	and	$t2,lr,$i3,lsr#24
    554 	and	$i1,lr,$i3,lsr#16
    555 	ldrb	$t2,[$tbl,$t2]
    556 	and	$i2,lr,$i3,lsr#8
    557 	ldrb	$i1,[$tbl,$i1]
    558 	and	$i3,lr,$i3
    559 	ldrb	$i2,[$tbl,$i2]
    560 	orr	$t2,$t2,$i1,lsl#24
    561 	ldrb	$i3,[$tbl,$i3]
    562 	orr	$t2,$t2,$i2,lsl#16
    563 	ldr	$t1,[$t3],#4			@ rcon[i++]
    564 	orr	$t2,$t2,$i3,lsl#8
    565 	eor	$i3,$t2,$t1
    566 	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    567 	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    568 	str	$s0,[$key],#32
    569 	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    570 	str	$s1,[$key,#-28]
    571 	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    572 	str	$s2,[$key,#-24]
    573 	subs	$rounds,$rounds,#1
    574 	str	$s3,[$key,#-20]
    575 	subeq	r2,$key,#256
    576 	beq	.Ldone
    577 
    578 	and	$t2,lr,$s3
    579 	and	$i1,lr,$s3,lsr#8
    580 	ldrb	$t2,[$tbl,$t2]
    581 	and	$i2,lr,$s3,lsr#16
    582 	ldrb	$i1,[$tbl,$i1]
    583 	and	$i3,lr,$s3,lsr#24
    584 	ldrb	$i2,[$tbl,$i2]
    585 	orr	$t2,$t2,$i1,lsl#8
    586 	ldrb	$i3,[$tbl,$i3]
    587 	orr	$t2,$t2,$i2,lsl#16
    588 	ldr	$t1,[$key,#-48]
    589 	orr	$t2,$t2,$i3,lsl#24
    590 
    591 	ldr	$i1,[$key,#-44]
    592 	ldr	$i2,[$key,#-40]
    593 	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    594 	ldr	$i3,[$key,#-36]
    595 	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    596 	str	$t1,[$key,#-16]
    597 	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    598 	str	$i1,[$key,#-12]
    599 	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    600 	str	$i2,[$key,#-8]
    601 	str	$i3,[$key,#-4]
    602 	b	.L256_loop
    603 
    604 .Ldone:	mov	r0,#0
    605 	ldmia   sp!,{r4-r12,lr}
    606 .Labrt:	tst	lr,#1
    607 	moveq	pc,lr			@ be binary compatible with V4, yet
    608 	bx	lr			@ interoperable with Thumb ISA:-)
    609 .size	AES_set_encrypt_key,.-AES_set_encrypt_key
    610 
    611 .global AES_set_decrypt_key
    612 .type   AES_set_decrypt_key,%function
    613 .align	5
    614 AES_set_decrypt_key:
    615 	str	lr,[sp,#-4]!            @ push lr
    616 	bl	AES_set_encrypt_key
    617 	teq	r0,#0
    618 	ldrne	lr,[sp],#4              @ pop lr
    619 	bne	.Labrt
    620 
    621 	stmdb   sp!,{r4-r12}
    622 
    623 	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
    624 	mov	$key,r2			@ which is AES_KEY *key
    625 	mov	$i1,r2
    626 	add	$i2,r2,$rounds,lsl#4
    627 
    628 .Linv:	ldr	$s0,[$i1]
    629 	ldr	$s1,[$i1,#4]
    630 	ldr	$s2,[$i1,#8]
    631 	ldr	$s3,[$i1,#12]
    632 	ldr	$t1,[$i2]
    633 	ldr	$t2,[$i2,#4]
    634 	ldr	$t3,[$i2,#8]
    635 	ldr	$i3,[$i2,#12]
    636 	str	$s0,[$i2],#-16
    637 	str	$s1,[$i2,#16+4]
    638 	str	$s2,[$i2,#16+8]
    639 	str	$s3,[$i2,#16+12]
    640 	str	$t1,[$i1],#16
    641 	str	$t2,[$i1,#-12]
    642 	str	$t3,[$i1,#-8]
    643 	str	$i3,[$i1,#-4]
    644 	teq	$i1,$i2
    645 	bne	.Linv
    646 ___
    647 $mask80=$i1;
    648 $mask1b=$i2;
    649 $mask7f=$i3;
    650 $code.=<<___;
    651 	ldr	$s0,[$key,#16]!		@ prefetch tp1
    652 	mov	$mask80,#0x80
    653 	mov	$mask1b,#0x1b
    654 	orr	$mask80,$mask80,#0x8000
    655 	orr	$mask1b,$mask1b,#0x1b00
    656 	orr	$mask80,$mask80,$mask80,lsl#16
    657 	orr	$mask1b,$mask1b,$mask1b,lsl#16
    658 	sub	$rounds,$rounds,#1
    659 	mvn	$mask7f,$mask80
    660 	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    661 
    662 .Lmix:	and	$t1,$s0,$mask80
    663 	and	$s1,$s0,$mask7f
    664 	sub	$t1,$t1,$t1,lsr#7
    665 	and	$t1,$t1,$mask1b
    666 	eor	$s1,$t1,$s1,lsl#1	@ tp2
    667 
    668 	and	$t1,$s1,$mask80
    669 	and	$s2,$s1,$mask7f
    670 	sub	$t1,$t1,$t1,lsr#7
    671 	and	$t1,$t1,$mask1b
    672 	eor	$s2,$t1,$s2,lsl#1	@ tp4
    673 
    674 	and	$t1,$s2,$mask80
    675 	and	$s3,$s2,$mask7f
    676 	sub	$t1,$t1,$t1,lsr#7
    677 	and	$t1,$t1,$mask1b
    678 	eor	$s3,$t1,$s3,lsl#1	@ tp8
    679 
    680 	eor	$t1,$s1,$s2
    681 	eor	$t2,$s0,$s3		@ tp9
    682 	eor	$t1,$t1,$s3		@ tpe
    683 	eor	$t1,$t1,$s1,ror#24
    684 	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    685 	eor	$t1,$t1,$s2,ror#16
    686 	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    687 	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    688 
    689 	ldr	$s0,[$key,#4]		@ prefetch tp1
    690 	str	$t1,[$key],#4
    691 	subs	$rounds,$rounds,#1
    692 	bne	.Lmix
    693 
    694 	mov	r0,#0
    695 	ldmia   sp!,{r4-r12,lr}
    696 	tst	lr,#1
    697 	moveq	pc,lr			@ be binary compatible with V4, yet
    698 	bx	lr			@ interoperable with Thumb ISA:-)
    699 .size	AES_set_decrypt_key,.-AES_set_decrypt_key
    700 
    701 .type	AES_Td,%object
    702 .align	5
    703 AES_Td:
    704 .word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    705 .word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    706 .word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    707 .word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    708 .word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    709 .word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    710 .word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    711 .word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    712 .word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    713 .word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    714 .word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    715 .word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    716 .word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    717 .word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    718 .word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    719 .word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    720 .word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    721 .word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    722 .word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    723 .word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    724 .word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    725 .word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    726 .word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    727 .word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    728 .word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    729 .word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    730 .word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    731 .word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    732 .word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    733 .word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    734 .word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    735 .word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    736 .word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    737 .word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    738 .word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    739 .word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    740 .word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    741 .word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    742 .word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    743 .word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    744 .word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    745 .word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    746 .word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    747 .word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    748 .word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    749 .word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    750 .word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    751 .word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    752 .word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    753 .word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    754 .word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    755 .word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    756 .word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    757 .word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    758 .word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    759 .word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    760 .word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    761 .word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    762 .word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    763 .word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    764 .word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    765 .word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    766 .word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    767 .word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    768 @ Td4[256]
    769 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    770 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    771 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    772 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    773 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    774 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    775 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    776 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    777 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    778 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    779 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    780 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    781 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    782 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    783 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    784 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    785 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    786 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    787 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    788 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    789 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    790 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    791 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    792 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    793 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    794 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    795 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    796 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    797 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    798 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    799 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    800 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    801 .size	AES_Td,.-AES_Td
    802 
    803 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
    804 @ 		 const AES_KEY *key) {
    805 .global AES_decrypt
    806 .type   AES_decrypt,%function
    807 .align	5
    808 AES_decrypt:
    809 	sub	r3,pc,#8		@ AES_decrypt
    810 	stmdb   sp!,{r1,r4-r12,lr}
    811 	mov	$rounds,r0		@ inp
    812 	mov	$key,r2
    813 	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
    814 
    815 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    816 	ldrb	$t1,[$rounds,#2]	@ manner...
    817 	ldrb	$t2,[$rounds,#1]
    818 	ldrb	$t3,[$rounds,#0]
    819 	orr	$s0,$s0,$t1,lsl#8
    820 	ldrb	$s1,[$rounds,#7]
    821 	orr	$s0,$s0,$t2,lsl#16
    822 	ldrb	$t1,[$rounds,#6]
    823 	orr	$s0,$s0,$t3,lsl#24
    824 	ldrb	$t2,[$rounds,#5]
    825 	ldrb	$t3,[$rounds,#4]
    826 	orr	$s1,$s1,$t1,lsl#8
    827 	ldrb	$s2,[$rounds,#11]
    828 	orr	$s1,$s1,$t2,lsl#16
    829 	ldrb	$t1,[$rounds,#10]
    830 	orr	$s1,$s1,$t3,lsl#24
    831 	ldrb	$t2,[$rounds,#9]
    832 	ldrb	$t3,[$rounds,#8]
    833 	orr	$s2,$s2,$t1,lsl#8
    834 	ldrb	$s3,[$rounds,#15]
    835 	orr	$s2,$s2,$t2,lsl#16
    836 	ldrb	$t1,[$rounds,#14]
    837 	orr	$s2,$s2,$t3,lsl#24
    838 	ldrb	$t2,[$rounds,#13]
    839 	ldrb	$t3,[$rounds,#12]
    840 	orr	$s3,$s3,$t1,lsl#8
    841 	orr	$s3,$s3,$t2,lsl#16
    842 	orr	$s3,$s3,$t3,lsl#24
    843 
    844 	bl	_armv4_AES_decrypt
    845 
    846 	ldr	$rounds,[sp],#4		@ pop out
    847 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    848 	mov	$t2,$s0,lsr#16		@ manner...
    849 	mov	$t3,$s0,lsr#8
    850 	strb	$t1,[$rounds,#0]
    851 	strb	$t2,[$rounds,#1]
    852 	mov	$t1,$s1,lsr#24
    853 	strb	$t3,[$rounds,#2]
    854 	mov	$t2,$s1,lsr#16
    855 	strb	$s0,[$rounds,#3]
    856 	mov	$t3,$s1,lsr#8
    857 	strb	$t1,[$rounds,#4]
    858 	strb	$t2,[$rounds,#5]
    859 	mov	$t1,$s2,lsr#24
    860 	strb	$t3,[$rounds,#6]
    861 	mov	$t2,$s2,lsr#16
    862 	strb	$s1,[$rounds,#7]
    863 	mov	$t3,$s2,lsr#8
    864 	strb	$t1,[$rounds,#8]
    865 	strb	$t2,[$rounds,#9]
    866 	mov	$t1,$s3,lsr#24
    867 	strb	$t3,[$rounds,#10]
    868 	mov	$t2,$s3,lsr#16
    869 	strb	$s2,[$rounds,#11]
    870 	mov	$t3,$s3,lsr#8
    871 	strb	$t1,[$rounds,#12]
    872 	strb	$t2,[$rounds,#13]
    873 	strb	$t3,[$rounds,#14]
    874 	strb	$s3,[$rounds,#15]
    875 
    876 	ldmia   sp!,{r4-r12,lr}
    877 	tst	lr,#1
    878 	moveq	pc,lr			@ be binary compatible with V4, yet
    879 	bx	lr			@ interoperable with Thumb ISA:-)
    880 .size	AES_decrypt,.-AES_decrypt
    881 
    882 .type   _armv4_AES_decrypt,%function
    883 .align	2
    884 _armv4_AES_decrypt:
    885 	str	lr,[sp,#-4]!		@ push lr
    886 	ldmia	$key!,{$t1-$i1}
    887 	eor	$s0,$s0,$t1
    888 	ldr	$rounds,[$key,#240-16]
    889 	eor	$s1,$s1,$t2
    890 	eor	$s2,$s2,$t3
    891 	eor	$s3,$s3,$i1
    892 	sub	$rounds,$rounds,#1
    893 	mov	lr,#255
    894 
    895 	and	$i1,lr,$s0,lsr#16
    896 	and	$i2,lr,$s0,lsr#8
    897 	and	$i3,lr,$s0
    898 	mov	$s0,$s0,lsr#24
    899 .Ldec_loop:
    900 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
    901 	and	$i1,lr,$s1		@ i0
    902 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
    903 	and	$i2,lr,$s1,lsr#16
    904 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
    905 	and	$i3,lr,$s1,lsr#8
    906 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
    907 	mov	$s1,$s1,lsr#24
    908 
    909 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
    910 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
    911 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
    912 	eor	$s0,$s0,$i1,ror#24
    913 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
    914 	and	$i1,lr,$s2,lsr#8	@ i0
    915 	eor	$t2,$i2,$t2,ror#8
    916 	and	$i2,lr,$s2		@ i1
    917 	eor	$t3,$i3,$t3,ror#8
    918 	and	$i3,lr,$s2,lsr#16
    919 	eor	$s1,$s1,$t1,ror#8
    920 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
    921 	mov	$s2,$s2,lsr#24
    922 
    923 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
    924 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
    925 	eor	$s0,$s0,$i1,ror#16
    926 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
    927 	and	$i1,lr,$s3,lsr#16	@ i0
    928 	eor	$s1,$s1,$i2,ror#24
    929 	and	$i2,lr,$s3,lsr#8	@ i1
    930 	eor	$t3,$i3,$t3,ror#8
    931 	and	$i3,lr,$s3		@ i2
    932 	eor	$s2,$s2,$t2,ror#8
    933 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
    934 	mov	$s3,$s3,lsr#24
    935 
    936 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
    937 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
    938 	eor	$s0,$s0,$i1,ror#8
    939 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
    940 	eor	$s1,$s1,$i2,ror#16
    941 	eor	$s2,$s2,$i3,ror#24
    942 	ldr	$i1,[$key],#16
    943 	eor	$s3,$s3,$t3,ror#8
    944 
    945 	ldr	$t1,[$key,#-12]
    946 	ldr	$t2,[$key,#-8]
    947 	eor	$s0,$s0,$i1
    948 	ldr	$t3,[$key,#-4]
    949 	and	$i1,lr,$s0,lsr#16
    950 	eor	$s1,$s1,$t1
    951 	and	$i2,lr,$s0,lsr#8
    952 	eor	$s2,$s2,$t2
    953 	and	$i3,lr,$s0
    954 	eor	$s3,$s3,$t3
    955 	mov	$s0,$s0,lsr#24
    956 
    957 	subs	$rounds,$rounds,#1
    958 	bne	.Ldec_loop
    959 
    960 	add	$tbl,$tbl,#1024
    961 
    962 	ldr	$t2,[$tbl,#0]		@ prefetch Td4
    963 	ldr	$t3,[$tbl,#32]
    964 	ldr	$t1,[$tbl,#64]
    965 	ldr	$t2,[$tbl,#96]
    966 	ldr	$t3,[$tbl,#128]
    967 	ldr	$t1,[$tbl,#160]
    968 	ldr	$t2,[$tbl,#192]
    969 	ldr	$t3,[$tbl,#224]
    970 
    971 	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
    972 	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
    973 	and	$i1,lr,$s1		@ i0
    974 	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
    975 	and	$i2,lr,$s1,lsr#16
    976 	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
    977 	and	$i3,lr,$s1,lsr#8
    978 
    979 	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
    980 	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
    981 	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
    982 	eor	$s0,$i1,$s0,lsl#24
    983 	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
    984 	eor	$s1,$t1,$s1,lsl#8
    985 	and	$i1,lr,$s2,lsr#8	@ i0
    986 	eor	$t2,$t2,$i2,lsl#8
    987 	and	$i2,lr,$s2		@ i1
    988 	eor	$t3,$t3,$i3,lsl#8
    989 	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
    990 	and	$i3,lr,$s2,lsr#16
    991 
    992 	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
    993 	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
    994 	eor	$s0,$s0,$i1,lsl#8
    995 	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
    996 	eor	$s1,$i2,$s1,lsl#16
    997 	and	$i1,lr,$s3,lsr#16	@ i0
    998 	eor	$s2,$t2,$s2,lsl#16
    999 	and	$i2,lr,$s3,lsr#8	@ i1
   1000 	eor	$t3,$t3,$i3,lsl#16
   1001 	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1002 	and	$i3,lr,$s3		@ i2
   1003 
   1004 	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1005 	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1006 	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
   1007 	eor	$s0,$s0,$i1,lsl#16
   1008 	ldr	$i1,[$key,#0]
   1009 	eor	$s1,$s1,$i2,lsl#8
   1010 	ldr	$t1,[$key,#4]
   1011 	eor	$s2,$i3,$s2,lsl#8
   1012 	ldr	$t2,[$key,#8]
   1013 	eor	$s3,$t3,$s3,lsl#24
   1014 	ldr	$t3,[$key,#12]
   1015 
   1016 	eor	$s0,$s0,$i1
   1017 	eor	$s1,$s1,$t1
   1018 	eor	$s2,$s2,$t2
   1019 	eor	$s3,$s3,$t3
   1020 
   1021 	sub	$tbl,$tbl,#1024
   1022 	ldr	pc,[sp],#4		@ pop and return
   1023 .size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1024 .asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1025 .align	2
   1026 ___
   1027 
   1028 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1029 print $code;
   1030 close STDOUT;	# enforce flush
   1031