Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # AES for ARMv4
     11 
     12 # January 2007.
     13 #
     14 # Code uses single 1K S-box and is >2 times faster than code generated
     15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     16 # allows to merge logical or arithmetic operation with shift or rotate
     17 # in one instruction and emit combined result every cycle. The module
     18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     19 # key.
     20 
     21 # May 2007.
     22 #
     23 # AES_set_[en|de]crypt_key is added.
     24 
     25 $s0="r0";
     26 $s1="r1";
     27 $s2="r2";
     28 $s3="r3";
     29 $t1="r4";
     30 $t2="r5";
     31 $t3="r6";
     32 $i1="r7";
     33 $i2="r8";
     34 $i3="r9";
     35 
     36 $tbl="r10";
     37 $key="r11";
     38 $rounds="r12";
     39 
     40 $code=<<___;
     41 .text
     42 .code	32
     43 
     44 .type	AES_Te,%object
     45 .align	5
     46 AES_Te:
     47 .word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     48 .word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     49 .word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     50 .word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     51 .word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     52 .word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     53 .word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     54 .word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     55 .word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     56 .word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     57 .word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     58 .word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     59 .word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     60 .word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     61 .word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     62 .word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     63 .word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     64 .word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     65 .word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     66 .word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     67 .word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     68 .word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     69 .word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     70 .word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
     71 .word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
     72 .word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
     73 .word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
     74 .word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
     75 .word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
     76 .word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
     77 .word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
     78 .word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
     79 .word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
     80 .word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
     81 .word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
     82 .word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
     83 .word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
     84 .word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
     85 .word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
     86 .word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
     87 .word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
     88 .word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
     89 .word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
     90 .word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
     91 .word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
     92 .word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
     93 .word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
     94 .word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
     95 .word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
     96 .word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
     97 .word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
     98 .word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
     99 .word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    100 .word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    101 .word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    102 .word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    103 .word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    104 .word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    105 .word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    106 .word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    107 .word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    108 .word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    109 .word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    110 .word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    111 @ Te4[256]
    112 .byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    113 .byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    114 .byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    115 .byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    116 .byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    117 .byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    118 .byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    119 .byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    120 .byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    121 .byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    122 .byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    123 .byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    124 .byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    125 .byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    126 .byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    127 .byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    128 .byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    129 .byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    130 .byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    131 .byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    132 .byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    133 .byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    134 .byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    135 .byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    136 .byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    137 .byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    138 .byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    139 .byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    140 .byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    141 .byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    142 .byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    143 .byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    144 @ rcon[]
    145 .word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    146 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    147 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    148 .size	AES_Te,.-AES_Te
    149 
    150 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
    151 @ 		 const AES_KEY *key) {
    152 .global AES_encrypt
    153 .type   AES_encrypt,%function
    154 .align	5
    155 AES_encrypt:
    156 	sub	r3,pc,#8		@ AES_encrypt
    157 	stmdb   sp!,{r1,r4-r12,lr}
    158 	mov	$rounds,r0		@ inp
    159 	mov	$key,r2
    160 	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
    161 
    162 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    163 	ldrb	$t1,[$rounds,#2]	@ manner...
    164 	ldrb	$t2,[$rounds,#1]
    165 	ldrb	$t3,[$rounds,#0]
    166 	orr	$s0,$s0,$t1,lsl#8
    167 	orr	$s0,$s0,$t2,lsl#16
    168 	orr	$s0,$s0,$t3,lsl#24
    169 	ldrb	$s1,[$rounds,#7]
    170 	ldrb	$t1,[$rounds,#6]
    171 	ldrb	$t2,[$rounds,#5]
    172 	ldrb	$t3,[$rounds,#4]
    173 	orr	$s1,$s1,$t1,lsl#8
    174 	orr	$s1,$s1,$t2,lsl#16
    175 	orr	$s1,$s1,$t3,lsl#24
    176 	ldrb	$s2,[$rounds,#11]
    177 	ldrb	$t1,[$rounds,#10]
    178 	ldrb	$t2,[$rounds,#9]
    179 	ldrb	$t3,[$rounds,#8]
    180 	orr	$s2,$s2,$t1,lsl#8
    181 	orr	$s2,$s2,$t2,lsl#16
    182 	orr	$s2,$s2,$t3,lsl#24
    183 	ldrb	$s3,[$rounds,#15]
    184 	ldrb	$t1,[$rounds,#14]
    185 	ldrb	$t2,[$rounds,#13]
    186 	ldrb	$t3,[$rounds,#12]
    187 	orr	$s3,$s3,$t1,lsl#8
    188 	orr	$s3,$s3,$t2,lsl#16
    189 	orr	$s3,$s3,$t3,lsl#24
    190 
    191 	bl	_armv4_AES_encrypt
    192 
    193 	ldr	$rounds,[sp],#4		@ pop out
    194 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    195 	mov	$t2,$s0,lsr#16		@ manner...
    196 	mov	$t3,$s0,lsr#8
    197 	strb	$t1,[$rounds,#0]
    198 	strb	$t2,[$rounds,#1]
    199 	strb	$t3,[$rounds,#2]
    200 	strb	$s0,[$rounds,#3]
    201 	mov	$t1,$s1,lsr#24
    202 	mov	$t2,$s1,lsr#16
    203 	mov	$t3,$s1,lsr#8
    204 	strb	$t1,[$rounds,#4]
    205 	strb	$t2,[$rounds,#5]
    206 	strb	$t3,[$rounds,#6]
    207 	strb	$s1,[$rounds,#7]
    208 	mov	$t1,$s2,lsr#24
    209 	mov	$t2,$s2,lsr#16
    210 	mov	$t3,$s2,lsr#8
    211 	strb	$t1,[$rounds,#8]
    212 	strb	$t2,[$rounds,#9]
    213 	strb	$t3,[$rounds,#10]
    214 	strb	$s2,[$rounds,#11]
    215 	mov	$t1,$s3,lsr#24
    216 	mov	$t2,$s3,lsr#16
    217 	mov	$t3,$s3,lsr#8
    218 	strb	$t1,[$rounds,#12]
    219 	strb	$t2,[$rounds,#13]
    220 	strb	$t3,[$rounds,#14]
    221 	strb	$s3,[$rounds,#15]
    222 
    223 	ldmia   sp!,{r4-r12,lr}
    224 	tst	lr,#1
    225 	moveq	pc,lr			@ be binary compatible with V4, yet
    226 	bx	lr			@ interoperable with Thumb ISA:-)
    227 .size	AES_encrypt,.-AES_encrypt
    228 
    229 .type   _armv4_AES_encrypt,%function
    230 .align	2
    231 _armv4_AES_encrypt:
    232 	str	lr,[sp,#-4]!		@ push lr
    233 	ldr	$t1,[$key],#16
    234 	ldr	$t2,[$key,#-12]
    235 	ldr	$t3,[$key,#-8]
    236 	ldr	$i1,[$key,#-4]
    237 	ldr	$rounds,[$key,#240-16]
    238 	eor	$s0,$s0,$t1
    239 	eor	$s1,$s1,$t2
    240 	eor	$s2,$s2,$t3
    241 	eor	$s3,$s3,$i1
    242 	sub	$rounds,$rounds,#1
    243 	mov	lr,#255
    244 
    245 .Lenc_loop:
    246 	and	$i2,lr,$s0,lsr#8
    247 	and	$i3,lr,$s0,lsr#16
    248 	and	$i1,lr,$s0
    249 	mov	$s0,$s0,lsr#24
    250 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    251 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    252 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    253 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    254 
    255 	and	$i1,lr,$s1,lsr#16	@ i0
    256 	and	$i2,lr,$s1
    257 	and	$i3,lr,$s1,lsr#8
    258 	mov	$s1,$s1,lsr#24
    259 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    260 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    261 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    262 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    263 	eor	$s0,$s0,$i1,ror#8
    264 	eor	$s1,$s1,$t1,ror#24
    265 	eor	$t2,$t2,$i2,ror#8
    266 	eor	$t3,$t3,$i3,ror#8
    267 
    268 	and	$i1,lr,$s2,lsr#8	@ i0
    269 	and	$i2,lr,$s2,lsr#16	@ i1
    270 	and	$i3,lr,$s2
    271 	mov	$s2,$s2,lsr#24
    272 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    273 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    274 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    275 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    276 	eor	$s0,$s0,$i1,ror#16
    277 	eor	$s1,$s1,$i2,ror#8
    278 	eor	$s2,$s2,$t2,ror#16
    279 	eor	$t3,$t3,$i3,ror#16
    280 
    281 	and	$i1,lr,$s3		@ i0
    282 	and	$i2,lr,$s3,lsr#8	@ i1
    283 	and	$i3,lr,$s3,lsr#16	@ i2
    284 	mov	$s3,$s3,lsr#24
    285 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    286 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    287 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    288 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    289 	eor	$s0,$s0,$i1,ror#24
    290 	eor	$s1,$s1,$i2,ror#16
    291 	eor	$s2,$s2,$i3,ror#8
    292 	eor	$s3,$s3,$t3,ror#8
    293 
    294 	ldr	$t1,[$key],#16
    295 	ldr	$t2,[$key,#-12]
    296 	ldr	$t3,[$key,#-8]
    297 	ldr	$i1,[$key,#-4]
    298 	eor	$s0,$s0,$t1
    299 	eor	$s1,$s1,$t2
    300 	eor	$s2,$s2,$t3
    301 	eor	$s3,$s3,$i1
    302 
    303 	subs	$rounds,$rounds,#1
    304 	bne	.Lenc_loop
    305 
    306 	add	$tbl,$tbl,#2
    307 
    308 	and	$i1,lr,$s0
    309 	and	$i2,lr,$s0,lsr#8
    310 	and	$i3,lr,$s0,lsr#16
    311 	mov	$s0,$s0,lsr#24
    312 	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    313 	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    314 	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    315 	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    316 
    317 	and	$i1,lr,$s1,lsr#16	@ i0
    318 	and	$i2,lr,$s1
    319 	and	$i3,lr,$s1,lsr#8
    320 	mov	$s1,$s1,lsr#24
    321 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    322 	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    323 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    324 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    325 	eor	$s0,$i1,$s0,lsl#8
    326 	eor	$s1,$t1,$s1,lsl#24
    327 	eor	$t2,$i2,$t2,lsl#8
    328 	eor	$t3,$i3,$t3,lsl#8
    329 
    330 	and	$i1,lr,$s2,lsr#8	@ i0
    331 	and	$i2,lr,$s2,lsr#16	@ i1
    332 	and	$i3,lr,$s2
    333 	mov	$s2,$s2,lsr#24
    334 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    335 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    336 	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    337 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    338 	eor	$s0,$i1,$s0,lsl#8
    339 	eor	$s1,$s1,$i2,lsl#16
    340 	eor	$s2,$t2,$s2,lsl#24
    341 	eor	$t3,$i3,$t3,lsl#8
    342 
    343 	and	$i1,lr,$s3		@ i0
    344 	and	$i2,lr,$s3,lsr#8	@ i1
    345 	and	$i3,lr,$s3,lsr#16	@ i2
    346 	mov	$s3,$s3,lsr#24
    347 	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    348 	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    349 	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    350 	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    351 	eor	$s0,$i1,$s0,lsl#8
    352 	eor	$s1,$s1,$i2,lsl#8
    353 	eor	$s2,$s2,$i3,lsl#16
    354 	eor	$s3,$t3,$s3,lsl#24
    355 
    356 	ldr	lr,[sp],#4		@ pop lr
    357 	ldr	$t1,[$key,#0]
    358 	ldr	$t2,[$key,#4]
    359 	ldr	$t3,[$key,#8]
    360 	ldr	$i1,[$key,#12]
    361 	eor	$s0,$s0,$t1
    362 	eor	$s1,$s1,$t2
    363 	eor	$s2,$s2,$t3
    364 	eor	$s3,$s3,$i1
    365 
    366 	sub	$tbl,$tbl,#2
    367 	mov	pc,lr			@ return
    368 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    369 
    370 .global AES_set_encrypt_key
    371 .type   AES_set_encrypt_key,%function
    372 .align	5
    373 AES_set_encrypt_key:
    374 	sub	r3,pc,#8		@ AES_set_encrypt_key
    375 	teq	r0,#0
    376 	moveq	r0,#-1
    377 	beq	.Labrt
    378 	teq	r2,#0
    379 	moveq	r0,#-1
    380 	beq	.Labrt
    381 
    382 	teq	r1,#128
    383 	beq	.Lok
    384 	teq	r1,#192
    385 	beq	.Lok
    386 	teq	r1,#256
    387 	movne	r0,#-1
    388 	bne	.Labrt
    389 
    390 .Lok:	stmdb   sp!,{r4-r12,lr}
    391 	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
    392 
    393 	mov	$rounds,r0		@ inp
    394 	mov	lr,r1			@ bits
    395 	mov	$key,r2			@ key
    396 
    397 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    398 	ldrb	$t1,[$rounds,#2]	@ manner...
    399 	ldrb	$t2,[$rounds,#1]
    400 	ldrb	$t3,[$rounds,#0]
    401 	orr	$s0,$s0,$t1,lsl#8
    402 	orr	$s0,$s0,$t2,lsl#16
    403 	orr	$s0,$s0,$t3,lsl#24
    404 	ldrb	$s1,[$rounds,#7]
    405 	ldrb	$t1,[$rounds,#6]
    406 	ldrb	$t2,[$rounds,#5]
    407 	ldrb	$t3,[$rounds,#4]
    408 	orr	$s1,$s1,$t1,lsl#8
    409 	orr	$s1,$s1,$t2,lsl#16
    410 	orr	$s1,$s1,$t3,lsl#24
    411 	ldrb	$s2,[$rounds,#11]
    412 	ldrb	$t1,[$rounds,#10]
    413 	ldrb	$t2,[$rounds,#9]
    414 	ldrb	$t3,[$rounds,#8]
    415 	orr	$s2,$s2,$t1,lsl#8
    416 	orr	$s2,$s2,$t2,lsl#16
    417 	orr	$s2,$s2,$t3,lsl#24
    418 	ldrb	$s3,[$rounds,#15]
    419 	ldrb	$t1,[$rounds,#14]
    420 	ldrb	$t2,[$rounds,#13]
    421 	ldrb	$t3,[$rounds,#12]
    422 	orr	$s3,$s3,$t1,lsl#8
    423 	orr	$s3,$s3,$t2,lsl#16
    424 	orr	$s3,$s3,$t3,lsl#24
    425 	str	$s0,[$key],#16
    426 	str	$s1,[$key,#-12]
    427 	str	$s2,[$key,#-8]
    428 	str	$s3,[$key,#-4]
    429 
    430 	teq	lr,#128
    431 	bne	.Lnot128
    432 	mov	$rounds,#10
    433 	str	$rounds,[$key,#240-16]
    434 	add	$t3,$tbl,#256			@ rcon
    435 	mov	lr,#255
    436 
    437 .L128_loop:
    438 	and	$t2,lr,$s3,lsr#24
    439 	and	$i1,lr,$s3,lsr#16
    440 	and	$i2,lr,$s3,lsr#8
    441 	and	$i3,lr,$s3
    442 	ldrb	$t2,[$tbl,$t2]
    443 	ldrb	$i1,[$tbl,$i1]
    444 	ldrb	$i2,[$tbl,$i2]
    445 	ldrb	$i3,[$tbl,$i3]
    446 	ldr	$t1,[$t3],#4			@ rcon[i++]
    447 	orr	$t2,$t2,$i1,lsl#24
    448 	orr	$t2,$t2,$i2,lsl#16
    449 	orr	$t2,$t2,$i3,lsl#8
    450 	eor	$t2,$t2,$t1
    451 	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    452 	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    453 	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    454 	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    455 	str	$s0,[$key],#16
    456 	str	$s1,[$key,#-12]
    457 	str	$s2,[$key,#-8]
    458 	str	$s3,[$key,#-4]
    459 
    460 	subs	$rounds,$rounds,#1
    461 	bne	.L128_loop
    462 	sub	r2,$key,#176
    463 	b	.Ldone
    464 
    465 .Lnot128:
    466 	ldrb	$i2,[$rounds,#19]
    467 	ldrb	$t1,[$rounds,#18]
    468 	ldrb	$t2,[$rounds,#17]
    469 	ldrb	$t3,[$rounds,#16]
    470 	orr	$i2,$i2,$t1,lsl#8
    471 	orr	$i2,$i2,$t2,lsl#16
    472 	orr	$i2,$i2,$t3,lsl#24
    473 	ldrb	$i3,[$rounds,#23]
    474 	ldrb	$t1,[$rounds,#22]
    475 	ldrb	$t2,[$rounds,#21]
    476 	ldrb	$t3,[$rounds,#20]
    477 	orr	$i3,$i3,$t1,lsl#8
    478 	orr	$i3,$i3,$t2,lsl#16
    479 	orr	$i3,$i3,$t3,lsl#24
    480 	str	$i2,[$key],#8
    481 	str	$i3,[$key,#-4]
    482 
    483 	teq	lr,#192
    484 	bne	.Lnot192
    485 	mov	$rounds,#12
    486 	str	$rounds,[$key,#240-24]
    487 	add	$t3,$tbl,#256			@ rcon
    488 	mov	lr,#255
    489 	mov	$rounds,#8
    490 
    491 .L192_loop:
    492 	and	$t2,lr,$i3,lsr#24
    493 	and	$i1,lr,$i3,lsr#16
    494 	and	$i2,lr,$i3,lsr#8
    495 	and	$i3,lr,$i3
    496 	ldrb	$t2,[$tbl,$t2]
    497 	ldrb	$i1,[$tbl,$i1]
    498 	ldrb	$i2,[$tbl,$i2]
    499 	ldrb	$i3,[$tbl,$i3]
    500 	ldr	$t1,[$t3],#4			@ rcon[i++]
    501 	orr	$t2,$t2,$i1,lsl#24
    502 	orr	$t2,$t2,$i2,lsl#16
    503 	orr	$t2,$t2,$i3,lsl#8
    504 	eor	$i3,$t2,$t1
    505 	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    506 	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    507 	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    508 	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    509 	str	$s0,[$key],#24
    510 	str	$s1,[$key,#-20]
    511 	str	$s2,[$key,#-16]
    512 	str	$s3,[$key,#-12]
    513 
    514 	subs	$rounds,$rounds,#1
    515 	subeq	r2,$key,#216
    516 	beq	.Ldone
    517 
    518 	ldr	$i1,[$key,#-32]
    519 	ldr	$i2,[$key,#-28]
    520 	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    521 	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    522 	str	$i1,[$key,#-8]
    523 	str	$i3,[$key,#-4]
    524 	b	.L192_loop
    525 
    526 .Lnot192:
    527 	ldrb	$i2,[$rounds,#27]
    528 	ldrb	$t1,[$rounds,#26]
    529 	ldrb	$t2,[$rounds,#25]
    530 	ldrb	$t3,[$rounds,#24]
    531 	orr	$i2,$i2,$t1,lsl#8
    532 	orr	$i2,$i2,$t2,lsl#16
    533 	orr	$i2,$i2,$t3,lsl#24
    534 	ldrb	$i3,[$rounds,#31]
    535 	ldrb	$t1,[$rounds,#30]
    536 	ldrb	$t2,[$rounds,#29]
    537 	ldrb	$t3,[$rounds,#28]
    538 	orr	$i3,$i3,$t1,lsl#8
    539 	orr	$i3,$i3,$t2,lsl#16
    540 	orr	$i3,$i3,$t3,lsl#24
    541 	str	$i2,[$key],#8
    542 	str	$i3,[$key,#-4]
    543 
    544 	mov	$rounds,#14
    545 	str	$rounds,[$key,#240-32]
    546 	add	$t3,$tbl,#256			@ rcon
    547 	mov	lr,#255
    548 	mov	$rounds,#7
    549 
    550 .L256_loop:
    551 	and	$t2,lr,$i3,lsr#24
    552 	and	$i1,lr,$i3,lsr#16
    553 	and	$i2,lr,$i3,lsr#8
    554 	and	$i3,lr,$i3
    555 	ldrb	$t2,[$tbl,$t2]
    556 	ldrb	$i1,[$tbl,$i1]
    557 	ldrb	$i2,[$tbl,$i2]
    558 	ldrb	$i3,[$tbl,$i3]
    559 	ldr	$t1,[$t3],#4			@ rcon[i++]
    560 	orr	$t2,$t2,$i1,lsl#24
    561 	orr	$t2,$t2,$i2,lsl#16
    562 	orr	$t2,$t2,$i3,lsl#8
    563 	eor	$i3,$t2,$t1
    564 	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    565 	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    566 	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    567 	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    568 	str	$s0,[$key],#32
    569 	str	$s1,[$key,#-28]
    570 	str	$s2,[$key,#-24]
    571 	str	$s3,[$key,#-20]
    572 
    573 	subs	$rounds,$rounds,#1
    574 	subeq	r2,$key,#256
    575 	beq	.Ldone
    576 
    577 	and	$t2,lr,$s3
    578 	and	$i1,lr,$s3,lsr#8
    579 	and	$i2,lr,$s3,lsr#16
    580 	and	$i3,lr,$s3,lsr#24
    581 	ldrb	$t2,[$tbl,$t2]
    582 	ldrb	$i1,[$tbl,$i1]
    583 	ldrb	$i2,[$tbl,$i2]
    584 	ldrb	$i3,[$tbl,$i3]
    585 	orr	$t2,$t2,$i1,lsl#8
    586 	orr	$t2,$t2,$i2,lsl#16
    587 	orr	$t2,$t2,$i3,lsl#24
    588 
    589 	ldr	$t1,[$key,#-48]
    590 	ldr	$i1,[$key,#-44]
    591 	ldr	$i2,[$key,#-40]
    592 	ldr	$i3,[$key,#-36]
    593 	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    594 	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    595 	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    596 	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    597 	str	$t1,[$key,#-16]
    598 	str	$i1,[$key,#-12]
    599 	str	$i2,[$key,#-8]
    600 	str	$i3,[$key,#-4]
    601 	b	.L256_loop
    602 
    603 .Ldone:	mov	r0,#0
    604 	ldmia   sp!,{r4-r12,lr}
    605 .Labrt:	tst	lr,#1
    606 	moveq	pc,lr			@ be binary compatible with V4, yet
    607 	bx	lr			@ interoperable with Thumb ISA:-)
    608 .size	AES_set_encrypt_key,.-AES_set_encrypt_key
    609 
    610 .global AES_set_decrypt_key
    611 .type   AES_set_decrypt_key,%function
    612 .align	5
    613 AES_set_decrypt_key:
    614 	str	lr,[sp,#-4]!            @ push lr
    615 	bl	AES_set_encrypt_key
    616 	teq	r0,#0
    617 	ldrne	lr,[sp],#4              @ pop lr
    618 	bne	.Labrt
    619 
    620 	stmdb   sp!,{r4-r12}
    621 
    622 	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
    623 	mov	$key,r2			@ which is AES_KEY *key
    624 	mov	$i1,r2
    625 	add	$i2,r2,$rounds,lsl#4
    626 
    627 .Linv:	ldr	$s0,[$i1]
    628 	ldr	$s1,[$i1,#4]
    629 	ldr	$s2,[$i1,#8]
    630 	ldr	$s3,[$i1,#12]
    631 	ldr	$t1,[$i2]
    632 	ldr	$t2,[$i2,#4]
    633 	ldr	$t3,[$i2,#8]
    634 	ldr	$i3,[$i2,#12]
    635 	str	$s0,[$i2],#-16
    636 	str	$s1,[$i2,#16+4]
    637 	str	$s2,[$i2,#16+8]
    638 	str	$s3,[$i2,#16+12]
    639 	str	$t1,[$i1],#16
    640 	str	$t2,[$i1,#-12]
    641 	str	$t3,[$i1,#-8]
    642 	str	$i3,[$i1,#-4]
    643 	teq	$i1,$i2
    644 	bne	.Linv
    645 ___
    646 $mask80=$i1;
    647 $mask1b=$i2;
    648 $mask7f=$i3;
    649 $code.=<<___;
    650 	ldr	$s0,[$key,#16]!		@ prefetch tp1
    651 	mov	$mask80,#0x80
    652 	mov	$mask1b,#0x1b
    653 	orr	$mask80,$mask80,#0x8000
    654 	orr	$mask1b,$mask1b,#0x1b00
    655 	orr	$mask80,$mask80,$mask80,lsl#16
    656 	orr	$mask1b,$mask1b,$mask1b,lsl#16
    657 	sub	$rounds,$rounds,#1
    658 	mvn	$mask7f,$mask80
    659 	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    660 
    661 .Lmix:	and	$t1,$s0,$mask80
    662 	and	$s1,$s0,$mask7f
    663 	sub	$t1,$t1,$t1,lsr#7
    664 	and	$t1,$t1,$mask1b
    665 	eor	$s1,$t1,$s1,lsl#1	@ tp2
    666 
    667 	and	$t1,$s1,$mask80
    668 	and	$s2,$s1,$mask7f
    669 	sub	$t1,$t1,$t1,lsr#7
    670 	and	$t1,$t1,$mask1b
    671 	eor	$s2,$t1,$s2,lsl#1	@ tp4
    672 
    673 	and	$t1,$s2,$mask80
    674 	and	$s3,$s2,$mask7f
    675 	sub	$t1,$t1,$t1,lsr#7
    676 	and	$t1,$t1,$mask1b
    677 	eor	$s3,$t1,$s3,lsl#1	@ tp8
    678 
    679 	eor	$t1,$s1,$s2
    680 	eor	$t2,$s0,$s3		@ tp9
    681 	eor	$t1,$t1,$s3		@ tpe
    682 	eor	$t1,$t1,$s1,ror#24
    683 	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    684 	eor	$t1,$t1,$s2,ror#16
    685 	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    686 	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    687 
    688 	ldr	$s0,[$key,#4]		@ prefetch tp1
    689 	str	$t1,[$key],#4
    690 	subs	$rounds,$rounds,#1
    691 	bne	.Lmix
    692 
    693 	mov	r0,#0
    694 	ldmia   sp!,{r4-r12,lr}
    695 	tst	lr,#1
    696 	moveq	pc,lr			@ be binary compatible with V4, yet
    697 	bx	lr			@ interoperable with Thumb ISA:-)
    698 .size	AES_set_decrypt_key,.-AES_set_decrypt_key
    699 
    700 .type	AES_Td,%object
    701 .align	5
    702 AES_Td:
    703 .word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    704 .word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    705 .word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    706 .word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    707 .word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    708 .word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    709 .word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    710 .word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    711 .word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    712 .word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    713 .word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    714 .word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    715 .word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    716 .word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    717 .word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    718 .word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    719 .word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    720 .word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    721 .word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    722 .word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    723 .word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    724 .word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    725 .word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    726 .word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    727 .word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    728 .word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    729 .word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    730 .word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    731 .word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    732 .word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    733 .word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    734 .word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    735 .word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    736 .word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    737 .word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    738 .word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    739 .word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    740 .word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    741 .word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    742 .word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    743 .word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    744 .word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    745 .word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    746 .word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    747 .word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    748 .word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    749 .word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    750 .word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    751 .word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    752 .word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    753 .word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    754 .word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    755 .word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    756 .word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    757 .word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    758 .word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    759 .word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    760 .word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    761 .word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    762 .word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    763 .word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    764 .word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    765 .word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    766 .word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    767 @ Td4[256]
    768 .byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    769 .byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    770 .byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    771 .byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    772 .byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    773 .byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    774 .byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    775 .byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    776 .byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    777 .byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    778 .byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    779 .byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    780 .byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    781 .byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    782 .byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    783 .byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    784 .byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    785 .byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    786 .byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    787 .byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    788 .byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    789 .byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    790 .byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    791 .byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    792 .byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    793 .byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    794 .byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    795 .byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    796 .byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    797 .byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    798 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    799 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    800 .size	AES_Td,.-AES_Td
    801 
    802 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
    803 @ 		 const AES_KEY *key) {
    804 .global AES_decrypt
    805 .type   AES_decrypt,%function
    806 .align	5
    807 AES_decrypt:
    808 	sub	r3,pc,#8		@ AES_decrypt
    809 	stmdb   sp!,{r1,r4-r12,lr}
    810 	mov	$rounds,r0		@ inp
    811 	mov	$key,r2
    812 	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
    813 
    814 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    815 	ldrb	$t1,[$rounds,#2]	@ manner...
    816 	ldrb	$t2,[$rounds,#1]
    817 	ldrb	$t3,[$rounds,#0]
    818 	orr	$s0,$s0,$t1,lsl#8
    819 	orr	$s0,$s0,$t2,lsl#16
    820 	orr	$s0,$s0,$t3,lsl#24
    821 	ldrb	$s1,[$rounds,#7]
    822 	ldrb	$t1,[$rounds,#6]
    823 	ldrb	$t2,[$rounds,#5]
    824 	ldrb	$t3,[$rounds,#4]
    825 	orr	$s1,$s1,$t1,lsl#8
    826 	orr	$s1,$s1,$t2,lsl#16
    827 	orr	$s1,$s1,$t3,lsl#24
    828 	ldrb	$s2,[$rounds,#11]
    829 	ldrb	$t1,[$rounds,#10]
    830 	ldrb	$t2,[$rounds,#9]
    831 	ldrb	$t3,[$rounds,#8]
    832 	orr	$s2,$s2,$t1,lsl#8
    833 	orr	$s2,$s2,$t2,lsl#16
    834 	orr	$s2,$s2,$t3,lsl#24
    835 	ldrb	$s3,[$rounds,#15]
    836 	ldrb	$t1,[$rounds,#14]
    837 	ldrb	$t2,[$rounds,#13]
    838 	ldrb	$t3,[$rounds,#12]
    839 	orr	$s3,$s3,$t1,lsl#8
    840 	orr	$s3,$s3,$t2,lsl#16
    841 	orr	$s3,$s3,$t3,lsl#24
    842 
    843 	bl	_armv4_AES_decrypt
    844 
    845 	ldr	$rounds,[sp],#4		@ pop out
    846 	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    847 	mov	$t2,$s0,lsr#16		@ manner...
    848 	mov	$t3,$s0,lsr#8
    849 	strb	$t1,[$rounds,#0]
    850 	strb	$t2,[$rounds,#1]
    851 	strb	$t3,[$rounds,#2]
    852 	strb	$s0,[$rounds,#3]
    853 	mov	$t1,$s1,lsr#24
    854 	mov	$t2,$s1,lsr#16
    855 	mov	$t3,$s1,lsr#8
    856 	strb	$t1,[$rounds,#4]
    857 	strb	$t2,[$rounds,#5]
    858 	strb	$t3,[$rounds,#6]
    859 	strb	$s1,[$rounds,#7]
    860 	mov	$t1,$s2,lsr#24
    861 	mov	$t2,$s2,lsr#16
    862 	mov	$t3,$s2,lsr#8
    863 	strb	$t1,[$rounds,#8]
    864 	strb	$t2,[$rounds,#9]
    865 	strb	$t3,[$rounds,#10]
    866 	strb	$s2,[$rounds,#11]
    867 	mov	$t1,$s3,lsr#24
    868 	mov	$t2,$s3,lsr#16
    869 	mov	$t3,$s3,lsr#8
    870 	strb	$t1,[$rounds,#12]
    871 	strb	$t2,[$rounds,#13]
    872 	strb	$t3,[$rounds,#14]
    873 	strb	$s3,[$rounds,#15]
    874 
    875 	ldmia   sp!,{r4-r12,lr}
    876 	tst	lr,#1
    877 	moveq	pc,lr			@ be binary compatible with V4, yet
    878 	bx	lr			@ interoperable with Thumb ISA:-)
    879 .size	AES_decrypt,.-AES_decrypt
    880 
    881 .type   _armv4_AES_decrypt,%function
    882 .align	2
    883 _armv4_AES_decrypt:
    884 	str	lr,[sp,#-4]!		@ push lr
    885 	ldr	$t1,[$key],#16
    886 	ldr	$t2,[$key,#-12]
    887 	ldr	$t3,[$key,#-8]
    888 	ldr	$i1,[$key,#-4]
    889 	ldr	$rounds,[$key,#240-16]
    890 	eor	$s0,$s0,$t1
    891 	eor	$s1,$s1,$t2
    892 	eor	$s2,$s2,$t3
    893 	eor	$s3,$s3,$i1
    894 	sub	$rounds,$rounds,#1
    895 	mov	lr,#255
    896 
    897 .Ldec_loop:
    898 	and	$i1,lr,$s0,lsr#16
    899 	and	$i2,lr,$s0,lsr#8
    900 	and	$i3,lr,$s0
    901 	mov	$s0,$s0,lsr#24
    902 	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
    903 	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
    904 	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
    905 	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
    906 
    907 	and	$i1,lr,$s1		@ i0
    908 	and	$i2,lr,$s1,lsr#16
    909 	and	$i3,lr,$s1,lsr#8
    910 	mov	$s1,$s1,lsr#24
    911 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
    912 	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
    913 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
    914 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
    915 	eor	$s0,$s0,$i1,ror#24
    916 	eor	$s1,$s1,$t1,ror#8
    917 	eor	$t2,$i2,$t2,ror#8
    918 	eor	$t3,$i3,$t3,ror#8
    919 
    920 	and	$i1,lr,$s2,lsr#8	@ i0
    921 	and	$i2,lr,$s2		@ i1
    922 	and	$i3,lr,$s2,lsr#16
    923 	mov	$s2,$s2,lsr#24
    924 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
    925 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
    926 	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
    927 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
    928 	eor	$s0,$s0,$i1,ror#16
    929 	eor	$s1,$s1,$i2,ror#24
    930 	eor	$s2,$s2,$t2,ror#8
    931 	eor	$t3,$i3,$t3,ror#8
    932 
    933 	and	$i1,lr,$s3,lsr#16	@ i0
    934 	and	$i2,lr,$s3,lsr#8	@ i1
    935 	and	$i3,lr,$s3		@ i2
    936 	mov	$s3,$s3,lsr#24
    937 	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
    938 	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
    939 	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
    940 	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
    941 	eor	$s0,$s0,$i1,ror#8
    942 	eor	$s1,$s1,$i2,ror#16
    943 	eor	$s2,$s2,$i3,ror#24
    944 	eor	$s3,$s3,$t3,ror#8
    945 
    946 	ldr	$t1,[$key],#16
    947 	ldr	$t2,[$key,#-12]
    948 	ldr	$t3,[$key,#-8]
    949 	ldr	$i1,[$key,#-4]
    950 	eor	$s0,$s0,$t1
    951 	eor	$s1,$s1,$t2
    952 	eor	$s2,$s2,$t3
    953 	eor	$s3,$s3,$i1
    954 
    955 	subs	$rounds,$rounds,#1
    956 	bne	.Ldec_loop
    957 
    958 	add	$tbl,$tbl,#1024
    959 
    960 	ldr	$t1,[$tbl,#0]		@ prefetch Td4
    961 	ldr	$t2,[$tbl,#32]
    962 	ldr	$t3,[$tbl,#64]
    963 	ldr	$i1,[$tbl,#96]
    964 	ldr	$i2,[$tbl,#128]
    965 	ldr	$i3,[$tbl,#160]
    966 	ldr	$t1,[$tbl,#192]
    967 	ldr	$t2,[$tbl,#224]
    968 
    969 	and	$i1,lr,$s0,lsr#16
    970 	and	$i2,lr,$s0,lsr#8
    971 	and	$i3,lr,$s0
    972 	ldrb	$s0,[$tbl,$s0,lsr#24]	@ Td4[s0>>24]
    973 	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
    974 	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
    975 	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
    976 
    977 	and	$i1,lr,$s1		@ i0
    978 	and	$i2,lr,$s1,lsr#16
    979 	and	$i3,lr,$s1,lsr#8
    980 	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
    981 	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
    982 	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
    983 	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
    984 	eor	$s0,$i1,$s0,lsl#24
    985 	eor	$s1,$t1,$s1,lsl#8
    986 	eor	$t2,$t2,$i2,lsl#8
    987 	eor	$t3,$t3,$i3,lsl#8
    988 
    989 	and	$i1,lr,$s2,lsr#8	@ i0
    990 	and	$i2,lr,$s2		@ i1
    991 	and	$i3,lr,$s2,lsr#16
    992 	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
    993 	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
    994 	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
    995 	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
    996 	eor	$s0,$s0,$i1,lsl#8
    997 	eor	$s1,$i2,$s1,lsl#16
    998 	eor	$s2,$t2,$s2,lsl#16
    999 	eor	$t3,$t3,$i3,lsl#16
   1000 
   1001 	and	$i1,lr,$s3,lsr#16	@ i0
   1002 	and	$i2,lr,$s3,lsr#8	@ i1
   1003 	and	$i3,lr,$s3		@ i2
   1004 	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1005 	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1006 	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1007 	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
   1008 	eor	$s0,$s0,$i1,lsl#16
   1009 	eor	$s1,$s1,$i2,lsl#8
   1010 	eor	$s2,$i3,$s2,lsl#8
   1011 	eor	$s3,$t3,$s3,lsl#24
   1012 
   1013 	ldr	lr,[sp],#4		@ pop lr
   1014 	ldr	$t1,[$key,#0]
   1015 	ldr	$t2,[$key,#4]
   1016 	ldr	$t3,[$key,#8]
   1017 	ldr	$i1,[$key,#12]
   1018 	eor	$s0,$s0,$t1
   1019 	eor	$s1,$s1,$t2
   1020 	eor	$s2,$s2,$t3
   1021 	eor	$s3,$s3,$i1
   1022 
   1023 	sub	$tbl,$tbl,#1024
   1024 	mov	pc,lr			@ return
   1025 .size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1026 .asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1027 .align	2
   1028 ___
   1029 
   1030 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1031 print $code;
   1032