Home | History | Annotate | Download | only in patches
      1 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
      2 +++ openssl-0.9.8h/crypto/0.9.9-dev/README.android	2009-09-03 15:42:39.000000000 -0700
      3 @@ -0,0 +1,6 @@
      4 +This directory does not exist in the OpenSSL distribution.
      5 +
      6 +It has been added to import assembler code from OpenSSL 0.9.9-dev
      7 +(ftp://ftp.openssl.org/snapshot/).  The assembler files (.s) were
      8 +generated by running the Perl files (.pl), with ".align 2" appended
      9 +to avoid assembler error messages where needed.
     10 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
     11 +++ openssl-0.9.8h/crypto/0.9.9-dev/aes/aes-armv4.pl	2009-09-03 15:42:39.000000000 -0700
     12 @@ -0,0 +1,1030 @@
     13 +#!/usr/bin/env perl
     14 +
     15 +# ====================================================================
     16 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL
     17 +# project. The module is, however, dual licensed under OpenSSL and
     18 +# CRYPTOGAMS licenses depending on where you obtain it. For further
     19 +# details see http://www.openssl.org/~appro/cryptogams/.
     20 +# ====================================================================
     21 +
     22 +# AES for ARMv4
     23 +
     24 +# January 2007.
     25 +#
     26 +# Code uses single 1K S-box and is >2 times faster than code generated
     27 +# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
     28 +# allows to merge logical or arithmetic operation with shift or rotate
     29 +# in one instruction and emit combined result every cycle. The module
     30 +# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
     31 +# key.
     32 +
     33 +# May 2007.
     34 +#
     35 +# AES_set_[en|de]crypt_key is added.
     36 +
     37 +$s0="r0";
     38 +$s1="r1";
     39 +$s2="r2";
     40 +$s3="r3";
     41 +$t1="r4";
     42 +$t2="r5";
     43 +$t3="r6";
     44 +$i1="r7";
     45 +$i2="r8";
     46 +$i3="r9";
     47 +
     48 +$tbl="r10";
     49 +$key="r11";
     50 +$rounds="r12";
     51 +
     52 +$code=<<___;
     53 +.text
     54 +.code	32
     55 +
     56 +.type	AES_Te,%object
     57 +.align	5
     58 +AES_Te:
     59 +.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
     60 +.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
     61 +.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
     62 +.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
     63 +.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
     64 +.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
     65 +.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
     66 +.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
     67 +.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
     68 +.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
     69 +.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
     70 +.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
     71 +.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
     72 +.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
     73 +.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
     74 +.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
     75 +.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
     76 +.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
     77 +.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
     78 +.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
     79 +.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
     80 +.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
     81 +.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
     82 +.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
     83 +.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
     84 +.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
     85 +.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
     86 +.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
     87 +.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
     88 +.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
     89 +.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
     90 +.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
     91 +.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
     92 +.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
     93 +.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
     94 +.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
     95 +.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
     96 +.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
     97 +.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
     98 +.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
     99 +.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
    100 +.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
    101 +.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
    102 +.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
    103 +.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
    104 +.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
    105 +.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
    106 +.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
    107 +.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
    108 +.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
    109 +.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
    110 +.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
    111 +.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
    112 +.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
    113 +.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
    114 +.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
    115 +.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
    116 +.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
    117 +.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
    118 +.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
    119 +.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
    120 +.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
    121 +.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
    122 +.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
    123 +@ Te4[256]
    124 +.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
    125 +.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
    126 +.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
    127 +.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
    128 +.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
    129 +.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
    130 +.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
    131 +.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
    132 +.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
    133 +.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
    134 +.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
    135 +.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
    136 +.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
    137 +.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
    138 +.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
    139 +.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
    140 +.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
    141 +.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
    142 +.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
    143 +.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
    144 +.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
    145 +.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
    146 +.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
    147 +.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
    148 +.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
    149 +.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
    150 +.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
    151 +.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
    152 +.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
    153 +.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
    154 +.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
    155 +.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    156 +@ rcon[]
    157 +.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
    158 +.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
    159 +.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
    160 +.size	AES_Te,.-AES_Te
    161 +
    162 +@ void AES_encrypt(const unsigned char *in, unsigned char *out,
    163 +@ 		 const AES_KEY *key) {
    164 +.global AES_encrypt
    165 +.type   AES_encrypt,%function
    166 +.align	5
    167 +AES_encrypt:
    168 +	sub	r3,pc,#8		@ AES_encrypt
    169 +	stmdb   sp!,{r1,r4-r12,lr}
    170 +	mov	$rounds,r0		@ inp
    171 +	mov	$key,r2
    172 +	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
    173 +
    174 +	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    175 +	ldrb	$t1,[$rounds,#2]	@ manner...
    176 +	ldrb	$t2,[$rounds,#1]
    177 +	ldrb	$t3,[$rounds,#0]
    178 +	orr	$s0,$s0,$t1,lsl#8
    179 +	orr	$s0,$s0,$t2,lsl#16
    180 +	orr	$s0,$s0,$t3,lsl#24
    181 +	ldrb	$s1,[$rounds,#7]
    182 +	ldrb	$t1,[$rounds,#6]
    183 +	ldrb	$t2,[$rounds,#5]
    184 +	ldrb	$t3,[$rounds,#4]
    185 +	orr	$s1,$s1,$t1,lsl#8
    186 +	orr	$s1,$s1,$t2,lsl#16
    187 +	orr	$s1,$s1,$t3,lsl#24
    188 +	ldrb	$s2,[$rounds,#11]
    189 +	ldrb	$t1,[$rounds,#10]
    190 +	ldrb	$t2,[$rounds,#9]
    191 +	ldrb	$t3,[$rounds,#8]
    192 +	orr	$s2,$s2,$t1,lsl#8
    193 +	orr	$s2,$s2,$t2,lsl#16
    194 +	orr	$s2,$s2,$t3,lsl#24
    195 +	ldrb	$s3,[$rounds,#15]
    196 +	ldrb	$t1,[$rounds,#14]
    197 +	ldrb	$t2,[$rounds,#13]
    198 +	ldrb	$t3,[$rounds,#12]
    199 +	orr	$s3,$s3,$t1,lsl#8
    200 +	orr	$s3,$s3,$t2,lsl#16
    201 +	orr	$s3,$s3,$t3,lsl#24
    202 +
    203 +	bl	_armv4_AES_encrypt
    204 +
    205 +	ldr	$rounds,[sp],#4		@ pop out
    206 +	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    207 +	mov	$t2,$s0,lsr#16		@ manner...
    208 +	mov	$t3,$s0,lsr#8
    209 +	strb	$t1,[$rounds,#0]
    210 +	strb	$t2,[$rounds,#1]
    211 +	strb	$t3,[$rounds,#2]
    212 +	strb	$s0,[$rounds,#3]
    213 +	mov	$t1,$s1,lsr#24
    214 +	mov	$t2,$s1,lsr#16
    215 +	mov	$t3,$s1,lsr#8
    216 +	strb	$t1,[$rounds,#4]
    217 +	strb	$t2,[$rounds,#5]
    218 +	strb	$t3,[$rounds,#6]
    219 +	strb	$s1,[$rounds,#7]
    220 +	mov	$t1,$s2,lsr#24
    221 +	mov	$t2,$s2,lsr#16
    222 +	mov	$t3,$s2,lsr#8
    223 +	strb	$t1,[$rounds,#8]
    224 +	strb	$t2,[$rounds,#9]
    225 +	strb	$t3,[$rounds,#10]
    226 +	strb	$s2,[$rounds,#11]
    227 +	mov	$t1,$s3,lsr#24
    228 +	mov	$t2,$s3,lsr#16
    229 +	mov	$t3,$s3,lsr#8
    230 +	strb	$t1,[$rounds,#12]
    231 +	strb	$t2,[$rounds,#13]
    232 +	strb	$t3,[$rounds,#14]
    233 +	strb	$s3,[$rounds,#15]
    234 +
    235 +	ldmia   sp!,{r4-r12,lr}
    236 +	tst	lr,#1
    237 +	moveq	pc,lr			@ be binary compatible with V4, yet
    238 +	bx	lr			@ interoperable with Thumb ISA:-)
    239 +.size	AES_encrypt,.-AES_encrypt
    240 +
    241 +.type   _armv4_AES_encrypt,%function
    242 +.align	2
    243 +_armv4_AES_encrypt:
    244 +	str	lr,[sp,#-4]!		@ push lr
    245 +	ldr	$t1,[$key],#16
    246 +	ldr	$t2,[$key,#-12]
    247 +	ldr	$t3,[$key,#-8]
    248 +	ldr	$i1,[$key,#-4]
    249 +	ldr	$rounds,[$key,#240-16]
    250 +	eor	$s0,$s0,$t1
    251 +	eor	$s1,$s1,$t2
    252 +	eor	$s2,$s2,$t3
    253 +	eor	$s3,$s3,$i1
    254 +	sub	$rounds,$rounds,#1
    255 +	mov	lr,#255
    256 +
    257 +.Lenc_loop:
    258 +	and	$i2,lr,$s0,lsr#8
    259 +	and	$i3,lr,$s0,lsr#16
    260 +	and	$i1,lr,$s0
    261 +	mov	$s0,$s0,lsr#24
    262 +	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
    263 +	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
    264 +	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
    265 +	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
    266 +
    267 +	and	$i1,lr,$s1,lsr#16	@ i0
    268 +	and	$i2,lr,$s1
    269 +	and	$i3,lr,$s1,lsr#8
    270 +	mov	$s1,$s1,lsr#24
    271 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
    272 +	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
    273 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
    274 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
    275 +	eor	$s0,$s0,$i1,ror#8
    276 +	eor	$s1,$s1,$t1,ror#24
    277 +	eor	$t2,$t2,$i2,ror#8
    278 +	eor	$t3,$t3,$i3,ror#8
    279 +
    280 +	and	$i1,lr,$s2,lsr#8	@ i0
    281 +	and	$i2,lr,$s2,lsr#16	@ i1
    282 +	and	$i3,lr,$s2
    283 +	mov	$s2,$s2,lsr#24
    284 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
    285 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
    286 +	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
    287 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
    288 +	eor	$s0,$s0,$i1,ror#16
    289 +	eor	$s1,$s1,$i2,ror#8
    290 +	eor	$s2,$s2,$t2,ror#16
    291 +	eor	$t3,$t3,$i3,ror#16
    292 +
    293 +	and	$i1,lr,$s3		@ i0
    294 +	and	$i2,lr,$s3,lsr#8	@ i1
    295 +	and	$i3,lr,$s3,lsr#16	@ i2
    296 +	mov	$s3,$s3,lsr#24
    297 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
    298 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
    299 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
    300 +	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
    301 +	eor	$s0,$s0,$i1,ror#24
    302 +	eor	$s1,$s1,$i2,ror#16
    303 +	eor	$s2,$s2,$i3,ror#8
    304 +	eor	$s3,$s3,$t3,ror#8
    305 +
    306 +	ldr	$t1,[$key],#16
    307 +	ldr	$t2,[$key,#-12]
    308 +	ldr	$t3,[$key,#-8]
    309 +	ldr	$i1,[$key,#-4]
    310 +	eor	$s0,$s0,$t1
    311 +	eor	$s1,$s1,$t2
    312 +	eor	$s2,$s2,$t3
    313 +	eor	$s3,$s3,$i1
    314 +
    315 +	subs	$rounds,$rounds,#1
    316 +	bne	.Lenc_loop
    317 +
    318 +	add	$tbl,$tbl,#2
    319 +
    320 +	and	$i1,lr,$s0
    321 +	and	$i2,lr,$s0,lsr#8
    322 +	and	$i3,lr,$s0,lsr#16
    323 +	mov	$s0,$s0,lsr#24
    324 +	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
    325 +	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
    326 +	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
    327 +	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
    328 +
    329 +	and	$i1,lr,$s1,lsr#16	@ i0
    330 +	and	$i2,lr,$s1
    331 +	and	$i3,lr,$s1,lsr#8
    332 +	mov	$s1,$s1,lsr#24
    333 +	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
    334 +	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
    335 +	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
    336 +	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
    337 +	eor	$s0,$i1,$s0,lsl#8
    338 +	eor	$s1,$t1,$s1,lsl#24
    339 +	eor	$t2,$i2,$t2,lsl#8
    340 +	eor	$t3,$i3,$t3,lsl#8
    341 +
    342 +	and	$i1,lr,$s2,lsr#8	@ i0
    343 +	and	$i2,lr,$s2,lsr#16	@ i1
    344 +	and	$i3,lr,$s2
    345 +	mov	$s2,$s2,lsr#24
    346 +	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
    347 +	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
    348 +	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
    349 +	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
    350 +	eor	$s0,$i1,$s0,lsl#8
    351 +	eor	$s1,$s1,$i2,lsl#16
    352 +	eor	$s2,$t2,$s2,lsl#24
    353 +	eor	$t3,$i3,$t3,lsl#8
    354 +
    355 +	and	$i1,lr,$s3		@ i0
    356 +	and	$i2,lr,$s3,lsr#8	@ i1
    357 +	and	$i3,lr,$s3,lsr#16	@ i2
    358 +	mov	$s3,$s3,lsr#24
    359 +	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
    360 +	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
    361 +	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
    362 +	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
    363 +	eor	$s0,$i1,$s0,lsl#8
    364 +	eor	$s1,$s1,$i2,lsl#8
    365 +	eor	$s2,$s2,$i3,lsl#16
    366 +	eor	$s3,$t3,$s3,lsl#24
    367 +
    368 +	ldr	lr,[sp],#4		@ pop lr
    369 +	ldr	$t1,[$key,#0]
    370 +	ldr	$t2,[$key,#4]
    371 +	ldr	$t3,[$key,#8]
    372 +	ldr	$i1,[$key,#12]
    373 +	eor	$s0,$s0,$t1
    374 +	eor	$s1,$s1,$t2
    375 +	eor	$s2,$s2,$t3
    376 +	eor	$s3,$s3,$i1
    377 +
    378 +	sub	$tbl,$tbl,#2
    379 +	mov	pc,lr			@ return
    380 +.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
    381 +
    382 +.global AES_set_encrypt_key
    383 +.type   AES_set_encrypt_key,%function
    384 +.align	5
    385 +AES_set_encrypt_key:
    386 +	sub	r3,pc,#8		@ AES_set_encrypt_key
    387 +	teq	r0,#0
    388 +	moveq	r0,#-1
    389 +	beq	.Labrt
    390 +	teq	r2,#0
    391 +	moveq	r0,#-1
    392 +	beq	.Labrt
    393 +
    394 +	teq	r1,#128
    395 +	beq	.Lok
    396 +	teq	r1,#192
    397 +	beq	.Lok
    398 +	teq	r1,#256
    399 +	movne	r0,#-1
    400 +	bne	.Labrt
    401 +
    402 +.Lok:	stmdb   sp!,{r4-r12,lr}
    403 +	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
    404 +
    405 +	mov	$rounds,r0		@ inp
    406 +	mov	lr,r1			@ bits
    407 +	mov	$key,r2			@ key
    408 +
    409 +	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    410 +	ldrb	$t1,[$rounds,#2]	@ manner...
    411 +	ldrb	$t2,[$rounds,#1]
    412 +	ldrb	$t3,[$rounds,#0]
    413 +	orr	$s0,$s0,$t1,lsl#8
    414 +	orr	$s0,$s0,$t2,lsl#16
    415 +	orr	$s0,$s0,$t3,lsl#24
    416 +	ldrb	$s1,[$rounds,#7]
    417 +	ldrb	$t1,[$rounds,#6]
    418 +	ldrb	$t2,[$rounds,#5]
    419 +	ldrb	$t3,[$rounds,#4]
    420 +	orr	$s1,$s1,$t1,lsl#8
    421 +	orr	$s1,$s1,$t2,lsl#16
    422 +	orr	$s1,$s1,$t3,lsl#24
    423 +	ldrb	$s2,[$rounds,#11]
    424 +	ldrb	$t1,[$rounds,#10]
    425 +	ldrb	$t2,[$rounds,#9]
    426 +	ldrb	$t3,[$rounds,#8]
    427 +	orr	$s2,$s2,$t1,lsl#8
    428 +	orr	$s2,$s2,$t2,lsl#16
    429 +	orr	$s2,$s2,$t3,lsl#24
    430 +	ldrb	$s3,[$rounds,#15]
    431 +	ldrb	$t1,[$rounds,#14]
    432 +	ldrb	$t2,[$rounds,#13]
    433 +	ldrb	$t3,[$rounds,#12]
    434 +	orr	$s3,$s3,$t1,lsl#8
    435 +	orr	$s3,$s3,$t2,lsl#16
    436 +	orr	$s3,$s3,$t3,lsl#24
    437 +	str	$s0,[$key],#16
    438 +	str	$s1,[$key,#-12]
    439 +	str	$s2,[$key,#-8]
    440 +	str	$s3,[$key,#-4]
    441 +
    442 +	teq	lr,#128
    443 +	bne	.Lnot128
    444 +	mov	$rounds,#10
    445 +	str	$rounds,[$key,#240-16]
    446 +	add	$t3,$tbl,#256			@ rcon
    447 +	mov	lr,#255
    448 +
    449 +.L128_loop:
    450 +	and	$t2,lr,$s3,lsr#24
    451 +	and	$i1,lr,$s3,lsr#16
    452 +	and	$i2,lr,$s3,lsr#8
    453 +	and	$i3,lr,$s3
    454 +	ldrb	$t2,[$tbl,$t2]
    455 +	ldrb	$i1,[$tbl,$i1]
    456 +	ldrb	$i2,[$tbl,$i2]
    457 +	ldrb	$i3,[$tbl,$i3]
    458 +	ldr	$t1,[$t3],#4			@ rcon[i++]
    459 +	orr	$t2,$t2,$i1,lsl#24
    460 +	orr	$t2,$t2,$i2,lsl#16
    461 +	orr	$t2,$t2,$i3,lsl#8
    462 +	eor	$t2,$t2,$t1
    463 +	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
    464 +	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
    465 +	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
    466 +	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
    467 +	str	$s0,[$key],#16
    468 +	str	$s1,[$key,#-12]
    469 +	str	$s2,[$key,#-8]
    470 +	str	$s3,[$key,#-4]
    471 +
    472 +	subs	$rounds,$rounds,#1
    473 +	bne	.L128_loop
    474 +	sub	r2,$key,#176
    475 +	b	.Ldone
    476 +
    477 +.Lnot128:
    478 +	ldrb	$i2,[$rounds,#19]
    479 +	ldrb	$t1,[$rounds,#18]
    480 +	ldrb	$t2,[$rounds,#17]
    481 +	ldrb	$t3,[$rounds,#16]
    482 +	orr	$i2,$i2,$t1,lsl#8
    483 +	orr	$i2,$i2,$t2,lsl#16
    484 +	orr	$i2,$i2,$t3,lsl#24
    485 +	ldrb	$i3,[$rounds,#23]
    486 +	ldrb	$t1,[$rounds,#22]
    487 +	ldrb	$t2,[$rounds,#21]
    488 +	ldrb	$t3,[$rounds,#20]
    489 +	orr	$i3,$i3,$t1,lsl#8
    490 +	orr	$i3,$i3,$t2,lsl#16
    491 +	orr	$i3,$i3,$t3,lsl#24
    492 +	str	$i2,[$key],#8
    493 +	str	$i3,[$key,#-4]
    494 +
    495 +	teq	lr,#192
    496 +	bne	.Lnot192
    497 +	mov	$rounds,#12
    498 +	str	$rounds,[$key,#240-24]
    499 +	add	$t3,$tbl,#256			@ rcon
    500 +	mov	lr,#255
    501 +	mov	$rounds,#8
    502 +
    503 +.L192_loop:
    504 +	and	$t2,lr,$i3,lsr#24
    505 +	and	$i1,lr,$i3,lsr#16
    506 +	and	$i2,lr,$i3,lsr#8
    507 +	and	$i3,lr,$i3
    508 +	ldrb	$t2,[$tbl,$t2]
    509 +	ldrb	$i1,[$tbl,$i1]
    510 +	ldrb	$i2,[$tbl,$i2]
    511 +	ldrb	$i3,[$tbl,$i3]
    512 +	ldr	$t1,[$t3],#4			@ rcon[i++]
    513 +	orr	$t2,$t2,$i1,lsl#24
    514 +	orr	$t2,$t2,$i2,lsl#16
    515 +	orr	$t2,$t2,$i3,lsl#8
    516 +	eor	$i3,$t2,$t1
    517 +	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
    518 +	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
    519 +	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
    520 +	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
    521 +	str	$s0,[$key],#24
    522 +	str	$s1,[$key,#-20]
    523 +	str	$s2,[$key,#-16]
    524 +	str	$s3,[$key,#-12]
    525 +
    526 +	subs	$rounds,$rounds,#1
    527 +	subeq	r2,$key,#216
    528 +	beq	.Ldone
    529 +
    530 +	ldr	$i1,[$key,#-32]
    531 +	ldr	$i2,[$key,#-28]
    532 +	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
    533 +	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
    534 +	str	$i1,[$key,#-8]
    535 +	str	$i3,[$key,#-4]
    536 +	b	.L192_loop
    537 +
    538 +.Lnot192:
    539 +	ldrb	$i2,[$rounds,#27]
    540 +	ldrb	$t1,[$rounds,#26]
    541 +	ldrb	$t2,[$rounds,#25]
    542 +	ldrb	$t3,[$rounds,#24]
    543 +	orr	$i2,$i2,$t1,lsl#8
    544 +	orr	$i2,$i2,$t2,lsl#16
    545 +	orr	$i2,$i2,$t3,lsl#24
    546 +	ldrb	$i3,[$rounds,#31]
    547 +	ldrb	$t1,[$rounds,#30]
    548 +	ldrb	$t2,[$rounds,#29]
    549 +	ldrb	$t3,[$rounds,#28]
    550 +	orr	$i3,$i3,$t1,lsl#8
    551 +	orr	$i3,$i3,$t2,lsl#16
    552 +	orr	$i3,$i3,$t3,lsl#24
    553 +	str	$i2,[$key],#8
    554 +	str	$i3,[$key,#-4]
    555 +
    556 +	mov	$rounds,#14
    557 +	str	$rounds,[$key,#240-32]
    558 +	add	$t3,$tbl,#256			@ rcon
    559 +	mov	lr,#255
    560 +	mov	$rounds,#7
    561 +
    562 +.L256_loop:
    563 +	and	$t2,lr,$i3,lsr#24
    564 +	and	$i1,lr,$i3,lsr#16
    565 +	and	$i2,lr,$i3,lsr#8
    566 +	and	$i3,lr,$i3
    567 +	ldrb	$t2,[$tbl,$t2]
    568 +	ldrb	$i1,[$tbl,$i1]
    569 +	ldrb	$i2,[$tbl,$i2]
    570 +	ldrb	$i3,[$tbl,$i3]
    571 +	ldr	$t1,[$t3],#4			@ rcon[i++]
    572 +	orr	$t2,$t2,$i1,lsl#24
    573 +	orr	$t2,$t2,$i2,lsl#16
    574 +	orr	$t2,$t2,$i3,lsl#8
    575 +	eor	$i3,$t2,$t1
    576 +	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
    577 +	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
    578 +	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
    579 +	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
    580 +	str	$s0,[$key],#32
    581 +	str	$s1,[$key,#-28]
    582 +	str	$s2,[$key,#-24]
    583 +	str	$s3,[$key,#-20]
    584 +
    585 +	subs	$rounds,$rounds,#1
    586 +	subeq	r2,$key,#256
    587 +	beq	.Ldone
    588 +
    589 +	and	$t2,lr,$s3
    590 +	and	$i1,lr,$s3,lsr#8
    591 +	and	$i2,lr,$s3,lsr#16
    592 +	and	$i3,lr,$s3,lsr#24
    593 +	ldrb	$t2,[$tbl,$t2]
    594 +	ldrb	$i1,[$tbl,$i1]
    595 +	ldrb	$i2,[$tbl,$i2]
    596 +	ldrb	$i3,[$tbl,$i3]
    597 +	orr	$t2,$t2,$i1,lsl#8
    598 +	orr	$t2,$t2,$i2,lsl#16
    599 +	orr	$t2,$t2,$i3,lsl#24
    600 +
    601 +	ldr	$t1,[$key,#-48]
    602 +	ldr	$i1,[$key,#-44]
    603 +	ldr	$i2,[$key,#-40]
    604 +	ldr	$i3,[$key,#-36]
    605 +	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
    606 +	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
    607 +	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
    608 +	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
    609 +	str	$t1,[$key,#-16]
    610 +	str	$i1,[$key,#-12]
    611 +	str	$i2,[$key,#-8]
    612 +	str	$i3,[$key,#-4]
    613 +	b	.L256_loop
    614 +
    615 +.Ldone:	mov	r0,#0
    616 +	ldmia   sp!,{r4-r12,lr}
    617 +.Labrt:	tst	lr,#1
    618 +	moveq	pc,lr			@ be binary compatible with V4, yet
    619 +	bx	lr			@ interoperable with Thumb ISA:-)
    620 +.size	AES_set_encrypt_key,.-AES_set_encrypt_key
    621 +
    622 +.global AES_set_decrypt_key
    623 +.type   AES_set_decrypt_key,%function
    624 +.align	5
    625 +AES_set_decrypt_key:
    626 +	str	lr,[sp,#-4]!            @ push lr
    627 +	bl	AES_set_encrypt_key
    628 +	teq	r0,#0
    629 +	ldrne	lr,[sp],#4              @ pop lr
    630 +	bne	.Labrt
    631 +
    632 +	stmdb   sp!,{r4-r12}
    633 +
    634 +	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
    635 +	mov	$key,r2			@ which is AES_KEY *key
    636 +	mov	$i1,r2
    637 +	add	$i2,r2,$rounds,lsl#4
    638 +
    639 +.Linv:	ldr	$s0,[$i1]
    640 +	ldr	$s1,[$i1,#4]
    641 +	ldr	$s2,[$i1,#8]
    642 +	ldr	$s3,[$i1,#12]
    643 +	ldr	$t1,[$i2]
    644 +	ldr	$t2,[$i2,#4]
    645 +	ldr	$t3,[$i2,#8]
    646 +	ldr	$i3,[$i2,#12]
    647 +	str	$s0,[$i2],#-16
    648 +	str	$s1,[$i2,#16+4]
    649 +	str	$s2,[$i2,#16+8]
    650 +	str	$s3,[$i2,#16+12]
    651 +	str	$t1,[$i1],#16
    652 +	str	$t2,[$i1,#-12]
    653 +	str	$t3,[$i1,#-8]
    654 +	str	$i3,[$i1,#-4]
    655 +	teq	$i1,$i2
    656 +	bne	.Linv
    657 +___
    658 +$mask80=$i1;
    659 +$mask1b=$i2;
    660 +$mask7f=$i3;
    661 +$code.=<<___;
    662 +	ldr	$s0,[$key,#16]!		@ prefetch tp1
    663 +	mov	$mask80,#0x80
    664 +	mov	$mask1b,#0x1b
    665 +	orr	$mask80,$mask80,#0x8000
    666 +	orr	$mask1b,$mask1b,#0x1b00
    667 +	orr	$mask80,$mask80,$mask80,lsl#16
    668 +	orr	$mask1b,$mask1b,$mask1b,lsl#16
    669 +	sub	$rounds,$rounds,#1
    670 +	mvn	$mask7f,$mask80
    671 +	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
    672 +
    673 +.Lmix:	and	$t1,$s0,$mask80
    674 +	and	$s1,$s0,$mask7f
    675 +	sub	$t1,$t1,$t1,lsr#7
    676 +	and	$t1,$t1,$mask1b
    677 +	eor	$s1,$t1,$s1,lsl#1	@ tp2
    678 +
    679 +	and	$t1,$s1,$mask80
    680 +	and	$s2,$s1,$mask7f
    681 +	sub	$t1,$t1,$t1,lsr#7
    682 +	and	$t1,$t1,$mask1b
    683 +	eor	$s2,$t1,$s2,lsl#1	@ tp4
    684 +
    685 +	and	$t1,$s2,$mask80
    686 +	and	$s3,$s2,$mask7f
    687 +	sub	$t1,$t1,$t1,lsr#7
    688 +	and	$t1,$t1,$mask1b
    689 +	eor	$s3,$t1,$s3,lsl#1	@ tp8
    690 +
    691 +	eor	$t1,$s1,$s2
    692 +	eor	$t2,$s0,$s3		@ tp9
    693 +	eor	$t1,$t1,$s3		@ tpe
    694 +	eor	$t1,$t1,$s1,ror#24
    695 +	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
    696 +	eor	$t1,$t1,$s2,ror#16
    697 +	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
    698 +	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
    699 +
    700 +	ldr	$s0,[$key,#4]		@ prefetch tp1
    701 +	str	$t1,[$key],#4
    702 +	subs	$rounds,$rounds,#1
    703 +	bne	.Lmix
    704 +
    705 +	mov	r0,#0
    706 +	ldmia   sp!,{r4-r12,lr}
    707 +	tst	lr,#1
    708 +	moveq	pc,lr			@ be binary compatible with V4, yet
    709 +	bx	lr			@ interoperable with Thumb ISA:-)
    710 +.size	AES_set_decrypt_key,.-AES_set_decrypt_key
    711 +
    712 +.type	AES_Td,%object
    713 +.align	5
    714 +AES_Td:
    715 +.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
    716 +.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
    717 +.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
    718 +.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
    719 +.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
    720 +.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
    721 +.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
    722 +.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
    723 +.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
    724 +.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
    725 +.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
    726 +.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
    727 +.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
    728 +.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
    729 +.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
    730 +.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
    731 +.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
    732 +.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
    733 +.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
    734 +.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
    735 +.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
    736 +.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
    737 +.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
    738 +.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
    739 +.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
    740 +.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
    741 +.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
    742 +.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
    743 +.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
    744 +.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
    745 +.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
    746 +.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
    747 +.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
    748 +.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
    749 +.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
    750 +.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
    751 +.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
    752 +.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
    753 +.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
    754 +.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
    755 +.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
    756 +.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
    757 +.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
    758 +.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
    759 +.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
    760 +.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
    761 +.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
    762 +.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
    763 +.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
    764 +.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
    765 +.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
    766 +.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
    767 +.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
    768 +.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
    769 +.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
    770 +.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
    771 +.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
    772 +.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
    773 +.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
    774 +.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
    775 +.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
    776 +.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
    777 +.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
    778 +.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
    779 +@ Td4[256]
    780 +.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
    781 +.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
    782 +.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
    783 +.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
    784 +.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
    785 +.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
    786 +.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
    787 +.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
    788 +.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
    789 +.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
    790 +.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
    791 +.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
    792 +.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
    793 +.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
    794 +.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
    795 +.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
    796 +.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
    797 +.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
    798 +.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
    799 +.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
    800 +.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
    801 +.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
    802 +.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
    803 +.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
    804 +.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
    805 +.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
    806 +.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
    807 +.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
    808 +.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
    809 +.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
    810 +.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
    811 +.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    812 +.size	AES_Td,.-AES_Td
    813 +
    814 +@ void AES_decrypt(const unsigned char *in, unsigned char *out,
    815 +@ 		 const AES_KEY *key) {
    816 +.global AES_decrypt
    817 +.type   AES_decrypt,%function
    818 +.align	5
    819 +AES_decrypt:
    820 +	sub	r3,pc,#8		@ AES_decrypt
    821 +	stmdb   sp!,{r1,r4-r12,lr}
    822 +	mov	$rounds,r0		@ inp
    823 +	mov	$key,r2
    824 +	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
    825 +
    826 +	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
    827 +	ldrb	$t1,[$rounds,#2]	@ manner...
    828 +	ldrb	$t2,[$rounds,#1]
    829 +	ldrb	$t3,[$rounds,#0]
    830 +	orr	$s0,$s0,$t1,lsl#8
    831 +	orr	$s0,$s0,$t2,lsl#16
    832 +	orr	$s0,$s0,$t3,lsl#24
    833 +	ldrb	$s1,[$rounds,#7]
    834 +	ldrb	$t1,[$rounds,#6]
    835 +	ldrb	$t2,[$rounds,#5]
    836 +	ldrb	$t3,[$rounds,#4]
    837 +	orr	$s1,$s1,$t1,lsl#8
    838 +	orr	$s1,$s1,$t2,lsl#16
    839 +	orr	$s1,$s1,$t3,lsl#24
    840 +	ldrb	$s2,[$rounds,#11]
    841 +	ldrb	$t1,[$rounds,#10]
    842 +	ldrb	$t2,[$rounds,#9]
    843 +	ldrb	$t3,[$rounds,#8]
    844 +	orr	$s2,$s2,$t1,lsl#8
    845 +	orr	$s2,$s2,$t2,lsl#16
    846 +	orr	$s2,$s2,$t3,lsl#24
    847 +	ldrb	$s3,[$rounds,#15]
    848 +	ldrb	$t1,[$rounds,#14]
    849 +	ldrb	$t2,[$rounds,#13]
    850 +	ldrb	$t3,[$rounds,#12]
    851 +	orr	$s3,$s3,$t1,lsl#8
    852 +	orr	$s3,$s3,$t2,lsl#16
    853 +	orr	$s3,$s3,$t3,lsl#24
    854 +
    855 +	bl	_armv4_AES_decrypt
    856 +
    857 +	ldr	$rounds,[sp],#4		@ pop out
    858 +	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
    859 +	mov	$t2,$s0,lsr#16		@ manner...
    860 +	mov	$t3,$s0,lsr#8
    861 +	strb	$t1,[$rounds,#0]
    862 +	strb	$t2,[$rounds,#1]
    863 +	strb	$t3,[$rounds,#2]
    864 +	strb	$s0,[$rounds,#3]
    865 +	mov	$t1,$s1,lsr#24
    866 +	mov	$t2,$s1,lsr#16
    867 +	mov	$t3,$s1,lsr#8
    868 +	strb	$t1,[$rounds,#4]
    869 +	strb	$t2,[$rounds,#5]
    870 +	strb	$t3,[$rounds,#6]
    871 +	strb	$s1,[$rounds,#7]
    872 +	mov	$t1,$s2,lsr#24
    873 +	mov	$t2,$s2,lsr#16
    874 +	mov	$t3,$s2,lsr#8
    875 +	strb	$t1,[$rounds,#8]
    876 +	strb	$t2,[$rounds,#9]
    877 +	strb	$t3,[$rounds,#10]
    878 +	strb	$s2,[$rounds,#11]
    879 +	mov	$t1,$s3,lsr#24
    880 +	mov	$t2,$s3,lsr#16
    881 +	mov	$t3,$s3,lsr#8
    882 +	strb	$t1,[$rounds,#12]
    883 +	strb	$t2,[$rounds,#13]
    884 +	strb	$t3,[$rounds,#14]
    885 +	strb	$s3,[$rounds,#15]
    886 +
    887 +	ldmia   sp!,{r4-r12,lr}
    888 +	tst	lr,#1
    889 +	moveq	pc,lr			@ be binary compatible with V4, yet
    890 +	bx	lr			@ interoperable with Thumb ISA:-)
    891 +.size	AES_decrypt,.-AES_decrypt
    892 +
    893 +.type   _armv4_AES_decrypt,%function
    894 +.align	2
    895 +_armv4_AES_decrypt:
    896 +	str	lr,[sp,#-4]!		@ push lr
    897 +	ldr	$t1,[$key],#16
    898 +	ldr	$t2,[$key,#-12]
    899 +	ldr	$t3,[$key,#-8]
    900 +	ldr	$i1,[$key,#-4]
    901 +	ldr	$rounds,[$key,#240-16]
    902 +	eor	$s0,$s0,$t1
    903 +	eor	$s1,$s1,$t2
    904 +	eor	$s2,$s2,$t3
    905 +	eor	$s3,$s3,$i1
    906 +	sub	$rounds,$rounds,#1
    907 +	mov	lr,#255
    908 +
    909 +.Ldec_loop:
    910 +	and	$i1,lr,$s0,lsr#16
    911 +	and	$i2,lr,$s0,lsr#8
    912 +	and	$i3,lr,$s0
    913 +	mov	$s0,$s0,lsr#24
    914 +	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
    915 +	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
    916 +	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
    917 +	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
    918 +
    919 +	and	$i1,lr,$s1		@ i0
    920 +	and	$i2,lr,$s1,lsr#16
    921 +	and	$i3,lr,$s1,lsr#8
    922 +	mov	$s1,$s1,lsr#24
    923 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
    924 +	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
    925 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
    926 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
    927 +	eor	$s0,$s0,$i1,ror#24
    928 +	eor	$s1,$s1,$t1,ror#8
    929 +	eor	$t2,$i2,$t2,ror#8
    930 +	eor	$t3,$i3,$t3,ror#8
    931 +
    932 +	and	$i1,lr,$s2,lsr#8	@ i0
    933 +	and	$i2,lr,$s2		@ i1
    934 +	and	$i3,lr,$s2,lsr#16
    935 +	mov	$s2,$s2,lsr#24
    936 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
    937 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
    938 +	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
    939 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
    940 +	eor	$s0,$s0,$i1,ror#16
    941 +	eor	$s1,$s1,$i2,ror#24
    942 +	eor	$s2,$s2,$t2,ror#8
    943 +	eor	$t3,$i3,$t3,ror#8
    944 +
    945 +	and	$i1,lr,$s3,lsr#16	@ i0
    946 +	and	$i2,lr,$s3,lsr#8	@ i1
    947 +	and	$i3,lr,$s3		@ i2
    948 +	mov	$s3,$s3,lsr#24
    949 +	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
    950 +	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
    951 +	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
    952 +	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
    953 +	eor	$s0,$s0,$i1,ror#8
    954 +	eor	$s1,$s1,$i2,ror#16
    955 +	eor	$s2,$s2,$i3,ror#24
    956 +	eor	$s3,$s3,$t3,ror#8
    957 +
    958 +	ldr	$t1,[$key],#16
    959 +	ldr	$t2,[$key,#-12]
    960 +	ldr	$t3,[$key,#-8]
    961 +	ldr	$i1,[$key,#-4]
    962 +	eor	$s0,$s0,$t1
    963 +	eor	$s1,$s1,$t2
    964 +	eor	$s2,$s2,$t3
    965 +	eor	$s3,$s3,$i1
    966 +
    967 +	subs	$rounds,$rounds,#1
    968 +	bne	.Ldec_loop
    969 +
    970 +	add	$tbl,$tbl,#1024
    971 +
    972 +	ldr	$t1,[$tbl,#0]		@ prefetch Td4
    973 +	ldr	$t2,[$tbl,#32]
    974 +	ldr	$t3,[$tbl,#64]
    975 +	ldr	$i1,[$tbl,#96]
    976 +	ldr	$i2,[$tbl,#128]
    977 +	ldr	$i3,[$tbl,#160]
    978 +	ldr	$t1,[$tbl,#192]
    979 +	ldr	$t2,[$tbl,#224]
    980 +
    981 +	and	$i1,lr,$s0,lsr#16
    982 +	and	$i2,lr,$s0,lsr#8
    983 +	and	$i3,lr,$s0
    984 +	ldrb	$s0,[$tbl,$s0,lsr#24]	@ Td4[s0>>24]
    985 +	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
    986 +	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
    987 +	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
    988 +
    989 +	and	$i1,lr,$s1		@ i0
    990 +	and	$i2,lr,$s1,lsr#16
    991 +	and	$i3,lr,$s1,lsr#8
    992 +	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
    993 +	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
    994 +	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
    995 +	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
    996 +	eor	$s0,$i1,$s0,lsl#24
    997 +	eor	$s1,$t1,$s1,lsl#8
    998 +	eor	$t2,$t2,$i2,lsl#8
    999 +	eor	$t3,$t3,$i3,lsl#8
   1000 +
   1001 +	and	$i1,lr,$s2,lsr#8	@ i0
   1002 +	and	$i2,lr,$s2		@ i1
   1003 +	and	$i3,lr,$s2,lsr#16
   1004 +	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
   1005 +	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
   1006 +	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
   1007 +	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
   1008 +	eor	$s0,$s0,$i1,lsl#8
   1009 +	eor	$s1,$i2,$s1,lsl#16
   1010 +	eor	$s2,$t2,$s2,lsl#16
   1011 +	eor	$t3,$t3,$i3,lsl#16
   1012 +
   1013 +	and	$i1,lr,$s3,lsr#16	@ i0
   1014 +	and	$i2,lr,$s3,lsr#8	@ i1
   1015 +	and	$i3,lr,$s3		@ i2
   1016 +	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
   1017 +	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
   1018 +	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
   1019 +	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
   1020 +	eor	$s0,$s0,$i1,lsl#16
   1021 +	eor	$s1,$s1,$i2,lsl#8
   1022 +	eor	$s2,$i3,$s2,lsl#8
   1023 +	eor	$s3,$t3,$s3,lsl#24
   1024 +
   1025 +	ldr	lr,[sp],#4		@ pop lr
   1026 +	ldr	$t1,[$key,#0]
   1027 +	ldr	$t2,[$key,#4]
   1028 +	ldr	$t3,[$key,#8]
   1029 +	ldr	$i1,[$key,#12]
   1030 +	eor	$s0,$s0,$t1
   1031 +	eor	$s1,$s1,$t2
   1032 +	eor	$s2,$s2,$t3
   1033 +	eor	$s3,$s3,$i1
   1034 +
   1035 +	sub	$tbl,$tbl,#1024
   1036 +	mov	pc,lr			@ return
   1037 +.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   1038 +.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   1039 +___
   1040 +
   1041 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   1042 +print $code;
   1043 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   1044 +++ openssl-0.9.8h/crypto/0.9.9-dev/aes/aes-armv4.s	2009-09-03 15:42:39.000000000 -0700
   1045 @@ -0,0 +1,982 @@
   1046 +.text
   1047 +.code	32
   1048 +
   1049 +.type	AES_Te,%object
   1050 +.align	5
   1051 +AES_Te:
   1052 +.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
   1053 +.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
   1054 +.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
   1055 +.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
   1056 +.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
   1057 +.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
   1058 +.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
   1059 +.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
   1060 +.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
   1061 +.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
   1062 +.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
   1063 +.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
   1064 +.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
   1065 +.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
   1066 +.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
   1067 +.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
   1068 +.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
   1069 +.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
   1070 +.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
   1071 +.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
   1072 +.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
   1073 +.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
   1074 +.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
   1075 +.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
   1076 +.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
   1077 +.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
   1078 +.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
   1079 +.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
   1080 +.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
   1081 +.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
   1082 +.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
   1083 +.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
   1084 +.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
   1085 +.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
   1086 +.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
   1087 +.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
   1088 +.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
   1089 +.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
   1090 +.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
   1091 +.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
   1092 +.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
   1093 +.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
   1094 +.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
   1095 +.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
   1096 +.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
   1097 +.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
   1098 +.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
   1099 +.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
   1100 +.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
   1101 +.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
   1102 +.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
   1103 +.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
   1104 +.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
   1105 +.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
   1106 +.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
   1107 +.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
   1108 +.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
   1109 +.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
   1110 +.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
   1111 +.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
   1112 +.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
   1113 +.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
   1114 +.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
   1115 +.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
   1116 +@ Te4[256]
   1117 +.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
   1118 +.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
   1119 +.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
   1120 +.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
   1121 +.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
   1122 +.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
   1123 +.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
   1124 +.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
   1125 +.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
   1126 +.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
   1127 +.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
   1128 +.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
   1129 +.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
   1130 +.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
   1131 +.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
   1132 +.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
   1133 +.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
   1134 +.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
   1135 +.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
   1136 +.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
   1137 +.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
   1138 +.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
   1139 +.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
   1140 +.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
   1141 +.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
   1142 +.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
   1143 +.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
   1144 +.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
   1145 +.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
   1146 +.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
   1147 +.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
   1148 +.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
   1149 +@ rcon[]
   1150 +.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
   1151 +.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
   1152 +.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
   1153 +.size	AES_Te,.-AES_Te
   1154 +
   1155 +@ void AES_encrypt(const unsigned char *in, unsigned char *out,
   1156 +@ 		 const AES_KEY *key) {
   1157 +.global AES_encrypt
   1158 +.type   AES_encrypt,%function
   1159 +.align	5
   1160 +AES_encrypt:
   1161 +	sub	r3,pc,#8		@ AES_encrypt
   1162 +	stmdb   sp!,{r1,r4-r12,lr}
   1163 +	mov	r12,r0		@ inp
   1164 +	mov	r11,r2
   1165 +	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
   1166 +
   1167 +	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
   1168 +	ldrb	r4,[r12,#2]	@ manner...
   1169 +	ldrb	r5,[r12,#1]
   1170 +	ldrb	r6,[r12,#0]
   1171 +	orr	r0,r0,r4,lsl#8
   1172 +	orr	r0,r0,r5,lsl#16
   1173 +	orr	r0,r0,r6,lsl#24
   1174 +	ldrb	r1,[r12,#7]
   1175 +	ldrb	r4,[r12,#6]
   1176 +	ldrb	r5,[r12,#5]
   1177 +	ldrb	r6,[r12,#4]
   1178 +	orr	r1,r1,r4,lsl#8
   1179 +	orr	r1,r1,r5,lsl#16
   1180 +	orr	r1,r1,r6,lsl#24
   1181 +	ldrb	r2,[r12,#11]
   1182 +	ldrb	r4,[r12,#10]
   1183 +	ldrb	r5,[r12,#9]
   1184 +	ldrb	r6,[r12,#8]
   1185 +	orr	r2,r2,r4,lsl#8
   1186 +	orr	r2,r2,r5,lsl#16
   1187 +	orr	r2,r2,r6,lsl#24
   1188 +	ldrb	r3,[r12,#15]
   1189 +	ldrb	r4,[r12,#14]
   1190 +	ldrb	r5,[r12,#13]
   1191 +	ldrb	r6,[r12,#12]
   1192 +	orr	r3,r3,r4,lsl#8
   1193 +	orr	r3,r3,r5,lsl#16
   1194 +	orr	r3,r3,r6,lsl#24
   1195 +
   1196 +	bl	_armv4_AES_encrypt
   1197 +
   1198 +	ldr	r12,[sp],#4		@ pop out
   1199 +	mov	r4,r0,lsr#24		@ write output in endian-neutral
   1200 +	mov	r5,r0,lsr#16		@ manner...
   1201 +	mov	r6,r0,lsr#8
   1202 +	strb	r4,[r12,#0]
   1203 +	strb	r5,[r12,#1]
   1204 +	strb	r6,[r12,#2]
   1205 +	strb	r0,[r12,#3]
   1206 +	mov	r4,r1,lsr#24
   1207 +	mov	r5,r1,lsr#16
   1208 +	mov	r6,r1,lsr#8
   1209 +	strb	r4,[r12,#4]
   1210 +	strb	r5,[r12,#5]
   1211 +	strb	r6,[r12,#6]
   1212 +	strb	r1,[r12,#7]
   1213 +	mov	r4,r2,lsr#24
   1214 +	mov	r5,r2,lsr#16
   1215 +	mov	r6,r2,lsr#8
   1216 +	strb	r4,[r12,#8]
   1217 +	strb	r5,[r12,#9]
   1218 +	strb	r6,[r12,#10]
   1219 +	strb	r2,[r12,#11]
   1220 +	mov	r4,r3,lsr#24
   1221 +	mov	r5,r3,lsr#16
   1222 +	mov	r6,r3,lsr#8
   1223 +	strb	r4,[r12,#12]
   1224 +	strb	r5,[r12,#13]
   1225 +	strb	r6,[r12,#14]
   1226 +	strb	r3,[r12,#15]
   1227 +
   1228 +	ldmia   sp!,{r4-r12,lr}
   1229 +	tst	lr,#1
   1230 +	moveq	pc,lr			@ be binary compatible with V4, yet
   1231 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1232 +.size	AES_encrypt,.-AES_encrypt
   1233 +
   1234 +.type   _armv4_AES_encrypt,%function
   1235 +.align	2
   1236 +_armv4_AES_encrypt:
   1237 +	str	lr,[sp,#-4]!		@ push lr
   1238 +	ldr	r4,[r11],#16
   1239 +	ldr	r5,[r11,#-12]
   1240 +	ldr	r6,[r11,#-8]
   1241 +	ldr	r7,[r11,#-4]
   1242 +	ldr	r12,[r11,#240-16]
   1243 +	eor	r0,r0,r4
   1244 +	eor	r1,r1,r5
   1245 +	eor	r2,r2,r6
   1246 +	eor	r3,r3,r7
   1247 +	sub	r12,r12,#1
   1248 +	mov	lr,#255
   1249 +
   1250 +.Lenc_loop:
   1251 +	and	r8,lr,r0,lsr#8
   1252 +	and	r9,lr,r0,lsr#16
   1253 +	and	r7,lr,r0
   1254 +	mov	r0,r0,lsr#24
   1255 +	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
   1256 +	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
   1257 +	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
   1258 +	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
   1259 +
   1260 +	and	r7,lr,r1,lsr#16	@ i0
   1261 +	and	r8,lr,r1
   1262 +	and	r9,lr,r1,lsr#8
   1263 +	mov	r1,r1,lsr#24
   1264 +	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
   1265 +	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
   1266 +	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
   1267 +	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
   1268 +	eor	r0,r0,r7,ror#8
   1269 +	eor	r1,r1,r4,ror#24
   1270 +	eor	r5,r5,r8,ror#8
   1271 +	eor	r6,r6,r9,ror#8
   1272 +
   1273 +	and	r7,lr,r2,lsr#8	@ i0
   1274 +	and	r8,lr,r2,lsr#16	@ i1
   1275 +	and	r9,lr,r2
   1276 +	mov	r2,r2,lsr#24
   1277 +	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
   1278 +	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
   1279 +	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
   1280 +	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
   1281 +	eor	r0,r0,r7,ror#16
   1282 +	eor	r1,r1,r8,ror#8
   1283 +	eor	r2,r2,r5,ror#16
   1284 +	eor	r6,r6,r9,ror#16
   1285 +
   1286 +	and	r7,lr,r3		@ i0
   1287 +	and	r8,lr,r3,lsr#8	@ i1
   1288 +	and	r9,lr,r3,lsr#16	@ i2
   1289 +	mov	r3,r3,lsr#24
   1290 +	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
   1291 +	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
   1292 +	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
   1293 +	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
   1294 +	eor	r0,r0,r7,ror#24
   1295 +	eor	r1,r1,r8,ror#16
   1296 +	eor	r2,r2,r9,ror#8
   1297 +	eor	r3,r3,r6,ror#8
   1298 +
   1299 +	ldr	r4,[r11],#16
   1300 +	ldr	r5,[r11,#-12]
   1301 +	ldr	r6,[r11,#-8]
   1302 +	ldr	r7,[r11,#-4]
   1303 +	eor	r0,r0,r4
   1304 +	eor	r1,r1,r5
   1305 +	eor	r2,r2,r6
   1306 +	eor	r3,r3,r7
   1307 +
   1308 +	subs	r12,r12,#1
   1309 +	bne	.Lenc_loop
   1310 +
   1311 +	add	r10,r10,#2
   1312 +
   1313 +	and	r7,lr,r0
   1314 +	and	r8,lr,r0,lsr#8
   1315 +	and	r9,lr,r0,lsr#16
   1316 +	mov	r0,r0,lsr#24
   1317 +	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
   1318 +	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
   1319 +	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
   1320 +	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
   1321 +
   1322 +	and	r7,lr,r1,lsr#16	@ i0
   1323 +	and	r8,lr,r1
   1324 +	and	r9,lr,r1,lsr#8
   1325 +	mov	r1,r1,lsr#24
   1326 +	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
   1327 +	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
   1328 +	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
   1329 +	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
   1330 +	eor	r0,r7,r0,lsl#8
   1331 +	eor	r1,r4,r1,lsl#24
   1332 +	eor	r5,r8,r5,lsl#8
   1333 +	eor	r6,r9,r6,lsl#8
   1334 +
   1335 +	and	r7,lr,r2,lsr#8	@ i0
   1336 +	and	r8,lr,r2,lsr#16	@ i1
   1337 +	and	r9,lr,r2
   1338 +	mov	r2,r2,lsr#24
   1339 +	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
   1340 +	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
   1341 +	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
   1342 +	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
   1343 +	eor	r0,r7,r0,lsl#8
   1344 +	eor	r1,r1,r8,lsl#16
   1345 +	eor	r2,r5,r2,lsl#24
   1346 +	eor	r6,r9,r6,lsl#8
   1347 +
   1348 +	and	r7,lr,r3		@ i0
   1349 +	and	r8,lr,r3,lsr#8	@ i1
   1350 +	and	r9,lr,r3,lsr#16	@ i2
   1351 +	mov	r3,r3,lsr#24
   1352 +	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
   1353 +	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
   1354 +	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
   1355 +	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
   1356 +	eor	r0,r7,r0,lsl#8
   1357 +	eor	r1,r1,r8,lsl#8
   1358 +	eor	r2,r2,r9,lsl#16
   1359 +	eor	r3,r6,r3,lsl#24
   1360 +
   1361 +	ldr	lr,[sp],#4		@ pop lr
   1362 +	ldr	r4,[r11,#0]
   1363 +	ldr	r5,[r11,#4]
   1364 +	ldr	r6,[r11,#8]
   1365 +	ldr	r7,[r11,#12]
   1366 +	eor	r0,r0,r4
   1367 +	eor	r1,r1,r5
   1368 +	eor	r2,r2,r6
   1369 +	eor	r3,r3,r7
   1370 +
   1371 +	sub	r10,r10,#2
   1372 +	mov	pc,lr			@ return
   1373 +.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
   1374 +
   1375 +.global AES_set_encrypt_key
   1376 +.type   AES_set_encrypt_key,%function
   1377 +.align	5
   1378 +AES_set_encrypt_key:
   1379 +	sub	r3,pc,#8		@ AES_set_encrypt_key
   1380 +	teq	r0,#0
   1381 +	moveq	r0,#-1
   1382 +	beq	.Labrt
   1383 +	teq	r2,#0
   1384 +	moveq	r0,#-1
   1385 +	beq	.Labrt
   1386 +
   1387 +	teq	r1,#128
   1388 +	beq	.Lok
   1389 +	teq	r1,#192
   1390 +	beq	.Lok
   1391 +	teq	r1,#256
   1392 +	movne	r0,#-1
   1393 +	bne	.Labrt
   1394 +
   1395 +.Lok:	stmdb   sp!,{r4-r12,lr}
   1396 +	sub	r10,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
   1397 +
   1398 +	mov	r12,r0		@ inp
   1399 +	mov	lr,r1			@ bits
   1400 +	mov	r11,r2			@ key
   1401 +
   1402 +	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
   1403 +	ldrb	r4,[r12,#2]	@ manner...
   1404 +	ldrb	r5,[r12,#1]
   1405 +	ldrb	r6,[r12,#0]
   1406 +	orr	r0,r0,r4,lsl#8
   1407 +	orr	r0,r0,r5,lsl#16
   1408 +	orr	r0,r0,r6,lsl#24
   1409 +	ldrb	r1,[r12,#7]
   1410 +	ldrb	r4,[r12,#6]
   1411 +	ldrb	r5,[r12,#5]
   1412 +	ldrb	r6,[r12,#4]
   1413 +	orr	r1,r1,r4,lsl#8
   1414 +	orr	r1,r1,r5,lsl#16
   1415 +	orr	r1,r1,r6,lsl#24
   1416 +	ldrb	r2,[r12,#11]
   1417 +	ldrb	r4,[r12,#10]
   1418 +	ldrb	r5,[r12,#9]
   1419 +	ldrb	r6,[r12,#8]
   1420 +	orr	r2,r2,r4,lsl#8
   1421 +	orr	r2,r2,r5,lsl#16
   1422 +	orr	r2,r2,r6,lsl#24
   1423 +	ldrb	r3,[r12,#15]
   1424 +	ldrb	r4,[r12,#14]
   1425 +	ldrb	r5,[r12,#13]
   1426 +	ldrb	r6,[r12,#12]
   1427 +	orr	r3,r3,r4,lsl#8
   1428 +	orr	r3,r3,r5,lsl#16
   1429 +	orr	r3,r3,r6,lsl#24
   1430 +	str	r0,[r11],#16
   1431 +	str	r1,[r11,#-12]
   1432 +	str	r2,[r11,#-8]
   1433 +	str	r3,[r11,#-4]
   1434 +
   1435 +	teq	lr,#128
   1436 +	bne	.Lnot128
   1437 +	mov	r12,#10
   1438 +	str	r12,[r11,#240-16]
   1439 +	add	r6,r10,#256			@ rcon
   1440 +	mov	lr,#255
   1441 +
   1442 +.L128_loop:
   1443 +	and	r5,lr,r3,lsr#24
   1444 +	and	r7,lr,r3,lsr#16
   1445 +	and	r8,lr,r3,lsr#8
   1446 +	and	r9,lr,r3
   1447 +	ldrb	r5,[r10,r5]
   1448 +	ldrb	r7,[r10,r7]
   1449 +	ldrb	r8,[r10,r8]
   1450 +	ldrb	r9,[r10,r9]
   1451 +	ldr	r4,[r6],#4			@ rcon[i++]
   1452 +	orr	r5,r5,r7,lsl#24
   1453 +	orr	r5,r5,r8,lsl#16
   1454 +	orr	r5,r5,r9,lsl#8
   1455 +	eor	r5,r5,r4
   1456 +	eor	r0,r0,r5			@ rk[4]=rk[0]^...
   1457 +	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
   1458 +	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
   1459 +	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
   1460 +	str	r0,[r11],#16
   1461 +	str	r1,[r11,#-12]
   1462 +	str	r2,[r11,#-8]
   1463 +	str	r3,[r11,#-4]
   1464 +
   1465 +	subs	r12,r12,#1
   1466 +	bne	.L128_loop
   1467 +	sub	r2,r11,#176
   1468 +	b	.Ldone
   1469 +
   1470 +.Lnot128:
   1471 +	ldrb	r8,[r12,#19]
   1472 +	ldrb	r4,[r12,#18]
   1473 +	ldrb	r5,[r12,#17]
   1474 +	ldrb	r6,[r12,#16]
   1475 +	orr	r8,r8,r4,lsl#8
   1476 +	orr	r8,r8,r5,lsl#16
   1477 +	orr	r8,r8,r6,lsl#24
   1478 +	ldrb	r9,[r12,#23]
   1479 +	ldrb	r4,[r12,#22]
   1480 +	ldrb	r5,[r12,#21]
   1481 +	ldrb	r6,[r12,#20]
   1482 +	orr	r9,r9,r4,lsl#8
   1483 +	orr	r9,r9,r5,lsl#16
   1484 +	orr	r9,r9,r6,lsl#24
   1485 +	str	r8,[r11],#8
   1486 +	str	r9,[r11,#-4]
   1487 +
   1488 +	teq	lr,#192
   1489 +	bne	.Lnot192
   1490 +	mov	r12,#12
   1491 +	str	r12,[r11,#240-24]
   1492 +	add	r6,r10,#256			@ rcon
   1493 +	mov	lr,#255
   1494 +	mov	r12,#8
   1495 +
   1496 +.L192_loop:
   1497 +	and	r5,lr,r9,lsr#24
   1498 +	and	r7,lr,r9,lsr#16
   1499 +	and	r8,lr,r9,lsr#8
   1500 +	and	r9,lr,r9
   1501 +	ldrb	r5,[r10,r5]
   1502 +	ldrb	r7,[r10,r7]
   1503 +	ldrb	r8,[r10,r8]
   1504 +	ldrb	r9,[r10,r9]
   1505 +	ldr	r4,[r6],#4			@ rcon[i++]
   1506 +	orr	r5,r5,r7,lsl#24
   1507 +	orr	r5,r5,r8,lsl#16
   1508 +	orr	r5,r5,r9,lsl#8
   1509 +	eor	r9,r5,r4
   1510 +	eor	r0,r0,r9			@ rk[6]=rk[0]^...
   1511 +	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
   1512 +	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
   1513 +	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
   1514 +	str	r0,[r11],#24
   1515 +	str	r1,[r11,#-20]
   1516 +	str	r2,[r11,#-16]
   1517 +	str	r3,[r11,#-12]
   1518 +
   1519 +	subs	r12,r12,#1
   1520 +	subeq	r2,r11,#216
   1521 +	beq	.Ldone
   1522 +
   1523 +	ldr	r7,[r11,#-32]
   1524 +	ldr	r8,[r11,#-28]
   1525 +	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
   1526 +	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
   1527 +	str	r7,[r11,#-8]
   1528 +	str	r9,[r11,#-4]
   1529 +	b	.L192_loop
   1530 +
   1531 +.Lnot192:
   1532 +	ldrb	r8,[r12,#27]
   1533 +	ldrb	r4,[r12,#26]
   1534 +	ldrb	r5,[r12,#25]
   1535 +	ldrb	r6,[r12,#24]
   1536 +	orr	r8,r8,r4,lsl#8
   1537 +	orr	r8,r8,r5,lsl#16
   1538 +	orr	r8,r8,r6,lsl#24
   1539 +	ldrb	r9,[r12,#31]
   1540 +	ldrb	r4,[r12,#30]
   1541 +	ldrb	r5,[r12,#29]
   1542 +	ldrb	r6,[r12,#28]
   1543 +	orr	r9,r9,r4,lsl#8
   1544 +	orr	r9,r9,r5,lsl#16
   1545 +	orr	r9,r9,r6,lsl#24
   1546 +	str	r8,[r11],#8
   1547 +	str	r9,[r11,#-4]
   1548 +
   1549 +	mov	r12,#14
   1550 +	str	r12,[r11,#240-32]
   1551 +	add	r6,r10,#256			@ rcon
   1552 +	mov	lr,#255
   1553 +	mov	r12,#7
   1554 +
   1555 +.L256_loop:
   1556 +	and	r5,lr,r9,lsr#24
   1557 +	and	r7,lr,r9,lsr#16
   1558 +	and	r8,lr,r9,lsr#8
   1559 +	and	r9,lr,r9
   1560 +	ldrb	r5,[r10,r5]
   1561 +	ldrb	r7,[r10,r7]
   1562 +	ldrb	r8,[r10,r8]
   1563 +	ldrb	r9,[r10,r9]
   1564 +	ldr	r4,[r6],#4			@ rcon[i++]
   1565 +	orr	r5,r5,r7,lsl#24
   1566 +	orr	r5,r5,r8,lsl#16
   1567 +	orr	r5,r5,r9,lsl#8
   1568 +	eor	r9,r5,r4
   1569 +	eor	r0,r0,r9			@ rk[8]=rk[0]^...
   1570 +	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
   1571 +	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
   1572 +	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
   1573 +	str	r0,[r11],#32
   1574 +	str	r1,[r11,#-28]
   1575 +	str	r2,[r11,#-24]
   1576 +	str	r3,[r11,#-20]
   1577 +
   1578 +	subs	r12,r12,#1
   1579 +	subeq	r2,r11,#256
   1580 +	beq	.Ldone
   1581 +
   1582 +	and	r5,lr,r3
   1583 +	and	r7,lr,r3,lsr#8
   1584 +	and	r8,lr,r3,lsr#16
   1585 +	and	r9,lr,r3,lsr#24
   1586 +	ldrb	r5,[r10,r5]
   1587 +	ldrb	r7,[r10,r7]
   1588 +	ldrb	r8,[r10,r8]
   1589 +	ldrb	r9,[r10,r9]
   1590 +	orr	r5,r5,r7,lsl#8
   1591 +	orr	r5,r5,r8,lsl#16
   1592 +	orr	r5,r5,r9,lsl#24
   1593 +
   1594 +	ldr	r4,[r11,#-48]
   1595 +	ldr	r7,[r11,#-44]
   1596 +	ldr	r8,[r11,#-40]
   1597 +	ldr	r9,[r11,#-36]
   1598 +	eor	r4,r4,r5			@ rk[12]=rk[4]^...
   1599 +	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
   1600 +	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
   1601 +	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
   1602 +	str	r4,[r11,#-16]
   1603 +	str	r7,[r11,#-12]
   1604 +	str	r8,[r11,#-8]
   1605 +	str	r9,[r11,#-4]
   1606 +	b	.L256_loop
   1607 +
   1608 +.Ldone:	mov	r0,#0
   1609 +	ldmia   sp!,{r4-r12,lr}
   1610 +.Labrt:	tst	lr,#1
   1611 +	moveq	pc,lr			@ be binary compatible with V4, yet
   1612 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1613 +.size	AES_set_encrypt_key,.-AES_set_encrypt_key
   1614 +
   1615 +.global AES_set_decrypt_key
   1616 +.type   AES_set_decrypt_key,%function
   1617 +.align	5
   1618 +AES_set_decrypt_key:
   1619 +	str	lr,[sp,#-4]!            @ push lr
   1620 +	bl	AES_set_encrypt_key
   1621 +	teq	r0,#0
   1622 +	ldrne	lr,[sp],#4              @ pop lr
   1623 +	bne	.Labrt
   1624 +
   1625 +	stmdb   sp!,{r4-r12}
   1626 +
   1627 +	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2,
   1628 +	mov	r11,r2			@ which is AES_KEY *key
   1629 +	mov	r7,r2
   1630 +	add	r8,r2,r12,lsl#4
   1631 +
   1632 +.Linv:	ldr	r0,[r7]
   1633 +	ldr	r1,[r7,#4]
   1634 +	ldr	r2,[r7,#8]
   1635 +	ldr	r3,[r7,#12]
   1636 +	ldr	r4,[r8]
   1637 +	ldr	r5,[r8,#4]
   1638 +	ldr	r6,[r8,#8]
   1639 +	ldr	r9,[r8,#12]
   1640 +	str	r0,[r8],#-16
   1641 +	str	r1,[r8,#16+4]
   1642 +	str	r2,[r8,#16+8]
   1643 +	str	r3,[r8,#16+12]
   1644 +	str	r4,[r7],#16
   1645 +	str	r5,[r7,#-12]
   1646 +	str	r6,[r7,#-8]
   1647 +	str	r9,[r7,#-4]
   1648 +	teq	r7,r8
   1649 +	bne	.Linv
   1650 +	ldr	r0,[r11,#16]!		@ prefetch tp1
   1651 +	mov	r7,#0x80
   1652 +	mov	r8,#0x1b
   1653 +	orr	r7,r7,#0x8000
   1654 +	orr	r8,r8,#0x1b00
   1655 +	orr	r7,r7,r7,lsl#16
   1656 +	orr	r8,r8,r8,lsl#16
   1657 +	sub	r12,r12,#1
   1658 +	mvn	r9,r7
   1659 +	mov	r12,r12,lsl#2	@ (rounds-1)*4
   1660 +
   1661 +.Lmix:	and	r4,r0,r7
   1662 +	and	r1,r0,r9
   1663 +	sub	r4,r4,r4,lsr#7
   1664 +	and	r4,r4,r8
   1665 +	eor	r1,r4,r1,lsl#1	@ tp2
   1666 +
   1667 +	and	r4,r1,r7
   1668 +	and	r2,r1,r9
   1669 +	sub	r4,r4,r4,lsr#7
   1670 +	and	r4,r4,r8
   1671 +	eor	r2,r4,r2,lsl#1	@ tp4
   1672 +
   1673 +	and	r4,r2,r7
   1674 +	and	r3,r2,r9
   1675 +	sub	r4,r4,r4,lsr#7
   1676 +	and	r4,r4,r8
   1677 +	eor	r3,r4,r3,lsl#1	@ tp8
   1678 +
   1679 +	eor	r4,r1,r2
   1680 +	eor	r5,r0,r3		@ tp9
   1681 +	eor	r4,r4,r3		@ tpe
   1682 +	eor	r4,r4,r1,ror#24
   1683 +	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
   1684 +	eor	r4,r4,r2,ror#16
   1685 +	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
   1686 +	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)
   1687 +
   1688 +	ldr	r0,[r11,#4]		@ prefetch tp1
   1689 +	str	r4,[r11],#4
   1690 +	subs	r12,r12,#1
   1691 +	bne	.Lmix
   1692 +
   1693 +	mov	r0,#0
   1694 +	ldmia   sp!,{r4-r12,lr}
   1695 +	tst	lr,#1
   1696 +	moveq	pc,lr			@ be binary compatible with V4, yet
   1697 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1698 +.size	AES_set_decrypt_key,.-AES_set_decrypt_key
   1699 +
   1700 +.type	AES_Td,%object
   1701 +.align	5
   1702 +AES_Td:
   1703 +.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
   1704 +.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
   1705 +.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
   1706 +.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
   1707 +.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
   1708 +.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
   1709 +.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
   1710 +.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
   1711 +.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
   1712 +.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
   1713 +.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
   1714 +.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
   1715 +.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
   1716 +.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
   1717 +.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
   1718 +.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
   1719 +.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
   1720 +.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
   1721 +.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
   1722 +.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
   1723 +.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
   1724 +.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
   1725 +.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
   1726 +.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
   1727 +.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
   1728 +.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
   1729 +.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
   1730 +.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
   1731 +.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
   1732 +.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
   1733 +.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
   1734 +.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
   1735 +.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
   1736 +.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
   1737 +.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
   1738 +.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
   1739 +.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
   1740 +.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
   1741 +.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
   1742 +.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
   1743 +.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
   1744 +.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
   1745 +.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
   1746 +.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
   1747 +.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
   1748 +.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
   1749 +.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
   1750 +.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
   1751 +.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
   1752 +.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
   1753 +.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
   1754 +.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
   1755 +.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
   1756 +.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
   1757 +.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
   1758 +.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
   1759 +.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
   1760 +.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
   1761 +.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
   1762 +.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
   1763 +.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
   1764 +.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
   1765 +.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
   1766 +.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
   1767 +@ Td4[256]
   1768 +.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
   1769 +.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
   1770 +.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
   1771 +.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
   1772 +.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
   1773 +.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
   1774 +.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
   1775 +.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
   1776 +.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
   1777 +.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
   1778 +.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
   1779 +.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
   1780 +.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
   1781 +.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
   1782 +.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
   1783 +.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
   1784 +.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
   1785 +.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
   1786 +.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
   1787 +.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
   1788 +.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
   1789 +.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
   1790 +.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
   1791 +.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
   1792 +.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
   1793 +.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
   1794 +.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
   1795 +.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
   1796 +.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
   1797 +.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
   1798 +.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
   1799 +.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
   1800 +.size	AES_Td,.-AES_Td
   1801 +
   1802 +@ void AES_decrypt(const unsigned char *in, unsigned char *out,
   1803 +@ 		 const AES_KEY *key) {
   1804 +.global AES_decrypt
   1805 +.type   AES_decrypt,%function
   1806 +.align	5
   1807 +AES_decrypt:
   1808 +	sub	r3,pc,#8		@ AES_decrypt
   1809 +	stmdb   sp!,{r1,r4-r12,lr}
   1810 +	mov	r12,r0		@ inp
   1811 +	mov	r11,r2
   1812 +	sub	r10,r3,#AES_decrypt-AES_Td		@ Td
   1813 +
   1814 +	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
   1815 +	ldrb	r4,[r12,#2]	@ manner...
   1816 +	ldrb	r5,[r12,#1]
   1817 +	ldrb	r6,[r12,#0]
   1818 +	orr	r0,r0,r4,lsl#8
   1819 +	orr	r0,r0,r5,lsl#16
   1820 +	orr	r0,r0,r6,lsl#24
   1821 +	ldrb	r1,[r12,#7]
   1822 +	ldrb	r4,[r12,#6]
   1823 +	ldrb	r5,[r12,#5]
   1824 +	ldrb	r6,[r12,#4]
   1825 +	orr	r1,r1,r4,lsl#8
   1826 +	orr	r1,r1,r5,lsl#16
   1827 +	orr	r1,r1,r6,lsl#24
   1828 +	ldrb	r2,[r12,#11]
   1829 +	ldrb	r4,[r12,#10]
   1830 +	ldrb	r5,[r12,#9]
   1831 +	ldrb	r6,[r12,#8]
   1832 +	orr	r2,r2,r4,lsl#8
   1833 +	orr	r2,r2,r5,lsl#16
   1834 +	orr	r2,r2,r6,lsl#24
   1835 +	ldrb	r3,[r12,#15]
   1836 +	ldrb	r4,[r12,#14]
   1837 +	ldrb	r5,[r12,#13]
   1838 +	ldrb	r6,[r12,#12]
   1839 +	orr	r3,r3,r4,lsl#8
   1840 +	orr	r3,r3,r5,lsl#16
   1841 +	orr	r3,r3,r6,lsl#24
   1842 +
   1843 +	bl	_armv4_AES_decrypt
   1844 +
   1845 +	ldr	r12,[sp],#4		@ pop out
   1846 +	mov	r4,r0,lsr#24		@ write output in endian-neutral
   1847 +	mov	r5,r0,lsr#16		@ manner...
   1848 +	mov	r6,r0,lsr#8
   1849 +	strb	r4,[r12,#0]
   1850 +	strb	r5,[r12,#1]
   1851 +	strb	r6,[r12,#2]
   1852 +	strb	r0,[r12,#3]
   1853 +	mov	r4,r1,lsr#24
   1854 +	mov	r5,r1,lsr#16
   1855 +	mov	r6,r1,lsr#8
   1856 +	strb	r4,[r12,#4]
   1857 +	strb	r5,[r12,#5]
   1858 +	strb	r6,[r12,#6]
   1859 +	strb	r1,[r12,#7]
   1860 +	mov	r4,r2,lsr#24
   1861 +	mov	r5,r2,lsr#16
   1862 +	mov	r6,r2,lsr#8
   1863 +	strb	r4,[r12,#8]
   1864 +	strb	r5,[r12,#9]
   1865 +	strb	r6,[r12,#10]
   1866 +	strb	r2,[r12,#11]
   1867 +	mov	r4,r3,lsr#24
   1868 +	mov	r5,r3,lsr#16
   1869 +	mov	r6,r3,lsr#8
   1870 +	strb	r4,[r12,#12]
   1871 +	strb	r5,[r12,#13]
   1872 +	strb	r6,[r12,#14]
   1873 +	strb	r3,[r12,#15]
   1874 +
   1875 +	ldmia   sp!,{r4-r12,lr}
   1876 +	tst	lr,#1
   1877 +	moveq	pc,lr			@ be binary compatible with V4, yet
   1878 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1879 +.size	AES_decrypt,.-AES_decrypt
   1880 +
   1881 +.type   _armv4_AES_decrypt,%function
   1882 +.align	2
   1883 +_armv4_AES_decrypt:
   1884 +	str	lr,[sp,#-4]!		@ push lr
   1885 +	ldr	r4,[r11],#16
   1886 +	ldr	r5,[r11,#-12]
   1887 +	ldr	r6,[r11,#-8]
   1888 +	ldr	r7,[r11,#-4]
   1889 +	ldr	r12,[r11,#240-16]
   1890 +	eor	r0,r0,r4
   1891 +	eor	r1,r1,r5
   1892 +	eor	r2,r2,r6
   1893 +	eor	r3,r3,r7
   1894 +	sub	r12,r12,#1
   1895 +	mov	lr,#255
   1896 +
   1897 +.Ldec_loop:
   1898 +	and	r7,lr,r0,lsr#16
   1899 +	and	r8,lr,r0,lsr#8
   1900 +	and	r9,lr,r0
   1901 +	mov	r0,r0,lsr#24
   1902 +	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
   1903 +	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
   1904 +	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
   1905 +	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
   1906 +
   1907 +	and	r7,lr,r1		@ i0
   1908 +	and	r8,lr,r1,lsr#16
   1909 +	and	r9,lr,r1,lsr#8
   1910 +	mov	r1,r1,lsr#24
   1911 +	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
   1912 +	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
   1913 +	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
   1914 +	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
   1915 +	eor	r0,r0,r7,ror#24
   1916 +	eor	r1,r1,r4,ror#8
   1917 +	eor	r5,r8,r5,ror#8
   1918 +	eor	r6,r9,r6,ror#8
   1919 +
   1920 +	and	r7,lr,r2,lsr#8	@ i0
   1921 +	and	r8,lr,r2		@ i1
   1922 +	and	r9,lr,r2,lsr#16
   1923 +	mov	r2,r2,lsr#24
   1924 +	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
   1925 +	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
   1926 +	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
   1927 +	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
   1928 +	eor	r0,r0,r7,ror#16
   1929 +	eor	r1,r1,r8,ror#24
   1930 +	eor	r2,r2,r5,ror#8
   1931 +	eor	r6,r9,r6,ror#8
   1932 +
   1933 +	and	r7,lr,r3,lsr#16	@ i0
   1934 +	and	r8,lr,r3,lsr#8	@ i1
   1935 +	and	r9,lr,r3		@ i2
   1936 +	mov	r3,r3,lsr#24
   1937 +	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
   1938 +	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
   1939 +	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
   1940 +	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
   1941 +	eor	r0,r0,r7,ror#8
   1942 +	eor	r1,r1,r8,ror#16
   1943 +	eor	r2,r2,r9,ror#24
   1944 +	eor	r3,r3,r6,ror#8
   1945 +
   1946 +	ldr	r4,[r11],#16
   1947 +	ldr	r5,[r11,#-12]
   1948 +	ldr	r6,[r11,#-8]
   1949 +	ldr	r7,[r11,#-4]
   1950 +	eor	r0,r0,r4
   1951 +	eor	r1,r1,r5
   1952 +	eor	r2,r2,r6
   1953 +	eor	r3,r3,r7
   1954 +
   1955 +	subs	r12,r12,#1
   1956 +	bne	.Ldec_loop
   1957 +
   1958 +	add	r10,r10,#1024
   1959 +
   1960 +	ldr	r4,[r10,#0]		@ prefetch Td4
   1961 +	ldr	r5,[r10,#32]
   1962 +	ldr	r6,[r10,#64]
   1963 +	ldr	r7,[r10,#96]
   1964 +	ldr	r8,[r10,#128]
   1965 +	ldr	r9,[r10,#160]
   1966 +	ldr	r4,[r10,#192]
   1967 +	ldr	r5,[r10,#224]
   1968 +
   1969 +	and	r7,lr,r0,lsr#16
   1970 +	and	r8,lr,r0,lsr#8
   1971 +	and	r9,lr,r0
   1972 +	ldrb	r0,[r10,r0,lsr#24]	@ Td4[s0>>24]
   1973 +	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
   1974 +	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
   1975 +	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
   1976 +
   1977 +	and	r7,lr,r1		@ i0
   1978 +	and	r8,lr,r1,lsr#16
   1979 +	and	r9,lr,r1,lsr#8
   1980 +	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
   1981 +	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
   1982 +	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
   1983 +	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
   1984 +	eor	r0,r7,r0,lsl#24
   1985 +	eor	r1,r4,r1,lsl#8
   1986 +	eor	r5,r5,r8,lsl#8
   1987 +	eor	r6,r6,r9,lsl#8
   1988 +
   1989 +	and	r7,lr,r2,lsr#8	@ i0
   1990 +	and	r8,lr,r2		@ i1
   1991 +	and	r9,lr,r2,lsr#16
   1992 +	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
   1993 +	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
   1994 +	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
   1995 +	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
   1996 +	eor	r0,r0,r7,lsl#8
   1997 +	eor	r1,r8,r1,lsl#16
   1998 +	eor	r2,r5,r2,lsl#16
   1999 +	eor	r6,r6,r9,lsl#16
   2000 +
   2001 +	and	r7,lr,r3,lsr#16	@ i0
   2002 +	and	r8,lr,r3,lsr#8	@ i1
   2003 +	and	r9,lr,r3		@ i2
   2004 +	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
   2005 +	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
   2006 +	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
   2007 +	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
   2008 +	eor	r0,r0,r7,lsl#16
   2009 +	eor	r1,r1,r8,lsl#8
   2010 +	eor	r2,r9,r2,lsl#8
   2011 +	eor	r3,r6,r3,lsl#24
   2012 +
   2013 +	ldr	lr,[sp],#4		@ pop lr
   2014 +	ldr	r4,[r11,#0]
   2015 +	ldr	r5,[r11,#4]
   2016 +	ldr	r6,[r11,#8]
   2017 +	ldr	r7,[r11,#12]
   2018 +	eor	r0,r0,r4
   2019 +	eor	r1,r1,r5
   2020 +	eor	r2,r2,r6
   2021 +	eor	r3,r3,r7
   2022 +
   2023 +	sub	r10,r10,#1024
   2024 +	mov	pc,lr			@ return
   2025 +.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
   2026 +.asciz	"AES for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>"
   2027 +.align	2
   2028 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   2029 +++ openssl-0.9.8h/crypto/0.9.9-dev/bn/armv4-mont.pl	2009-09-03 15:42:39.000000000 -0700
   2030 @@ -0,0 +1,200 @@
   2031 +#!/usr/bin/env perl
   2032 +
   2033 +# ====================================================================
   2034 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL
   2035 +# project. The module is, however, dual licensed under OpenSSL and
   2036 +# CRYPTOGAMS licenses depending on where you obtain it. For further
   2037 +# details see http://www.openssl.org/~appro/cryptogams/.
   2038 +# ====================================================================
   2039 +
   2040 +# January 2007.
   2041 +
   2042 +# Montgomery multiplication for ARMv4.
   2043 +#
   2044 +# Performance improvement naturally varies among CPU implementations
   2045 +# and compilers. The code was observed to provide +65-35% improvement
   2046 +# [depending on key length, less for longer keys] on ARM920T, and
   2047 +# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
   2048 +# base and compiler generated code with in-lined umull and even umlal
   2049 +# instructions. The latter means that this code didn't really have an 
   2050 +# "advantage" of utilizing some "secret" instruction.
   2051 +#
   2052 +# The code is interoperable with Thumb ISA and is rather compact, less
   2053 +# than 1/2KB. Windows CE port would be trivial, as it's exclusively
   2054 +# about decorations, ABI and instruction syntax are identical.
   2055 +
   2056 +$num="r0";	# starts as num argument, but holds &tp[num-1]
   2057 +$ap="r1";
   2058 +$bp="r2"; $bi="r2"; $rp="r2";
   2059 +$np="r3";
   2060 +$tp="r4";
   2061 +$aj="r5";
   2062 +$nj="r6";
   2063 +$tj="r7";
   2064 +$n0="r8";
   2065 +###########	# r9 is reserved by ELF as platform specific, e.g. TLS pointer
   2066 +$alo="r10";	# sl, gcc uses it to keep @GOT
   2067 +$ahi="r11";	# fp
   2068 +$nlo="r12";	# ip
   2069 +###########	# r13 is stack pointer
   2070 +$nhi="r14";	# lr
   2071 +###########	# r15 is program counter
   2072 +
   2073 +#### argument block layout relative to &tp[num-1], a.k.a. $num
   2074 +$_rp="$num,#12*4";
   2075 +# ap permanently resides in r1
   2076 +$_bp="$num,#13*4";
   2077 +# np permanently resides in r3
   2078 +$_n0="$num,#14*4";
   2079 +$_num="$num,#15*4";	$_bpend=$_num;
   2080 +
   2081 +$code=<<___;
   2082 +.text
   2083 +
   2084 +.global	bn_mul_mont
   2085 +.type	bn_mul_mont,%function
   2086 +
   2087 +.align	2
   2088 +bn_mul_mont:
   2089 +	stmdb	sp!,{r0,r2}		@ sp points at argument block
   2090 +	ldr	$num,[sp,#3*4]		@ load num
   2091 +	cmp	$num,#2
   2092 +	movlt	r0,#0
   2093 +	addlt	sp,sp,#2*4
   2094 +	blt	.Labrt
   2095 +
   2096 +	stmdb	sp!,{r4-r12,lr}		@ save 10 registers
   2097 +
   2098 +	mov	$num,$num,lsl#2		@ rescale $num for byte count
   2099 +	sub	sp,sp,$num		@ alloca(4*num)
   2100 +	sub	sp,sp,#4		@ +extra dword
   2101 +	sub	$num,$num,#4		@ "num=num-1"
   2102 +	add	$tp,$bp,$num		@ &bp[num-1]
   2103 +
   2104 +	add	$num,sp,$num		@ $num to point at &tp[num-1]
   2105 +	ldr	$n0,[$_n0]		@ &n0
   2106 +	ldr	$bi,[$bp]		@ bp[0]
   2107 +	ldr	$aj,[$ap],#4		@ ap[0],ap++
   2108 +	ldr	$nj,[$np],#4		@ np[0],np++
   2109 +	ldr	$n0,[$n0]		@ *n0
   2110 +	str	$tp,[$_bpend]		@ save &bp[num]
   2111 +
   2112 +	umull	$alo,$ahi,$aj,$bi	@ ap[0]*bp[0]
   2113 +	str	$n0,[$_n0]		@ save n0 value
   2114 +	mul	$n0,$alo,$n0		@ "tp[0]"*n0
   2115 +	mov	$nlo,#0
   2116 +	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"t[0]"
   2117 +	mov	$tp,sp
   2118 +
   2119 +.L1st:
   2120 +	ldr	$aj,[$ap],#4		@ ap[j],ap++
   2121 +	mov	$alo,$ahi
   2122 +	mov	$ahi,#0
   2123 +	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[0]
   2124 +	ldr	$nj,[$np],#4		@ np[j],np++
   2125 +	mov	$nhi,#0
   2126 +	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0
   2127 +	adds	$nlo,$nlo,$alo
   2128 +	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++
   2129 +	adc	$nlo,$nhi,#0
   2130 +	cmp	$tp,$num
   2131 +	bne	.L1st
   2132 +
   2133 +	adds	$nlo,$nlo,$ahi
   2134 +	mov	$nhi,#0
   2135 +	adc	$nhi,$nhi,#0
   2136 +	ldr	$tp,[$_bp]		@ restore bp
   2137 +	str	$nlo,[$num]		@ tp[num-1]=
   2138 +	ldr	$n0,[$_n0]		@ restore n0
   2139 +	str	$nhi,[$num,#4]		@ tp[num]=
   2140 +
   2142 +.Louter:
   2143 +	sub	$tj,$num,sp		@ "original" $num-1 value
   2144 +	sub	$ap,$ap,$tj		@ "rewind" ap to &ap[1]
   2145 +	sub	$np,$np,$tj		@ "rewind" np to &np[1]
   2146 +	ldr	$bi,[$tp,#4]!		@ *(++bp)
   2147 +	ldr	$aj,[$ap,#-4]		@ ap[0]
   2148 +	ldr	$nj,[$np,#-4]		@ np[0]
   2149 +	ldr	$alo,[sp]		@ tp[0]
   2150 +	ldr	$tj,[sp,#4]		@ tp[1]
   2151 +
   2152 +	mov	$ahi,#0
   2153 +	umlal	$alo,$ahi,$aj,$bi	@ ap[0]*bp[i]+tp[0]
   2154 +	str	$tp,[$_bp]		@ save bp
   2155 +	mul	$n0,$alo,$n0
   2156 +	mov	$nlo,#0
   2157 +	umlal	$alo,$nlo,$nj,$n0	@ np[0]*n0+"tp[0]"
   2158 +	mov	$tp,sp
   2159 +
   2160 +.Linner:
   2161 +	ldr	$aj,[$ap],#4		@ ap[j],ap++
   2162 +	adds	$alo,$ahi,$tj		@ +=tp[j]
   2163 +	mov	$ahi,#0
   2164 +	umlal	$alo,$ahi,$aj,$bi	@ ap[j]*bp[i]
   2165 +	ldr	$nj,[$np],#4		@ np[j],np++
   2166 +	mov	$nhi,#0
   2167 +	umlal	$nlo,$nhi,$nj,$n0	@ np[j]*n0
   2168 +	ldr	$tj,[$tp,#8]		@ tp[j+1]
   2169 +	adc	$ahi,$ahi,#0
   2170 +	adds	$nlo,$nlo,$alo
   2171 +	str	$nlo,[$tp],#4		@ tp[j-1]=,tp++
   2172 +	adc	$nlo,$nhi,#0
   2173 +	cmp	$tp,$num
   2174 +	bne	.Linner
   2175 +
   2176 +	adds	$nlo,$nlo,$ahi
   2177 +	mov	$nhi,#0
   2178 +	adc	$nhi,$nhi,#0
   2179 +	adds	$nlo,$nlo,$tj
   2180 +	adc	$nhi,$nhi,#0
   2181 +	ldr	$tp,[$_bp]		@ restore bp
   2182 +	ldr	$tj,[$_bpend]		@ restore &bp[num]
   2183 +	str	$nlo,[$num]		@ tp[num-1]=
   2184 +	ldr	$n0,[$_n0]		@ restore n0
   2185 +	str	$nhi,[$num,#4]		@ tp[num]=
   2186 +
   2187 +	cmp	$tp,$tj
   2188 +	bne	.Louter
   2189 +
   2191 +	ldr	$rp,[$_rp]		@ pull rp
   2192 +	add	$num,$num,#4		@ $num to point at &tp[num]
   2193 +	sub	$aj,$num,sp		@ "original" num value
   2194 +	mov	$tp,sp			@ "rewind" $tp
   2195 +	mov	$ap,$tp			@ "borrow" $ap
   2196 +	sub	$np,$np,$aj		@ "rewind" $np to &np[0]
   2197 +
   2198 +	subs	$tj,$tj,$tj		@ "clear" carry flag
   2199 +.Lsub:	ldr	$tj,[$tp],#4
   2200 +	ldr	$nj,[$np],#4
   2201 +	sbcs	$tj,$tj,$nj		@ tp[j]-np[j]
   2202 +	str	$tj,[$rp],#4		@ rp[j]=
   2203 +	teq	$tp,$num		@ preserve carry
   2204 +	bne	.Lsub
   2205 +	sbcs	$nhi,$nhi,#0		@ upmost carry
   2206 +	mov	$tp,sp			@ "rewind" $tp
   2207 +	sub	$rp,$rp,$aj		@ "rewind" $rp
   2208 +
   2209 +	and	$ap,$tp,$nhi
   2210 +	bic	$np,$rp,$nhi
   2211 +	orr	$ap,$ap,$np		@ ap=borrow?tp:rp
   2212 +
   2213 +.Lcopy:	ldr	$tj,[$ap],#4		@ copy or in-place refresh
   2214 +	str	sp,[$tp],#4		@ zap tp
   2215 +	str	$tj,[$rp],#4
   2216 +	cmp	$tp,$num
   2217 +	bne	.Lcopy
   2218 +
   2219 +	add	sp,$num,#4		@ skip over tp[num+1]
   2220 +	ldmia	sp!,{r4-r12,lr}		@ restore registers
   2221 +	add	sp,sp,#2*4		@ skip over {r0,r2}
   2222 +	mov	r0,#1
   2223 +.Labrt:	tst	lr,#1
   2224 +	moveq	pc,lr			@ be binary compatible with V4, yet
   2225 +	bx	lr			@ interoperable with Thumb ISA:-)
   2226 +.size	bn_mul_mont,.-bn_mul_mont
   2227 +.asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   2228 +___
   2229 +
   2230 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   2231 +print $code;
   2232 +close STDOUT;
   2233 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   2234 +++ openssl-0.9.8h/crypto/0.9.9-dev/bn/armv4-mont.s	2009-09-03 15:42:39.000000000 -0700
   2235 @@ -0,0 +1,145 @@
   2236 +.text
   2237 +
   2238 +.global	bn_mul_mont
   2239 +.type	bn_mul_mont,%function
   2240 +
   2241 +.align	2
   2242 +bn_mul_mont:
   2243 +	stmdb	sp!,{r0,r2}		@ sp points at argument block
   2244 +	ldr	r0,[sp,#3*4]		@ load num
   2245 +	cmp	r0,#2
   2246 +	movlt	r0,#0
   2247 +	addlt	sp,sp,#2*4
   2248 +	blt	.Labrt
   2249 +
   2250 +	stmdb	sp!,{r4-r12,lr}		@ save 10 registers
   2251 +
   2252 +	mov	r0,r0,lsl#2		@ rescale r0 for byte count
   2253 +	sub	sp,sp,r0		@ alloca(4*num)
   2254 +	sub	sp,sp,#4		@ +extra dword
   2255 +	sub	r0,r0,#4		@ "num=num-1"
   2256 +	add	r4,r2,r0		@ &bp[num-1]
   2257 +
   2258 +	add	r0,sp,r0		@ r0 to point at &tp[num-1]
   2259 +	ldr	r8,[r0,#14*4]		@ &n0
   2260 +	ldr	r2,[r2]		@ bp[0]
   2261 +	ldr	r5,[r1],#4		@ ap[0],ap++
   2262 +	ldr	r6,[r3],#4		@ np[0],np++
   2263 +	ldr	r8,[r8]		@ *n0
   2264 +	str	r4,[r0,#15*4]		@ save &bp[num]
   2265 +
   2266 +	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
   2267 +	str	r8,[r0,#14*4]		@ save n0 value
   2268 +	mul	r8,r10,r8		@ "tp[0]"*n0
   2269 +	mov	r12,#0
   2270 +	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
   2271 +	mov	r4,sp
   2272 +
   2273 +.L1st:
   2274 +	ldr	r5,[r1],#4		@ ap[j],ap++
   2275 +	mov	r10,r11
   2276 +	mov	r11,#0
   2277 +	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
   2278 +	ldr	r6,[r3],#4		@ np[j],np++
   2279 +	mov	r14,#0
   2280 +	umlal	r12,r14,r6,r8	@ np[j]*n0
   2281 +	adds	r12,r12,r10
   2282 +	str	r12,[r4],#4		@ tp[j-1]=,tp++
   2283 +	adc	r12,r14,#0
   2284 +	cmp	r4,r0
   2285 +	bne	.L1st
   2286 +
   2287 +	adds	r12,r12,r11
   2288 +	mov	r14,#0
   2289 +	adc	r14,r14,#0
   2290 +	ldr	r4,[r0,#13*4]		@ restore bp
   2291 +	str	r12,[r0]		@ tp[num-1]=
   2292 +	ldr	r8,[r0,#14*4]		@ restore n0
   2293 +	str	r14,[r0,#4]		@ tp[num]=
   2294 +
   2296 +.Louter:
   2297 +	sub	r7,r0,sp		@ "original" r0-1 value
   2298 +	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
   2299 +	sub	r3,r3,r7		@ "rewind" np to &np[1]
   2300 +	ldr	r2,[r4,#4]!		@ *(++bp)
   2301 +	ldr	r5,[r1,#-4]		@ ap[0]
   2302 +	ldr	r6,[r3,#-4]		@ np[0]
   2303 +	ldr	r10,[sp]		@ tp[0]
   2304 +	ldr	r7,[sp,#4]		@ tp[1]
   2305 +
   2306 +	mov	r11,#0
   2307 +	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
   2308 +	str	r4,[r0,#13*4]		@ save bp
   2309 +	mul	r8,r10,r8
   2310 +	mov	r12,#0
   2311 +	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
   2312 +	mov	r4,sp
   2313 +
   2314 +.Linner:
   2315 +	ldr	r5,[r1],#4		@ ap[j],ap++
   2316 +	adds	r10,r11,r7		@ +=tp[j]
   2317 +	mov	r11,#0
   2318 +	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
   2319 +	ldr	r6,[r3],#4		@ np[j],np++
   2320 +	mov	r14,#0
   2321 +	umlal	r12,r14,r6,r8	@ np[j]*n0
   2322 +	ldr	r7,[r4,#8]		@ tp[j+1]
   2323 +	adc	r11,r11,#0
   2324 +	adds	r12,r12,r10
   2325 +	str	r12,[r4],#4		@ tp[j-1]=,tp++
   2326 +	adc	r12,r14,#0
   2327 +	cmp	r4,r0
   2328 +	bne	.Linner
   2329 +
   2330 +	adds	r12,r12,r11
   2331 +	mov	r14,#0
   2332 +	adc	r14,r14,#0
   2333 +	adds	r12,r12,r7
   2334 +	adc	r14,r14,#0
   2335 +	ldr	r4,[r0,#13*4]		@ restore bp
   2336 +	ldr	r7,[r0,#15*4]		@ restore &bp[num]
   2337 +	str	r12,[r0]		@ tp[num-1]=
   2338 +	ldr	r8,[r0,#14*4]		@ restore n0
   2339 +	str	r14,[r0,#4]		@ tp[num]=
   2340 +
   2341 +	cmp	r4,r7
   2342 +	bne	.Louter
   2343 +
   2345 +	ldr	r2,[r0,#12*4]		@ pull rp
   2346 +	add	r0,r0,#4		@ r0 to point at &tp[num]
   2347 +	sub	r5,r0,sp		@ "original" num value
   2348 +	mov	r4,sp			@ "rewind" r4
   2349 +	mov	r1,r4			@ "borrow" r1
   2350 +	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
   2351 +
   2352 +	subs	r7,r7,r7		@ "clear" carry flag
   2353 +.Lsub:	ldr	r7,[r4],#4
   2354 +	ldr	r6,[r3],#4
   2355 +	sbcs	r7,r7,r6		@ tp[j]-np[j]
   2356 +	str	r7,[r2],#4		@ rp[j]=
   2357 +	teq	r4,r0		@ preserve carry
   2358 +	bne	.Lsub
   2359 +	sbcs	r14,r14,#0		@ upmost carry
   2360 +	mov	r4,sp			@ "rewind" r4
   2361 +	sub	r2,r2,r5		@ "rewind" r2
   2362 +
   2363 +	and	r1,r4,r14
   2364 +	bic	r3,r2,r14
   2365 +	orr	r1,r1,r3		@ ap=borrow?tp:rp
   2366 +
   2367 +.Lcopy:	ldr	r7,[r1],#4		@ copy or in-place refresh
   2368 +	str	sp,[r4],#4		@ zap tp
   2369 +	str	r7,[r2],#4
   2370 +	cmp	r4,r0
   2371 +	bne	.Lcopy
   2372 +
   2373 +	add	sp,r0,#4		@ skip over tp[num+1]
   2374 +	ldmia	sp!,{r4-r12,lr}		@ restore registers
   2375 +	add	sp,sp,#2*4		@ skip over {r0,r2}
   2376 +	mov	r0,#1
   2377 +.Labrt:	tst	lr,#1
   2378 +	moveq	pc,lr			@ be binary compatible with V4, yet
   2379 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   2380 +.size	bn_mul_mont,.-bn_mul_mont
   2381 +.asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>"
   2382 +.align	2
   2383 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   2384 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha1-armv4-large.pl	2009-09-03 15:42:39.000000000 -0700
   2385 @@ -0,0 +1,231 @@
   2386 +#!/usr/bin/env perl
   2387 +
   2388 +# ====================================================================
   2389 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL
   2390 +# project. The module is, however, dual licensed under OpenSSL and
   2391 +# CRYPTOGAMS licenses depending on where you obtain it. For further
   2392 +# details see http://www.openssl.org/~appro/cryptogams/.
   2393 +# ====================================================================
   2394 +
   2395 +# sha1_block procedure for ARMv4.
   2396 +#
   2397 +# January 2007.
   2398 +
   2399 +# Size/performance trade-off
   2400 +# ====================================================================
   2401 +# impl		size in bytes	comp cycles[*]	measured performance
   2402 +# ====================================================================
   2403 +# thumb		304		3212		4420
   2404 +# armv4-small	392/+29%	1958/+64%	2250/+96%
   2405 +# armv4-compact	740/+89%	1552/+26%	1840/+22%
   2406 +# armv4-large	1420/+92%	1307/+19%	1500/+23%
   2407 +# full unroll	~5100/+260%	~1260/+4%	~1500/+0%
   2408 +# ====================================================================
   2409 +# thumb		= same as 'small' but in Thumb instructions[**] and
   2410 +#		  with recurring code in two private functions;
   2411 +# small		= detached Xload/update, loops are folded;
   2412 +# compact	= detached Xload/update, 5x unroll;
   2413 +# large		= interleaved Xload/update, 5x unroll;
   2414 +# full unroll	= interleaved Xload/update, full unroll, estimated[!];
   2415 +#
   2416 +# [*]	Manually counted instructions in "grand" loop body. Measured
   2417 +#	performance is affected by prologue and epilogue overhead,
   2418 +#	i-cache availability, branch penalties, etc.
   2419 +# [**]	While each Thumb instruction is twice smaller, they are not as
   2420 +#	diverse as ARM ones: e.g., there are only two arithmetic
   2421 +#	instructions with 3 arguments, no [fixed] rotate, addressing
   2422 +#	modes are limited. As result it takes more instructions to do
   2423 +#	the same job in Thumb, therefore the code is never twice as
   2424 +#	small and always slower.
   2425 +
   2426 +$output=shift;
   2427 +open STDOUT,">$output";
   2428 +
   2429 +$ctx="r0";
   2430 +$inp="r1";
   2431 +$len="r2";
   2432 +$a="r3";
   2433 +$b="r4";
   2434 +$c="r5";
   2435 +$d="r6";
   2436 +$e="r7";
   2437 +$K="r8";
   2438 +$t0="r10";
   2439 +$t1="r11";
   2440 +$t2="r12";
   2441 +$Xi="r14";
   2442 +@V=($a,$b,$c,$d,$e);
   2443 +
   2444 +# One can optimize this for aligned access on big-endian architecture,
   2445 +# but code's endian neutrality makes it too pretty:-)
   2446 +sub Xload {
   2447 +my ($a,$b,$c,$d,$e)=@_;
   2448 +$code.=<<___;
   2449 +	ldrb	$t0,[$inp],#4
   2450 +	ldrb	$t1,[$inp,#-3]
   2451 +	ldrb	$t2,[$inp,#-2]
   2452 +	add	$e,$K,$e,ror#2			@ E+=K_00_19
   2453 +	orr	$t0,$t1,$t0,lsl#8
   2454 +	ldrb	$t1,[$inp,#-1]
   2455 +	orr	$t0,$t2,$t0,lsl#8
   2456 +	add	$e,$e,$a,ror#27			@ E+=ROR(A,27)
   2457 +	orr	$t0,$t1,$t0,lsl#8
   2458 +	add	$e,$e,$t0			@ E+=X[i]
   2459 +	eor	$t1,$c,$d			@ F_xx_xx
   2460 +	str	$t0,[$Xi,#-4]!
   2461 +___
   2462 +}
   2463 +sub Xupdate {
   2464 +my ($a,$b,$c,$d,$e,$flag)=@_;
   2465 +$code.=<<___;
   2466 +	ldr	$t0,[$Xi,#15*4]
   2467 +	ldr	$t1,[$Xi,#13*4]
   2468 +	ldr	$t2,[$Xi,#7*4]
   2469 +	add	$e,$K,$e,ror#2			@ E+=K_xx_xx
   2470 +	eor	$t0,$t0,$t1
   2471 +	ldr	$t1,[$Xi,#2*4]
   2472 +	add	$e,$e,$a,ror#27			@ E+=ROR(A,27)
   2473 +	eor	$t0,$t0,$t2
   2474 +	eor	$t0,$t0,$t1
   2475 +___
   2476 +$code.=<<___ if (!defined($flag));
   2477 +	eor	$t1,$c,$d			@ F_xx_xx, but not in 40_59
   2478 +___
   2479 +$code.=<<___;
   2480 +	mov	$t0,$t0,ror#31
   2481 +	add	$e,$e,$t0			@ E+=X[i]
   2482 +	str	$t0,[$Xi,#-4]!
   2483 +___
   2484 +}
   2485 +
   2486 +sub BODY_00_15 {
   2487 +my ($a,$b,$c,$d,$e)=@_;
   2488 +	&Xload(@_);
   2489 +$code.=<<___;
   2490 +	and	$t1,$b,$t1,ror#2
   2491 +	eor	$t1,$t1,$d,ror#2		@ F_00_19(B,C,D)
   2492 +	add	$e,$e,$t1			@ E+=F_00_19(B,C,D)
   2493 +___
   2494 +}
   2495 +
   2496 +sub BODY_16_19 {
   2497 +my ($a,$b,$c,$d,$e)=@_;
   2498 +	&Xupdate(@_);
   2499 +$code.=<<___;
   2500 +	and	$t1,$b,$t1,ror#2
   2501 +	eor	$t1,$t1,$d,ror#2		@ F_00_19(B,C,D)
   2502 +	add	$e,$e,$t1			@ E+=F_00_19(B,C,D)
   2503 +___
   2504 +}
   2505 +
   2506 +sub BODY_20_39 {
   2507 +my ($a,$b,$c,$d,$e)=@_;
   2508 +	&Xupdate(@_);
   2509 +$code.=<<___;
   2510 +	eor	$t1,$b,$t1,ror#2		@ F_20_39(B,C,D)
   2511 +	add	$e,$e,$t1			@ E+=F_20_39(B,C,D)
   2512 +___
   2513 +}
   2514 +
   2515 +sub BODY_40_59 {
   2516 +my ($a,$b,$c,$d,$e)=@_;
   2517 +	&Xupdate(@_,1);
   2518 +$code.=<<___;
   2519 +	and	$t1,$b,$c,ror#2
   2520 +	orr	$t2,$b,$c,ror#2
   2521 +	and	$t2,$t2,$d,ror#2
   2522 +	orr	$t1,$t1,$t2			@ F_40_59(B,C,D)
   2523 +	add	$e,$e,$t1			@ E+=F_40_59(B,C,D)
   2524 +___
   2525 +}
   2526 +
   2527 +$code=<<___;
   2528 +.text
   2529 +
   2530 +.global	sha1_block_data_order
   2531 +.type	sha1_block_data_order,%function
   2532 +
   2533 +.align	2
   2534 +sha1_block_data_order:
   2535 +	stmdb	sp!,{r4-r12,lr}
   2536 +	add	$len,$inp,$len,lsl#6	@ $len to point at the end of $inp
   2537 +	ldmia	$ctx,{$a,$b,$c,$d,$e}
   2538 +.Lloop:
   2539 +	ldr	$K,.LK_00_19
   2540 +	mov	$Xi,sp
   2541 +	sub	sp,sp,#15*4
   2542 +	mov	$c,$c,ror#30
   2543 +	mov	$d,$d,ror#30
   2544 +	mov	$e,$e,ror#30		@ [6]
   2545 +.L_00_15:
   2546 +___
   2547 +for($i=0;$i<5;$i++) {
   2548 +	&BODY_00_15(@V);	unshift(@V,pop(@V));
   2549 +}
   2550 +$code.=<<___;
   2551 +	teq	$Xi,sp
   2552 +	bne	.L_00_15		@ [((11+4)*5+2)*3]
   2553 +___
   2554 +	&BODY_00_15(@V);	unshift(@V,pop(@V));
   2555 +	&BODY_16_19(@V);	unshift(@V,pop(@V));
   2556 +	&BODY_16_19(@V);	unshift(@V,pop(@V));
   2557 +	&BODY_16_19(@V);	unshift(@V,pop(@V));
   2558 +	&BODY_16_19(@V);	unshift(@V,pop(@V));
   2559 +$code.=<<___;
   2560 +
   2561 +	ldr	$K,.LK_20_39		@ [+15+16*4]
   2562 +	sub	sp,sp,#25*4
   2563 +	cmn	sp,#0			@ [+3], clear carry to denote 20_39
   2564 +.L_20_39_or_60_79:
   2565 +___
   2566 +for($i=0;$i<5;$i++) {
   2567 +	&BODY_20_39(@V);	unshift(@V,pop(@V));
   2568 +}
   2569 +$code.=<<___;
   2570 +	teq	$Xi,sp			@ preserve carry
   2571 +	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
   2572 +	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
   2573 +
   2574 +	ldr	$K,.LK_40_59
   2575 +	sub	sp,sp,#20*4		@ [+2]
   2576 +.L_40_59:
   2577 +___
   2578 +for($i=0;$i<5;$i++) {
   2579 +	&BODY_40_59(@V);	unshift(@V,pop(@V));
   2580 +}
   2581 +$code.=<<___;
   2582 +	teq	$Xi,sp
   2583 +	bne	.L_40_59		@ [+((12+5)*5+2)*4]
   2584 +
   2585 +	ldr	$K,.LK_60_79
   2586 +	sub	sp,sp,#20*4
   2587 +	cmp	sp,#0			@ set carry to denote 60_79
   2588 +	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
   2589 +.L_done:
   2590 +	add	sp,sp,#80*4		@ "deallocate" stack frame
   2591 +	ldmia	$ctx,{$K,$t0,$t1,$t2,$Xi}
   2592 +	add	$a,$K,$a
   2593 +	add	$b,$t0,$b
   2594 +	add	$c,$t1,$c,ror#2
   2595 +	add	$d,$t2,$d,ror#2
   2596 +	add	$e,$Xi,$e,ror#2
   2597 +	stmia	$ctx,{$a,$b,$c,$d,$e}
   2598 +	teq	$inp,$len
   2599 +	bne	.Lloop			@ [+18], total 1307
   2600 +
   2601 +	ldmia	sp!,{r4-r12,lr}
   2602 +	tst	lr,#1
   2603 +	moveq	pc,lr			@ be binary compatible with V4, yet
   2604 +	bx	lr			@ interoperable with Thumb ISA:-)
   2605 +.align	2
   2606 +.LK_00_19:	.word	0x5a827999
   2607 +.LK_20_39:	.word	0x6ed9eba1
   2608 +.LK_40_59:	.word	0x8f1bbcdc
   2609 +.LK_60_79:	.word	0xca62c1d6
   2610 +.size	sha1_block_data_order,.-sha1_block_data_order
   2611 +.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   2612 +___
   2613 +
   2614 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   2615 +print $code;
   2616 +close STDOUT; # enforce flush
   2617 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   2618 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha1-armv4-large.s	2009-09-03 15:42:39.000000000 -0700
   2619 @@ -0,0 +1,376 @@
   2620 +.text
   2621 +
   2622 +.global	sha1_block_data_order
   2623 +.type	sha1_block_data_order,%function
   2624 +
   2625 +.align	2
   2626 +sha1_block_data_order:
   2627 +	stmdb	sp!,{r4-r12,lr}
   2628 +	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
   2629 +	ldmia	r0,{r3,r4,r5,r6,r7}
   2630 +.Lloop:
   2631 +	ldr	r8,.LK_00_19
   2632 +	mov	r14,sp
   2633 +	sub	sp,sp,#15*4
   2634 +	mov	r5,r5,ror#30
   2635 +	mov	r6,r6,ror#30
   2636 +	mov	r7,r7,ror#30		@ [6]
   2637 +.L_00_15:
   2638 +	ldrb	r10,[r1],#4
   2639 +	ldrb	r11,[r1,#-3]
   2640 +	ldrb	r12,[r1,#-2]
   2641 +	add	r7,r8,r7,ror#2			@ E+=K_00_19
   2642 +	orr	r10,r11,r10,lsl#8
   2643 +	ldrb	r11,[r1,#-1]
   2644 +	orr	r10,r12,r10,lsl#8
   2645 +	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
   2646 +	orr	r10,r11,r10,lsl#8
   2647 +	add	r7,r7,r10			@ E+=X[i]
   2648 +	eor	r11,r5,r6			@ F_xx_xx
   2649 +	str	r10,[r14,#-4]!
   2650 +	and	r11,r4,r11,ror#2
   2651 +	eor	r11,r11,r6,ror#2		@ F_00_19(B,C,D)
   2652 +	add	r7,r7,r11			@ E+=F_00_19(B,C,D)
   2653 +	ldrb	r10,[r1],#4
   2654 +	ldrb	r11,[r1,#-3]
   2655 +	ldrb	r12,[r1,#-2]
   2656 +	add	r6,r8,r6,ror#2			@ E+=K_00_19
   2657 +	orr	r10,r11,r10,lsl#8
   2658 +	ldrb	r11,[r1,#-1]
   2659 +	orr	r10,r12,r10,lsl#8
   2660 +	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
   2661 +	orr	r10,r11,r10,lsl#8
   2662 +	add	r6,r6,r10			@ E+=X[i]
   2663 +	eor	r11,r4,r5			@ F_xx_xx
   2664 +	str	r10,[r14,#-4]!
   2665 +	and	r11,r3,r11,ror#2
   2666 +	eor	r11,r11,r5,ror#2		@ F_00_19(B,C,D)
   2667 +	add	r6,r6,r11			@ E+=F_00_19(B,C,D)
   2668 +	ldrb	r10,[r1],#4
   2669 +	ldrb	r11,[r1,#-3]
   2670 +	ldrb	r12,[r1,#-2]
   2671 +	add	r5,r8,r5,ror#2			@ E+=K_00_19
   2672 +	orr	r10,r11,r10,lsl#8
   2673 +	ldrb	r11,[r1,#-1]
   2674 +	orr	r10,r12,r10,lsl#8
   2675 +	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
   2676 +	orr	r10,r11,r10,lsl#8
   2677 +	add	r5,r5,r10			@ E+=X[i]
   2678 +	eor	r11,r3,r4			@ F_xx_xx
   2679 +	str	r10,[r14,#-4]!
   2680 +	and	r11,r7,r11,ror#2
   2681 +	eor	r11,r11,r4,ror#2		@ F_00_19(B,C,D)
   2682 +	add	r5,r5,r11			@ E+=F_00_19(B,C,D)
   2683 +	ldrb	r10,[r1],#4
   2684 +	ldrb	r11,[r1,#-3]
   2685 +	ldrb	r12,[r1,#-2]
   2686 +	add	r4,r8,r4,ror#2			@ E+=K_00_19
   2687 +	orr	r10,r11,r10,lsl#8
   2688 +	ldrb	r11,[r1,#-1]
   2689 +	orr	r10,r12,r10,lsl#8
   2690 +	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
   2691 +	orr	r10,r11,r10,lsl#8
   2692 +	add	r4,r4,r10			@ E+=X[i]
   2693 +	eor	r11,r7,r3			@ F_xx_xx
   2694 +	str	r10,[r14,#-4]!
   2695 +	and	r11,r6,r11,ror#2
   2696 +	eor	r11,r11,r3,ror#2		@ F_00_19(B,C,D)
   2697 +	add	r4,r4,r11			@ E+=F_00_19(B,C,D)
   2698 +	ldrb	r10,[r1],#4
   2699 +	ldrb	r11,[r1,#-3]
   2700 +	ldrb	r12,[r1,#-2]
   2701 +	add	r3,r8,r3,ror#2			@ E+=K_00_19
   2702 +	orr	r10,r11,r10,lsl#8
   2703 +	ldrb	r11,[r1,#-1]
   2704 +	orr	r10,r12,r10,lsl#8
   2705 +	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
   2706 +	orr	r10,r11,r10,lsl#8
   2707 +	add	r3,r3,r10			@ E+=X[i]
   2708 +	eor	r11,r6,r7			@ F_xx_xx
   2709 +	str	r10,[r14,#-4]!
   2710 +	and	r11,r5,r11,ror#2
   2711 +	eor	r11,r11,r7,ror#2		@ F_00_19(B,C,D)
   2712 +	add	r3,r3,r11			@ E+=F_00_19(B,C,D)
   2713 +	teq	r14,sp
   2714 +	bne	.L_00_15		@ [((11+4)*5+2)*3]
   2715 +	ldrb	r10,[r1],#4
   2716 +	ldrb	r11,[r1,#-3]
   2717 +	ldrb	r12,[r1,#-2]
   2718 +	add	r7,r8,r7,ror#2			@ E+=K_00_19
   2719 +	orr	r10,r11,r10,lsl#8
   2720 +	ldrb	r11,[r1,#-1]
   2721 +	orr	r10,r12,r10,lsl#8
   2722 +	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
   2723 +	orr	r10,r11,r10,lsl#8
   2724 +	add	r7,r7,r10			@ E+=X[i]
   2725 +	eor	r11,r5,r6			@ F_xx_xx
   2726 +	str	r10,[r14,#-4]!
   2727 +	and	r11,r4,r11,ror#2
   2728 +	eor	r11,r11,r6,ror#2		@ F_00_19(B,C,D)
   2729 +	add	r7,r7,r11			@ E+=F_00_19(B,C,D)
   2730 +	ldr	r10,[r14,#15*4]
   2731 +	ldr	r11,[r14,#13*4]
   2732 +	ldr	r12,[r14,#7*4]
   2733 +	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
   2734 +	eor	r10,r10,r11
   2735 +	ldr	r11,[r14,#2*4]
   2736 +	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
   2737 +	eor	r10,r10,r12
   2738 +	eor	r10,r10,r11
   2739 +	eor	r11,r4,r5			@ F_xx_xx, but not in 40_59
   2740 +	mov	r10,r10,ror#31
   2741 +	add	r6,r6,r10			@ E+=X[i]
   2742 +	str	r10,[r14,#-4]!
   2743 +	and	r11,r3,r11,ror#2
   2744 +	eor	r11,r11,r5,ror#2		@ F_00_19(B,C,D)
   2745 +	add	r6,r6,r11			@ E+=F_00_19(B,C,D)
   2746 +	ldr	r10,[r14,#15*4]
   2747 +	ldr	r11,[r14,#13*4]
   2748 +	ldr	r12,[r14,#7*4]
   2749 +	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
   2750 +	eor	r10,r10,r11
   2751 +	ldr	r11,[r14,#2*4]
   2752 +	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
   2753 +	eor	r10,r10,r12
   2754 +	eor	r10,r10,r11
   2755 +	eor	r11,r3,r4			@ F_xx_xx, but not in 40_59
   2756 +	mov	r10,r10,ror#31
   2757 +	add	r5,r5,r10			@ E+=X[i]
   2758 +	str	r10,[r14,#-4]!
   2759 +	and	r11,r7,r11,ror#2
   2760 +	eor	r11,r11,r4,ror#2		@ F_00_19(B,C,D)
   2761 +	add	r5,r5,r11			@ E+=F_00_19(B,C,D)
   2762 +	ldr	r10,[r14,#15*4]
   2763 +	ldr	r11,[r14,#13*4]
   2764 +	ldr	r12,[r14,#7*4]
   2765 +	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
   2766 +	eor	r10,r10,r11
   2767 +	ldr	r11,[r14,#2*4]
   2768 +	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
   2769 +	eor	r10,r10,r12
   2770 +	eor	r10,r10,r11
   2771 +	eor	r11,r7,r3			@ F_xx_xx, but not in 40_59
   2772 +	mov	r10,r10,ror#31
   2773 +	add	r4,r4,r10			@ E+=X[i]
   2774 +	str	r10,[r14,#-4]!
   2775 +	and	r11,r6,r11,ror#2
   2776 +	eor	r11,r11,r3,ror#2		@ F_00_19(B,C,D)
   2777 +	add	r4,r4,r11			@ E+=F_00_19(B,C,D)
   2778 +	ldr	r10,[r14,#15*4]
   2779 +	ldr	r11,[r14,#13*4]
   2780 +	ldr	r12,[r14,#7*4]
   2781 +	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
   2782 +	eor	r10,r10,r11
   2783 +	ldr	r11,[r14,#2*4]
   2784 +	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
   2785 +	eor	r10,r10,r12
   2786 +	eor	r10,r10,r11
   2787 +	eor	r11,r6,r7			@ F_xx_xx, but not in 40_59
   2788 +	mov	r10,r10,ror#31
   2789 +	add	r3,r3,r10			@ E+=X[i]
   2790 +	str	r10,[r14,#-4]!
   2791 +	and	r11,r5,r11,ror#2
   2792 +	eor	r11,r11,r7,ror#2		@ F_00_19(B,C,D)
   2793 +	add	r3,r3,r11			@ E+=F_00_19(B,C,D)
   2794 +
   2795 +	ldr	r8,.LK_20_39		@ [+15+16*4]
   2796 +	sub	sp,sp,#25*4
   2797 +	cmn	sp,#0			@ [+3], clear carry to denote 20_39
   2798 +.L_20_39_or_60_79:
   2799 +	ldr	r10,[r14,#15*4]
   2800 +	ldr	r11,[r14,#13*4]
   2801 +	ldr	r12,[r14,#7*4]
   2802 +	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
   2803 +	eor	r10,r10,r11
   2804 +	ldr	r11,[r14,#2*4]
   2805 +	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
   2806 +	eor	r10,r10,r12
   2807 +	eor	r10,r10,r11
   2808 +	eor	r11,r5,r6			@ F_xx_xx, but not in 40_59
   2809 +	mov	r10,r10,ror#31
   2810 +	add	r7,r7,r10			@ E+=X[i]
   2811 +	str	r10,[r14,#-4]!
   2812 +	eor	r11,r4,r11,ror#2		@ F_20_39(B,C,D)
   2813 +	add	r7,r7,r11			@ E+=F_20_39(B,C,D)
   2814 +	ldr	r10,[r14,#15*4]
   2815 +	ldr	r11,[r14,#13*4]
   2816 +	ldr	r12,[r14,#7*4]
   2817 +	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
   2818 +	eor	r10,r10,r11
   2819 +	ldr	r11,[r14,#2*4]
   2820 +	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
   2821 +	eor	r10,r10,r12
   2822 +	eor	r10,r10,r11
   2823 +	eor	r11,r4,r5			@ F_xx_xx, but not in 40_59
   2824 +	mov	r10,r10,ror#31
   2825 +	add	r6,r6,r10			@ E+=X[i]
   2826 +	str	r10,[r14,#-4]!
   2827 +	eor	r11,r3,r11,ror#2		@ F_20_39(B,C,D)
   2828 +	add	r6,r6,r11			@ E+=F_20_39(B,C,D)
   2829 +	ldr	r10,[r14,#15*4]
   2830 +	ldr	r11,[r14,#13*4]
   2831 +	ldr	r12,[r14,#7*4]
   2832 +	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
   2833 +	eor	r10,r10,r11
   2834 +	ldr	r11,[r14,#2*4]
   2835 +	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
   2836 +	eor	r10,r10,r12
   2837 +	eor	r10,r10,r11
   2838 +	eor	r11,r3,r4			@ F_xx_xx, but not in 40_59
   2839 +	mov	r10,r10,ror#31
   2840 +	add	r5,r5,r10			@ E+=X[i]
   2841 +	str	r10,[r14,#-4]!
   2842 +	eor	r11,r7,r11,ror#2		@ F_20_39(B,C,D)
   2843 +	add	r5,r5,r11			@ E+=F_20_39(B,C,D)
   2844 +	ldr	r10,[r14,#15*4]
   2845 +	ldr	r11,[r14,#13*4]
   2846 +	ldr	r12,[r14,#7*4]
   2847 +	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
   2848 +	eor	r10,r10,r11
   2849 +	ldr	r11,[r14,#2*4]
   2850 +	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
   2851 +	eor	r10,r10,r12
   2852 +	eor	r10,r10,r11
   2853 +	eor	r11,r7,r3			@ F_xx_xx, but not in 40_59
   2854 +	mov	r10,r10,ror#31
   2855 +	add	r4,r4,r10			@ E+=X[i]
   2856 +	str	r10,[r14,#-4]!
   2857 +	eor	r11,r6,r11,ror#2		@ F_20_39(B,C,D)
   2858 +	add	r4,r4,r11			@ E+=F_20_39(B,C,D)
   2859 +	ldr	r10,[r14,#15*4]
   2860 +	ldr	r11,[r14,#13*4]
   2861 +	ldr	r12,[r14,#7*4]
   2862 +	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
   2863 +	eor	r10,r10,r11
   2864 +	ldr	r11,[r14,#2*4]
   2865 +	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
   2866 +	eor	r10,r10,r12
   2867 +	eor	r10,r10,r11
   2868 +	eor	r11,r6,r7			@ F_xx_xx, but not in 40_59
   2869 +	mov	r10,r10,ror#31
   2870 +	add	r3,r3,r10			@ E+=X[i]
   2871 +	str	r10,[r14,#-4]!
   2872 +	eor	r11,r5,r11,ror#2		@ F_20_39(B,C,D)
   2873 +	add	r3,r3,r11			@ E+=F_20_39(B,C,D)
   2874 +	teq	r14,sp			@ preserve carry
   2875 +	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
   2876 +	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
   2877 +
   2878 +	ldr	r8,.LK_40_59
   2879 +	sub	sp,sp,#20*4		@ [+2]
   2880 +.L_40_59:
   2881 +	ldr	r10,[r14,#15*4]
   2882 +	ldr	r11,[r14,#13*4]
   2883 +	ldr	r12,[r14,#7*4]
   2884 +	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
   2885 +	eor	r10,r10,r11
   2886 +	ldr	r11,[r14,#2*4]
   2887 +	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
   2888 +	eor	r10,r10,r12
   2889 +	eor	r10,r10,r11
   2890 +	mov	r10,r10,ror#31
   2891 +	add	r7,r7,r10			@ E+=X[i]
   2892 +	str	r10,[r14,#-4]!
   2893 +	and	r11,r4,r5,ror#2
   2894 +	orr	r12,r4,r5,ror#2
   2895 +	and	r12,r12,r6,ror#2
   2896 +	orr	r11,r11,r12			@ F_40_59(B,C,D)
   2897 +	add	r7,r7,r11			@ E+=F_40_59(B,C,D)
   2898 +	ldr	r10,[r14,#15*4]
   2899 +	ldr	r11,[r14,#13*4]
   2900 +	ldr	r12,[r14,#7*4]
   2901 +	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
   2902 +	eor	r10,r10,r11
   2903 +	ldr	r11,[r14,#2*4]
   2904 +	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
   2905 +	eor	r10,r10,r12
   2906 +	eor	r10,r10,r11
   2907 +	mov	r10,r10,ror#31
   2908 +	add	r6,r6,r10			@ E+=X[i]
   2909 +	str	r10,[r14,#-4]!
   2910 +	and	r11,r3,r4,ror#2
   2911 +	orr	r12,r3,r4,ror#2
   2912 +	and	r12,r12,r5,ror#2
   2913 +	orr	r11,r11,r12			@ F_40_59(B,C,D)
   2914 +	add	r6,r6,r11			@ E+=F_40_59(B,C,D)
   2915 +	ldr	r10,[r14,#15*4]
   2916 +	ldr	r11,[r14,#13*4]
   2917 +	ldr	r12,[r14,#7*4]
   2918 +	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
   2919 +	eor	r10,r10,r11
   2920 +	ldr	r11,[r14,#2*4]
   2921 +	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
   2922 +	eor	r10,r10,r12
   2923 +	eor	r10,r10,r11
   2924 +	mov	r10,r10,ror#31
   2925 +	add	r5,r5,r10			@ E+=X[i]
   2926 +	str	r10,[r14,#-4]!
   2927 +	and	r11,r7,r3,ror#2
   2928 +	orr	r12,r7,r3,ror#2
   2929 +	and	r12,r12,r4,ror#2
   2930 +	orr	r11,r11,r12			@ F_40_59(B,C,D)
   2931 +	add	r5,r5,r11			@ E+=F_40_59(B,C,D)
   2932 +	ldr	r10,[r14,#15*4]
   2933 +	ldr	r11,[r14,#13*4]
   2934 +	ldr	r12,[r14,#7*4]
   2935 +	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
   2936 +	eor	r10,r10,r11
   2937 +	ldr	r11,[r14,#2*4]
   2938 +	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
   2939 +	eor	r10,r10,r12
   2940 +	eor	r10,r10,r11
   2941 +	mov	r10,r10,ror#31
   2942 +	add	r4,r4,r10			@ E+=X[i]
   2943 +	str	r10,[r14,#-4]!
   2944 +	and	r11,r6,r7,ror#2
   2945 +	orr	r12,r6,r7,ror#2
   2946 +	and	r12,r12,r3,ror#2
   2947 +	orr	r11,r11,r12			@ F_40_59(B,C,D)
   2948 +	add	r4,r4,r11			@ E+=F_40_59(B,C,D)
   2949 +	ldr	r10,[r14,#15*4]
   2950 +	ldr	r11,[r14,#13*4]
   2951 +	ldr	r12,[r14,#7*4]
   2952 +	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
   2953 +	eor	r10,r10,r11
   2954 +	ldr	r11,[r14,#2*4]
   2955 +	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
   2956 +	eor	r10,r10,r12
   2957 +	eor	r10,r10,r11
   2958 +	mov	r10,r10,ror#31
   2959 +	add	r3,r3,r10			@ E+=X[i]
   2960 +	str	r10,[r14,#-4]!
   2961 +	and	r11,r5,r6,ror#2
   2962 +	orr	r12,r5,r6,ror#2
   2963 +	and	r12,r12,r7,ror#2
   2964 +	orr	r11,r11,r12			@ F_40_59(B,C,D)
   2965 +	add	r3,r3,r11			@ E+=F_40_59(B,C,D)
   2966 +	teq	r14,sp
   2967 +	bne	.L_40_59		@ [+((12+5)*5+2)*4]
   2968 +
   2969 +	ldr	r8,.LK_60_79
   2970 +	sub	sp,sp,#20*4
   2971 +	cmp	sp,#0			@ set carry to denote 60_79
   2972 +	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
   2973 +.L_done:
   2974 +	add	sp,sp,#80*4		@ "deallocate" stack frame
   2975 +	ldmia	r0,{r8,r10,r11,r12,r14}
   2976 +	add	r3,r8,r3
   2977 +	add	r4,r10,r4
   2978 +	add	r5,r11,r5,ror#2
   2979 +	add	r6,r12,r6,ror#2
   2980 +	add	r7,r14,r7,ror#2
   2981 +	stmia	r0,{r3,r4,r5,r6,r7}
   2982 +	teq	r1,r2
   2983 +	bne	.Lloop			@ [+18], total 1307
   2984 +
   2985 +	ldmia	sp!,{r4-r12,lr}
   2986 +	tst	lr,#1
   2987 +	moveq	pc,lr			@ be binary compatible with V4, yet
   2988 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   2989 +.align	2
   2990 +.LK_00_19:	.word	0x5a827999
   2991 +.LK_20_39:	.word	0x6ed9eba1
   2992 +.LK_40_59:	.word	0x8f1bbcdc
   2993 +.LK_60_79:	.word	0xca62c1d6
   2994 +.size	sha1_block_data_order,.-sha1_block_data_order
   2995 +.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>"
   2996 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   2997 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha256-armv4.pl	2009-09-03 15:42:39.000000000 -0700
   2998 @@ -0,0 +1,180 @@
   2999 +#!/usr/bin/env perl
   3000 +
   3001 +# ====================================================================
   3002 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL
   3003 +# project. The module is, however, dual licensed under OpenSSL and
   3004 +# CRYPTOGAMS licenses depending on where you obtain it. For further
   3005 +# details see http://www.openssl.org/~appro/cryptogams/.
   3006 +# ====================================================================
   3007 +
   3008 +# SHA256 block procedure for ARMv4. May 2007.
   3009 +
   3010 +# Performance is ~2x better than gcc 3.4 generated code and in "abso-
   3011 +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
   3012 +# byte.
   3013 +
   3014 +$output=shift;
   3015 +open STDOUT,">$output";
   3016 +
   3017 +$ctx="r0";	$t0="r0";
   3018 +$inp="r1";
   3019 +$len="r2";	$t1="r2";
   3020 +$T1="r3";
   3021 +$A="r4";
   3022 +$B="r5";
   3023 +$C="r6";
   3024 +$D="r7";
   3025 +$E="r8";
   3026 +$F="r9";
   3027 +$G="r10";
   3028 +$H="r11";
   3029 +@V=($A,$B,$C,$D,$E,$F,$G,$H);
   3030 +$t2="r12";
   3031 +$Ktbl="r14";
   3032 +
   3033 +@Sigma0=( 2,13,22);
   3034 +@Sigma1=( 6,11,25);
   3035 +@sigma0=( 7,18, 3);
   3036 +@sigma1=(17,19,10);
   3037 +
   3038 +sub BODY_00_15 {
   3039 +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
   3040 +
   3041 +$code.=<<___ if ($i<16);
   3042 +	ldrb	$T1,[$inp,#3]			@ $i
   3043 +	ldrb	$t2,[$inp,#2]
   3044 +	ldrb	$t1,[$inp,#1]
   3045 +	ldrb	$t0,[$inp],#4
   3046 +	orr	$T1,$T1,$t2,lsl#8
   3047 +	orr	$T1,$T1,$t1,lsl#16
   3048 +	orr	$T1,$T1,$t0,lsl#24
   3049 +	`"str	$inp,[sp,#17*4]"	if ($i==15)`
   3050 +___
   3051 +$code.=<<___;
   3052 +	ldr	$t2,[$Ktbl],#4			@ *K256++
   3053 +	str	$T1,[sp,#`$i%16`*4]
   3054 +	mov	$t0,$e,ror#$Sigma1[0]
   3055 +	eor	$t0,$t0,$e,ror#$Sigma1[1]
   3056 +	eor	$t0,$t0,$e,ror#$Sigma1[2]	@ Sigma1(e)
   3057 +	add	$T1,$T1,$t0
   3058 +	eor	$t1,$f,$g
   3059 +	and	$t1,$t1,$e
   3060 +	eor	$t1,$t1,$g			@ Ch(e,f,g)
   3061 +	add	$T1,$T1,$t1
   3062 +	add	$T1,$T1,$h
   3063 +	add	$T1,$T1,$t2
   3064 +	mov	$h,$a,ror#$Sigma0[0]
   3065 +	eor	$h,$h,$a,ror#$Sigma0[1]
   3066 +	eor	$h,$h,$a,ror#$Sigma0[2]		@ Sigma0(a)
   3067 +	orr	$t0,$a,$b
   3068 +	and	$t0,$t0,$c
   3069 +	and	$t1,$a,$b
   3070 +	orr	$t0,$t0,$t1			@ Maj(a,b,c)
   3071 +	add	$h,$h,$t0
   3072 +	add	$d,$d,$T1
   3073 +	add	$h,$h,$T1
   3074 +___
   3075 +}
   3076 +
   3077 +sub BODY_16_XX {
   3078 +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
   3079 +
   3080 +$code.=<<___;
   3081 +	ldr	$t1,[sp,#`($i+1)%16`*4]	@ $i
   3082 +	ldr	$t2,[sp,#`($i+14)%16`*4]
   3083 +	ldr	$T1,[sp,#`($i+0)%16`*4]
   3084 +	ldr	$inp,[sp,#`($i+9)%16`*4]
   3085 +	mov	$t0,$t1,ror#$sigma0[0]
   3086 +	eor	$t0,$t0,$t1,ror#$sigma0[1]
   3087 +	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
   3088 +	mov	$t1,$t2,ror#$sigma1[0]
   3089 +	eor	$t1,$t1,$t2,ror#$sigma1[1]
   3090 +	eor	$t1,$t1,$t2,lsr#$sigma1[2]	@ sigma1(X[i+14])
   3091 +	add	$T1,$T1,$t0
   3092 +	add	$T1,$T1,$t1
   3093 +	add	$T1,$T1,$inp
   3094 +___
   3095 +	&BODY_00_15(@_);
   3096 +}
   3097 +
   3098 +$code=<<___;
   3099 +.text
   3100 +.code	32
   3101 +
   3102 +.type	K256,%object
   3103 +.align	5
   3104 +K256:
   3105 +.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
   3106 +.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
   3107 +.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
   3108 +.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
   3109 +.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
   3110 +.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
   3111 +.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
   3112 +.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
   3113 +.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
   3114 +.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
   3115 +.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
   3116 +.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
   3117 +.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
   3118 +.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
   3119 +.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
   3120 +.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
   3121 +.size	K256,.-K256
   3122 +
   3123 +.global	sha256_block_data_order
   3124 +.type	sha256_block_data_order,%function
   3125 +sha256_block_data_order:
   3126 +	sub	r3,pc,#8		@ sha256_block_data_order
   3127 +	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
   3128 +	stmdb	sp!,{$ctx,$inp,$len,r4-r12,lr}
   3129 +	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
   3130 +	sub	$Ktbl,r3,#256		@ K256
   3131 +	sub	sp,sp,#16*4		@ alloca(X[16])
   3132 +.Loop:
   3133 +___
   3134 +for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
   3135 +$code.=".Lrounds_16_xx:\n";
   3136 +for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
   3137 +$code.=<<___;
   3138 +	and	$t2,$t2,#0xff
   3139 +	cmp	$t2,#0xf2
   3140 +	bne	.Lrounds_16_xx
   3141 +
   3142 +	ldr	$T1,[sp,#16*4]		@ pull ctx
   3143 +	ldr	$t0,[$T1,#0]
   3144 +	ldr	$t1,[$T1,#4]
   3145 +	ldr	$t2,[$T1,#8]
   3146 +	add	$A,$A,$t0
   3147 +	ldr	$t0,[$T1,#12]
   3148 +	add	$B,$B,$t1
   3149 +	ldr	$t1,[$T1,#16]
   3150 +	add	$C,$C,$t2
   3151 +	ldr	$t2,[$T1,#20]
   3152 +	add	$D,$D,$t0
   3153 +	ldr	$t0,[$T1,#24]
   3154 +	add	$E,$E,$t1
   3155 +	ldr	$t1,[$T1,#28]
   3156 +	add	$F,$F,$t2
   3157 +	ldr	$inp,[sp,#17*4]		@ pull inp
   3158 +	ldr	$t2,[sp,#18*4]		@ pull inp+len
   3159 +	add	$G,$G,$t0
   3160 +	add	$H,$H,$t1
   3161 +	stmia	$T1,{$A,$B,$C,$D,$E,$F,$G,$H}
   3162 +	cmp	$inp,$t2
   3163 +	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
   3164 +	bne	.Loop
   3165 +
   3166 +	add	sp,sp,#`16+3`*4	@ destroy frame
   3167 +	ldmia	sp!,{r4-r12,lr}
   3168 +	tst	lr,#1
   3169 +	moveq	pc,lr			@ be binary compatible with V4, yet
   3170 +	bx	lr			@ interoperable with Thumb ISA:-)
   3171 +.size   sha256_block_data_order,.-sha256_block_data_order
   3172 +.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   3173 +___
   3174 +
   3175 +$code =~ s/\`([^\`]*)\`/eval $1/gem;
   3176 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   3177 +print $code;
   3178 +close STDOUT; # enforce flush
   3179 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   3180 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha256-armv4.s	2009-09-03 15:42:39.000000000 -0700
   3181 @@ -0,0 +1,1110 @@
   3182 +.text
   3183 +.code	32
   3184 +
   3185 +.type	K256,%object
   3186 +.align	5
   3187 +K256:
   3188 +.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
   3189 +.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
   3190 +.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
   3191 +.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
   3192 +.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
   3193 +.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
   3194 +.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
   3195 +.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
   3196 +.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
   3197 +.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
   3198 +.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
   3199 +.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
   3200 +.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
   3201 +.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
   3202 +.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
   3203 +.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
   3204 +.size	K256,.-K256
   3205 +
   3206 +.global	sha256_block_data_order
   3207 +.type	sha256_block_data_order,%function
   3208 +sha256_block_data_order:
   3209 +	sub	r3,pc,#8		@ sha256_block_data_order
   3210 +	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
   3211 +	stmdb	sp!,{r0,r1,r2,r4-r12,lr}
   3212 +	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
   3213 +	sub	r14,r3,#256		@ K256
   3214 +	sub	sp,sp,#16*4		@ alloca(X[16])
   3215 +.Loop:
   3216 +	ldrb	r3,[r1,#3]			@ 0
   3217 +	ldrb	r12,[r1,#2]
   3218 +	ldrb	r2,[r1,#1]
   3219 +	ldrb	r0,[r1],#4
   3220 +	orr	r3,r3,r12,lsl#8
   3221 +	orr	r3,r3,r2,lsl#16
   3222 +	orr	r3,r3,r0,lsl#24
   3223 +	
   3224 +	ldr	r12,[r14],#4			@ *K256++
   3225 +	str	r3,[sp,#0*4]
   3226 +	mov	r0,r8,ror#6
   3227 +	eor	r0,r0,r8,ror#11
   3228 +	eor	r0,r0,r8,ror#25	@ Sigma1(e)
   3229 +	add	r3,r3,r0
   3230 +	eor	r2,r9,r10
   3231 +	and	r2,r2,r8
   3232 +	eor	r2,r2,r10			@ Ch(e,f,g)
   3233 +	add	r3,r3,r2
   3234 +	add	r3,r3,r11
   3235 +	add	r3,r3,r12
   3236 +	mov	r11,r4,ror#2
   3237 +	eor	r11,r11,r4,ror#13
   3238 +	eor	r11,r11,r4,ror#22		@ Sigma0(a)
   3239 +	orr	r0,r4,r5
   3240 +	and	r0,r0,r6
   3241 +	and	r2,r4,r5
   3242 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3243 +	add	r11,r11,r0
   3244 +	add	r7,r7,r3
   3245 +	add	r11,r11,r3
   3246 +	ldrb	r3,[r1,#3]			@ 1
   3247 +	ldrb	r12,[r1,#2]
   3248 +	ldrb	r2,[r1,#1]
   3249 +	ldrb	r0,[r1],#4
   3250 +	orr	r3,r3,r12,lsl#8
   3251 +	orr	r3,r3,r2,lsl#16
   3252 +	orr	r3,r3,r0,lsl#24
   3253 +	
   3254 +	ldr	r12,[r14],#4			@ *K256++
   3255 +	str	r3,[sp,#1*4]
   3256 +	mov	r0,r7,ror#6
   3257 +	eor	r0,r0,r7,ror#11
   3258 +	eor	r0,r0,r7,ror#25	@ Sigma1(e)
   3259 +	add	r3,r3,r0
   3260 +	eor	r2,r8,r9
   3261 +	and	r2,r2,r7
   3262 +	eor	r2,r2,r9			@ Ch(e,f,g)
   3263 +	add	r3,r3,r2
   3264 +	add	r3,r3,r10
   3265 +	add	r3,r3,r12
   3266 +	mov	r10,r11,ror#2
   3267 +	eor	r10,r10,r11,ror#13
   3268 +	eor	r10,r10,r11,ror#22		@ Sigma0(a)
   3269 +	orr	r0,r11,r4
   3270 +	and	r0,r0,r5
   3271 +	and	r2,r11,r4
   3272 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3273 +	add	r10,r10,r0
   3274 +	add	r6,r6,r3
   3275 +	add	r10,r10,r3
   3276 +	ldrb	r3,[r1,#3]			@ 2
   3277 +	ldrb	r12,[r1,#2]
   3278 +	ldrb	r2,[r1,#1]
   3279 +	ldrb	r0,[r1],#4
   3280 +	orr	r3,r3,r12,lsl#8
   3281 +	orr	r3,r3,r2,lsl#16
   3282 +	orr	r3,r3,r0,lsl#24
   3283 +	
   3284 +	ldr	r12,[r14],#4			@ *K256++
   3285 +	str	r3,[sp,#2*4]
   3286 +	mov	r0,r6,ror#6
   3287 +	eor	r0,r0,r6,ror#11
   3288 +	eor	r0,r0,r6,ror#25	@ Sigma1(e)
   3289 +	add	r3,r3,r0
   3290 +	eor	r2,r7,r8
   3291 +	and	r2,r2,r6
   3292 +	eor	r2,r2,r8			@ Ch(e,f,g)
   3293 +	add	r3,r3,r2
   3294 +	add	r3,r3,r9
   3295 +	add	r3,r3,r12
   3296 +	mov	r9,r10,ror#2
   3297 +	eor	r9,r9,r10,ror#13
   3298 +	eor	r9,r9,r10,ror#22		@ Sigma0(a)
   3299 +	orr	r0,r10,r11
   3300 +	and	r0,r0,r4
   3301 +	and	r2,r10,r11
   3302 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3303 +	add	r9,r9,r0
   3304 +	add	r5,r5,r3
   3305 +	add	r9,r9,r3
   3306 +	ldrb	r3,[r1,#3]			@ 3
   3307 +	ldrb	r12,[r1,#2]
   3308 +	ldrb	r2,[r1,#1]
   3309 +	ldrb	r0,[r1],#4
   3310 +	orr	r3,r3,r12,lsl#8
   3311 +	orr	r3,r3,r2,lsl#16
   3312 +	orr	r3,r3,r0,lsl#24
   3313 +	
   3314 +	ldr	r12,[r14],#4			@ *K256++
   3315 +	str	r3,[sp,#3*4]
   3316 +	mov	r0,r5,ror#6
   3317 +	eor	r0,r0,r5,ror#11
   3318 +	eor	r0,r0,r5,ror#25	@ Sigma1(e)
   3319 +	add	r3,r3,r0
   3320 +	eor	r2,r6,r7
   3321 +	and	r2,r2,r5
   3322 +	eor	r2,r2,r7			@ Ch(e,f,g)
   3323 +	add	r3,r3,r2
   3324 +	add	r3,r3,r8
   3325 +	add	r3,r3,r12
   3326 +	mov	r8,r9,ror#2
   3327 +	eor	r8,r8,r9,ror#13
   3328 +	eor	r8,r8,r9,ror#22		@ Sigma0(a)
   3329 +	orr	r0,r9,r10
   3330 +	and	r0,r0,r11
   3331 +	and	r2,r9,r10
   3332 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3333 +	add	r8,r8,r0
   3334 +	add	r4,r4,r3
   3335 +	add	r8,r8,r3
   3336 +	ldrb	r3,[r1,#3]			@ 4
   3337 +	ldrb	r12,[r1,#2]
   3338 +	ldrb	r2,[r1,#1]
   3339 +	ldrb	r0,[r1],#4
   3340 +	orr	r3,r3,r12,lsl#8
   3341 +	orr	r3,r3,r2,lsl#16
   3342 +	orr	r3,r3,r0,lsl#24
   3343 +	
   3344 +	ldr	r12,[r14],#4			@ *K256++
   3345 +	str	r3,[sp,#4*4]
   3346 +	mov	r0,r4,ror#6
   3347 +	eor	r0,r0,r4,ror#11
   3348 +	eor	r0,r0,r4,ror#25	@ Sigma1(e)
   3349 +	add	r3,r3,r0
   3350 +	eor	r2,r5,r6
   3351 +	and	r2,r2,r4
   3352 +	eor	r2,r2,r6			@ Ch(e,f,g)
   3353 +	add	r3,r3,r2
   3354 +	add	r3,r3,r7
   3355 +	add	r3,r3,r12
   3356 +	mov	r7,r8,ror#2
   3357 +	eor	r7,r7,r8,ror#13
   3358 +	eor	r7,r7,r8,ror#22		@ Sigma0(a)
   3359 +	orr	r0,r8,r9
   3360 +	and	r0,r0,r10
   3361 +	and	r2,r8,r9
   3362 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3363 +	add	r7,r7,r0
   3364 +	add	r11,r11,r3
   3365 +	add	r7,r7,r3
   3366 +	ldrb	r3,[r1,#3]			@ 5
   3367 +	ldrb	r12,[r1,#2]
   3368 +	ldrb	r2,[r1,#1]
   3369 +	ldrb	r0,[r1],#4
   3370 +	orr	r3,r3,r12,lsl#8
   3371 +	orr	r3,r3,r2,lsl#16
   3372 +	orr	r3,r3,r0,lsl#24
   3373 +	
   3374 +	ldr	r12,[r14],#4			@ *K256++
   3375 +	str	r3,[sp,#5*4]
   3376 +	mov	r0,r11,ror#6
   3377 +	eor	r0,r0,r11,ror#11
   3378 +	eor	r0,r0,r11,ror#25	@ Sigma1(e)
   3379 +	add	r3,r3,r0
   3380 +	eor	r2,r4,r5
   3381 +	and	r2,r2,r11
   3382 +	eor	r2,r2,r5			@ Ch(e,f,g)
   3383 +	add	r3,r3,r2
   3384 +	add	r3,r3,r6
   3385 +	add	r3,r3,r12
   3386 +	mov	r6,r7,ror#2
   3387 +	eor	r6,r6,r7,ror#13
   3388 +	eor	r6,r6,r7,ror#22		@ Sigma0(a)
   3389 +	orr	r0,r7,r8
   3390 +	and	r0,r0,r9
   3391 +	and	r2,r7,r8
   3392 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3393 +	add	r6,r6,r0
   3394 +	add	r10,r10,r3
   3395 +	add	r6,r6,r3
   3396 +	ldrb	r3,[r1,#3]			@ 6
   3397 +	ldrb	r12,[r1,#2]
   3398 +	ldrb	r2,[r1,#1]
   3399 +	ldrb	r0,[r1],#4
   3400 +	orr	r3,r3,r12,lsl#8
   3401 +	orr	r3,r3,r2,lsl#16
   3402 +	orr	r3,r3,r0,lsl#24
   3403 +	
   3404 +	ldr	r12,[r14],#4			@ *K256++
   3405 +	str	r3,[sp,#6*4]
   3406 +	mov	r0,r10,ror#6
   3407 +	eor	r0,r0,r10,ror#11
   3408 +	eor	r0,r0,r10,ror#25	@ Sigma1(e)
   3409 +	add	r3,r3,r0
   3410 +	eor	r2,r11,r4
   3411 +	and	r2,r2,r10
   3412 +	eor	r2,r2,r4			@ Ch(e,f,g)
   3413 +	add	r3,r3,r2
   3414 +	add	r3,r3,r5
   3415 +	add	r3,r3,r12
   3416 +	mov	r5,r6,ror#2
   3417 +	eor	r5,r5,r6,ror#13
   3418 +	eor	r5,r5,r6,ror#22		@ Sigma0(a)
   3419 +	orr	r0,r6,r7
   3420 +	and	r0,r0,r8
   3421 +	and	r2,r6,r7
   3422 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3423 +	add	r5,r5,r0
   3424 +	add	r9,r9,r3
   3425 +	add	r5,r5,r3
   3426 +	ldrb	r3,[r1,#3]			@ 7
   3427 +	ldrb	r12,[r1,#2]
   3428 +	ldrb	r2,[r1,#1]
   3429 +	ldrb	r0,[r1],#4
   3430 +	orr	r3,r3,r12,lsl#8
   3431 +	orr	r3,r3,r2,lsl#16
   3432 +	orr	r3,r3,r0,lsl#24
   3433 +	
   3434 +	ldr	r12,[r14],#4			@ *K256++
   3435 +	str	r3,[sp,#7*4]
   3436 +	mov	r0,r9,ror#6
   3437 +	eor	r0,r0,r9,ror#11
   3438 +	eor	r0,r0,r9,ror#25	@ Sigma1(e)
   3439 +	add	r3,r3,r0
   3440 +	eor	r2,r10,r11
   3441 +	and	r2,r2,r9
   3442 +	eor	r2,r2,r11			@ Ch(e,f,g)
   3443 +	add	r3,r3,r2
   3444 +	add	r3,r3,r4
   3445 +	add	r3,r3,r12
   3446 +	mov	r4,r5,ror#2
   3447 +	eor	r4,r4,r5,ror#13
   3448 +	eor	r4,r4,r5,ror#22		@ Sigma0(a)
   3449 +	orr	r0,r5,r6
   3450 +	and	r0,r0,r7
   3451 +	and	r2,r5,r6
   3452 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3453 +	add	r4,r4,r0
   3454 +	add	r8,r8,r3
   3455 +	add	r4,r4,r3
   3456 +	ldrb	r3,[r1,#3]			@ 8
   3457 +	ldrb	r12,[r1,#2]
   3458 +	ldrb	r2,[r1,#1]
   3459 +	ldrb	r0,[r1],#4
   3460 +	orr	r3,r3,r12,lsl#8
   3461 +	orr	r3,r3,r2,lsl#16
   3462 +	orr	r3,r3,r0,lsl#24
   3463 +	
   3464 +	ldr	r12,[r14],#4			@ *K256++
   3465 +	str	r3,[sp,#8*4]
   3466 +	mov	r0,r8,ror#6
   3467 +	eor	r0,r0,r8,ror#11
   3468 +	eor	r0,r0,r8,ror#25	@ Sigma1(e)
   3469 +	add	r3,r3,r0
   3470 +	eor	r2,r9,r10
   3471 +	and	r2,r2,r8
   3472 +	eor	r2,r2,r10			@ Ch(e,f,g)
   3473 +	add	r3,r3,r2
   3474 +	add	r3,r3,r11
   3475 +	add	r3,r3,r12
   3476 +	mov	r11,r4,ror#2
   3477 +	eor	r11,r11,r4,ror#13
   3478 +	eor	r11,r11,r4,ror#22		@ Sigma0(a)
   3479 +	orr	r0,r4,r5
   3480 +	and	r0,r0,r6
   3481 +	and	r2,r4,r5
   3482 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3483 +	add	r11,r11,r0
   3484 +	add	r7,r7,r3
   3485 +	add	r11,r11,r3
   3486 +	ldrb	r3,[r1,#3]			@ 9
   3487 +	ldrb	r12,[r1,#2]
   3488 +	ldrb	r2,[r1,#1]
   3489 +	ldrb	r0,[r1],#4
   3490 +	orr	r3,r3,r12,lsl#8
   3491 +	orr	r3,r3,r2,lsl#16
   3492 +	orr	r3,r3,r0,lsl#24
   3493 +	
   3494 +	ldr	r12,[r14],#4			@ *K256++
   3495 +	str	r3,[sp,#9*4]
   3496 +	mov	r0,r7,ror#6
   3497 +	eor	r0,r0,r7,ror#11
   3498 +	eor	r0,r0,r7,ror#25	@ Sigma1(e)
   3499 +	add	r3,r3,r0
   3500 +	eor	r2,r8,r9
   3501 +	and	r2,r2,r7
   3502 +	eor	r2,r2,r9			@ Ch(e,f,g)
   3503 +	add	r3,r3,r2
   3504 +	add	r3,r3,r10
   3505 +	add	r3,r3,r12
   3506 +	mov	r10,r11,ror#2
   3507 +	eor	r10,r10,r11,ror#13
   3508 +	eor	r10,r10,r11,ror#22		@ Sigma0(a)
   3509 +	orr	r0,r11,r4
   3510 +	and	r0,r0,r5
   3511 +	and	r2,r11,r4
   3512 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3513 +	add	r10,r10,r0
   3514 +	add	r6,r6,r3
   3515 +	add	r10,r10,r3
   3516 +	ldrb	r3,[r1,#3]			@ 10
   3517 +	ldrb	r12,[r1,#2]
   3518 +	ldrb	r2,[r1,#1]
   3519 +	ldrb	r0,[r1],#4
   3520 +	orr	r3,r3,r12,lsl#8
   3521 +	orr	r3,r3,r2,lsl#16
   3522 +	orr	r3,r3,r0,lsl#24
   3523 +	
   3524 +	ldr	r12,[r14],#4			@ *K256++
   3525 +	str	r3,[sp,#10*4]
   3526 +	mov	r0,r6,ror#6
   3527 +	eor	r0,r0,r6,ror#11
   3528 +	eor	r0,r0,r6,ror#25	@ Sigma1(e)
   3529 +	add	r3,r3,r0
   3530 +	eor	r2,r7,r8
   3531 +	and	r2,r2,r6
   3532 +	eor	r2,r2,r8			@ Ch(e,f,g)
   3533 +	add	r3,r3,r2
   3534 +	add	r3,r3,r9
   3535 +	add	r3,r3,r12
   3536 +	mov	r9,r10,ror#2
   3537 +	eor	r9,r9,r10,ror#13
   3538 +	eor	r9,r9,r10,ror#22		@ Sigma0(a)
   3539 +	orr	r0,r10,r11
   3540 +	and	r0,r0,r4
   3541 +	and	r2,r10,r11
   3542 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3543 +	add	r9,r9,r0
   3544 +	add	r5,r5,r3
   3545 +	add	r9,r9,r3
   3546 +	ldrb	r3,[r1,#3]			@ 11
   3547 +	ldrb	r12,[r1,#2]
   3548 +	ldrb	r2,[r1,#1]
   3549 +	ldrb	r0,[r1],#4
   3550 +	orr	r3,r3,r12,lsl#8
   3551 +	orr	r3,r3,r2,lsl#16
   3552 +	orr	r3,r3,r0,lsl#24
   3553 +	
   3554 +	ldr	r12,[r14],#4			@ *K256++
   3555 +	str	r3,[sp,#11*4]
   3556 +	mov	r0,r5,ror#6
   3557 +	eor	r0,r0,r5,ror#11
   3558 +	eor	r0,r0,r5,ror#25	@ Sigma1(e)
   3559 +	add	r3,r3,r0
   3560 +	eor	r2,r6,r7
   3561 +	and	r2,r2,r5
   3562 +	eor	r2,r2,r7			@ Ch(e,f,g)
   3563 +	add	r3,r3,r2
   3564 +	add	r3,r3,r8
   3565 +	add	r3,r3,r12
   3566 +	mov	r8,r9,ror#2
   3567 +	eor	r8,r8,r9,ror#13
   3568 +	eor	r8,r8,r9,ror#22		@ Sigma0(a)
   3569 +	orr	r0,r9,r10
   3570 +	and	r0,r0,r11
   3571 +	and	r2,r9,r10
   3572 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3573 +	add	r8,r8,r0
   3574 +	add	r4,r4,r3
   3575 +	add	r8,r8,r3
   3576 +	ldrb	r3,[r1,#3]			@ 12
   3577 +	ldrb	r12,[r1,#2]
   3578 +	ldrb	r2,[r1,#1]
   3579 +	ldrb	r0,[r1],#4
   3580 +	orr	r3,r3,r12,lsl#8
   3581 +	orr	r3,r3,r2,lsl#16
   3582 +	orr	r3,r3,r0,lsl#24
   3583 +	
   3584 +	ldr	r12,[r14],#4			@ *K256++
   3585 +	str	r3,[sp,#12*4]
   3586 +	mov	r0,r4,ror#6
   3587 +	eor	r0,r0,r4,ror#11
   3588 +	eor	r0,r0,r4,ror#25	@ Sigma1(e)
   3589 +	add	r3,r3,r0
   3590 +	eor	r2,r5,r6
   3591 +	and	r2,r2,r4
   3592 +	eor	r2,r2,r6			@ Ch(e,f,g)
   3593 +	add	r3,r3,r2
   3594 +	add	r3,r3,r7
   3595 +	add	r3,r3,r12
   3596 +	mov	r7,r8,ror#2
   3597 +	eor	r7,r7,r8,ror#13
   3598 +	eor	r7,r7,r8,ror#22		@ Sigma0(a)
   3599 +	orr	r0,r8,r9
   3600 +	and	r0,r0,r10
   3601 +	and	r2,r8,r9
   3602 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3603 +	add	r7,r7,r0
   3604 +	add	r11,r11,r3
   3605 +	add	r7,r7,r3
   3606 +	ldrb	r3,[r1,#3]			@ 13
   3607 +	ldrb	r12,[r1,#2]
   3608 +	ldrb	r2,[r1,#1]
   3609 +	ldrb	r0,[r1],#4
   3610 +	orr	r3,r3,r12,lsl#8
   3611 +	orr	r3,r3,r2,lsl#16
   3612 +	orr	r3,r3,r0,lsl#24
   3613 +	
   3614 +	ldr	r12,[r14],#4			@ *K256++
   3615 +	str	r3,[sp,#13*4]
   3616 +	mov	r0,r11,ror#6
   3617 +	eor	r0,r0,r11,ror#11
   3618 +	eor	r0,r0,r11,ror#25	@ Sigma1(e)
   3619 +	add	r3,r3,r0
   3620 +	eor	r2,r4,r5
   3621 +	and	r2,r2,r11
   3622 +	eor	r2,r2,r5			@ Ch(e,f,g)
   3623 +	add	r3,r3,r2
   3624 +	add	r3,r3,r6
   3625 +	add	r3,r3,r12
   3626 +	mov	r6,r7,ror#2
   3627 +	eor	r6,r6,r7,ror#13
   3628 +	eor	r6,r6,r7,ror#22		@ Sigma0(a)
   3629 +	orr	r0,r7,r8
   3630 +	and	r0,r0,r9
   3631 +	and	r2,r7,r8
   3632 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3633 +	add	r6,r6,r0
   3634 +	add	r10,r10,r3
   3635 +	add	r6,r6,r3
   3636 +	ldrb	r3,[r1,#3]			@ 14
   3637 +	ldrb	r12,[r1,#2]
   3638 +	ldrb	r2,[r1,#1]
   3639 +	ldrb	r0,[r1],#4
   3640 +	orr	r3,r3,r12,lsl#8
   3641 +	orr	r3,r3,r2,lsl#16
   3642 +	orr	r3,r3,r0,lsl#24
   3643 +	
   3644 +	ldr	r12,[r14],#4			@ *K256++
   3645 +	str	r3,[sp,#14*4]
   3646 +	mov	r0,r10,ror#6
   3647 +	eor	r0,r0,r10,ror#11
   3648 +	eor	r0,r0,r10,ror#25	@ Sigma1(e)
   3649 +	add	r3,r3,r0
   3650 +	eor	r2,r11,r4
   3651 +	and	r2,r2,r10
   3652 +	eor	r2,r2,r4			@ Ch(e,f,g)
   3653 +	add	r3,r3,r2
   3654 +	add	r3,r3,r5
   3655 +	add	r3,r3,r12
   3656 +	mov	r5,r6,ror#2
   3657 +	eor	r5,r5,r6,ror#13
   3658 +	eor	r5,r5,r6,ror#22		@ Sigma0(a)
   3659 +	orr	r0,r6,r7
   3660 +	and	r0,r0,r8
   3661 +	and	r2,r6,r7
   3662 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3663 +	add	r5,r5,r0
   3664 +	add	r9,r9,r3
   3665 +	add	r5,r5,r3
   3666 +	ldrb	r3,[r1,#3]			@ 15
   3667 +	ldrb	r12,[r1,#2]
   3668 +	ldrb	r2,[r1,#1]
   3669 +	ldrb	r0,[r1],#4
   3670 +	orr	r3,r3,r12,lsl#8
   3671 +	orr	r3,r3,r2,lsl#16
   3672 +	orr	r3,r3,r0,lsl#24
   3673 +	str	r1,[sp,#17*4]
   3674 +	ldr	r12,[r14],#4			@ *K256++
   3675 +	str	r3,[sp,#15*4]
   3676 +	mov	r0,r9,ror#6
   3677 +	eor	r0,r0,r9,ror#11
   3678 +	eor	r0,r0,r9,ror#25	@ Sigma1(e)
   3679 +	add	r3,r3,r0
   3680 +	eor	r2,r10,r11
   3681 +	and	r2,r2,r9
   3682 +	eor	r2,r2,r11			@ Ch(e,f,g)
   3683 +	add	r3,r3,r2
   3684 +	add	r3,r3,r4
   3685 +	add	r3,r3,r12
   3686 +	mov	r4,r5,ror#2
   3687 +	eor	r4,r4,r5,ror#13
   3688 +	eor	r4,r4,r5,ror#22		@ Sigma0(a)
   3689 +	orr	r0,r5,r6
   3690 +	and	r0,r0,r7
   3691 +	and	r2,r5,r6
   3692 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3693 +	add	r4,r4,r0
   3694 +	add	r8,r8,r3
   3695 +	add	r4,r4,r3
   3696 +.Lrounds_16_xx:
   3697 +	ldr	r2,[sp,#1*4]	@ 16
   3698 +	ldr	r12,[sp,#14*4]
   3699 +	ldr	r3,[sp,#0*4]
   3700 +	ldr	r1,[sp,#9*4]
   3701 +	mov	r0,r2,ror#7
   3702 +	eor	r0,r0,r2,ror#18
   3703 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3704 +	mov	r2,r12,ror#17
   3705 +	eor	r2,r2,r12,ror#19
   3706 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3707 +	add	r3,r3,r0
   3708 +	add	r3,r3,r2
   3709 +	add	r3,r3,r1
   3710 +	ldr	r12,[r14],#4			@ *K256++
   3711 +	str	r3,[sp,#0*4]
   3712 +	mov	r0,r8,ror#6
   3713 +	eor	r0,r0,r8,ror#11
   3714 +	eor	r0,r0,r8,ror#25	@ Sigma1(e)
   3715 +	add	r3,r3,r0
   3716 +	eor	r2,r9,r10
   3717 +	and	r2,r2,r8
   3718 +	eor	r2,r2,r10			@ Ch(e,f,g)
   3719 +	add	r3,r3,r2
   3720 +	add	r3,r3,r11
   3721 +	add	r3,r3,r12
   3722 +	mov	r11,r4,ror#2
   3723 +	eor	r11,r11,r4,ror#13
   3724 +	eor	r11,r11,r4,ror#22		@ Sigma0(a)
   3725 +	orr	r0,r4,r5
   3726 +	and	r0,r0,r6
   3727 +	and	r2,r4,r5
   3728 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3729 +	add	r11,r11,r0
   3730 +	add	r7,r7,r3
   3731 +	add	r11,r11,r3
   3732 +	ldr	r2,[sp,#2*4]	@ 17
   3733 +	ldr	r12,[sp,#15*4]
   3734 +	ldr	r3,[sp,#1*4]
   3735 +	ldr	r1,[sp,#10*4]
   3736 +	mov	r0,r2,ror#7
   3737 +	eor	r0,r0,r2,ror#18
   3738 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3739 +	mov	r2,r12,ror#17
   3740 +	eor	r2,r2,r12,ror#19
   3741 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3742 +	add	r3,r3,r0
   3743 +	add	r3,r3,r2
   3744 +	add	r3,r3,r1
   3745 +	ldr	r12,[r14],#4			@ *K256++
   3746 +	str	r3,[sp,#1*4]
   3747 +	mov	r0,r7,ror#6
   3748 +	eor	r0,r0,r7,ror#11
   3749 +	eor	r0,r0,r7,ror#25	@ Sigma1(e)
   3750 +	add	r3,r3,r0
   3751 +	eor	r2,r8,r9
   3752 +	and	r2,r2,r7
   3753 +	eor	r2,r2,r9			@ Ch(e,f,g)
   3754 +	add	r3,r3,r2
   3755 +	add	r3,r3,r10
   3756 +	add	r3,r3,r12
   3757 +	mov	r10,r11,ror#2
   3758 +	eor	r10,r10,r11,ror#13
   3759 +	eor	r10,r10,r11,ror#22		@ Sigma0(a)
   3760 +	orr	r0,r11,r4
   3761 +	and	r0,r0,r5
   3762 +	and	r2,r11,r4
   3763 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3764 +	add	r10,r10,r0
   3765 +	add	r6,r6,r3
   3766 +	add	r10,r10,r3
   3767 +	ldr	r2,[sp,#3*4]	@ 18
   3768 +	ldr	r12,[sp,#0*4]
   3769 +	ldr	r3,[sp,#2*4]
   3770 +	ldr	r1,[sp,#11*4]
   3771 +	mov	r0,r2,ror#7
   3772 +	eor	r0,r0,r2,ror#18
   3773 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3774 +	mov	r2,r12,ror#17
   3775 +	eor	r2,r2,r12,ror#19
   3776 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3777 +	add	r3,r3,r0
   3778 +	add	r3,r3,r2
   3779 +	add	r3,r3,r1
   3780 +	ldr	r12,[r14],#4			@ *K256++
   3781 +	str	r3,[sp,#2*4]
   3782 +	mov	r0,r6,ror#6
   3783 +	eor	r0,r0,r6,ror#11
   3784 +	eor	r0,r0,r6,ror#25	@ Sigma1(e)
   3785 +	add	r3,r3,r0
   3786 +	eor	r2,r7,r8
   3787 +	and	r2,r2,r6
   3788 +	eor	r2,r2,r8			@ Ch(e,f,g)
   3789 +	add	r3,r3,r2
   3790 +	add	r3,r3,r9
   3791 +	add	r3,r3,r12
   3792 +	mov	r9,r10,ror#2
   3793 +	eor	r9,r9,r10,ror#13
   3794 +	eor	r9,r9,r10,ror#22		@ Sigma0(a)
   3795 +	orr	r0,r10,r11
   3796 +	and	r0,r0,r4
   3797 +	and	r2,r10,r11
   3798 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3799 +	add	r9,r9,r0
   3800 +	add	r5,r5,r3
   3801 +	add	r9,r9,r3
   3802 +	ldr	r2,[sp,#4*4]	@ 19
   3803 +	ldr	r12,[sp,#1*4]
   3804 +	ldr	r3,[sp,#3*4]
   3805 +	ldr	r1,[sp,#12*4]
   3806 +	mov	r0,r2,ror#7
   3807 +	eor	r0,r0,r2,ror#18
   3808 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3809 +	mov	r2,r12,ror#17
   3810 +	eor	r2,r2,r12,ror#19
   3811 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3812 +	add	r3,r3,r0
   3813 +	add	r3,r3,r2
   3814 +	add	r3,r3,r1
   3815 +	ldr	r12,[r14],#4			@ *K256++
   3816 +	str	r3,[sp,#3*4]
   3817 +	mov	r0,r5,ror#6
   3818 +	eor	r0,r0,r5,ror#11
   3819 +	eor	r0,r0,r5,ror#25	@ Sigma1(e)
   3820 +	add	r3,r3,r0
   3821 +	eor	r2,r6,r7
   3822 +	and	r2,r2,r5
   3823 +	eor	r2,r2,r7			@ Ch(e,f,g)
   3824 +	add	r3,r3,r2
   3825 +	add	r3,r3,r8
   3826 +	add	r3,r3,r12
   3827 +	mov	r8,r9,ror#2
   3828 +	eor	r8,r8,r9,ror#13
   3829 +	eor	r8,r8,r9,ror#22		@ Sigma0(a)
   3830 +	orr	r0,r9,r10
   3831 +	and	r0,r0,r11
   3832 +	and	r2,r9,r10
   3833 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3834 +	add	r8,r8,r0
   3835 +	add	r4,r4,r3
   3836 +	add	r8,r8,r3
   3837 +	ldr	r2,[sp,#5*4]	@ 20
   3838 +	ldr	r12,[sp,#2*4]
   3839 +	ldr	r3,[sp,#4*4]
   3840 +	ldr	r1,[sp,#13*4]
   3841 +	mov	r0,r2,ror#7
   3842 +	eor	r0,r0,r2,ror#18
   3843 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3844 +	mov	r2,r12,ror#17
   3845 +	eor	r2,r2,r12,ror#19
   3846 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3847 +	add	r3,r3,r0
   3848 +	add	r3,r3,r2
   3849 +	add	r3,r3,r1
   3850 +	ldr	r12,[r14],#4			@ *K256++
   3851 +	str	r3,[sp,#4*4]
   3852 +	mov	r0,r4,ror#6
   3853 +	eor	r0,r0,r4,ror#11
   3854 +	eor	r0,r0,r4,ror#25	@ Sigma1(e)
   3855 +	add	r3,r3,r0
   3856 +	eor	r2,r5,r6
   3857 +	and	r2,r2,r4
   3858 +	eor	r2,r2,r6			@ Ch(e,f,g)
   3859 +	add	r3,r3,r2
   3860 +	add	r3,r3,r7
   3861 +	add	r3,r3,r12
   3862 +	mov	r7,r8,ror#2
   3863 +	eor	r7,r7,r8,ror#13
   3864 +	eor	r7,r7,r8,ror#22		@ Sigma0(a)
   3865 +	orr	r0,r8,r9
   3866 +	and	r0,r0,r10
   3867 +	and	r2,r8,r9
   3868 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3869 +	add	r7,r7,r0
   3870 +	add	r11,r11,r3
   3871 +	add	r7,r7,r3
   3872 +	ldr	r2,[sp,#6*4]	@ 21
   3873 +	ldr	r12,[sp,#3*4]
   3874 +	ldr	r3,[sp,#5*4]
   3875 +	ldr	r1,[sp,#14*4]
   3876 +	mov	r0,r2,ror#7
   3877 +	eor	r0,r0,r2,ror#18
   3878 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3879 +	mov	r2,r12,ror#17
   3880 +	eor	r2,r2,r12,ror#19
   3881 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3882 +	add	r3,r3,r0
   3883 +	add	r3,r3,r2
   3884 +	add	r3,r3,r1
   3885 +	ldr	r12,[r14],#4			@ *K256++
   3886 +	str	r3,[sp,#5*4]
   3887 +	mov	r0,r11,ror#6
   3888 +	eor	r0,r0,r11,ror#11
   3889 +	eor	r0,r0,r11,ror#25	@ Sigma1(e)
   3890 +	add	r3,r3,r0
   3891 +	eor	r2,r4,r5
   3892 +	and	r2,r2,r11
   3893 +	eor	r2,r2,r5			@ Ch(e,f,g)
   3894 +	add	r3,r3,r2
   3895 +	add	r3,r3,r6
   3896 +	add	r3,r3,r12
   3897 +	mov	r6,r7,ror#2
   3898 +	eor	r6,r6,r7,ror#13
   3899 +	eor	r6,r6,r7,ror#22		@ Sigma0(a)
   3900 +	orr	r0,r7,r8
   3901 +	and	r0,r0,r9
   3902 +	and	r2,r7,r8
   3903 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3904 +	add	r6,r6,r0
   3905 +	add	r10,r10,r3
   3906 +	add	r6,r6,r3
   3907 +	ldr	r2,[sp,#7*4]	@ 22
   3908 +	ldr	r12,[sp,#4*4]
   3909 +	ldr	r3,[sp,#6*4]
   3910 +	ldr	r1,[sp,#15*4]
   3911 +	mov	r0,r2,ror#7
   3912 +	eor	r0,r0,r2,ror#18
   3913 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3914 +	mov	r2,r12,ror#17
   3915 +	eor	r2,r2,r12,ror#19
   3916 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3917 +	add	r3,r3,r0
   3918 +	add	r3,r3,r2
   3919 +	add	r3,r3,r1
   3920 +	ldr	r12,[r14],#4			@ *K256++
   3921 +	str	r3,[sp,#6*4]
   3922 +	mov	r0,r10,ror#6
   3923 +	eor	r0,r0,r10,ror#11
   3924 +	eor	r0,r0,r10,ror#25	@ Sigma1(e)
   3925 +	add	r3,r3,r0
   3926 +	eor	r2,r11,r4
   3927 +	and	r2,r2,r10
   3928 +	eor	r2,r2,r4			@ Ch(e,f,g)
   3929 +	add	r3,r3,r2
   3930 +	add	r3,r3,r5
   3931 +	add	r3,r3,r12
   3932 +	mov	r5,r6,ror#2
   3933 +	eor	r5,r5,r6,ror#13
   3934 +	eor	r5,r5,r6,ror#22		@ Sigma0(a)
   3935 +	orr	r0,r6,r7
   3936 +	and	r0,r0,r8
   3937 +	and	r2,r6,r7
   3938 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3939 +	add	r5,r5,r0
   3940 +	add	r9,r9,r3
   3941 +	add	r5,r5,r3
   3942 +	ldr	r2,[sp,#8*4]	@ 23
   3943 +	ldr	r12,[sp,#5*4]
   3944 +	ldr	r3,[sp,#7*4]
   3945 +	ldr	r1,[sp,#0*4]
   3946 +	mov	r0,r2,ror#7
   3947 +	eor	r0,r0,r2,ror#18
   3948 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3949 +	mov	r2,r12,ror#17
   3950 +	eor	r2,r2,r12,ror#19
   3951 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3952 +	add	r3,r3,r0
   3953 +	add	r3,r3,r2
   3954 +	add	r3,r3,r1
   3955 +	ldr	r12,[r14],#4			@ *K256++
   3956 +	str	r3,[sp,#7*4]
   3957 +	mov	r0,r9,ror#6
   3958 +	eor	r0,r0,r9,ror#11
   3959 +	eor	r0,r0,r9,ror#25	@ Sigma1(e)
   3960 +	add	r3,r3,r0
   3961 +	eor	r2,r10,r11
   3962 +	and	r2,r2,r9
   3963 +	eor	r2,r2,r11			@ Ch(e,f,g)
   3964 +	add	r3,r3,r2
   3965 +	add	r3,r3,r4
   3966 +	add	r3,r3,r12
   3967 +	mov	r4,r5,ror#2
   3968 +	eor	r4,r4,r5,ror#13
   3969 +	eor	r4,r4,r5,ror#22		@ Sigma0(a)
   3970 +	orr	r0,r5,r6
   3971 +	and	r0,r0,r7
   3972 +	and	r2,r5,r6
   3973 +	orr	r0,r0,r2			@ Maj(a,b,c)
   3974 +	add	r4,r4,r0
   3975 +	add	r8,r8,r3
   3976 +	add	r4,r4,r3
   3977 +	ldr	r2,[sp,#9*4]	@ 24
   3978 +	ldr	r12,[sp,#6*4]
   3979 +	ldr	r3,[sp,#8*4]
   3980 +	ldr	r1,[sp,#1*4]
   3981 +	mov	r0,r2,ror#7
   3982 +	eor	r0,r0,r2,ror#18
   3983 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   3984 +	mov	r2,r12,ror#17
   3985 +	eor	r2,r2,r12,ror#19
   3986 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   3987 +	add	r3,r3,r0
   3988 +	add	r3,r3,r2
   3989 +	add	r3,r3,r1
   3990 +	ldr	r12,[r14],#4			@ *K256++
   3991 +	str	r3,[sp,#8*4]
   3992 +	mov	r0,r8,ror#6
   3993 +	eor	r0,r0,r8,ror#11
   3994 +	eor	r0,r0,r8,ror#25	@ Sigma1(e)
   3995 +	add	r3,r3,r0
   3996 +	eor	r2,r9,r10
   3997 +	and	r2,r2,r8
   3998 +	eor	r2,r2,r10			@ Ch(e,f,g)
   3999 +	add	r3,r3,r2
   4000 +	add	r3,r3,r11
   4001 +	add	r3,r3,r12
   4002 +	mov	r11,r4,ror#2
   4003 +	eor	r11,r11,r4,ror#13
   4004 +	eor	r11,r11,r4,ror#22		@ Sigma0(a)
   4005 +	orr	r0,r4,r5
   4006 +	and	r0,r0,r6
   4007 +	and	r2,r4,r5
   4008 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4009 +	add	r11,r11,r0
   4010 +	add	r7,r7,r3
   4011 +	add	r11,r11,r3
   4012 +	ldr	r2,[sp,#10*4]	@ 25
   4013 +	ldr	r12,[sp,#7*4]
   4014 +	ldr	r3,[sp,#9*4]
   4015 +	ldr	r1,[sp,#2*4]
   4016 +	mov	r0,r2,ror#7
   4017 +	eor	r0,r0,r2,ror#18
   4018 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4019 +	mov	r2,r12,ror#17
   4020 +	eor	r2,r2,r12,ror#19
   4021 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4022 +	add	r3,r3,r0
   4023 +	add	r3,r3,r2
   4024 +	add	r3,r3,r1
   4025 +	ldr	r12,[r14],#4			@ *K256++
   4026 +	str	r3,[sp,#9*4]
   4027 +	mov	r0,r7,ror#6
   4028 +	eor	r0,r0,r7,ror#11
   4029 +	eor	r0,r0,r7,ror#25	@ Sigma1(e)
   4030 +	add	r3,r3,r0
   4031 +	eor	r2,r8,r9
   4032 +	and	r2,r2,r7
   4033 +	eor	r2,r2,r9			@ Ch(e,f,g)
   4034 +	add	r3,r3,r2
   4035 +	add	r3,r3,r10
   4036 +	add	r3,r3,r12
   4037 +	mov	r10,r11,ror#2
   4038 +	eor	r10,r10,r11,ror#13
   4039 +	eor	r10,r10,r11,ror#22		@ Sigma0(a)
   4040 +	orr	r0,r11,r4
   4041 +	and	r0,r0,r5
   4042 +	and	r2,r11,r4
   4043 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4044 +	add	r10,r10,r0
   4045 +	add	r6,r6,r3
   4046 +	add	r10,r10,r3
   4047 +	ldr	r2,[sp,#11*4]	@ 26
   4048 +	ldr	r12,[sp,#8*4]
   4049 +	ldr	r3,[sp,#10*4]
   4050 +	ldr	r1,[sp,#3*4]
   4051 +	mov	r0,r2,ror#7
   4052 +	eor	r0,r0,r2,ror#18
   4053 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4054 +	mov	r2,r12,ror#17
   4055 +	eor	r2,r2,r12,ror#19
   4056 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4057 +	add	r3,r3,r0
   4058 +	add	r3,r3,r2
   4059 +	add	r3,r3,r1
   4060 +	ldr	r12,[r14],#4			@ *K256++
   4061 +	str	r3,[sp,#10*4]
   4062 +	mov	r0,r6,ror#6
   4063 +	eor	r0,r0,r6,ror#11
   4064 +	eor	r0,r0,r6,ror#25	@ Sigma1(e)
   4065 +	add	r3,r3,r0
   4066 +	eor	r2,r7,r8
   4067 +	and	r2,r2,r6
   4068 +	eor	r2,r2,r8			@ Ch(e,f,g)
   4069 +	add	r3,r3,r2
   4070 +	add	r3,r3,r9
   4071 +	add	r3,r3,r12
   4072 +	mov	r9,r10,ror#2
   4073 +	eor	r9,r9,r10,ror#13
   4074 +	eor	r9,r9,r10,ror#22		@ Sigma0(a)
   4075 +	orr	r0,r10,r11
   4076 +	and	r0,r0,r4
   4077 +	and	r2,r10,r11
   4078 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4079 +	add	r9,r9,r0
   4080 +	add	r5,r5,r3
   4081 +	add	r9,r9,r3
   4082 +	ldr	r2,[sp,#12*4]	@ 27
   4083 +	ldr	r12,[sp,#9*4]
   4084 +	ldr	r3,[sp,#11*4]
   4085 +	ldr	r1,[sp,#4*4]
   4086 +	mov	r0,r2,ror#7
   4087 +	eor	r0,r0,r2,ror#18
   4088 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4089 +	mov	r2,r12,ror#17
   4090 +	eor	r2,r2,r12,ror#19
   4091 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4092 +	add	r3,r3,r0
   4093 +	add	r3,r3,r2
   4094 +	add	r3,r3,r1
   4095 +	ldr	r12,[r14],#4			@ *K256++
   4096 +	str	r3,[sp,#11*4]
   4097 +	mov	r0,r5,ror#6
   4098 +	eor	r0,r0,r5,ror#11
   4099 +	eor	r0,r0,r5,ror#25	@ Sigma1(e)
   4100 +	add	r3,r3,r0
   4101 +	eor	r2,r6,r7
   4102 +	and	r2,r2,r5
   4103 +	eor	r2,r2,r7			@ Ch(e,f,g)
   4104 +	add	r3,r3,r2
   4105 +	add	r3,r3,r8
   4106 +	add	r3,r3,r12
   4107 +	mov	r8,r9,ror#2
   4108 +	eor	r8,r8,r9,ror#13
   4109 +	eor	r8,r8,r9,ror#22		@ Sigma0(a)
   4110 +	orr	r0,r9,r10
   4111 +	and	r0,r0,r11
   4112 +	and	r2,r9,r10
   4113 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4114 +	add	r8,r8,r0
   4115 +	add	r4,r4,r3
   4116 +	add	r8,r8,r3
   4117 +	ldr	r2,[sp,#13*4]	@ 28
   4118 +	ldr	r12,[sp,#10*4]
   4119 +	ldr	r3,[sp,#12*4]
   4120 +	ldr	r1,[sp,#5*4]
   4121 +	mov	r0,r2,ror#7
   4122 +	eor	r0,r0,r2,ror#18
   4123 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4124 +	mov	r2,r12,ror#17
   4125 +	eor	r2,r2,r12,ror#19
   4126 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4127 +	add	r3,r3,r0
   4128 +	add	r3,r3,r2
   4129 +	add	r3,r3,r1
   4130 +	ldr	r12,[r14],#4			@ *K256++
   4131 +	str	r3,[sp,#12*4]
   4132 +	mov	r0,r4,ror#6
   4133 +	eor	r0,r0,r4,ror#11
   4134 +	eor	r0,r0,r4,ror#25	@ Sigma1(e)
   4135 +	add	r3,r3,r0
   4136 +	eor	r2,r5,r6
   4137 +	and	r2,r2,r4
   4138 +	eor	r2,r2,r6			@ Ch(e,f,g)
   4139 +	add	r3,r3,r2
   4140 +	add	r3,r3,r7
   4141 +	add	r3,r3,r12
   4142 +	mov	r7,r8,ror#2
   4143 +	eor	r7,r7,r8,ror#13
   4144 +	eor	r7,r7,r8,ror#22		@ Sigma0(a)
   4145 +	orr	r0,r8,r9
   4146 +	and	r0,r0,r10
   4147 +	and	r2,r8,r9
   4148 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4149 +	add	r7,r7,r0
   4150 +	add	r11,r11,r3
   4151 +	add	r7,r7,r3
   4152 +	ldr	r2,[sp,#14*4]	@ 29
   4153 +	ldr	r12,[sp,#11*4]
   4154 +	ldr	r3,[sp,#13*4]
   4155 +	ldr	r1,[sp,#6*4]
   4156 +	mov	r0,r2,ror#7
   4157 +	eor	r0,r0,r2,ror#18
   4158 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4159 +	mov	r2,r12,ror#17
   4160 +	eor	r2,r2,r12,ror#19
   4161 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4162 +	add	r3,r3,r0
   4163 +	add	r3,r3,r2
   4164 +	add	r3,r3,r1
   4165 +	ldr	r12,[r14],#4			@ *K256++
   4166 +	str	r3,[sp,#13*4]
   4167 +	mov	r0,r11,ror#6
   4168 +	eor	r0,r0,r11,ror#11
   4169 +	eor	r0,r0,r11,ror#25	@ Sigma1(e)
   4170 +	add	r3,r3,r0
   4171 +	eor	r2,r4,r5
   4172 +	and	r2,r2,r11
   4173 +	eor	r2,r2,r5			@ Ch(e,f,g)
   4174 +	add	r3,r3,r2
   4175 +	add	r3,r3,r6
   4176 +	add	r3,r3,r12
   4177 +	mov	r6,r7,ror#2
   4178 +	eor	r6,r6,r7,ror#13
   4179 +	eor	r6,r6,r7,ror#22		@ Sigma0(a)
   4180 +	orr	r0,r7,r8
   4181 +	and	r0,r0,r9
   4182 +	and	r2,r7,r8
   4183 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4184 +	add	r6,r6,r0
   4185 +	add	r10,r10,r3
   4186 +	add	r6,r6,r3
   4187 +	ldr	r2,[sp,#15*4]	@ 30
   4188 +	ldr	r12,[sp,#12*4]
   4189 +	ldr	r3,[sp,#14*4]
   4190 +	ldr	r1,[sp,#7*4]
   4191 +	mov	r0,r2,ror#7
   4192 +	eor	r0,r0,r2,ror#18
   4193 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4194 +	mov	r2,r12,ror#17
   4195 +	eor	r2,r2,r12,ror#19
   4196 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4197 +	add	r3,r3,r0
   4198 +	add	r3,r3,r2
   4199 +	add	r3,r3,r1
   4200 +	ldr	r12,[r14],#4			@ *K256++
   4201 +	str	r3,[sp,#14*4]
   4202 +	mov	r0,r10,ror#6
   4203 +	eor	r0,r0,r10,ror#11
   4204 +	eor	r0,r0,r10,ror#25	@ Sigma1(e)
   4205 +	add	r3,r3,r0
   4206 +	eor	r2,r11,r4
   4207 +	and	r2,r2,r10
   4208 +	eor	r2,r2,r4			@ Ch(e,f,g)
   4209 +	add	r3,r3,r2
   4210 +	add	r3,r3,r5
   4211 +	add	r3,r3,r12
   4212 +	mov	r5,r6,ror#2
   4213 +	eor	r5,r5,r6,ror#13
   4214 +	eor	r5,r5,r6,ror#22		@ Sigma0(a)
   4215 +	orr	r0,r6,r7
   4216 +	and	r0,r0,r8
   4217 +	and	r2,r6,r7
   4218 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4219 +	add	r5,r5,r0
   4220 +	add	r9,r9,r3
   4221 +	add	r5,r5,r3
   4222 +	ldr	r2,[sp,#0*4]	@ 31
   4223 +	ldr	r12,[sp,#13*4]
   4224 +	ldr	r3,[sp,#15*4]
   4225 +	ldr	r1,[sp,#8*4]
   4226 +	mov	r0,r2,ror#7
   4227 +	eor	r0,r0,r2,ror#18
   4228 +	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   4229 +	mov	r2,r12,ror#17
   4230 +	eor	r2,r2,r12,ror#19
   4231 +	eor	r2,r2,r12,lsr#10	@ sigma1(X[i+14])
   4232 +	add	r3,r3,r0
   4233 +	add	r3,r3,r2
   4234 +	add	r3,r3,r1
   4235 +	ldr	r12,[r14],#4			@ *K256++
   4236 +	str	r3,[sp,#15*4]
   4237 +	mov	r0,r9,ror#6
   4238 +	eor	r0,r0,r9,ror#11
   4239 +	eor	r0,r0,r9,ror#25	@ Sigma1(e)
   4240 +	add	r3,r3,r0
   4241 +	eor	r2,r10,r11
   4242 +	and	r2,r2,r9
   4243 +	eor	r2,r2,r11			@ Ch(e,f,g)
   4244 +	add	r3,r3,r2
   4245 +	add	r3,r3,r4
   4246 +	add	r3,r3,r12
   4247 +	mov	r4,r5,ror#2
   4248 +	eor	r4,r4,r5,ror#13
   4249 +	eor	r4,r4,r5,ror#22		@ Sigma0(a)
   4250 +	orr	r0,r5,r6
   4251 +	and	r0,r0,r7
   4252 +	and	r2,r5,r6
   4253 +	orr	r0,r0,r2			@ Maj(a,b,c)
   4254 +	add	r4,r4,r0
   4255 +	add	r8,r8,r3
   4256 +	add	r4,r4,r3
   4257 +	and	r12,r12,#0xff
   4258 +	cmp	r12,#0xf2
   4259 +	bne	.Lrounds_16_xx
   4260 +
   4261 +	ldr	r3,[sp,#16*4]		@ pull ctx
   4262 +	ldr	r0,[r3,#0]
   4263 +	ldr	r2,[r3,#4]
   4264 +	ldr	r12,[r3,#8]
   4265 +	add	r4,r4,r0
   4266 +	ldr	r0,[r3,#12]
   4267 +	add	r5,r5,r2
   4268 +	ldr	r2,[r3,#16]
   4269 +	add	r6,r6,r12
   4270 +	ldr	r12,[r3,#20]
   4271 +	add	r7,r7,r0
   4272 +	ldr	r0,[r3,#24]
   4273 +	add	r8,r8,r2
   4274 +	ldr	r2,[r3,#28]
   4275 +	add	r9,r9,r12
   4276 +	ldr	r1,[sp,#17*4]		@ pull inp
   4277 +	ldr	r12,[sp,#18*4]		@ pull inp+len
   4278 +	add	r10,r10,r0
   4279 +	add	r11,r11,r2
   4280 +	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
   4281 +	cmp	r1,r12
   4282 +	sub	r14,r14,#256	@ rewind Ktbl
   4283 +	bne	.Loop
   4284 +
   4285 +	add	sp,sp,#19*4	@ destroy frame
   4286 +	ldmia	sp!,{r4-r12,lr}
   4287 +	tst	lr,#1
   4288 +	moveq	pc,lr			@ be binary compatible with V4, yet
   4289 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   4290 +.size   sha256_block_data_order,.-sha256_block_data_order
   4291 +.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>"
   4292 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   4293 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha512-armv4.pl	2009-09-03 15:42:39.000000000 -0700
   4294 @@ -0,0 +1,398 @@
   4295 +#!/usr/bin/env perl
   4296 +
   4297 +# ====================================================================
   4298 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL
   4299 +# project. The module is, however, dual licensed under OpenSSL and
   4300 +# CRYPTOGAMS licenses depending on where you obtain it. For further
   4301 +# details see http://www.openssl.org/~appro/cryptogams/.
   4302 +# ====================================================================
   4303 +
   4304 +# SHA512 block procedure for ARMv4. September 2007.
   4305 +
   4306 +# This code is ~4.5 (four and a half) times faster than code generated
   4307 +# by gcc 3.4 and it spends ~72 clock cycles per byte. 
   4308 +
   4309 +# Byte order [in]dependence. =========================================
   4310 +#
   4311 +# Caller is expected to maintain specific *dword* order in h[0-7],
   4312 +# namely with most significant dword at *lower* address, which is
   4313 +# reflected in below two parameters. *Byte* order within these dwords
   4314 +# in turn is whatever *native* byte order on current platform.
   4315 +$hi=0;
   4316 +$lo=4;
   4317 +# ====================================================================
   4318 +
   4319 +$output=shift;
   4320 +open STDOUT,">$output";
   4321 +
   4322 +$ctx="r0";
   4323 +$inp="r1";
   4324 +$len="r2";
   4325 +$Tlo="r3";
   4326 +$Thi="r4";
   4327 +$Alo="r5";
   4328 +$Ahi="r6";
   4329 +$Elo="r7";
   4330 +$Ehi="r8";
   4331 +$t0="r9";
   4332 +$t1="r10";
   4333 +$t2="r11";
   4334 +$t3="r12";
   4335 +############	r13 is stack pointer
   4336 +$Ktbl="r14";
   4337 +############	r15 is program counter
   4338 +
   4339 +$Aoff=8*0;
   4340 +$Boff=8*1;
   4341 +$Coff=8*2;
   4342 +$Doff=8*3;
   4343 +$Eoff=8*4;
   4344 +$Foff=8*5;
   4345 +$Goff=8*6;
   4346 +$Hoff=8*7;
   4347 +$Xoff=8*8;
   4348 +
   4349 +sub BODY_00_15() {
   4350 +my $magic = shift;
   4351 +$code.=<<___;
   4352 +	ldr	$t2,[sp,#$Hoff+0]	@ h.lo
   4353 +	ldr	$t3,[sp,#$Hoff+4]	@ h.hi
   4354 +	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
   4355 +	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
   4356 +	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
   4357 +	mov	$t0,$Elo,lsr#14
   4358 +	mov	$t1,$Ehi,lsr#14
   4359 +	eor	$t0,$t0,$Ehi,lsl#18
   4360 +	eor	$t1,$t1,$Elo,lsl#18
   4361 +	eor	$t0,$t0,$Elo,lsr#18
   4362 +	eor	$t1,$t1,$Ehi,lsr#18
   4363 +	eor	$t0,$t0,$Ehi,lsl#14
   4364 +	eor	$t1,$t1,$Elo,lsl#14
   4365 +	eor	$t0,$t0,$Ehi,lsr#9
   4366 +	eor	$t1,$t1,$Elo,lsr#9
   4367 +	eor	$t0,$t0,$Elo,lsl#23
   4368 +	eor	$t1,$t1,$Ehi,lsl#23	@ Sigma1(e)
   4369 +	adds	$Tlo,$Tlo,$t0
   4370 +	adc	$Thi,$Thi,$t1		@ T += Sigma1(e)
   4371 +	adds	$Tlo,$Tlo,$t2
   4372 +	adc	$Thi,$Thi,$t3		@ T += h
   4373 +
   4374 +	ldr	$t0,[sp,#$Foff+0]	@ f.lo
   4375 +	ldr	$t1,[sp,#$Foff+4]	@ f.hi
   4376 +	ldr	$t2,[sp,#$Goff+0]	@ g.lo
   4377 +	ldr	$t3,[sp,#$Goff+4]	@ g.hi
   4378 +	str	$Elo,[sp,#$Eoff+0]
   4379 +	str	$Ehi,[sp,#$Eoff+4]
   4380 +	str	$Alo,[sp,#$Aoff+0]
   4381 +	str	$Ahi,[sp,#$Aoff+4]
   4382 +
   4383 +	eor	$t0,$t0,$t2
   4384 +	eor	$t1,$t1,$t3
   4385 +	and	$t0,$t0,$Elo
   4386 +	and	$t1,$t1,$Ehi
   4387 +	eor	$t0,$t0,$t2
   4388 +	eor	$t1,$t1,$t3		@ Ch(e,f,g)
   4389 +
   4390 +	ldr	$t2,[$Ktbl,#4]		@ K[i].lo
   4391 +	ldr	$t3,[$Ktbl,#0]		@ K[i].hi
   4392 +	ldr	$Elo,[sp,#$Doff+0]	@ d.lo
   4393 +	ldr	$Ehi,[sp,#$Doff+4]	@ d.hi
   4394 +
   4395 +	adds	$Tlo,$Tlo,$t0
   4396 +	adc	$Thi,$Thi,$t1		@ T += Ch(e,f,g)
   4397 +	adds	$Tlo,$Tlo,$t2
   4398 +	adc	$Thi,$Thi,$t3		@ T += K[i]
   4399 +	adds	$Elo,$Elo,$Tlo
   4400 +	adc	$Ehi,$Ehi,$Thi		@ d += T
   4401 +
   4402 +	and	$t0,$t2,#0xff
   4403 +	teq	$t0,#$magic
   4404 +	orreq	$Ktbl,$Ktbl,#1
   4405 +
   4406 +	ldr	$t2,[sp,#$Boff+0]	@ b.lo
   4407 +	ldr	$t3,[sp,#$Coff+0]	@ c.lo
   4408 +	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
   4409 +	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
   4410 +	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
   4411 +	mov	$t0,$Alo,lsr#28
   4412 +	mov	$t1,$Ahi,lsr#28
   4413 +	eor	$t0,$t0,$Ahi,lsl#4
   4414 +	eor	$t1,$t1,$Alo,lsl#4
   4415 +	eor	$t0,$t0,$Ahi,lsr#2
   4416 +	eor	$t1,$t1,$Alo,lsr#2
   4417 +	eor	$t0,$t0,$Alo,lsl#30
   4418 +	eor	$t1,$t1,$Ahi,lsl#30
   4419 +	eor	$t0,$t0,$Ahi,lsr#7
   4420 +	eor	$t1,$t1,$Alo,lsr#7
   4421 +	eor	$t0,$t0,$Alo,lsl#25
   4422 +	eor	$t1,$t1,$Ahi,lsl#25	@ Sigma0(a)
   4423 +	adds	$Tlo,$Tlo,$t0
   4424 +	adc	$Thi,$Thi,$t1		@ T += Sigma0(a)
   4425 +
   4426 +	and	$t0,$Alo,$t2
   4427 +	orr	$Alo,$Alo,$t2
   4428 +	ldr	$t1,[sp,#$Boff+4]	@ b.hi
   4429 +	ldr	$t2,[sp,#$Coff+4]	@ c.hi
   4430 +	and	$Alo,$Alo,$t3
   4431 +	orr	$Alo,$Alo,$t0		@ Maj(a,b,c).lo
   4432 +	and	$t3,$Ahi,$t1
   4433 +	orr	$Ahi,$Ahi,$t1
   4434 +	and	$Ahi,$Ahi,$t2
   4435 +	orr	$Ahi,$Ahi,$t3		@ Maj(a,b,c).hi
   4436 +	adds	$Alo,$Alo,$Tlo
   4437 +	adc	$Ahi,$Ahi,$Thi		@ h += T
   4438 +
   4439 +	sub	sp,sp,#8
   4440 +	add	$Ktbl,$Ktbl,#8
   4441 +___
   4442 +}
   4443 +$code=<<___;
   4444 +.text
   4445 +.code	32
   4446 +.type	K512,%object
   4447 +.align	5
   4448 +K512:
   4449 +.word	0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
   4450 +.word	0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
   4451 +.word	0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
   4452 +.word	0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
   4453 +.word	0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
   4454 +.word	0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
   4455 +.word	0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
   4456 +.word	0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
   4457 +.word	0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
   4458 +.word	0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
   4459 +.word	0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
   4460 +.word	0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
   4461 +.word	0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
   4462 +.word	0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
   4463 +.word	0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
   4464 +.word	0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
   4465 +.word	0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
   4466 +.word	0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
   4467 +.word	0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
   4468 +.word	0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
   4469 +.word	0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
   4470 +.word	0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
   4471 +.word	0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
   4472 +.word	0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
   4473 +.word	0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
   4474 +.word	0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
   4475 +.word	0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
   4476 +.word	0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
   4477 +.word	0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
   4478 +.word	0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
   4479 +.word	0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
   4480 +.word	0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
   4481 +.word	0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
   4482 +.word	0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
   4483 +.word	0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
   4484 +.word	0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
   4485 +.word	0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
   4486 +.word	0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
   4487 +.word	0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
   4488 +.word	0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
   4489 +.size	K512,.-K512
   4490 +
   4491 +.global	sha512_block_data_order
   4492 +.type	sha512_block_data_order,%function
   4493 +sha512_block_data_order:
   4494 +	sub	r3,pc,#8		@ sha512_block_data_order
   4495 +	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
   4496 +	stmdb	sp!,{r4-r12,lr}
   4497 +	sub	$Ktbl,r3,#640		@ K512
   4498 +	sub	sp,sp,#9*8
   4499 +
   4500 +	ldr	$Elo,[$ctx,#$Eoff+$lo]
   4501 +	ldr	$Ehi,[$ctx,#$Eoff+$hi]
   4502 +	ldr	$t0, [$ctx,#$Goff+$lo]
   4503 +	ldr	$t1, [$ctx,#$Goff+$hi]
   4504 +	ldr	$t2, [$ctx,#$Hoff+$lo]
   4505 +	ldr	$t3, [$ctx,#$Hoff+$hi]
   4506 +.Loop:
   4507 +	str	$t0, [sp,#$Goff+0]
   4508 +	str	$t1, [sp,#$Goff+4]
   4509 +	str	$t2, [sp,#$Hoff+0]
   4510 +	str	$t3, [sp,#$Hoff+4]
   4511 +	ldr	$Alo,[$ctx,#$Aoff+$lo]
   4512 +	ldr	$Ahi,[$ctx,#$Aoff+$hi]
   4513 +	ldr	$Tlo,[$ctx,#$Boff+$lo]
   4514 +	ldr	$Thi,[$ctx,#$Boff+$hi]
   4515 +	ldr	$t0, [$ctx,#$Coff+$lo]
   4516 +	ldr	$t1, [$ctx,#$Coff+$hi]
   4517 +	ldr	$t2, [$ctx,#$Doff+$lo]
   4518 +	ldr	$t3, [$ctx,#$Doff+$hi]
   4519 +	str	$Tlo,[sp,#$Boff+0]
   4520 +	str	$Thi,[sp,#$Boff+4]
   4521 +	str	$t0, [sp,#$Coff+0]
   4522 +	str	$t1, [sp,#$Coff+4]
   4523 +	str	$t2, [sp,#$Doff+0]
   4524 +	str	$t3, [sp,#$Doff+4]
   4525 +	ldr	$Tlo,[$ctx,#$Foff+$lo]
   4526 +	ldr	$Thi,[$ctx,#$Foff+$hi]
   4527 +	str	$Tlo,[sp,#$Foff+0]
   4528 +	str	$Thi,[sp,#$Foff+4]
   4529 +
   4530 +.L00_15:
   4531 +	ldrb	$Tlo,[$inp,#7]
   4532 +	ldrb	$t0, [$inp,#6]
   4533 +	ldrb	$t1, [$inp,#5]
   4534 +	ldrb	$t2, [$inp,#4]
   4535 +	ldrb	$Thi,[$inp,#3]
   4536 +	ldrb	$t3, [$inp,#2]
   4537 +	orr	$Tlo,$Tlo,$t0,lsl#8
   4538 +	ldrb	$t0, [$inp,#1]
   4539 +	orr	$Tlo,$Tlo,$t1,lsl#16
   4540 +	ldrb	$t1, [$inp],#8
   4541 +	orr	$Tlo,$Tlo,$t2,lsl#24
   4542 +	orr	$Thi,$Thi,$t3,lsl#8
   4543 +	orr	$Thi,$Thi,$t0,lsl#16
   4544 +	orr	$Thi,$Thi,$t1,lsl#24
   4545 +	str	$Tlo,[sp,#$Xoff+0]
   4546 +	str	$Thi,[sp,#$Xoff+4]
   4547 +___
   4548 +	&BODY_00_15(0x94);
   4549 +$code.=<<___;
   4550 +	tst	$Ktbl,#1
   4551 +	beq	.L00_15
   4552 +	bic	$Ktbl,$Ktbl,#1
   4553 +
   4554 +.L16_79:
   4555 +	ldr	$t0,[sp,#`$Xoff+8*(16-1)`+0]
   4556 +	ldr	$t1,[sp,#`$Xoff+8*(16-1)`+4]
   4557 +	ldr	$t2,[sp,#`$Xoff+8*(16-14)`+0]
   4558 +	ldr	$t3,[sp,#`$Xoff+8*(16-14)`+4]
   4559 +
   4560 +	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
   4561 +	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
   4562 +	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
   4563 +	mov	$Tlo,$t0,lsr#1
   4564 +	mov	$Thi,$t1,lsr#1
   4565 +	eor	$Tlo,$Tlo,$t1,lsl#31
   4566 +	eor	$Thi,$Thi,$t0,lsl#31
   4567 +	eor	$Tlo,$Tlo,$t0,lsr#8
   4568 +	eor	$Thi,$Thi,$t1,lsr#8
   4569 +	eor	$Tlo,$Tlo,$t1,lsl#24
   4570 +	eor	$Thi,$Thi,$t0,lsl#24
   4571 +	eor	$Tlo,$Tlo,$t0,lsr#7
   4572 +	eor	$Thi,$Thi,$t1,lsr#7
   4573 +	eor	$Tlo,$Tlo,$t1,lsl#25
   4574 +
   4575 +	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
   4576 +	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
   4577 +	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
   4578 +	mov	$t0,$t2,lsr#19
   4579 +	mov	$t1,$t3,lsr#19
   4580 +	eor	$t0,$t0,$t3,lsl#13
   4581 +	eor	$t1,$t1,$t2,lsl#13
   4582 +	eor	$t0,$t0,$t3,lsr#29
   4583 +	eor	$t1,$t1,$t2,lsr#29
   4584 +	eor	$t0,$t0,$t2,lsl#3
   4585 +	eor	$t1,$t1,$t3,lsl#3
   4586 +	eor	$t0,$t0,$t2,lsr#6
   4587 +	eor	$t1,$t1,$t3,lsr#6
   4588 +	eor	$t0,$t0,$t3,lsl#26
   4589 +
   4590 +	ldr	$t2,[sp,#`$Xoff+8*(16-9)`+0]
   4591 +	ldr	$t3,[sp,#`$Xoff+8*(16-9)`+4]
   4592 +	adds	$Tlo,$Tlo,$t0
   4593 +	adc	$Thi,$Thi,$t1
   4594 +
   4595 +	ldr	$t0,[sp,#`$Xoff+8*16`+0]
   4596 +	ldr	$t1,[sp,#`$Xoff+8*16`+4]
   4597 +	adds	$Tlo,$Tlo,$t2
   4598 +	adc	$Thi,$Thi,$t3
   4599 +	adds	$Tlo,$Tlo,$t0
   4600 +	adc	$Thi,$Thi,$t1
   4601 +	str	$Tlo,[sp,#$Xoff+0]
   4602 +	str	$Thi,[sp,#$Xoff+4]
   4603 +___
   4604 +	&BODY_00_15(0x17);
   4605 +$code.=<<___;
   4606 +	tst	$Ktbl,#1
   4607 +	beq	.L16_79
   4608 +	bic	$Ktbl,$Ktbl,#1
   4609 +
   4610 +	ldr	$Tlo,[sp,#$Boff+0]
   4611 +	ldr	$Thi,[sp,#$Boff+4]
   4612 +	ldr	$t0, [$ctx,#$Aoff+$lo]
   4613 +	ldr	$t1, [$ctx,#$Aoff+$hi]
   4614 +	ldr	$t2, [$ctx,#$Boff+$lo]
   4615 +	ldr	$t3, [$ctx,#$Boff+$hi]
   4616 +	adds	$t0,$Alo,$t0
   4617 +	adc	$t1,$Ahi,$t1
   4618 +	adds	$t2,$Tlo,$t2
   4619 +	adc	$t3,$Thi,$t3
   4620 +	str	$t0, [$ctx,#$Aoff+$lo]
   4621 +	str	$t1, [$ctx,#$Aoff+$hi]
   4622 +	str	$t2, [$ctx,#$Boff+$lo]
   4623 +	str	$t3, [$ctx,#$Boff+$hi]
   4624 +
   4625 +	ldr	$Alo,[sp,#$Coff+0]
   4626 +	ldr	$Ahi,[sp,#$Coff+4]
   4627 +	ldr	$Tlo,[sp,#$Doff+0]
   4628 +	ldr	$Thi,[sp,#$Doff+4]
   4629 +	ldr	$t0, [$ctx,#$Coff+$lo]
   4630 +	ldr	$t1, [$ctx,#$Coff+$hi]
   4631 +	ldr	$t2, [$ctx,#$Doff+$lo]
   4632 +	ldr	$t3, [$ctx,#$Doff+$hi]
   4633 +	adds	$t0,$Alo,$t0
   4634 +	adc	$t1,$Ahi,$t1
   4635 +	adds	$t2,$Tlo,$t2
   4636 +	adc	$t3,$Thi,$t3
   4637 +	str	$t0, [$ctx,#$Coff+$lo]
   4638 +	str	$t1, [$ctx,#$Coff+$hi]
   4639 +	str	$t2, [$ctx,#$Doff+$lo]
   4640 +	str	$t3, [$ctx,#$Doff+$hi]
   4641 +
   4642 +	ldr	$Tlo,[sp,#$Foff+0]
   4643 +	ldr	$Thi,[sp,#$Foff+4]
   4644 +	ldr	$t0, [$ctx,#$Eoff+$lo]
   4645 +	ldr	$t1, [$ctx,#$Eoff+$hi]
   4646 +	ldr	$t2, [$ctx,#$Foff+$lo]
   4647 +	ldr	$t3, [$ctx,#$Foff+$hi]
   4648 +	adds	$Elo,$Elo,$t0
   4649 +	adc	$Ehi,$Ehi,$t1
   4650 +	adds	$t2,$Tlo,$t2
   4651 +	adc	$t3,$Thi,$t3
   4652 +	str	$Elo,[$ctx,#$Eoff+$lo]
   4653 +	str	$Ehi,[$ctx,#$Eoff+$hi]
   4654 +	str	$t2, [$ctx,#$Foff+$lo]
   4655 +	str	$t3, [$ctx,#$Foff+$hi]
   4656 +
   4657 +	ldr	$Alo,[sp,#$Goff+0]
   4658 +	ldr	$Ahi,[sp,#$Goff+4]
   4659 +	ldr	$Tlo,[sp,#$Hoff+0]
   4660 +	ldr	$Thi,[sp,#$Hoff+4]
   4661 +	ldr	$t0, [$ctx,#$Goff+$lo]
   4662 +	ldr	$t1, [$ctx,#$Goff+$hi]
   4663 +	ldr	$t2, [$ctx,#$Hoff+$lo]
   4664 +	ldr	$t3, [$ctx,#$Hoff+$hi]
   4665 +	adds	$t0,$Alo,$t0
   4666 +	adc	$t1,$Ahi,$t1
   4667 +	adds	$t2,$Tlo,$t2
   4668 +	adc	$t3,$Thi,$t3
   4669 +	str	$t0, [$ctx,#$Goff+$lo]
   4670 +	str	$t1, [$ctx,#$Goff+$hi]
   4671 +	str	$t2, [$ctx,#$Hoff+$lo]
   4672 +	str	$t3, [$ctx,#$Hoff+$hi]
   4673 +
   4674 +	add	sp,sp,#640
   4675 +	sub	$Ktbl,$Ktbl,#640
   4676 +
   4677 +	teq	$inp,$len
   4678 +	bne	.Loop
   4679 +
   4680 +	add	sp,sp,#8*9		@ destroy frame
   4681 +	ldmia	sp!,{r4-r12,lr}
   4682 +	tst	lr,#1
   4683 +	moveq	pc,lr			@ be binary compatible with V4, yet
   4684 +	bx	lr			@ interoperable with Thumb ISA:-)
   4685 +.size   sha512_block_data_order,.-sha512_block_data_order
   4686 +.asciz  "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
   4687 +___
   4688 +
   4689 +$code =~ s/\`([^\`]*)\`/eval $1/gem;
   4690 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
   4691 +print $code;
   4692 +close STDOUT; # enforce flush
   4693 --- /dev/null	2009-04-24 06:09:48.000000000 -0700
   4694 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha512-armv4.s	2009-09-03 15:42:39.000000000 -0700
   4695 @@ -0,0 +1,415 @@
   4696 +.text
   4697 +.code	32
   4698 +.type	K512,%object
   4699 +.align	5
   4700 +K512:
   4701 +.word	0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
   4702 +.word	0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
   4703 +.word	0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
   4704 +.word	0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
   4705 +.word	0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
   4706 +.word	0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
   4707 +.word	0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
   4708 +.word	0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
   4709 +.word	0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
   4710 +.word	0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
   4711 +.word	0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
   4712 +.word	0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
   4713 +.word	0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
   4714 +.word	0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
   4715 +.word	0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
   4716 +.word	0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
   4717 +.word	0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
   4718 +.word	0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
   4719 +.word	0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
   4720 +.word	0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
   4721 +.word	0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
   4722 +.word	0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
   4723 +.word	0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
   4724 +.word	0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
   4725 +.word	0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
   4726 +.word	0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
   4727 +.word	0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
   4728 +.word	0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
   4729 +.word	0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
   4730 +.word	0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
   4731 +.word	0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
   4732 +.word	0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
   4733 +.word	0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
   4734 +.word	0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
   4735 +.word	0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
   4736 +.word	0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
   4737 +.word	0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
   4738 +.word	0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
   4739 +.word	0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
   4740 +.word	0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
   4741 +.size	K512,.-K512
   4742 +
   4743 +.global	sha512_block_data_order
   4744 +.type	sha512_block_data_order,%function
   4745 +sha512_block_data_order:
   4746 +	sub	r3,pc,#8		@ sha512_block_data_order
   4747 +	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
   4748 +	stmdb	sp!,{r4-r12,lr}
   4749 +	sub	r14,r3,#640		@ K512
   4750 +	sub	sp,sp,#9*8
   4751 +
   4752 +	ldr	r7,[r0,#32+4]
   4753 +	ldr	r8,[r0,#32+0]
   4754 +	ldr	r9, [r0,#48+4]
   4755 +	ldr	r10, [r0,#48+0]
   4756 +	ldr	r11, [r0,#56+4]
   4757 +	ldr	r12, [r0,#56+0]
   4758 +.Loop:
   4759 +	str	r9, [sp,#48+0]
   4760 +	str	r10, [sp,#48+4]
   4761 +	str	r11, [sp,#56+0]
   4762 +	str	r12, [sp,#56+4]
   4763 +	ldr	r5,[r0,#0+4]
   4764 +	ldr	r6,[r0,#0+0]
   4765 +	ldr	r3,[r0,#8+4]
   4766 +	ldr	r4,[r0,#8+0]
   4767 +	ldr	r9, [r0,#16+4]
   4768 +	ldr	r10, [r0,#16+0]
   4769 +	ldr	r11, [r0,#24+4]
   4770 +	ldr	r12, [r0,#24+0]
   4771 +	str	r3,[sp,#8+0]
   4772 +	str	r4,[sp,#8+4]
   4773 +	str	r9, [sp,#16+0]
   4774 +	str	r10, [sp,#16+4]
   4775 +	str	r11, [sp,#24+0]
   4776 +	str	r12, [sp,#24+4]
   4777 +	ldr	r3,[r0,#40+4]
   4778 +	ldr	r4,[r0,#40+0]
   4779 +	str	r3,[sp,#40+0]
   4780 +	str	r4,[sp,#40+4]
   4781 +
   4782 +.L00_15:
   4783 +	ldrb	r3,[r1,#7]
   4784 +	ldrb	r9, [r1,#6]
   4785 +	ldrb	r10, [r1,#5]
   4786 +	ldrb	r11, [r1,#4]
   4787 +	ldrb	r4,[r1,#3]
   4788 +	ldrb	r12, [r1,#2]
   4789 +	orr	r3,r3,r9,lsl#8
   4790 +	ldrb	r9, [r1,#1]
   4791 +	orr	r3,r3,r10,lsl#16
   4792 +	ldrb	r10, [r1],#8
   4793 +	orr	r3,r3,r11,lsl#24
   4794 +	orr	r4,r4,r12,lsl#8
   4795 +	orr	r4,r4,r9,lsl#16
   4796 +	orr	r4,r4,r10,lsl#24
   4797 +	str	r3,[sp,#64+0]
   4798 +	str	r4,[sp,#64+4]
   4799 +	ldr	r11,[sp,#56+0]	@ h.lo
   4800 +	ldr	r12,[sp,#56+4]	@ h.hi
   4801 +	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
   4802 +	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
   4803 +	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
   4804 +	mov	r9,r7,lsr#14
   4805 +	mov	r10,r8,lsr#14
   4806 +	eor	r9,r9,r8,lsl#18
   4807 +	eor	r10,r10,r7,lsl#18
   4808 +	eor	r9,r9,r7,lsr#18
   4809 +	eor	r10,r10,r8,lsr#18
   4810 +	eor	r9,r9,r8,lsl#14
   4811 +	eor	r10,r10,r7,lsl#14
   4812 +	eor	r9,r9,r8,lsr#9
   4813 +	eor	r10,r10,r7,lsr#9
   4814 +	eor	r9,r9,r7,lsl#23
   4815 +	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
   4816 +	adds	r3,r3,r9
   4817 +	adc	r4,r4,r10		@ T += Sigma1(e)
   4818 +	adds	r3,r3,r11
   4819 +	adc	r4,r4,r12		@ T += h
   4820 +
   4821 +	ldr	r9,[sp,#40+0]	@ f.lo
   4822 +	ldr	r10,[sp,#40+4]	@ f.hi
   4823 +	ldr	r11,[sp,#48+0]	@ g.lo
   4824 +	ldr	r12,[sp,#48+4]	@ g.hi
   4825 +	str	r7,[sp,#32+0]
   4826 +	str	r8,[sp,#32+4]
   4827 +	str	r5,[sp,#0+0]
   4828 +	str	r6,[sp,#0+4]
   4829 +
   4830 +	eor	r9,r9,r11
   4831 +	eor	r10,r10,r12
   4832 +	and	r9,r9,r7
   4833 +	and	r10,r10,r8
   4834 +	eor	r9,r9,r11
   4835 +	eor	r10,r10,r12		@ Ch(e,f,g)
   4836 +
   4837 +	ldr	r11,[r14,#4]		@ K[i].lo
   4838 +	ldr	r12,[r14,#0]		@ K[i].hi
   4839 +	ldr	r7,[sp,#24+0]	@ d.lo
   4840 +	ldr	r8,[sp,#24+4]	@ d.hi
   4841 +
   4842 +	adds	r3,r3,r9
   4843 +	adc	r4,r4,r10		@ T += Ch(e,f,g)
   4844 +	adds	r3,r3,r11
   4845 +	adc	r4,r4,r12		@ T += K[i]
   4846 +	adds	r7,r7,r3
   4847 +	adc	r8,r8,r4		@ d += T
   4848 +
   4849 +	and	r9,r11,#0xff
   4850 +	teq	r9,#148
   4851 +	orreq	r14,r14,#1
   4852 +
   4853 +	ldr	r11,[sp,#8+0]	@ b.lo
   4854 +	ldr	r12,[sp,#16+0]	@ c.lo
   4855 +	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
   4856 +	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
   4857 +	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
   4858 +	mov	r9,r5,lsr#28
   4859 +	mov	r10,r6,lsr#28
   4860 +	eor	r9,r9,r6,lsl#4
   4861 +	eor	r10,r10,r5,lsl#4
   4862 +	eor	r9,r9,r6,lsr#2
   4863 +	eor	r10,r10,r5,lsr#2
   4864 +	eor	r9,r9,r5,lsl#30
   4865 +	eor	r10,r10,r6,lsl#30
   4866 +	eor	r9,r9,r6,lsr#7
   4867 +	eor	r10,r10,r5,lsr#7
   4868 +	eor	r9,r9,r5,lsl#25
   4869 +	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
   4870 +	adds	r3,r3,r9
   4871 +	adc	r4,r4,r10		@ T += Sigma0(a)
   4872 +
   4873 +	and	r9,r5,r11
   4874 +	orr	r5,r5,r11
   4875 +	ldr	r10,[sp,#8+4]	@ b.hi
   4876 +	ldr	r11,[sp,#16+4]	@ c.hi
   4877 +	and	r5,r5,r12
   4878 +	orr	r5,r5,r9		@ Maj(a,b,c).lo
   4879 +	and	r12,r6,r10
   4880 +	orr	r6,r6,r10
   4881 +	and	r6,r6,r11
   4882 +	orr	r6,r6,r12		@ Maj(a,b,c).hi
   4883 +	adds	r5,r5,r3
   4884 +	adc	r6,r6,r4		@ h += T
   4885 +
   4886 +	sub	sp,sp,#8
   4887 +	add	r14,r14,#8
   4888 +	tst	r14,#1
   4889 +	beq	.L00_15
   4890 +	bic	r14,r14,#1
   4891 +
   4892 +.L16_79:
   4893 +	ldr	r9,[sp,#184+0]
   4894 +	ldr	r10,[sp,#184+4]
   4895 +	ldr	r11,[sp,#80+0]
   4896 +	ldr	r12,[sp,#80+4]
   4897 +
   4898 +	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
   4899 +	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
   4900 +	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
   4901 +	mov	r3,r9,lsr#1
   4902 +	mov	r4,r10,lsr#1
   4903 +	eor	r3,r3,r10,lsl#31
   4904 +	eor	r4,r4,r9,lsl#31
   4905 +	eor	r3,r3,r9,lsr#8
   4906 +	eor	r4,r4,r10,lsr#8
   4907 +	eor	r3,r3,r10,lsl#24
   4908 +	eor	r4,r4,r9,lsl#24
   4909 +	eor	r3,r3,r9,lsr#7
   4910 +	eor	r4,r4,r10,lsr#7
   4911 +	eor	r3,r3,r10,lsl#25
   4912 +
   4913 +	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
   4914 +	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
   4915 +	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
   4916 +	mov	r9,r11,lsr#19
   4917 +	mov	r10,r12,lsr#19
   4918 +	eor	r9,r9,r12,lsl#13
   4919 +	eor	r10,r10,r11,lsl#13
   4920 +	eor	r9,r9,r12,lsr#29
   4921 +	eor	r10,r10,r11,lsr#29
   4922 +	eor	r9,r9,r11,lsl#3
   4923 +	eor	r10,r10,r12,lsl#3
   4924 +	eor	r9,r9,r11,lsr#6
   4925 +	eor	r10,r10,r12,lsr#6
   4926 +	eor	r9,r9,r12,lsl#26
   4927 +
   4928 +	ldr	r11,[sp,#120+0]
   4929 +	ldr	r12,[sp,#120+4]
   4930 +	adds	r3,r3,r9
   4931 +	adc	r4,r4,r10
   4932 +
   4933 +	ldr	r9,[sp,#192+0]
   4934 +	ldr	r10,[sp,#192+4]
   4935 +	adds	r3,r3,r11
   4936 +	adc	r4,r4,r12
   4937 +	adds	r3,r3,r9
   4938 +	adc	r4,r4,r10
   4939 +	str	r3,[sp,#64+0]
   4940 +	str	r4,[sp,#64+4]
   4941 +	ldr	r11,[sp,#56+0]	@ h.lo
   4942 +	ldr	r12,[sp,#56+4]	@ h.hi
   4943 +	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
   4944 +	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
   4945 +	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
   4946 +	mov	r9,r7,lsr#14
   4947 +	mov	r10,r8,lsr#14
   4948 +	eor	r9,r9,r8,lsl#18
   4949 +	eor	r10,r10,r7,lsl#18
   4950 +	eor	r9,r9,r7,lsr#18
   4951 +	eor	r10,r10,r8,lsr#18
   4952 +	eor	r9,r9,r8,lsl#14
   4953 +	eor	r10,r10,r7,lsl#14
   4954 +	eor	r9,r9,r8,lsr#9
   4955 +	eor	r10,r10,r7,lsr#9
   4956 +	eor	r9,r9,r7,lsl#23
   4957 +	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
   4958 +	adds	r3,r3,r9
   4959 +	adc	r4,r4,r10		@ T += Sigma1(e)
   4960 +	adds	r3,r3,r11
   4961 +	adc	r4,r4,r12		@ T += h
   4962 +
   4963 +	ldr	r9,[sp,#40+0]	@ f.lo
   4964 +	ldr	r10,[sp,#40+4]	@ f.hi
   4965 +	ldr	r11,[sp,#48+0]	@ g.lo
   4966 +	ldr	r12,[sp,#48+4]	@ g.hi
   4967 +	str	r7,[sp,#32+0]
   4968 +	str	r8,[sp,#32+4]
   4969 +	str	r5,[sp,#0+0]
   4970 +	str	r6,[sp,#0+4]
   4971 +
   4972 +	eor	r9,r9,r11
   4973 +	eor	r10,r10,r12
   4974 +	and	r9,r9,r7
   4975 +	and	r10,r10,r8
   4976 +	eor	r9,r9,r11
   4977 +	eor	r10,r10,r12		@ Ch(e,f,g)
   4978 +
   4979 +	ldr	r11,[r14,#4]		@ K[i].lo
   4980 +	ldr	r12,[r14,#0]		@ K[i].hi
   4981 +	ldr	r7,[sp,#24+0]	@ d.lo
   4982 +	ldr	r8,[sp,#24+4]	@ d.hi
   4983 +
   4984 +	adds	r3,r3,r9
   4985 +	adc	r4,r4,r10		@ T += Ch(e,f,g)
   4986 +	adds	r3,r3,r11
   4987 +	adc	r4,r4,r12		@ T += K[i]
   4988 +	adds	r7,r7,r3
   4989 +	adc	r8,r8,r4		@ d += T
   4990 +
   4991 +	and	r9,r11,#0xff
   4992 +	teq	r9,#23
   4993 +	orreq	r14,r14,#1
   4994 +
   4995 +	ldr	r11,[sp,#8+0]	@ b.lo
   4996 +	ldr	r12,[sp,#16+0]	@ c.lo
   4997 +	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
   4998 +	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
   4999 +	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
   5000 +	mov	r9,r5,lsr#28
   5001 +	mov	r10,r6,lsr#28
   5002 +	eor	r9,r9,r6,lsl#4
   5003 +	eor	r10,r10,r5,lsl#4
   5004 +	eor	r9,r9,r6,lsr#2
   5005 +	eor	r10,r10,r5,lsr#2
   5006 +	eor	r9,r9,r5,lsl#30
   5007 +	eor	r10,r10,r6,lsl#30
   5008 +	eor	r9,r9,r6,lsr#7
   5009 +	eor	r10,r10,r5,lsr#7
   5010 +	eor	r9,r9,r5,lsl#25
   5011 +	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
   5012 +	adds	r3,r3,r9
   5013 +	adc	r4,r4,r10		@ T += Sigma0(a)
   5014 +
   5015 +	and	r9,r5,r11
   5016 +	orr	r5,r5,r11
   5017 +	ldr	r10,[sp,#8+4]	@ b.hi
   5018 +	ldr	r11,[sp,#16+4]	@ c.hi
   5019 +	and	r5,r5,r12
   5020 +	orr	r5,r5,r9		@ Maj(a,b,c).lo
   5021 +	and	r12,r6,r10
   5022 +	orr	r6,r6,r10
   5023 +	and	r6,r6,r11
   5024 +	orr	r6,r6,r12		@ Maj(a,b,c).hi
   5025 +	adds	r5,r5,r3
   5026 +	adc	r6,r6,r4		@ h += T
   5027 +
   5028 +	sub	sp,sp,#8
   5029 +	add	r14,r14,#8
   5030 +	tst	r14,#1
   5031 +	beq	.L16_79
   5032 +	bic	r14,r14,#1
   5033 +
   5034 +	ldr	r3,[sp,#8+0]
   5035 +	ldr	r4,[sp,#8+4]
   5036 +	ldr	r9, [r0,#0+4]
   5037 +	ldr	r10, [r0,#0+0]
   5038 +	ldr	r11, [r0,#8+4]
   5039 +	ldr	r12, [r0,#8+0]
   5040 +	adds	r9,r5,r9
   5041 +	adc	r10,r6,r10
   5042 +	adds	r11,r3,r11
   5043 +	adc	r12,r4,r12
   5044 +	str	r9, [r0,#0+4]
   5045 +	str	r10, [r0,#0+0]
   5046 +	str	r11, [r0,#8+4]
   5047 +	str	r12, [r0,#8+0]
   5048 +
   5049 +	ldr	r5,[sp,#16+0]
   5050 +	ldr	r6,[sp,#16+4]
   5051 +	ldr	r3,[sp,#24+0]
   5052 +	ldr	r4,[sp,#24+4]
   5053 +	ldr	r9, [r0,#16+4]
   5054 +	ldr	r10, [r0,#16+0]
   5055 +	ldr	r11, [r0,#24+4]
   5056 +	ldr	r12, [r0,#24+0]
   5057 +	adds	r9,r5,r9
   5058 +	adc	r10,r6,r10
   5059 +	adds	r11,r3,r11
   5060 +	adc	r12,r4,r12
   5061 +	str	r9, [r0,#16+4]
   5062 +	str	r10, [r0,#16+0]
   5063 +	str	r11, [r0,#24+4]
   5064 +	str	r12, [r0,#24+0]
   5065 +
   5066 +	ldr	r3,[sp,#40+0]
   5067 +	ldr	r4,[sp,#40+4]
   5068 +	ldr	r9, [r0,#32+4]
   5069 +	ldr	r10, [r0,#32+0]
   5070 +	ldr	r11, [r0,#40+4]
   5071 +	ldr	r12, [r0,#40+0]
   5072 +	adds	r7,r7,r9
   5073 +	adc	r8,r8,r10
   5074 +	adds	r11,r3,r11
   5075 +	adc	r12,r4,r12
   5076 +	str	r7,[r0,#32+4]
   5077 +	str	r8,[r0,#32+0]
   5078 +	str	r11, [r0,#40+4]
   5079 +	str	r12, [r0,#40+0]
   5080 +
   5081 +	ldr	r5,[sp,#48+0]
   5082 +	ldr	r6,[sp,#48+4]
   5083 +	ldr	r3,[sp,#56+0]
   5084 +	ldr	r4,[sp,#56+4]
   5085 +	ldr	r9, [r0,#48+4]
   5086 +	ldr	r10, [r0,#48+0]
   5087 +	ldr	r11, [r0,#56+4]
   5088 +	ldr	r12, [r0,#56+0]
   5089 +	adds	r9,r5,r9
   5090 +	adc	r10,r6,r10
   5091 +	adds	r11,r3,r11
   5092 +	adc	r12,r4,r12
   5093 +	str	r9, [r0,#48+4]
   5094 +	str	r10, [r0,#48+0]
   5095 +	str	r11, [r0,#56+4]
   5096 +	str	r12, [r0,#56+0]
   5097 +
   5098 +	add	sp,sp,#640
   5099 +	sub	r14,r14,#640
   5100 +
   5101 +	teq	r1,r2
   5102 +	bne	.Loop
   5103 +
   5104 +	add	sp,sp,#8*9		@ destroy frame
   5105 +	ldmia	sp!,{r4-r12,lr}
   5106 +	tst	lr,#1
   5107 +	moveq	pc,lr			@ be binary compatible with V4, yet
   5108 +	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   5109 +.size   sha512_block_data_order,.-sha512_block_data_order
   5110 +.asciz  "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>"
   5111