1 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2 +++ openssl-0.9.8h/crypto/0.9.9-dev/README.android 2009-09-03 15:42:39.000000000 -0700 3 @@ -0,0 +1,6 @@ 4 +This directory does not exist in the OpenSSL distribution. 5 + 6 +It has been added to import assembler code from OpenSSL 0.9.9-dev 7 +(ftp://ftp.openssl.org/snapshot/). The assembler files (.s) were 8 +generated by running the Perl files (.pl), with ".align 2" appended 9 +to avoid assembler error messages where needed. 10 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 11 +++ openssl-0.9.8h/crypto/0.9.9-dev/aes/aes-armv4.pl 2009-09-03 15:42:39.000000000 -0700 12 @@ -0,0 +1,1030 @@ 13 +#!/usr/bin/env perl 14 + 15 +# ==================================================================== 16 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL 17 +# project. The module is, however, dual licensed under OpenSSL and 18 +# CRYPTOGAMS licenses depending on where you obtain it. For further 19 +# details see http://www.openssl.org/~appro/cryptogams/. 20 +# ==================================================================== 21 + 22 +# AES for ARMv4 23 + 24 +# January 2007. 25 +# 26 +# Code uses single 1K S-box and is >2 times faster than code generated 27 +# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which 28 +# allows to merge logical or arithmetic operation with shift or rotate 29 +# in one instruction and emit combined result every cycle. The module 30 +# is endian-neutral. The performance is ~42 cycles/byte for 128-bit 31 +# key. 32 + 33 +# May 2007. 34 +# 35 +# AES_set_[en|de]crypt_key is added. 36 + 37 +$s0="r0"; 38 +$s1="r1"; 39 +$s2="r2"; 40 +$s3="r3"; 41 +$t1="r4"; 42 +$t2="r5"; 43 +$t3="r6"; 44 +$i1="r7"; 45 +$i2="r8"; 46 +$i3="r9"; 47 + 48 +$tbl="r10"; 49 +$key="r11"; 50 +$rounds="r12"; 51 + 52 +$code=<<___; 53 +.text 54 +.code 32 55 + 56 +.type AES_Te,%object 57 +.align 5 58 +AES_Te: 59 +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d 60 +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 61 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d 62 +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a 63 +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 64 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b 65 +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea 66 +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b 67 +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a 68 +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f 69 +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 70 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f 71 +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e 72 +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 73 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d 74 +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f 75 +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e 76 +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb 77 +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce 78 +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 79 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c 80 +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed 81 +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b 82 +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a 83 +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 84 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 85 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 86 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 87 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a 88 +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 89 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 90 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d 91 +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f 92 +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 93 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 94 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 95 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f 96 +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 97 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c 98 +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 99 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e 100 +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 101 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 102 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b 103 +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 104 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 105 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 106 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 107 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 108 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 109 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 110 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 111 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa 112 +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 113 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 114 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 115 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 116 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 117 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 118 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a 119 +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 120 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 121 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 122 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a 123 +@ Te4[256] 124 +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 125 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 126 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 127 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 128 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 129 +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 130 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 131 +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 132 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 133 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 134 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 135 +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 136 +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 137 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 138 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 139 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 140 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 141 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 142 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 143 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 144 +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 145 +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 146 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 147 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 148 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 149 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 150 +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 151 +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 152 +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 153 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 154 +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 155 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 156 +@ rcon[] 157 +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 158 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 159 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 160 +.size AES_Te,.-AES_Te 161 + 162 +@ void AES_encrypt(const unsigned char *in, unsigned char *out, 163 +@ const AES_KEY *key) { 164 +.global AES_encrypt 165 +.type AES_encrypt,%function 166 +.align 5 167 +AES_encrypt: 168 + sub r3,pc,#8 @ AES_encrypt 169 + stmdb sp!,{r1,r4-r12,lr} 170 + mov $rounds,r0 @ inp 171 + mov $key,r2 172 + sub $tbl,r3,#AES_encrypt-AES_Te @ Te 173 + 174 + ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 175 + ldrb $t1,[$rounds,#2] @ manner... 176 + ldrb $t2,[$rounds,#1] 177 + ldrb $t3,[$rounds,#0] 178 + orr $s0,$s0,$t1,lsl#8 179 + orr $s0,$s0,$t2,lsl#16 180 + orr $s0,$s0,$t3,lsl#24 181 + ldrb $s1,[$rounds,#7] 182 + ldrb $t1,[$rounds,#6] 183 + ldrb $t2,[$rounds,#5] 184 + ldrb $t3,[$rounds,#4] 185 + orr $s1,$s1,$t1,lsl#8 186 + orr $s1,$s1,$t2,lsl#16 187 + orr $s1,$s1,$t3,lsl#24 188 + ldrb $s2,[$rounds,#11] 189 + ldrb $t1,[$rounds,#10] 190 + ldrb $t2,[$rounds,#9] 191 + ldrb $t3,[$rounds,#8] 192 + orr $s2,$s2,$t1,lsl#8 193 + orr $s2,$s2,$t2,lsl#16 194 + orr $s2,$s2,$t3,lsl#24 195 + ldrb $s3,[$rounds,#15] 196 + ldrb $t1,[$rounds,#14] 197 + ldrb $t2,[$rounds,#13] 198 + ldrb $t3,[$rounds,#12] 199 + orr $s3,$s3,$t1,lsl#8 200 + orr $s3,$s3,$t2,lsl#16 201 + orr $s3,$s3,$t3,lsl#24 202 + 203 + bl _armv4_AES_encrypt 204 + 205 + ldr $rounds,[sp],#4 @ pop out 206 + mov $t1,$s0,lsr#24 @ write output in endian-neutral 207 + mov $t2,$s0,lsr#16 @ manner... 208 + mov $t3,$s0,lsr#8 209 + strb $t1,[$rounds,#0] 210 + strb $t2,[$rounds,#1] 211 + strb $t3,[$rounds,#2] 212 + strb $s0,[$rounds,#3] 213 + mov $t1,$s1,lsr#24 214 + mov $t2,$s1,lsr#16 215 + mov $t3,$s1,lsr#8 216 + strb $t1,[$rounds,#4] 217 + strb $t2,[$rounds,#5] 218 + strb $t3,[$rounds,#6] 219 + strb $s1,[$rounds,#7] 220 + mov $t1,$s2,lsr#24 221 + mov $t2,$s2,lsr#16 222 + mov $t3,$s2,lsr#8 223 + strb $t1,[$rounds,#8] 224 + strb $t2,[$rounds,#9] 225 + strb $t3,[$rounds,#10] 226 + strb $s2,[$rounds,#11] 227 + mov $t1,$s3,lsr#24 228 + mov $t2,$s3,lsr#16 229 + mov $t3,$s3,lsr#8 230 + strb $t1,[$rounds,#12] 231 + strb $t2,[$rounds,#13] 232 + strb $t3,[$rounds,#14] 233 + strb $s3,[$rounds,#15] 234 + 235 + ldmia sp!,{r4-r12,lr} 236 + tst lr,#1 237 + moveq pc,lr @ be binary compatible with V4, yet 238 + bx lr @ interoperable with Thumb ISA:-) 239 +.size AES_encrypt,.-AES_encrypt 240 + 241 +.type _armv4_AES_encrypt,%function 242 +.align 2 243 +_armv4_AES_encrypt: 244 + str lr,[sp,#-4]! @ push lr 245 + ldr $t1,[$key],#16 246 + ldr $t2,[$key,#-12] 247 + ldr $t3,[$key,#-8] 248 + ldr $i1,[$key,#-4] 249 + ldr $rounds,[$key,#240-16] 250 + eor $s0,$s0,$t1 251 + eor $s1,$s1,$t2 252 + eor $s2,$s2,$t3 253 + eor $s3,$s3,$i1 254 + sub $rounds,$rounds,#1 255 + mov lr,#255 256 + 257 +.Lenc_loop: 258 + and $i2,lr,$s0,lsr#8 259 + and $i3,lr,$s0,lsr#16 260 + and $i1,lr,$s0 261 + mov $s0,$s0,lsr#24 262 + ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0] 263 + ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24] 264 + ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8] 265 + ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16] 266 + 267 + and $i1,lr,$s1,lsr#16 @ i0 268 + and $i2,lr,$s1 269 + and $i3,lr,$s1,lsr#8 270 + mov $s1,$s1,lsr#24 271 + ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16] 272 + ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24] 273 + ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0] 274 + ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8] 275 + eor $s0,$s0,$i1,ror#8 276 + eor $s1,$s1,$t1,ror#24 277 + eor $t2,$t2,$i2,ror#8 278 + eor $t3,$t3,$i3,ror#8 279 + 280 + and $i1,lr,$s2,lsr#8 @ i0 281 + and $i2,lr,$s2,lsr#16 @ i1 282 + and $i3,lr,$s2 283 + mov $s2,$s2,lsr#24 284 + ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] 285 + ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] 286 + ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] 287 + ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] 288 + eor $s0,$s0,$i1,ror#16 289 + eor $s1,$s1,$i2,ror#8 290 + eor $s2,$s2,$t2,ror#16 291 + eor $t3,$t3,$i3,ror#16 292 + 293 + and $i1,lr,$s3 @ i0 294 + and $i2,lr,$s3,lsr#8 @ i1 295 + and $i3,lr,$s3,lsr#16 @ i2 296 + mov $s3,$s3,lsr#24 297 + ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] 298 + ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] 299 + ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] 300 + ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] 301 + eor $s0,$s0,$i1,ror#24 302 + eor $s1,$s1,$i2,ror#16 303 + eor $s2,$s2,$i3,ror#8 304 + eor $s3,$s3,$t3,ror#8 305 + 306 + ldr $t1,[$key],#16 307 + ldr $t2,[$key,#-12] 308 + ldr $t3,[$key,#-8] 309 + ldr $i1,[$key,#-4] 310 + eor $s0,$s0,$t1 311 + eor $s1,$s1,$t2 312 + eor $s2,$s2,$t3 313 + eor $s3,$s3,$i1 314 + 315 + subs $rounds,$rounds,#1 316 + bne .Lenc_loop 317 + 318 + add $tbl,$tbl,#2 319 + 320 + and $i1,lr,$s0 321 + and $i2,lr,$s0,lsr#8 322 + and $i3,lr,$s0,lsr#16 323 + mov $s0,$s0,lsr#24 324 + ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0] 325 + ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24] 326 + ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8] 327 + ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16] 328 + 329 + and $i1,lr,$s1,lsr#16 @ i0 330 + and $i2,lr,$s1 331 + and $i3,lr,$s1,lsr#8 332 + mov $s1,$s1,lsr#24 333 + ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16] 334 + ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24] 335 + ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0] 336 + ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8] 337 + eor $s0,$i1,$s0,lsl#8 338 + eor $s1,$t1,$s1,lsl#24 339 + eor $t2,$i2,$t2,lsl#8 340 + eor $t3,$i3,$t3,lsl#8 341 + 342 + and $i1,lr,$s2,lsr#8 @ i0 343 + and $i2,lr,$s2,lsr#16 @ i1 344 + and $i3,lr,$s2 345 + mov $s2,$s2,lsr#24 346 + ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] 347 + ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] 348 + ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] 349 + ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] 350 + eor $s0,$i1,$s0,lsl#8 351 + eor $s1,$s1,$i2,lsl#16 352 + eor $s2,$t2,$s2,lsl#24 353 + eor $t3,$i3,$t3,lsl#8 354 + 355 + and $i1,lr,$s3 @ i0 356 + and $i2,lr,$s3,lsr#8 @ i1 357 + and $i3,lr,$s3,lsr#16 @ i2 358 + mov $s3,$s3,lsr#24 359 + ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] 360 + ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] 361 + ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] 362 + ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] 363 + eor $s0,$i1,$s0,lsl#8 364 + eor $s1,$s1,$i2,lsl#8 365 + eor $s2,$s2,$i3,lsl#16 366 + eor $s3,$t3,$s3,lsl#24 367 + 368 + ldr lr,[sp],#4 @ pop lr 369 + ldr $t1,[$key,#0] 370 + ldr $t2,[$key,#4] 371 + ldr $t3,[$key,#8] 372 + ldr $i1,[$key,#12] 373 + eor $s0,$s0,$t1 374 + eor $s1,$s1,$t2 375 + eor $s2,$s2,$t3 376 + eor $s3,$s3,$i1 377 + 378 + sub $tbl,$tbl,#2 379 + mov pc,lr @ return 380 +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt 381 + 382 +.global AES_set_encrypt_key 383 +.type AES_set_encrypt_key,%function 384 +.align 5 385 +AES_set_encrypt_key: 386 + sub r3,pc,#8 @ AES_set_encrypt_key 387 + teq r0,#0 388 + moveq r0,#-1 389 + beq .Labrt 390 + teq r2,#0 391 + moveq r0,#-1 392 + beq .Labrt 393 + 394 + teq r1,#128 395 + beq .Lok 396 + teq r1,#192 397 + beq .Lok 398 + teq r1,#256 399 + movne r0,#-1 400 + bne .Labrt 401 + 402 +.Lok: stmdb sp!,{r4-r12,lr} 403 + sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 404 + 405 + mov $rounds,r0 @ inp 406 + mov lr,r1 @ bits 407 + mov $key,r2 @ key 408 + 409 + ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 410 + ldrb $t1,[$rounds,#2] @ manner... 411 + ldrb $t2,[$rounds,#1] 412 + ldrb $t3,[$rounds,#0] 413 + orr $s0,$s0,$t1,lsl#8 414 + orr $s0,$s0,$t2,lsl#16 415 + orr $s0,$s0,$t3,lsl#24 416 + ldrb $s1,[$rounds,#7] 417 + ldrb $t1,[$rounds,#6] 418 + ldrb $t2,[$rounds,#5] 419 + ldrb $t3,[$rounds,#4] 420 + orr $s1,$s1,$t1,lsl#8 421 + orr $s1,$s1,$t2,lsl#16 422 + orr $s1,$s1,$t3,lsl#24 423 + ldrb $s2,[$rounds,#11] 424 + ldrb $t1,[$rounds,#10] 425 + ldrb $t2,[$rounds,#9] 426 + ldrb $t3,[$rounds,#8] 427 + orr $s2,$s2,$t1,lsl#8 428 + orr $s2,$s2,$t2,lsl#16 429 + orr $s2,$s2,$t3,lsl#24 430 + ldrb $s3,[$rounds,#15] 431 + ldrb $t1,[$rounds,#14] 432 + ldrb $t2,[$rounds,#13] 433 + ldrb $t3,[$rounds,#12] 434 + orr $s3,$s3,$t1,lsl#8 435 + orr $s3,$s3,$t2,lsl#16 436 + orr $s3,$s3,$t3,lsl#24 437 + str $s0,[$key],#16 438 + str $s1,[$key,#-12] 439 + str $s2,[$key,#-8] 440 + str $s3,[$key,#-4] 441 + 442 + teq lr,#128 443 + bne .Lnot128 444 + mov $rounds,#10 445 + str $rounds,[$key,#240-16] 446 + add $t3,$tbl,#256 @ rcon 447 + mov lr,#255 448 + 449 +.L128_loop: 450 + and $t2,lr,$s3,lsr#24 451 + and $i1,lr,$s3,lsr#16 452 + and $i2,lr,$s3,lsr#8 453 + and $i3,lr,$s3 454 + ldrb $t2,[$tbl,$t2] 455 + ldrb $i1,[$tbl,$i1] 456 + ldrb $i2,[$tbl,$i2] 457 + ldrb $i3,[$tbl,$i3] 458 + ldr $t1,[$t3],#4 @ rcon[i++] 459 + orr $t2,$t2,$i1,lsl#24 460 + orr $t2,$t2,$i2,lsl#16 461 + orr $t2,$t2,$i3,lsl#8 462 + eor $t2,$t2,$t1 463 + eor $s0,$s0,$t2 @ rk[4]=rk[0]^... 464 + eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4] 465 + eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5] 466 + eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6] 467 + str $s0,[$key],#16 468 + str $s1,[$key,#-12] 469 + str $s2,[$key,#-8] 470 + str $s3,[$key,#-4] 471 + 472 + subs $rounds,$rounds,#1 473 + bne .L128_loop 474 + sub r2,$key,#176 475 + b .Ldone 476 + 477 +.Lnot128: 478 + ldrb $i2,[$rounds,#19] 479 + ldrb $t1,[$rounds,#18] 480 + ldrb $t2,[$rounds,#17] 481 + ldrb $t3,[$rounds,#16] 482 + orr $i2,$i2,$t1,lsl#8 483 + orr $i2,$i2,$t2,lsl#16 484 + orr $i2,$i2,$t3,lsl#24 485 + ldrb $i3,[$rounds,#23] 486 + ldrb $t1,[$rounds,#22] 487 + ldrb $t2,[$rounds,#21] 488 + ldrb $t3,[$rounds,#20] 489 + orr $i3,$i3,$t1,lsl#8 490 + orr $i3,$i3,$t2,lsl#16 491 + orr $i3,$i3,$t3,lsl#24 492 + str $i2,[$key],#8 493 + str $i3,[$key,#-4] 494 + 495 + teq lr,#192 496 + bne .Lnot192 497 + mov $rounds,#12 498 + str $rounds,[$key,#240-24] 499 + add $t3,$tbl,#256 @ rcon 500 + mov lr,#255 501 + mov $rounds,#8 502 + 503 +.L192_loop: 504 + and $t2,lr,$i3,lsr#24 505 + and $i1,lr,$i3,lsr#16 506 + and $i2,lr,$i3,lsr#8 507 + and $i3,lr,$i3 508 + ldrb $t2,[$tbl,$t2] 509 + ldrb $i1,[$tbl,$i1] 510 + ldrb $i2,[$tbl,$i2] 511 + ldrb $i3,[$tbl,$i3] 512 + ldr $t1,[$t3],#4 @ rcon[i++] 513 + orr $t2,$t2,$i1,lsl#24 514 + orr $t2,$t2,$i2,lsl#16 515 + orr $t2,$t2,$i3,lsl#8 516 + eor $i3,$t2,$t1 517 + eor $s0,$s0,$i3 @ rk[6]=rk[0]^... 518 + eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6] 519 + eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7] 520 + eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8] 521 + str $s0,[$key],#24 522 + str $s1,[$key,#-20] 523 + str $s2,[$key,#-16] 524 + str $s3,[$key,#-12] 525 + 526 + subs $rounds,$rounds,#1 527 + subeq r2,$key,#216 528 + beq .Ldone 529 + 530 + ldr $i1,[$key,#-32] 531 + ldr $i2,[$key,#-28] 532 + eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9] 533 + eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10] 534 + str $i1,[$key,#-8] 535 + str $i3,[$key,#-4] 536 + b .L192_loop 537 + 538 +.Lnot192: 539 + ldrb $i2,[$rounds,#27] 540 + ldrb $t1,[$rounds,#26] 541 + ldrb $t2,[$rounds,#25] 542 + ldrb $t3,[$rounds,#24] 543 + orr $i2,$i2,$t1,lsl#8 544 + orr $i2,$i2,$t2,lsl#16 545 + orr $i2,$i2,$t3,lsl#24 546 + ldrb $i3,[$rounds,#31] 547 + ldrb $t1,[$rounds,#30] 548 + ldrb $t2,[$rounds,#29] 549 + ldrb $t3,[$rounds,#28] 550 + orr $i3,$i3,$t1,lsl#8 551 + orr $i3,$i3,$t2,lsl#16 552 + orr $i3,$i3,$t3,lsl#24 553 + str $i2,[$key],#8 554 + str $i3,[$key,#-4] 555 + 556 + mov $rounds,#14 557 + str $rounds,[$key,#240-32] 558 + add $t3,$tbl,#256 @ rcon 559 + mov lr,#255 560 + mov $rounds,#7 561 + 562 +.L256_loop: 563 + and $t2,lr,$i3,lsr#24 564 + and $i1,lr,$i3,lsr#16 565 + and $i2,lr,$i3,lsr#8 566 + and $i3,lr,$i3 567 + ldrb $t2,[$tbl,$t2] 568 + ldrb $i1,[$tbl,$i1] 569 + ldrb $i2,[$tbl,$i2] 570 + ldrb $i3,[$tbl,$i3] 571 + ldr $t1,[$t3],#4 @ rcon[i++] 572 + orr $t2,$t2,$i1,lsl#24 573 + orr $t2,$t2,$i2,lsl#16 574 + orr $t2,$t2,$i3,lsl#8 575 + eor $i3,$t2,$t1 576 + eor $s0,$s0,$i3 @ rk[8]=rk[0]^... 577 + eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8] 578 + eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9] 579 + eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10] 580 + str $s0,[$key],#32 581 + str $s1,[$key,#-28] 582 + str $s2,[$key,#-24] 583 + str $s3,[$key,#-20] 584 + 585 + subs $rounds,$rounds,#1 586 + subeq r2,$key,#256 587 + beq .Ldone 588 + 589 + and $t2,lr,$s3 590 + and $i1,lr,$s3,lsr#8 591 + and $i2,lr,$s3,lsr#16 592 + and $i3,lr,$s3,lsr#24 593 + ldrb $t2,[$tbl,$t2] 594 + ldrb $i1,[$tbl,$i1] 595 + ldrb $i2,[$tbl,$i2] 596 + ldrb $i3,[$tbl,$i3] 597 + orr $t2,$t2,$i1,lsl#8 598 + orr $t2,$t2,$i2,lsl#16 599 + orr $t2,$t2,$i3,lsl#24 600 + 601 + ldr $t1,[$key,#-48] 602 + ldr $i1,[$key,#-44] 603 + ldr $i2,[$key,#-40] 604 + ldr $i3,[$key,#-36] 605 + eor $t1,$t1,$t2 @ rk[12]=rk[4]^... 606 + eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12] 607 + eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13] 608 + eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14] 609 + str $t1,[$key,#-16] 610 + str $i1,[$key,#-12] 611 + str $i2,[$key,#-8] 612 + str $i3,[$key,#-4] 613 + b .L256_loop 614 + 615 +.Ldone: mov r0,#0 616 + ldmia sp!,{r4-r12,lr} 617 +.Labrt: tst lr,#1 618 + moveq pc,lr @ be binary compatible with V4, yet 619 + bx lr @ interoperable with Thumb ISA:-) 620 +.size AES_set_encrypt_key,.-AES_set_encrypt_key 621 + 622 +.global AES_set_decrypt_key 623 +.type AES_set_decrypt_key,%function 624 +.align 5 625 +AES_set_decrypt_key: 626 + str lr,[sp,#-4]! @ push lr 627 + bl AES_set_encrypt_key 628 + teq r0,#0 629 + ldrne lr,[sp],#4 @ pop lr 630 + bne .Labrt 631 + 632 + stmdb sp!,{r4-r12} 633 + 634 + ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, 635 + mov $key,r2 @ which is AES_KEY *key 636 + mov $i1,r2 637 + add $i2,r2,$rounds,lsl#4 638 + 639 +.Linv: ldr $s0,[$i1] 640 + ldr $s1,[$i1,#4] 641 + ldr $s2,[$i1,#8] 642 + ldr $s3,[$i1,#12] 643 + ldr $t1,[$i2] 644 + ldr $t2,[$i2,#4] 645 + ldr $t3,[$i2,#8] 646 + ldr $i3,[$i2,#12] 647 + str $s0,[$i2],#-16 648 + str $s1,[$i2,#16+4] 649 + str $s2,[$i2,#16+8] 650 + str $s3,[$i2,#16+12] 651 + str $t1,[$i1],#16 652 + str $t2,[$i1,#-12] 653 + str $t3,[$i1,#-8] 654 + str $i3,[$i1,#-4] 655 + teq $i1,$i2 656 + bne .Linv 657 +___ 658 +$mask80=$i1; 659 +$mask1b=$i2; 660 +$mask7f=$i3; 661 +$code.=<<___; 662 + ldr $s0,[$key,#16]! @ prefetch tp1 663 + mov $mask80,#0x80 664 + mov $mask1b,#0x1b 665 + orr $mask80,$mask80,#0x8000 666 + orr $mask1b,$mask1b,#0x1b00 667 + orr $mask80,$mask80,$mask80,lsl#16 668 + orr $mask1b,$mask1b,$mask1b,lsl#16 669 + sub $rounds,$rounds,#1 670 + mvn $mask7f,$mask80 671 + mov $rounds,$rounds,lsl#2 @ (rounds-1)*4 672 + 673 +.Lmix: and $t1,$s0,$mask80 674 + and $s1,$s0,$mask7f 675 + sub $t1,$t1,$t1,lsr#7 676 + and $t1,$t1,$mask1b 677 + eor $s1,$t1,$s1,lsl#1 @ tp2 678 + 679 + and $t1,$s1,$mask80 680 + and $s2,$s1,$mask7f 681 + sub $t1,$t1,$t1,lsr#7 682 + and $t1,$t1,$mask1b 683 + eor $s2,$t1,$s2,lsl#1 @ tp4 684 + 685 + and $t1,$s2,$mask80 686 + and $s3,$s2,$mask7f 687 + sub $t1,$t1,$t1,lsr#7 688 + and $t1,$t1,$mask1b 689 + eor $s3,$t1,$s3,lsl#1 @ tp8 690 + 691 + eor $t1,$s1,$s2 692 + eor $t2,$s0,$s3 @ tp9 693 + eor $t1,$t1,$s3 @ tpe 694 + eor $t1,$t1,$s1,ror#24 695 + eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) 696 + eor $t1,$t1,$s2,ror#16 697 + eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) 698 + eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24) 699 + 700 + ldr $s0,[$key,#4] @ prefetch tp1 701 + str $t1,[$key],#4 702 + subs $rounds,$rounds,#1 703 + bne .Lmix 704 + 705 + mov r0,#0 706 + ldmia sp!,{r4-r12,lr} 707 + tst lr,#1 708 + moveq pc,lr @ be binary compatible with V4, yet 709 + bx lr @ interoperable with Thumb ISA:-) 710 +.size AES_set_decrypt_key,.-AES_set_decrypt_key 711 + 712 +.type AES_Td,%object 713 +.align 5 714 +AES_Td: 715 +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 716 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 717 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 718 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f 719 +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 720 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 721 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da 722 +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 723 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd 724 +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 725 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 726 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 727 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 728 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a 729 +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 730 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c 731 +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 732 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a 733 +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 734 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 735 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 736 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff 737 +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 738 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb 739 +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 740 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e 741 +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 742 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a 743 +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e 744 +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 745 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d 746 +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 747 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd 748 +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 749 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 750 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 751 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d 752 +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 753 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 754 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef 755 +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 756 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 757 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 758 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 759 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 760 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b 761 +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 762 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 763 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 764 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 765 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 766 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f 767 +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df 768 +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f 769 +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e 770 +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 771 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 772 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c 773 +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf 774 +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 775 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f 776 +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 777 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 778 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 779 +@ Td4[256] 780 +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 781 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 782 +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 783 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 784 +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 785 +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 786 +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 787 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 788 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 789 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 790 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 791 +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 792 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 793 +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 794 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 795 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 796 +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 797 +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 798 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 799 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 800 +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 801 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 802 +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 803 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 804 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 805 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 806 +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 807 +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 808 +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 809 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 810 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 811 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 812 +.size AES_Td,.-AES_Td 813 + 814 +@ void AES_decrypt(const unsigned char *in, unsigned char *out, 815 +@ const AES_KEY *key) { 816 +.global AES_decrypt 817 +.type AES_decrypt,%function 818 +.align 5 819 +AES_decrypt: 820 + sub r3,pc,#8 @ AES_decrypt 821 + stmdb sp!,{r1,r4-r12,lr} 822 + mov $rounds,r0 @ inp 823 + mov $key,r2 824 + sub $tbl,r3,#AES_decrypt-AES_Td @ Td 825 + 826 + ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 827 + ldrb $t1,[$rounds,#2] @ manner... 828 + ldrb $t2,[$rounds,#1] 829 + ldrb $t3,[$rounds,#0] 830 + orr $s0,$s0,$t1,lsl#8 831 + orr $s0,$s0,$t2,lsl#16 832 + orr $s0,$s0,$t3,lsl#24 833 + ldrb $s1,[$rounds,#7] 834 + ldrb $t1,[$rounds,#6] 835 + ldrb $t2,[$rounds,#5] 836 + ldrb $t3,[$rounds,#4] 837 + orr $s1,$s1,$t1,lsl#8 838 + orr $s1,$s1,$t2,lsl#16 839 + orr $s1,$s1,$t3,lsl#24 840 + ldrb $s2,[$rounds,#11] 841 + ldrb $t1,[$rounds,#10] 842 + ldrb $t2,[$rounds,#9] 843 + ldrb $t3,[$rounds,#8] 844 + orr $s2,$s2,$t1,lsl#8 845 + orr $s2,$s2,$t2,lsl#16 846 + orr $s2,$s2,$t3,lsl#24 847 + ldrb $s3,[$rounds,#15] 848 + ldrb $t1,[$rounds,#14] 849 + ldrb $t2,[$rounds,#13] 850 + ldrb $t3,[$rounds,#12] 851 + orr $s3,$s3,$t1,lsl#8 852 + orr $s3,$s3,$t2,lsl#16 853 + orr $s3,$s3,$t3,lsl#24 854 + 855 + bl _armv4_AES_decrypt 856 + 857 + ldr $rounds,[sp],#4 @ pop out 858 + mov $t1,$s0,lsr#24 @ write output in endian-neutral 859 + mov $t2,$s0,lsr#16 @ manner... 860 + mov $t3,$s0,lsr#8 861 + strb $t1,[$rounds,#0] 862 + strb $t2,[$rounds,#1] 863 + strb $t3,[$rounds,#2] 864 + strb $s0,[$rounds,#3] 865 + mov $t1,$s1,lsr#24 866 + mov $t2,$s1,lsr#16 867 + mov $t3,$s1,lsr#8 868 + strb $t1,[$rounds,#4] 869 + strb $t2,[$rounds,#5] 870 + strb $t3,[$rounds,#6] 871 + strb $s1,[$rounds,#7] 872 + mov $t1,$s2,lsr#24 873 + mov $t2,$s2,lsr#16 874 + mov $t3,$s2,lsr#8 875 + strb $t1,[$rounds,#8] 876 + strb $t2,[$rounds,#9] 877 + strb $t3,[$rounds,#10] 878 + strb $s2,[$rounds,#11] 879 + mov $t1,$s3,lsr#24 880 + mov $t2,$s3,lsr#16 881 + mov $t3,$s3,lsr#8 882 + strb $t1,[$rounds,#12] 883 + strb $t2,[$rounds,#13] 884 + strb $t3,[$rounds,#14] 885 + strb $s3,[$rounds,#15] 886 + 887 + ldmia sp!,{r4-r12,lr} 888 + tst lr,#1 889 + moveq pc,lr @ be binary compatible with V4, yet 890 + bx lr @ interoperable with Thumb ISA:-) 891 +.size AES_decrypt,.-AES_decrypt 892 + 893 +.type _armv4_AES_decrypt,%function 894 +.align 2 895 +_armv4_AES_decrypt: 896 + str lr,[sp,#-4]! @ push lr 897 + ldr $t1,[$key],#16 898 + ldr $t2,[$key,#-12] 899 + ldr $t3,[$key,#-8] 900 + ldr $i1,[$key,#-4] 901 + ldr $rounds,[$key,#240-16] 902 + eor $s0,$s0,$t1 903 + eor $s1,$s1,$t2 904 + eor $s2,$s2,$t3 905 + eor $s3,$s3,$i1 906 + sub $rounds,$rounds,#1 907 + mov lr,#255 908 + 909 +.Ldec_loop: 910 + and $i1,lr,$s0,lsr#16 911 + and $i2,lr,$s0,lsr#8 912 + and $i3,lr,$s0 913 + mov $s0,$s0,lsr#24 914 + ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16] 915 + ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24] 916 + ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8] 917 + ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0] 918 + 919 + and $i1,lr,$s1 @ i0 920 + and $i2,lr,$s1,lsr#16 921 + and $i3,lr,$s1,lsr#8 922 + mov $s1,$s1,lsr#24 923 + ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0] 924 + ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24] 925 + ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16] 926 + ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8] 927 + eor $s0,$s0,$i1,ror#24 928 + eor $s1,$s1,$t1,ror#8 929 + eor $t2,$i2,$t2,ror#8 930 + eor $t3,$i3,$t3,ror#8 931 + 932 + and $i1,lr,$s2,lsr#8 @ i0 933 + and $i2,lr,$s2 @ i1 934 + and $i3,lr,$s2,lsr#16 935 + mov $s2,$s2,lsr#24 936 + ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] 937 + ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] 938 + ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] 939 + ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] 940 + eor $s0,$s0,$i1,ror#16 941 + eor $s1,$s1,$i2,ror#24 942 + eor $s2,$s2,$t2,ror#8 943 + eor $t3,$i3,$t3,ror#8 944 + 945 + and $i1,lr,$s3,lsr#16 @ i0 946 + and $i2,lr,$s3,lsr#8 @ i1 947 + and $i3,lr,$s3 @ i2 948 + mov $s3,$s3,lsr#24 949 + ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] 950 + ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] 951 + ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] 952 + ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] 953 + eor $s0,$s0,$i1,ror#8 954 + eor $s1,$s1,$i2,ror#16 955 + eor $s2,$s2,$i3,ror#24 956 + eor $s3,$s3,$t3,ror#8 957 + 958 + ldr $t1,[$key],#16 959 + ldr $t2,[$key,#-12] 960 + ldr $t3,[$key,#-8] 961 + ldr $i1,[$key,#-4] 962 + eor $s0,$s0,$t1 963 + eor $s1,$s1,$t2 964 + eor $s2,$s2,$t3 965 + eor $s3,$s3,$i1 966 + 967 + subs $rounds,$rounds,#1 968 + bne .Ldec_loop 969 + 970 + add $tbl,$tbl,#1024 971 + 972 + ldr $t1,[$tbl,#0] @ prefetch Td4 973 + ldr $t2,[$tbl,#32] 974 + ldr $t3,[$tbl,#64] 975 + ldr $i1,[$tbl,#96] 976 + ldr $i2,[$tbl,#128] 977 + ldr $i3,[$tbl,#160] 978 + ldr $t1,[$tbl,#192] 979 + ldr $t2,[$tbl,#224] 980 + 981 + and $i1,lr,$s0,lsr#16 982 + and $i2,lr,$s0,lsr#8 983 + and $i3,lr,$s0 984 + ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24] 985 + ldrb $t1,[$tbl,$i1] @ Td4[s0>>16] 986 + ldrb $t2,[$tbl,$i2] @ Td4[s0>>8] 987 + ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] 988 + 989 + and $i1,lr,$s1 @ i0 990 + and $i2,lr,$s1,lsr#16 991 + and $i3,lr,$s1,lsr#8 992 + ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] 993 + ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] 994 + ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] 995 + ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] 996 + eor $s0,$i1,$s0,lsl#24 997 + eor $s1,$t1,$s1,lsl#8 998 + eor $t2,$t2,$i2,lsl#8 999 + eor $t3,$t3,$i3,lsl#8 1000 + 1001 + and $i1,lr,$s2,lsr#8 @ i0 1002 + and $i2,lr,$s2 @ i1 1003 + and $i3,lr,$s2,lsr#16 1004 + ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] 1005 + ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] 1006 + ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] 1007 + ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] 1008 + eor $s0,$s0,$i1,lsl#8 1009 + eor $s1,$i2,$s1,lsl#16 1010 + eor $s2,$t2,$s2,lsl#16 1011 + eor $t3,$t3,$i3,lsl#16 1012 + 1013 + and $i1,lr,$s3,lsr#16 @ i0 1014 + and $i2,lr,$s3,lsr#8 @ i1 1015 + and $i3,lr,$s3 @ i2 1016 + ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] 1017 + ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] 1018 + ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] 1019 + ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] 1020 + eor $s0,$s0,$i1,lsl#16 1021 + eor $s1,$s1,$i2,lsl#8 1022 + eor $s2,$i3,$s2,lsl#8 1023 + eor $s3,$t3,$s3,lsl#24 1024 + 1025 + ldr lr,[sp],#4 @ pop lr 1026 + ldr $t1,[$key,#0] 1027 + ldr $t2,[$key,#4] 1028 + ldr $t3,[$key,#8] 1029 + ldr $i1,[$key,#12] 1030 + eor $s0,$s0,$t1 1031 + eor $s1,$s1,$t2 1032 + eor $s2,$s2,$t3 1033 + eor $s3,$s3,$i1 1034 + 1035 + sub $tbl,$tbl,#1024 1036 + mov pc,lr @ return 1037 +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt 1038 +.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 1039 +___ 1040 + 1041 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 1042 +print $code; 1043 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 1044 +++ openssl-0.9.8h/crypto/0.9.9-dev/aes/aes-armv4.s 2009-09-03 15:42:39.000000000 -0700 1045 @@ -0,0 +1,982 @@ 1046 +.text 1047 +.code 32 1048 + 1049 +.type AES_Te,%object 1050 +.align 5 1051 +AES_Te: 1052 +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d 1053 +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 1054 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d 1055 +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a 1056 +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 1057 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b 1058 +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea 1059 +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b 1060 +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a 1061 +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f 1062 +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 1063 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f 1064 +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e 1065 +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 1066 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d 1067 +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f 1068 +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e 1069 +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb 1070 +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce 1071 +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 1072 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c 1073 +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed 1074 +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b 1075 +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a 1076 +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 1077 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 1078 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 1079 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 1080 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a 1081 +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 1082 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 1083 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d 1084 +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f 1085 +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 1086 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 1087 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 1088 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f 1089 +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 1090 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c 1091 +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 1092 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e 1093 +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 1094 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 1095 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b 1096 +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 1097 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 1098 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 1099 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 1100 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 1101 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 1102 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 1103 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 1104 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa 1105 +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 1106 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 1107 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 1108 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 1109 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 1110 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 1111 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a 1112 +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 1113 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 1114 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 1115 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a 1116 +@ Te4[256] 1117 +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 1118 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 1119 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 1120 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 1121 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 1122 +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 1123 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 1124 +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 1125 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 1126 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 1127 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 1128 +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 1129 +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 1130 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 1131 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 1132 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 1133 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 1134 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 1135 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 1136 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 1137 +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 1138 +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 1139 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 1140 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 1141 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 1142 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 1143 +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 1144 +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 1145 +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 1146 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 1147 +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 1148 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 1149 +@ rcon[] 1150 +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 1151 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 1152 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 1153 +.size AES_Te,.-AES_Te 1154 + 1155 +@ void AES_encrypt(const unsigned char *in, unsigned char *out, 1156 +@ const AES_KEY *key) { 1157 +.global AES_encrypt 1158 +.type AES_encrypt,%function 1159 +.align 5 1160 +AES_encrypt: 1161 + sub r3,pc,#8 @ AES_encrypt 1162 + stmdb sp!,{r1,r4-r12,lr} 1163 + mov r12,r0 @ inp 1164 + mov r11,r2 1165 + sub r10,r3,#AES_encrypt-AES_Te @ Te 1166 + 1167 + ldrb r0,[r12,#3] @ load input data in endian-neutral 1168 + ldrb r4,[r12,#2] @ manner... 1169 + ldrb r5,[r12,#1] 1170 + ldrb r6,[r12,#0] 1171 + orr r0,r0,r4,lsl#8 1172 + orr r0,r0,r5,lsl#16 1173 + orr r0,r0,r6,lsl#24 1174 + ldrb r1,[r12,#7] 1175 + ldrb r4,[r12,#6] 1176 + ldrb r5,[r12,#5] 1177 + ldrb r6,[r12,#4] 1178 + orr r1,r1,r4,lsl#8 1179 + orr r1,r1,r5,lsl#16 1180 + orr r1,r1,r6,lsl#24 1181 + ldrb r2,[r12,#11] 1182 + ldrb r4,[r12,#10] 1183 + ldrb r5,[r12,#9] 1184 + ldrb r6,[r12,#8] 1185 + orr r2,r2,r4,lsl#8 1186 + orr r2,r2,r5,lsl#16 1187 + orr r2,r2,r6,lsl#24 1188 + ldrb r3,[r12,#15] 1189 + ldrb r4,[r12,#14] 1190 + ldrb r5,[r12,#13] 1191 + ldrb r6,[r12,#12] 1192 + orr r3,r3,r4,lsl#8 1193 + orr r3,r3,r5,lsl#16 1194 + orr r3,r3,r6,lsl#24 1195 + 1196 + bl _armv4_AES_encrypt 1197 + 1198 + ldr r12,[sp],#4 @ pop out 1199 + mov r4,r0,lsr#24 @ write output in endian-neutral 1200 + mov r5,r0,lsr#16 @ manner... 1201 + mov r6,r0,lsr#8 1202 + strb r4,[r12,#0] 1203 + strb r5,[r12,#1] 1204 + strb r6,[r12,#2] 1205 + strb r0,[r12,#3] 1206 + mov r4,r1,lsr#24 1207 + mov r5,r1,lsr#16 1208 + mov r6,r1,lsr#8 1209 + strb r4,[r12,#4] 1210 + strb r5,[r12,#5] 1211 + strb r6,[r12,#6] 1212 + strb r1,[r12,#7] 1213 + mov r4,r2,lsr#24 1214 + mov r5,r2,lsr#16 1215 + mov r6,r2,lsr#8 1216 + strb r4,[r12,#8] 1217 + strb r5,[r12,#9] 1218 + strb r6,[r12,#10] 1219 + strb r2,[r12,#11] 1220 + mov r4,r3,lsr#24 1221 + mov r5,r3,lsr#16 1222 + mov r6,r3,lsr#8 1223 + strb r4,[r12,#12] 1224 + strb r5,[r12,#13] 1225 + strb r6,[r12,#14] 1226 + strb r3,[r12,#15] 1227 + 1228 + ldmia sp!,{r4-r12,lr} 1229 + tst lr,#1 1230 + moveq pc,lr @ be binary compatible with V4, yet 1231 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 1232 +.size AES_encrypt,.-AES_encrypt 1233 + 1234 +.type _armv4_AES_encrypt,%function 1235 +.align 2 1236 +_armv4_AES_encrypt: 1237 + str lr,[sp,#-4]! @ push lr 1238 + ldr r4,[r11],#16 1239 + ldr r5,[r11,#-12] 1240 + ldr r6,[r11,#-8] 1241 + ldr r7,[r11,#-4] 1242 + ldr r12,[r11,#240-16] 1243 + eor r0,r0,r4 1244 + eor r1,r1,r5 1245 + eor r2,r2,r6 1246 + eor r3,r3,r7 1247 + sub r12,r12,#1 1248 + mov lr,#255 1249 + 1250 +.Lenc_loop: 1251 + and r8,lr,r0,lsr#8 1252 + and r9,lr,r0,lsr#16 1253 + and r7,lr,r0 1254 + mov r0,r0,lsr#24 1255 + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] 1256 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] 1257 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] 1258 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] 1259 + 1260 + and r7,lr,r1,lsr#16 @ i0 1261 + and r8,lr,r1 1262 + and r9,lr,r1,lsr#8 1263 + mov r1,r1,lsr#24 1264 + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] 1265 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] 1266 + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] 1267 + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] 1268 + eor r0,r0,r7,ror#8 1269 + eor r1,r1,r4,ror#24 1270 + eor r5,r5,r8,ror#8 1271 + eor r6,r6,r9,ror#8 1272 + 1273 + and r7,lr,r2,lsr#8 @ i0 1274 + and r8,lr,r2,lsr#16 @ i1 1275 + and r9,lr,r2 1276 + mov r2,r2,lsr#24 1277 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] 1278 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] 1279 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] 1280 + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] 1281 + eor r0,r0,r7,ror#16 1282 + eor r1,r1,r8,ror#8 1283 + eor r2,r2,r5,ror#16 1284 + eor r6,r6,r9,ror#16 1285 + 1286 + and r7,lr,r3 @ i0 1287 + and r8,lr,r3,lsr#8 @ i1 1288 + and r9,lr,r3,lsr#16 @ i2 1289 + mov r3,r3,lsr#24 1290 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] 1291 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] 1292 + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] 1293 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] 1294 + eor r0,r0,r7,ror#24 1295 + eor r1,r1,r8,ror#16 1296 + eor r2,r2,r9,ror#8 1297 + eor r3,r3,r6,ror#8 1298 + 1299 + ldr r4,[r11],#16 1300 + ldr r5,[r11,#-12] 1301 + ldr r6,[r11,#-8] 1302 + ldr r7,[r11,#-4] 1303 + eor r0,r0,r4 1304 + eor r1,r1,r5 1305 + eor r2,r2,r6 1306 + eor r3,r3,r7 1307 + 1308 + subs r12,r12,#1 1309 + bne .Lenc_loop 1310 + 1311 + add r10,r10,#2 1312 + 1313 + and r7,lr,r0 1314 + and r8,lr,r0,lsr#8 1315 + and r9,lr,r0,lsr#16 1316 + mov r0,r0,lsr#24 1317 + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] 1318 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] 1319 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] 1320 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] 1321 + 1322 + and r7,lr,r1,lsr#16 @ i0 1323 + and r8,lr,r1 1324 + and r9,lr,r1,lsr#8 1325 + mov r1,r1,lsr#24 1326 + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] 1327 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] 1328 + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] 1329 + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] 1330 + eor r0,r7,r0,lsl#8 1331 + eor r1,r4,r1,lsl#24 1332 + eor r5,r8,r5,lsl#8 1333 + eor r6,r9,r6,lsl#8 1334 + 1335 + and r7,lr,r2,lsr#8 @ i0 1336 + and r8,lr,r2,lsr#16 @ i1 1337 + and r9,lr,r2 1338 + mov r2,r2,lsr#24 1339 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] 1340 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] 1341 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] 1342 + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] 1343 + eor r0,r7,r0,lsl#8 1344 + eor r1,r1,r8,lsl#16 1345 + eor r2,r5,r2,lsl#24 1346 + eor r6,r9,r6,lsl#8 1347 + 1348 + and r7,lr,r3 @ i0 1349 + and r8,lr,r3,lsr#8 @ i1 1350 + and r9,lr,r3,lsr#16 @ i2 1351 + mov r3,r3,lsr#24 1352 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] 1353 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] 1354 + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] 1355 + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] 1356 + eor r0,r7,r0,lsl#8 1357 + eor r1,r1,r8,lsl#8 1358 + eor r2,r2,r9,lsl#16 1359 + eor r3,r6,r3,lsl#24 1360 + 1361 + ldr lr,[sp],#4 @ pop lr 1362 + ldr r4,[r11,#0] 1363 + ldr r5,[r11,#4] 1364 + ldr r6,[r11,#8] 1365 + ldr r7,[r11,#12] 1366 + eor r0,r0,r4 1367 + eor r1,r1,r5 1368 + eor r2,r2,r6 1369 + eor r3,r3,r7 1370 + 1371 + sub r10,r10,#2 1372 + mov pc,lr @ return 1373 +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt 1374 + 1375 +.global AES_set_encrypt_key 1376 +.type AES_set_encrypt_key,%function 1377 +.align 5 1378 +AES_set_encrypt_key: 1379 + sub r3,pc,#8 @ AES_set_encrypt_key 1380 + teq r0,#0 1381 + moveq r0,#-1 1382 + beq .Labrt 1383 + teq r2,#0 1384 + moveq r0,#-1 1385 + beq .Labrt 1386 + 1387 + teq r1,#128 1388 + beq .Lok 1389 + teq r1,#192 1390 + beq .Lok 1391 + teq r1,#256 1392 + movne r0,#-1 1393 + bne .Labrt 1394 + 1395 +.Lok: stmdb sp!,{r4-r12,lr} 1396 + sub r10,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 1397 + 1398 + mov r12,r0 @ inp 1399 + mov lr,r1 @ bits 1400 + mov r11,r2 @ key 1401 + 1402 + ldrb r0,[r12,#3] @ load input data in endian-neutral 1403 + ldrb r4,[r12,#2] @ manner... 1404 + ldrb r5,[r12,#1] 1405 + ldrb r6,[r12,#0] 1406 + orr r0,r0,r4,lsl#8 1407 + orr r0,r0,r5,lsl#16 1408 + orr r0,r0,r6,lsl#24 1409 + ldrb r1,[r12,#7] 1410 + ldrb r4,[r12,#6] 1411 + ldrb r5,[r12,#5] 1412 + ldrb r6,[r12,#4] 1413 + orr r1,r1,r4,lsl#8 1414 + orr r1,r1,r5,lsl#16 1415 + orr r1,r1,r6,lsl#24 1416 + ldrb r2,[r12,#11] 1417 + ldrb r4,[r12,#10] 1418 + ldrb r5,[r12,#9] 1419 + ldrb r6,[r12,#8] 1420 + orr r2,r2,r4,lsl#8 1421 + orr r2,r2,r5,lsl#16 1422 + orr r2,r2,r6,lsl#24 1423 + ldrb r3,[r12,#15] 1424 + ldrb r4,[r12,#14] 1425 + ldrb r5,[r12,#13] 1426 + ldrb r6,[r12,#12] 1427 + orr r3,r3,r4,lsl#8 1428 + orr r3,r3,r5,lsl#16 1429 + orr r3,r3,r6,lsl#24 1430 + str r0,[r11],#16 1431 + str r1,[r11,#-12] 1432 + str r2,[r11,#-8] 1433 + str r3,[r11,#-4] 1434 + 1435 + teq lr,#128 1436 + bne .Lnot128 1437 + mov r12,#10 1438 + str r12,[r11,#240-16] 1439 + add r6,r10,#256 @ rcon 1440 + mov lr,#255 1441 + 1442 +.L128_loop: 1443 + and r5,lr,r3,lsr#24 1444 + and r7,lr,r3,lsr#16 1445 + and r8,lr,r3,lsr#8 1446 + and r9,lr,r3 1447 + ldrb r5,[r10,r5] 1448 + ldrb r7,[r10,r7] 1449 + ldrb r8,[r10,r8] 1450 + ldrb r9,[r10,r9] 1451 + ldr r4,[r6],#4 @ rcon[i++] 1452 + orr r5,r5,r7,lsl#24 1453 + orr r5,r5,r8,lsl#16 1454 + orr r5,r5,r9,lsl#8 1455 + eor r5,r5,r4 1456 + eor r0,r0,r5 @ rk[4]=rk[0]^... 1457 + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] 1458 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] 1459 + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] 1460 + str r0,[r11],#16 1461 + str r1,[r11,#-12] 1462 + str r2,[r11,#-8] 1463 + str r3,[r11,#-4] 1464 + 1465 + subs r12,r12,#1 1466 + bne .L128_loop 1467 + sub r2,r11,#176 1468 + b .Ldone 1469 + 1470 +.Lnot128: 1471 + ldrb r8,[r12,#19] 1472 + ldrb r4,[r12,#18] 1473 + ldrb r5,[r12,#17] 1474 + ldrb r6,[r12,#16] 1475 + orr r8,r8,r4,lsl#8 1476 + orr r8,r8,r5,lsl#16 1477 + orr r8,r8,r6,lsl#24 1478 + ldrb r9,[r12,#23] 1479 + ldrb r4,[r12,#22] 1480 + ldrb r5,[r12,#21] 1481 + ldrb r6,[r12,#20] 1482 + orr r9,r9,r4,lsl#8 1483 + orr r9,r9,r5,lsl#16 1484 + orr r9,r9,r6,lsl#24 1485 + str r8,[r11],#8 1486 + str r9,[r11,#-4] 1487 + 1488 + teq lr,#192 1489 + bne .Lnot192 1490 + mov r12,#12 1491 + str r12,[r11,#240-24] 1492 + add r6,r10,#256 @ rcon 1493 + mov lr,#255 1494 + mov r12,#8 1495 + 1496 +.L192_loop: 1497 + and r5,lr,r9,lsr#24 1498 + and r7,lr,r9,lsr#16 1499 + and r8,lr,r9,lsr#8 1500 + and r9,lr,r9 1501 + ldrb r5,[r10,r5] 1502 + ldrb r7,[r10,r7] 1503 + ldrb r8,[r10,r8] 1504 + ldrb r9,[r10,r9] 1505 + ldr r4,[r6],#4 @ rcon[i++] 1506 + orr r5,r5,r7,lsl#24 1507 + orr r5,r5,r8,lsl#16 1508 + orr r5,r5,r9,lsl#8 1509 + eor r9,r5,r4 1510 + eor r0,r0,r9 @ rk[6]=rk[0]^... 1511 + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] 1512 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] 1513 + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] 1514 + str r0,[r11],#24 1515 + str r1,[r11,#-20] 1516 + str r2,[r11,#-16] 1517 + str r3,[r11,#-12] 1518 + 1519 + subs r12,r12,#1 1520 + subeq r2,r11,#216 1521 + beq .Ldone 1522 + 1523 + ldr r7,[r11,#-32] 1524 + ldr r8,[r11,#-28] 1525 + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] 1526 + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] 1527 + str r7,[r11,#-8] 1528 + str r9,[r11,#-4] 1529 + b .L192_loop 1530 + 1531 +.Lnot192: 1532 + ldrb r8,[r12,#27] 1533 + ldrb r4,[r12,#26] 1534 + ldrb r5,[r12,#25] 1535 + ldrb r6,[r12,#24] 1536 + orr r8,r8,r4,lsl#8 1537 + orr r8,r8,r5,lsl#16 1538 + orr r8,r8,r6,lsl#24 1539 + ldrb r9,[r12,#31] 1540 + ldrb r4,[r12,#30] 1541 + ldrb r5,[r12,#29] 1542 + ldrb r6,[r12,#28] 1543 + orr r9,r9,r4,lsl#8 1544 + orr r9,r9,r5,lsl#16 1545 + orr r9,r9,r6,lsl#24 1546 + str r8,[r11],#8 1547 + str r9,[r11,#-4] 1548 + 1549 + mov r12,#14 1550 + str r12,[r11,#240-32] 1551 + add r6,r10,#256 @ rcon 1552 + mov lr,#255 1553 + mov r12,#7 1554 + 1555 +.L256_loop: 1556 + and r5,lr,r9,lsr#24 1557 + and r7,lr,r9,lsr#16 1558 + and r8,lr,r9,lsr#8 1559 + and r9,lr,r9 1560 + ldrb r5,[r10,r5] 1561 + ldrb r7,[r10,r7] 1562 + ldrb r8,[r10,r8] 1563 + ldrb r9,[r10,r9] 1564 + ldr r4,[r6],#4 @ rcon[i++] 1565 + orr r5,r5,r7,lsl#24 1566 + orr r5,r5,r8,lsl#16 1567 + orr r5,r5,r9,lsl#8 1568 + eor r9,r5,r4 1569 + eor r0,r0,r9 @ rk[8]=rk[0]^... 1570 + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] 1571 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] 1572 + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] 1573 + str r0,[r11],#32 1574 + str r1,[r11,#-28] 1575 + str r2,[r11,#-24] 1576 + str r3,[r11,#-20] 1577 + 1578 + subs r12,r12,#1 1579 + subeq r2,r11,#256 1580 + beq .Ldone 1581 + 1582 + and r5,lr,r3 1583 + and r7,lr,r3,lsr#8 1584 + and r8,lr,r3,lsr#16 1585 + and r9,lr,r3,lsr#24 1586 + ldrb r5,[r10,r5] 1587 + ldrb r7,[r10,r7] 1588 + ldrb r8,[r10,r8] 1589 + ldrb r9,[r10,r9] 1590 + orr r5,r5,r7,lsl#8 1591 + orr r5,r5,r8,lsl#16 1592 + orr r5,r5,r9,lsl#24 1593 + 1594 + ldr r4,[r11,#-48] 1595 + ldr r7,[r11,#-44] 1596 + ldr r8,[r11,#-40] 1597 + ldr r9,[r11,#-36] 1598 + eor r4,r4,r5 @ rk[12]=rk[4]^... 1599 + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] 1600 + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] 1601 + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] 1602 + str r4,[r11,#-16] 1603 + str r7,[r11,#-12] 1604 + str r8,[r11,#-8] 1605 + str r9,[r11,#-4] 1606 + b .L256_loop 1607 + 1608 +.Ldone: mov r0,#0 1609 + ldmia sp!,{r4-r12,lr} 1610 +.Labrt: tst lr,#1 1611 + moveq pc,lr @ be binary compatible with V4, yet 1612 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 1613 +.size AES_set_encrypt_key,.-AES_set_encrypt_key 1614 + 1615 +.global AES_set_decrypt_key 1616 +.type AES_set_decrypt_key,%function 1617 +.align 5 1618 +AES_set_decrypt_key: 1619 + str lr,[sp,#-4]! @ push lr 1620 + bl AES_set_encrypt_key 1621 + teq r0,#0 1622 + ldrne lr,[sp],#4 @ pop lr 1623 + bne .Labrt 1624 + 1625 + stmdb sp!,{r4-r12} 1626 + 1627 + ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, 1628 + mov r11,r2 @ which is AES_KEY *key 1629 + mov r7,r2 1630 + add r8,r2,r12,lsl#4 1631 + 1632 +.Linv: ldr r0,[r7] 1633 + ldr r1,[r7,#4] 1634 + ldr r2,[r7,#8] 1635 + ldr r3,[r7,#12] 1636 + ldr r4,[r8] 1637 + ldr r5,[r8,#4] 1638 + ldr r6,[r8,#8] 1639 + ldr r9,[r8,#12] 1640 + str r0,[r8],#-16 1641 + str r1,[r8,#16+4] 1642 + str r2,[r8,#16+8] 1643 + str r3,[r8,#16+12] 1644 + str r4,[r7],#16 1645 + str r5,[r7,#-12] 1646 + str r6,[r7,#-8] 1647 + str r9,[r7,#-4] 1648 + teq r7,r8 1649 + bne .Linv 1650 + ldr r0,[r11,#16]! @ prefetch tp1 1651 + mov r7,#0x80 1652 + mov r8,#0x1b 1653 + orr r7,r7,#0x8000 1654 + orr r8,r8,#0x1b00 1655 + orr r7,r7,r7,lsl#16 1656 + orr r8,r8,r8,lsl#16 1657 + sub r12,r12,#1 1658 + mvn r9,r7 1659 + mov r12,r12,lsl#2 @ (rounds-1)*4 1660 + 1661 +.Lmix: and r4,r0,r7 1662 + and r1,r0,r9 1663 + sub r4,r4,r4,lsr#7 1664 + and r4,r4,r8 1665 + eor r1,r4,r1,lsl#1 @ tp2 1666 + 1667 + and r4,r1,r7 1668 + and r2,r1,r9 1669 + sub r4,r4,r4,lsr#7 1670 + and r4,r4,r8 1671 + eor r2,r4,r2,lsl#1 @ tp4 1672 + 1673 + and r4,r2,r7 1674 + and r3,r2,r9 1675 + sub r4,r4,r4,lsr#7 1676 + and r4,r4,r8 1677 + eor r3,r4,r3,lsl#1 @ tp8 1678 + 1679 + eor r4,r1,r2 1680 + eor r5,r0,r3 @ tp9 1681 + eor r4,r4,r3 @ tpe 1682 + eor r4,r4,r1,ror#24 1683 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) 1684 + eor r4,r4,r2,ror#16 1685 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) 1686 + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) 1687 + 1688 + ldr r0,[r11,#4] @ prefetch tp1 1689 + str r4,[r11],#4 1690 + subs r12,r12,#1 1691 + bne .Lmix 1692 + 1693 + mov r0,#0 1694 + ldmia sp!,{r4-r12,lr} 1695 + tst lr,#1 1696 + moveq pc,lr @ be binary compatible with V4, yet 1697 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 1698 +.size AES_set_decrypt_key,.-AES_set_decrypt_key 1699 + 1700 +.type AES_Td,%object 1701 +.align 5 1702 +AES_Td: 1703 +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 1704 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 1705 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 1706 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f 1707 +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 1708 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 1709 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da 1710 +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 1711 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd 1712 +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 1713 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 1714 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 1715 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 1716 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a 1717 +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 1718 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c 1719 +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 1720 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a 1721 +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 1722 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 1723 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 1724 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff 1725 +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 1726 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb 1727 +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 1728 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e 1729 +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 1730 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a 1731 +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e 1732 +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 1733 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d 1734 +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 1735 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd 1736 +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 1737 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 1738 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 1739 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d 1740 +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 1741 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 1742 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef 1743 +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 1744 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 1745 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 1746 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 1747 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 1748 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b 1749 +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 1750 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 1751 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 1752 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 1753 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 1754 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f 1755 +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df 1756 +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f 1757 +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e 1758 +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 1759 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 1760 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c 1761 +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf 1762 +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 1763 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f 1764 +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 1765 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 1766 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 1767 +@ Td4[256] 1768 +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 1769 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 1770 +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 1771 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 1772 +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 1773 +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 1774 +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 1775 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 1776 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 1777 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 1778 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 1779 +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 1780 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 1781 +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 1782 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 1783 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 1784 +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 1785 +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 1786 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 1787 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 1788 +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 1789 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 1790 +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 1791 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 1792 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 1793 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 1794 +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 1795 +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 1796 +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 1797 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 1798 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 1799 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 1800 +.size AES_Td,.-AES_Td 1801 + 1802 +@ void AES_decrypt(const unsigned char *in, unsigned char *out, 1803 +@ const AES_KEY *key) { 1804 +.global AES_decrypt 1805 +.type AES_decrypt,%function 1806 +.align 5 1807 +AES_decrypt: 1808 + sub r3,pc,#8 @ AES_decrypt 1809 + stmdb sp!,{r1,r4-r12,lr} 1810 + mov r12,r0 @ inp 1811 + mov r11,r2 1812 + sub r10,r3,#AES_decrypt-AES_Td @ Td 1813 + 1814 + ldrb r0,[r12,#3] @ load input data in endian-neutral 1815 + ldrb r4,[r12,#2] @ manner... 1816 + ldrb r5,[r12,#1] 1817 + ldrb r6,[r12,#0] 1818 + orr r0,r0,r4,lsl#8 1819 + orr r0,r0,r5,lsl#16 1820 + orr r0,r0,r6,lsl#24 1821 + ldrb r1,[r12,#7] 1822 + ldrb r4,[r12,#6] 1823 + ldrb r5,[r12,#5] 1824 + ldrb r6,[r12,#4] 1825 + orr r1,r1,r4,lsl#8 1826 + orr r1,r1,r5,lsl#16 1827 + orr r1,r1,r6,lsl#24 1828 + ldrb r2,[r12,#11] 1829 + ldrb r4,[r12,#10] 1830 + ldrb r5,[r12,#9] 1831 + ldrb r6,[r12,#8] 1832 + orr r2,r2,r4,lsl#8 1833 + orr r2,r2,r5,lsl#16 1834 + orr r2,r2,r6,lsl#24 1835 + ldrb r3,[r12,#15] 1836 + ldrb r4,[r12,#14] 1837 + ldrb r5,[r12,#13] 1838 + ldrb r6,[r12,#12] 1839 + orr r3,r3,r4,lsl#8 1840 + orr r3,r3,r5,lsl#16 1841 + orr r3,r3,r6,lsl#24 1842 + 1843 + bl _armv4_AES_decrypt 1844 + 1845 + ldr r12,[sp],#4 @ pop out 1846 + mov r4,r0,lsr#24 @ write output in endian-neutral 1847 + mov r5,r0,lsr#16 @ manner... 1848 + mov r6,r0,lsr#8 1849 + strb r4,[r12,#0] 1850 + strb r5,[r12,#1] 1851 + strb r6,[r12,#2] 1852 + strb r0,[r12,#3] 1853 + mov r4,r1,lsr#24 1854 + mov r5,r1,lsr#16 1855 + mov r6,r1,lsr#8 1856 + strb r4,[r12,#4] 1857 + strb r5,[r12,#5] 1858 + strb r6,[r12,#6] 1859 + strb r1,[r12,#7] 1860 + mov r4,r2,lsr#24 1861 + mov r5,r2,lsr#16 1862 + mov r6,r2,lsr#8 1863 + strb r4,[r12,#8] 1864 + strb r5,[r12,#9] 1865 + strb r6,[r12,#10] 1866 + strb r2,[r12,#11] 1867 + mov r4,r3,lsr#24 1868 + mov r5,r3,lsr#16 1869 + mov r6,r3,lsr#8 1870 + strb r4,[r12,#12] 1871 + strb r5,[r12,#13] 1872 + strb r6,[r12,#14] 1873 + strb r3,[r12,#15] 1874 + 1875 + ldmia sp!,{r4-r12,lr} 1876 + tst lr,#1 1877 + moveq pc,lr @ be binary compatible with V4, yet 1878 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 1879 +.size AES_decrypt,.-AES_decrypt 1880 + 1881 +.type _armv4_AES_decrypt,%function 1882 +.align 2 1883 +_armv4_AES_decrypt: 1884 + str lr,[sp,#-4]! @ push lr 1885 + ldr r4,[r11],#16 1886 + ldr r5,[r11,#-12] 1887 + ldr r6,[r11,#-8] 1888 + ldr r7,[r11,#-4] 1889 + ldr r12,[r11,#240-16] 1890 + eor r0,r0,r4 1891 + eor r1,r1,r5 1892 + eor r2,r2,r6 1893 + eor r3,r3,r7 1894 + sub r12,r12,#1 1895 + mov lr,#255 1896 + 1897 +.Ldec_loop: 1898 + and r7,lr,r0,lsr#16 1899 + and r8,lr,r0,lsr#8 1900 + and r9,lr,r0 1901 + mov r0,r0,lsr#24 1902 + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] 1903 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] 1904 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] 1905 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] 1906 + 1907 + and r7,lr,r1 @ i0 1908 + and r8,lr,r1,lsr#16 1909 + and r9,lr,r1,lsr#8 1910 + mov r1,r1,lsr#24 1911 + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] 1912 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] 1913 + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] 1914 + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] 1915 + eor r0,r0,r7,ror#24 1916 + eor r1,r1,r4,ror#8 1917 + eor r5,r8,r5,ror#8 1918 + eor r6,r9,r6,ror#8 1919 + 1920 + and r7,lr,r2,lsr#8 @ i0 1921 + and r8,lr,r2 @ i1 1922 + and r9,lr,r2,lsr#16 1923 + mov r2,r2,lsr#24 1924 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] 1925 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] 1926 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] 1927 + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] 1928 + eor r0,r0,r7,ror#16 1929 + eor r1,r1,r8,ror#24 1930 + eor r2,r2,r5,ror#8 1931 + eor r6,r9,r6,ror#8 1932 + 1933 + and r7,lr,r3,lsr#16 @ i0 1934 + and r8,lr,r3,lsr#8 @ i1 1935 + and r9,lr,r3 @ i2 1936 + mov r3,r3,lsr#24 1937 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] 1938 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] 1939 + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] 1940 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] 1941 + eor r0,r0,r7,ror#8 1942 + eor r1,r1,r8,ror#16 1943 + eor r2,r2,r9,ror#24 1944 + eor r3,r3,r6,ror#8 1945 + 1946 + ldr r4,[r11],#16 1947 + ldr r5,[r11,#-12] 1948 + ldr r6,[r11,#-8] 1949 + ldr r7,[r11,#-4] 1950 + eor r0,r0,r4 1951 + eor r1,r1,r5 1952 + eor r2,r2,r6 1953 + eor r3,r3,r7 1954 + 1955 + subs r12,r12,#1 1956 + bne .Ldec_loop 1957 + 1958 + add r10,r10,#1024 1959 + 1960 + ldr r4,[r10,#0] @ prefetch Td4 1961 + ldr r5,[r10,#32] 1962 + ldr r6,[r10,#64] 1963 + ldr r7,[r10,#96] 1964 + ldr r8,[r10,#128] 1965 + ldr r9,[r10,#160] 1966 + ldr r4,[r10,#192] 1967 + ldr r5,[r10,#224] 1968 + 1969 + and r7,lr,r0,lsr#16 1970 + and r8,lr,r0,lsr#8 1971 + and r9,lr,r0 1972 + ldrb r0,[r10,r0,lsr#24] @ Td4[s0>>24] 1973 + ldrb r4,[r10,r7] @ Td4[s0>>16] 1974 + ldrb r5,[r10,r8] @ Td4[s0>>8] 1975 + ldrb r6,[r10,r9] @ Td4[s0>>0] 1976 + 1977 + and r7,lr,r1 @ i0 1978 + and r8,lr,r1,lsr#16 1979 + and r9,lr,r1,lsr#8 1980 + ldrb r7,[r10,r7] @ Td4[s1>>0] 1981 + ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] 1982 + ldrb r8,[r10,r8] @ Td4[s1>>16] 1983 + ldrb r9,[r10,r9] @ Td4[s1>>8] 1984 + eor r0,r7,r0,lsl#24 1985 + eor r1,r4,r1,lsl#8 1986 + eor r5,r5,r8,lsl#8 1987 + eor r6,r6,r9,lsl#8 1988 + 1989 + and r7,lr,r2,lsr#8 @ i0 1990 + and r8,lr,r2 @ i1 1991 + and r9,lr,r2,lsr#16 1992 + ldrb r7,[r10,r7] @ Td4[s2>>8] 1993 + ldrb r8,[r10,r8] @ Td4[s2>>0] 1994 + ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] 1995 + ldrb r9,[r10,r9] @ Td4[s2>>16] 1996 + eor r0,r0,r7,lsl#8 1997 + eor r1,r8,r1,lsl#16 1998 + eor r2,r5,r2,lsl#16 1999 + eor r6,r6,r9,lsl#16 2000 + 2001 + and r7,lr,r3,lsr#16 @ i0 2002 + and r8,lr,r3,lsr#8 @ i1 2003 + and r9,lr,r3 @ i2 2004 + ldrb r7,[r10,r7] @ Td4[s3>>16] 2005 + ldrb r8,[r10,r8] @ Td4[s3>>8] 2006 + ldrb r9,[r10,r9] @ Td4[s3>>0] 2007 + ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] 2008 + eor r0,r0,r7,lsl#16 2009 + eor r1,r1,r8,lsl#8 2010 + eor r2,r9,r2,lsl#8 2011 + eor r3,r6,r3,lsl#24 2012 + 2013 + ldr lr,[sp],#4 @ pop lr 2014 + ldr r4,[r11,#0] 2015 + ldr r5,[r11,#4] 2016 + ldr r6,[r11,#8] 2017 + ldr r7,[r11,#12] 2018 + eor r0,r0,r4 2019 + eor r1,r1,r5 2020 + eor r2,r2,r6 2021 + eor r3,r3,r7 2022 + 2023 + sub r10,r10,#1024 2024 + mov pc,lr @ return 2025 +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt 2026 +.asciz "AES for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>" 2027 +.align 2 2028 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2029 +++ openssl-0.9.8h/crypto/0.9.9-dev/bn/armv4-mont.pl 2009-09-03 15:42:39.000000000 -0700 2030 @@ -0,0 +1,200 @@ 2031 +#!/usr/bin/env perl 2032 + 2033 +# ==================================================================== 2034 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL 2035 +# project. The module is, however, dual licensed under OpenSSL and 2036 +# CRYPTOGAMS licenses depending on where you obtain it. For further 2037 +# details see http://www.openssl.org/~appro/cryptogams/. 2038 +# ==================================================================== 2039 + 2040 +# January 2007. 2041 + 2042 +# Montgomery multiplication for ARMv4. 2043 +# 2044 +# Performance improvement naturally varies among CPU implementations 2045 +# and compilers. The code was observed to provide +65-35% improvement 2046 +# [depending on key length, less for longer keys] on ARM920T, and 2047 +# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code 2048 +# base and compiler generated code with in-lined umull and even umlal 2049 +# instructions. The latter means that this code didn't really have an 2050 +# "advantage" of utilizing some "secret" instruction. 2051 +# 2052 +# The code is interoperable with Thumb ISA and is rather compact, less 2053 +# than 1/2KB. Windows CE port would be trivial, as it's exclusively 2054 +# about decorations, ABI and instruction syntax are identical. 2055 + 2056 +$num="r0"; # starts as num argument, but holds &tp[num-1] 2057 +$ap="r1"; 2058 +$bp="r2"; $bi="r2"; $rp="r2"; 2059 +$np="r3"; 2060 +$tp="r4"; 2061 +$aj="r5"; 2062 +$nj="r6"; 2063 +$tj="r7"; 2064 +$n0="r8"; 2065 +########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer 2066 +$alo="r10"; # sl, gcc uses it to keep @GOT 2067 +$ahi="r11"; # fp 2068 +$nlo="r12"; # ip 2069 +########### # r13 is stack pointer 2070 +$nhi="r14"; # lr 2071 +########### # r15 is program counter 2072 + 2073 +#### argument block layout relative to &tp[num-1], a.k.a. $num 2074 +$_rp="$num,#12*4"; 2075 +# ap permanently resides in r1 2076 +$_bp="$num,#13*4"; 2077 +# np permanently resides in r3 2078 +$_n0="$num,#14*4"; 2079 +$_num="$num,#15*4"; $_bpend=$_num; 2080 + 2081 +$code=<<___; 2082 +.text 2083 + 2084 +.global bn_mul_mont 2085 +.type bn_mul_mont,%function 2086 + 2087 +.align 2 2088 +bn_mul_mont: 2089 + stmdb sp!,{r0,r2} @ sp points at argument block 2090 + ldr $num,[sp,#3*4] @ load num 2091 + cmp $num,#2 2092 + movlt r0,#0 2093 + addlt sp,sp,#2*4 2094 + blt .Labrt 2095 + 2096 + stmdb sp!,{r4-r12,lr} @ save 10 registers 2097 + 2098 + mov $num,$num,lsl#2 @ rescale $num for byte count 2099 + sub sp,sp,$num @ alloca(4*num) 2100 + sub sp,sp,#4 @ +extra dword 2101 + sub $num,$num,#4 @ "num=num-1" 2102 + add $tp,$bp,$num @ &bp[num-1] 2103 + 2104 + add $num,sp,$num @ $num to point at &tp[num-1] 2105 + ldr $n0,[$_n0] @ &n0 2106 + ldr $bi,[$bp] @ bp[0] 2107 + ldr $aj,[$ap],#4 @ ap[0],ap++ 2108 + ldr $nj,[$np],#4 @ np[0],np++ 2109 + ldr $n0,[$n0] @ *n0 2110 + str $tp,[$_bpend] @ save &bp[num] 2111 + 2112 + umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0] 2113 + str $n0,[$_n0] @ save n0 value 2114 + mul $n0,$alo,$n0 @ "tp[0]"*n0 2115 + mov $nlo,#0 2116 + umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]" 2117 + mov $tp,sp 2118 + 2119 +.L1st: 2120 + ldr $aj,[$ap],#4 @ ap[j],ap++ 2121 + mov $alo,$ahi 2122 + mov $ahi,#0 2123 + umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] 2124 + ldr $nj,[$np],#4 @ np[j],np++ 2125 + mov $nhi,#0 2126 + umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 2127 + adds $nlo,$nlo,$alo 2128 + str $nlo,[$tp],#4 @ tp[j-1]=,tp++ 2129 + adc $nlo,$nhi,#0 2130 + cmp $tp,$num 2131 + bne .L1st 2132 + 2133 + adds $nlo,$nlo,$ahi 2134 + mov $nhi,#0 2135 + adc $nhi,$nhi,#0 2136 + ldr $tp,[$_bp] @ restore bp 2137 + str $nlo,[$num] @ tp[num-1]= 2138 + ldr $n0,[$_n0] @ restore n0 2139 + str $nhi,[$num,#4] @ tp[num]= 2140 + 2142 +.Louter: 2143 + sub $tj,$num,sp @ "original" $num-1 value 2144 + sub $ap,$ap,$tj @ "rewind" ap to &ap[1] 2145 + sub $np,$np,$tj @ "rewind" np to &np[1] 2146 + ldr $bi,[$tp,#4]! @ *(++bp) 2147 + ldr $aj,[$ap,#-4] @ ap[0] 2148 + ldr $nj,[$np,#-4] @ np[0] 2149 + ldr $alo,[sp] @ tp[0] 2150 + ldr $tj,[sp,#4] @ tp[1] 2151 + 2152 + mov $ahi,#0 2153 + umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0] 2154 + str $tp,[$_bp] @ save bp 2155 + mul $n0,$alo,$n0 2156 + mov $nlo,#0 2157 + umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]" 2158 + mov $tp,sp 2159 + 2160 +.Linner: 2161 + ldr $aj,[$ap],#4 @ ap[j],ap++ 2162 + adds $alo,$ahi,$tj @ +=tp[j] 2163 + mov $ahi,#0 2164 + umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] 2165 + ldr $nj,[$np],#4 @ np[j],np++ 2166 + mov $nhi,#0 2167 + umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 2168 + ldr $tj,[$tp,#8] @ tp[j+1] 2169 + adc $ahi,$ahi,#0 2170 + adds $nlo,$nlo,$alo 2171 + str $nlo,[$tp],#4 @ tp[j-1]=,tp++ 2172 + adc $nlo,$nhi,#0 2173 + cmp $tp,$num 2174 + bne .Linner 2175 + 2176 + adds $nlo,$nlo,$ahi 2177 + mov $nhi,#0 2178 + adc $nhi,$nhi,#0 2179 + adds $nlo,$nlo,$tj 2180 + adc $nhi,$nhi,#0 2181 + ldr $tp,[$_bp] @ restore bp 2182 + ldr $tj,[$_bpend] @ restore &bp[num] 2183 + str $nlo,[$num] @ tp[num-1]= 2184 + ldr $n0,[$_n0] @ restore n0 2185 + str $nhi,[$num,#4] @ tp[num]= 2186 + 2187 + cmp $tp,$tj 2188 + bne .Louter 2189 + 2191 + ldr $rp,[$_rp] @ pull rp 2192 + add $num,$num,#4 @ $num to point at &tp[num] 2193 + sub $aj,$num,sp @ "original" num value 2194 + mov $tp,sp @ "rewind" $tp 2195 + mov $ap,$tp @ "borrow" $ap 2196 + sub $np,$np,$aj @ "rewind" $np to &np[0] 2197 + 2198 + subs $tj,$tj,$tj @ "clear" carry flag 2199 +.Lsub: ldr $tj,[$tp],#4 2200 + ldr $nj,[$np],#4 2201 + sbcs $tj,$tj,$nj @ tp[j]-np[j] 2202 + str $tj,[$rp],#4 @ rp[j]= 2203 + teq $tp,$num @ preserve carry 2204 + bne .Lsub 2205 + sbcs $nhi,$nhi,#0 @ upmost carry 2206 + mov $tp,sp @ "rewind" $tp 2207 + sub $rp,$rp,$aj @ "rewind" $rp 2208 + 2209 + and $ap,$tp,$nhi 2210 + bic $np,$rp,$nhi 2211 + orr $ap,$ap,$np @ ap=borrow?tp:rp 2212 + 2213 +.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh 2214 + str sp,[$tp],#4 @ zap tp 2215 + str $tj,[$rp],#4 2216 + cmp $tp,$num 2217 + bne .Lcopy 2218 + 2219 + add sp,$num,#4 @ skip over tp[num+1] 2220 + ldmia sp!,{r4-r12,lr} @ restore registers 2221 + add sp,sp,#2*4 @ skip over {r0,r2} 2222 + mov r0,#1 2223 +.Labrt: tst lr,#1 2224 + moveq pc,lr @ be binary compatible with V4, yet 2225 + bx lr @ interoperable with Thumb ISA:-) 2226 +.size bn_mul_mont,.-bn_mul_mont 2227 +.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 2228 +___ 2229 + 2230 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 2231 +print $code; 2232 +close STDOUT; 2233 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2234 +++ openssl-0.9.8h/crypto/0.9.9-dev/bn/armv4-mont.s 2009-09-03 15:42:39.000000000 -0700 2235 @@ -0,0 +1,145 @@ 2236 +.text 2237 + 2238 +.global bn_mul_mont 2239 +.type bn_mul_mont,%function 2240 + 2241 +.align 2 2242 +bn_mul_mont: 2243 + stmdb sp!,{r0,r2} @ sp points at argument block 2244 + ldr r0,[sp,#3*4] @ load num 2245 + cmp r0,#2 2246 + movlt r0,#0 2247 + addlt sp,sp,#2*4 2248 + blt .Labrt 2249 + 2250 + stmdb sp!,{r4-r12,lr} @ save 10 registers 2251 + 2252 + mov r0,r0,lsl#2 @ rescale r0 for byte count 2253 + sub sp,sp,r0 @ alloca(4*num) 2254 + sub sp,sp,#4 @ +extra dword 2255 + sub r0,r0,#4 @ "num=num-1" 2256 + add r4,r2,r0 @ &bp[num-1] 2257 + 2258 + add r0,sp,r0 @ r0 to point at &tp[num-1] 2259 + ldr r8,[r0,#14*4] @ &n0 2260 + ldr r2,[r2] @ bp[0] 2261 + ldr r5,[r1],#4 @ ap[0],ap++ 2262 + ldr r6,[r3],#4 @ np[0],np++ 2263 + ldr r8,[r8] @ *n0 2264 + str r4,[r0,#15*4] @ save &bp[num] 2265 + 2266 + umull r10,r11,r5,r2 @ ap[0]*bp[0] 2267 + str r8,[r0,#14*4] @ save n0 value 2268 + mul r8,r10,r8 @ "tp[0]"*n0 2269 + mov r12,#0 2270 + umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" 2271 + mov r4,sp 2272 + 2273 +.L1st: 2274 + ldr r5,[r1],#4 @ ap[j],ap++ 2275 + mov r10,r11 2276 + mov r11,#0 2277 + umlal r10,r11,r5,r2 @ ap[j]*bp[0] 2278 + ldr r6,[r3],#4 @ np[j],np++ 2279 + mov r14,#0 2280 + umlal r12,r14,r6,r8 @ np[j]*n0 2281 + adds r12,r12,r10 2282 + str r12,[r4],#4 @ tp[j-1]=,tp++ 2283 + adc r12,r14,#0 2284 + cmp r4,r0 2285 + bne .L1st 2286 + 2287 + adds r12,r12,r11 2288 + mov r14,#0 2289 + adc r14,r14,#0 2290 + ldr r4,[r0,#13*4] @ restore bp 2291 + str r12,[r0] @ tp[num-1]= 2292 + ldr r8,[r0,#14*4] @ restore n0 2293 + str r14,[r0,#4] @ tp[num]= 2294 + 2296 +.Louter: 2297 + sub r7,r0,sp @ "original" r0-1 value 2298 + sub r1,r1,r7 @ "rewind" ap to &ap[1] 2299 + sub r3,r3,r7 @ "rewind" np to &np[1] 2300 + ldr r2,[r4,#4]! @ *(++bp) 2301 + ldr r5,[r1,#-4] @ ap[0] 2302 + ldr r6,[r3,#-4] @ np[0] 2303 + ldr r10,[sp] @ tp[0] 2304 + ldr r7,[sp,#4] @ tp[1] 2305 + 2306 + mov r11,#0 2307 + umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] 2308 + str r4,[r0,#13*4] @ save bp 2309 + mul r8,r10,r8 2310 + mov r12,#0 2311 + umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" 2312 + mov r4,sp 2313 + 2314 +.Linner: 2315 + ldr r5,[r1],#4 @ ap[j],ap++ 2316 + adds r10,r11,r7 @ +=tp[j] 2317 + mov r11,#0 2318 + umlal r10,r11,r5,r2 @ ap[j]*bp[i] 2319 + ldr r6,[r3],#4 @ np[j],np++ 2320 + mov r14,#0 2321 + umlal r12,r14,r6,r8 @ np[j]*n0 2322 + ldr r7,[r4,#8] @ tp[j+1] 2323 + adc r11,r11,#0 2324 + adds r12,r12,r10 2325 + str r12,[r4],#4 @ tp[j-1]=,tp++ 2326 + adc r12,r14,#0 2327 + cmp r4,r0 2328 + bne .Linner 2329 + 2330 + adds r12,r12,r11 2331 + mov r14,#0 2332 + adc r14,r14,#0 2333 + adds r12,r12,r7 2334 + adc r14,r14,#0 2335 + ldr r4,[r0,#13*4] @ restore bp 2336 + ldr r7,[r0,#15*4] @ restore &bp[num] 2337 + str r12,[r0] @ tp[num-1]= 2338 + ldr r8,[r0,#14*4] @ restore n0 2339 + str r14,[r0,#4] @ tp[num]= 2340 + 2341 + cmp r4,r7 2342 + bne .Louter 2343 + 2345 + ldr r2,[r0,#12*4] @ pull rp 2346 + add r0,r0,#4 @ r0 to point at &tp[num] 2347 + sub r5,r0,sp @ "original" num value 2348 + mov r4,sp @ "rewind" r4 2349 + mov r1,r4 @ "borrow" r1 2350 + sub r3,r3,r5 @ "rewind" r3 to &np[0] 2351 + 2352 + subs r7,r7,r7 @ "clear" carry flag 2353 +.Lsub: ldr r7,[r4],#4 2354 + ldr r6,[r3],#4 2355 + sbcs r7,r7,r6 @ tp[j]-np[j] 2356 + str r7,[r2],#4 @ rp[j]= 2357 + teq r4,r0 @ preserve carry 2358 + bne .Lsub 2359 + sbcs r14,r14,#0 @ upmost carry 2360 + mov r4,sp @ "rewind" r4 2361 + sub r2,r2,r5 @ "rewind" r2 2362 + 2363 + and r1,r4,r14 2364 + bic r3,r2,r14 2365 + orr r1,r1,r3 @ ap=borrow?tp:rp 2366 + 2367 +.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh 2368 + str sp,[r4],#4 @ zap tp 2369 + str r7,[r2],#4 2370 + cmp r4,r0 2371 + bne .Lcopy 2372 + 2373 + add sp,r0,#4 @ skip over tp[num+1] 2374 + ldmia sp!,{r4-r12,lr} @ restore registers 2375 + add sp,sp,#2*4 @ skip over {r0,r2} 2376 + mov r0,#1 2377 +.Labrt: tst lr,#1 2378 + moveq pc,lr @ be binary compatible with V4, yet 2379 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 2380 +.size bn_mul_mont,.-bn_mul_mont 2381 +.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>" 2382 +.align 2 2383 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2384 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha1-armv4-large.pl 2009-09-03 15:42:39.000000000 -0700 2385 @@ -0,0 +1,231 @@ 2386 +#!/usr/bin/env perl 2387 + 2388 +# ==================================================================== 2389 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL 2390 +# project. The module is, however, dual licensed under OpenSSL and 2391 +# CRYPTOGAMS licenses depending on where you obtain it. For further 2392 +# details see http://www.openssl.org/~appro/cryptogams/. 2393 +# ==================================================================== 2394 + 2395 +# sha1_block procedure for ARMv4. 2396 +# 2397 +# January 2007. 2398 + 2399 +# Size/performance trade-off 2400 +# ==================================================================== 2401 +# impl size in bytes comp cycles[*] measured performance 2402 +# ==================================================================== 2403 +# thumb 304 3212 4420 2404 +# armv4-small 392/+29% 1958/+64% 2250/+96% 2405 +# armv4-compact 740/+89% 1552/+26% 1840/+22% 2406 +# armv4-large 1420/+92% 1307/+19% 1500/+23% 2407 +# full unroll ~5100/+260% ~1260/+4% ~1500/+0% 2408 +# ==================================================================== 2409 +# thumb = same as 'small' but in Thumb instructions[**] and 2410 +# with recurring code in two private functions; 2411 +# small = detached Xload/update, loops are folded; 2412 +# compact = detached Xload/update, 5x unroll; 2413 +# large = interleaved Xload/update, 5x unroll; 2414 +# full unroll = interleaved Xload/update, full unroll, estimated[!]; 2415 +# 2416 +# [*] Manually counted instructions in "grand" loop body. Measured 2417 +# performance is affected by prologue and epilogue overhead, 2418 +# i-cache availability, branch penalties, etc. 2419 +# [**] While each Thumb instruction is twice smaller, they are not as 2420 +# diverse as ARM ones: e.g., there are only two arithmetic 2421 +# instructions with 3 arguments, no [fixed] rotate, addressing 2422 +# modes are limited. As result it takes more instructions to do 2423 +# the same job in Thumb, therefore the code is never twice as 2424 +# small and always slower. 2425 + 2426 +$output=shift; 2427 +open STDOUT,">$output"; 2428 + 2429 +$ctx="r0"; 2430 +$inp="r1"; 2431 +$len="r2"; 2432 +$a="r3"; 2433 +$b="r4"; 2434 +$c="r5"; 2435 +$d="r6"; 2436 +$e="r7"; 2437 +$K="r8"; 2438 +$t0="r10"; 2439 +$t1="r11"; 2440 +$t2="r12"; 2441 +$Xi="r14"; 2442 +@V=($a,$b,$c,$d,$e); 2443 + 2444 +# One can optimize this for aligned access on big-endian architecture, 2445 +# but code's endian neutrality makes it too pretty:-) 2446 +sub Xload { 2447 +my ($a,$b,$c,$d,$e)=@_; 2448 +$code.=<<___; 2449 + ldrb $t0,[$inp],#4 2450 + ldrb $t1,[$inp,#-3] 2451 + ldrb $t2,[$inp,#-2] 2452 + add $e,$K,$e,ror#2 @ E+=K_00_19 2453 + orr $t0,$t1,$t0,lsl#8 2454 + ldrb $t1,[$inp,#-1] 2455 + orr $t0,$t2,$t0,lsl#8 2456 + add $e,$e,$a,ror#27 @ E+=ROR(A,27) 2457 + orr $t0,$t1,$t0,lsl#8 2458 + add $e,$e,$t0 @ E+=X[i] 2459 + eor $t1,$c,$d @ F_xx_xx 2460 + str $t0,[$Xi,#-4]! 2461 +___ 2462 +} 2463 +sub Xupdate { 2464 +my ($a,$b,$c,$d,$e,$flag)=@_; 2465 +$code.=<<___; 2466 + ldr $t0,[$Xi,#15*4] 2467 + ldr $t1,[$Xi,#13*4] 2468 + ldr $t2,[$Xi,#7*4] 2469 + add $e,$K,$e,ror#2 @ E+=K_xx_xx 2470 + eor $t0,$t0,$t1 2471 + ldr $t1,[$Xi,#2*4] 2472 + add $e,$e,$a,ror#27 @ E+=ROR(A,27) 2473 + eor $t0,$t0,$t2 2474 + eor $t0,$t0,$t1 2475 +___ 2476 +$code.=<<___ if (!defined($flag)); 2477 + eor $t1,$c,$d @ F_xx_xx, but not in 40_59 2478 +___ 2479 +$code.=<<___; 2480 + mov $t0,$t0,ror#31 2481 + add $e,$e,$t0 @ E+=X[i] 2482 + str $t0,[$Xi,#-4]! 2483 +___ 2484 +} 2485 + 2486 +sub BODY_00_15 { 2487 +my ($a,$b,$c,$d,$e)=@_; 2488 + &Xload(@_); 2489 +$code.=<<___; 2490 + and $t1,$b,$t1,ror#2 2491 + eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) 2492 + add $e,$e,$t1 @ E+=F_00_19(B,C,D) 2493 +___ 2494 +} 2495 + 2496 +sub BODY_16_19 { 2497 +my ($a,$b,$c,$d,$e)=@_; 2498 + &Xupdate(@_); 2499 +$code.=<<___; 2500 + and $t1,$b,$t1,ror#2 2501 + eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) 2502 + add $e,$e,$t1 @ E+=F_00_19(B,C,D) 2503 +___ 2504 +} 2505 + 2506 +sub BODY_20_39 { 2507 +my ($a,$b,$c,$d,$e)=@_; 2508 + &Xupdate(@_); 2509 +$code.=<<___; 2510 + eor $t1,$b,$t1,ror#2 @ F_20_39(B,C,D) 2511 + add $e,$e,$t1 @ E+=F_20_39(B,C,D) 2512 +___ 2513 +} 2514 + 2515 +sub BODY_40_59 { 2516 +my ($a,$b,$c,$d,$e)=@_; 2517 + &Xupdate(@_,1); 2518 +$code.=<<___; 2519 + and $t1,$b,$c,ror#2 2520 + orr $t2,$b,$c,ror#2 2521 + and $t2,$t2,$d,ror#2 2522 + orr $t1,$t1,$t2 @ F_40_59(B,C,D) 2523 + add $e,$e,$t1 @ E+=F_40_59(B,C,D) 2524 +___ 2525 +} 2526 + 2527 +$code=<<___; 2528 +.text 2529 + 2530 +.global sha1_block_data_order 2531 +.type sha1_block_data_order,%function 2532 + 2533 +.align 2 2534 +sha1_block_data_order: 2535 + stmdb sp!,{r4-r12,lr} 2536 + add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp 2537 + ldmia $ctx,{$a,$b,$c,$d,$e} 2538 +.Lloop: 2539 + ldr $K,.LK_00_19 2540 + mov $Xi,sp 2541 + sub sp,sp,#15*4 2542 + mov $c,$c,ror#30 2543 + mov $d,$d,ror#30 2544 + mov $e,$e,ror#30 @ [6] 2545 +.L_00_15: 2546 +___ 2547 +for($i=0;$i<5;$i++) { 2548 + &BODY_00_15(@V); unshift(@V,pop(@V)); 2549 +} 2550 +$code.=<<___; 2551 + teq $Xi,sp 2552 + bne .L_00_15 @ [((11+4)*5+2)*3] 2553 +___ 2554 + &BODY_00_15(@V); unshift(@V,pop(@V)); 2555 + &BODY_16_19(@V); unshift(@V,pop(@V)); 2556 + &BODY_16_19(@V); unshift(@V,pop(@V)); 2557 + &BODY_16_19(@V); unshift(@V,pop(@V)); 2558 + &BODY_16_19(@V); unshift(@V,pop(@V)); 2559 +$code.=<<___; 2560 + 2561 + ldr $K,.LK_20_39 @ [+15+16*4] 2562 + sub sp,sp,#25*4 2563 + cmn sp,#0 @ [+3], clear carry to denote 20_39 2564 +.L_20_39_or_60_79: 2565 +___ 2566 +for($i=0;$i<5;$i++) { 2567 + &BODY_20_39(@V); unshift(@V,pop(@V)); 2568 +} 2569 +$code.=<<___; 2570 + teq $Xi,sp @ preserve carry 2571 + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 2572 + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 2573 + 2574 + ldr $K,.LK_40_59 2575 + sub sp,sp,#20*4 @ [+2] 2576 +.L_40_59: 2577 +___ 2578 +for($i=0;$i<5;$i++) { 2579 + &BODY_40_59(@V); unshift(@V,pop(@V)); 2580 +} 2581 +$code.=<<___; 2582 + teq $Xi,sp 2583 + bne .L_40_59 @ [+((12+5)*5+2)*4] 2584 + 2585 + ldr $K,.LK_60_79 2586 + sub sp,sp,#20*4 2587 + cmp sp,#0 @ set carry to denote 60_79 2588 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes 2589 +.L_done: 2590 + add sp,sp,#80*4 @ "deallocate" stack frame 2591 + ldmia $ctx,{$K,$t0,$t1,$t2,$Xi} 2592 + add $a,$K,$a 2593 + add $b,$t0,$b 2594 + add $c,$t1,$c,ror#2 2595 + add $d,$t2,$d,ror#2 2596 + add $e,$Xi,$e,ror#2 2597 + stmia $ctx,{$a,$b,$c,$d,$e} 2598 + teq $inp,$len 2599 + bne .Lloop @ [+18], total 1307 2600 + 2601 + ldmia sp!,{r4-r12,lr} 2602 + tst lr,#1 2603 + moveq pc,lr @ be binary compatible with V4, yet 2604 + bx lr @ interoperable with Thumb ISA:-) 2605 +.align 2 2606 +.LK_00_19: .word 0x5a827999 2607 +.LK_20_39: .word 0x6ed9eba1 2608 +.LK_40_59: .word 0x8f1bbcdc 2609 +.LK_60_79: .word 0xca62c1d6 2610 +.size sha1_block_data_order,.-sha1_block_data_order 2611 +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 2612 +___ 2613 + 2614 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 2615 +print $code; 2616 +close STDOUT; # enforce flush 2617 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2618 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha1-armv4-large.s 2009-09-03 15:42:39.000000000 -0700 2619 @@ -0,0 +1,376 @@ 2620 +.text 2621 + 2622 +.global sha1_block_data_order 2623 +.type sha1_block_data_order,%function 2624 + 2625 +.align 2 2626 +sha1_block_data_order: 2627 + stmdb sp!,{r4-r12,lr} 2628 + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 2629 + ldmia r0,{r3,r4,r5,r6,r7} 2630 +.Lloop: 2631 + ldr r8,.LK_00_19 2632 + mov r14,sp 2633 + sub sp,sp,#15*4 2634 + mov r5,r5,ror#30 2635 + mov r6,r6,ror#30 2636 + mov r7,r7,ror#30 @ [6] 2637 +.L_00_15: 2638 + ldrb r10,[r1],#4 2639 + ldrb r11,[r1,#-3] 2640 + ldrb r12,[r1,#-2] 2641 + add r7,r8,r7,ror#2 @ E+=K_00_19 2642 + orr r10,r11,r10,lsl#8 2643 + ldrb r11,[r1,#-1] 2644 + orr r10,r12,r10,lsl#8 2645 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) 2646 + orr r10,r11,r10,lsl#8 2647 + add r7,r7,r10 @ E+=X[i] 2648 + eor r11,r5,r6 @ F_xx_xx 2649 + str r10,[r14,#-4]! 2650 + and r11,r4,r11,ror#2 2651 + eor r11,r11,r6,ror#2 @ F_00_19(B,C,D) 2652 + add r7,r7,r11 @ E+=F_00_19(B,C,D) 2653 + ldrb r10,[r1],#4 2654 + ldrb r11,[r1,#-3] 2655 + ldrb r12,[r1,#-2] 2656 + add r6,r8,r6,ror#2 @ E+=K_00_19 2657 + orr r10,r11,r10,lsl#8 2658 + ldrb r11,[r1,#-1] 2659 + orr r10,r12,r10,lsl#8 2660 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) 2661 + orr r10,r11,r10,lsl#8 2662 + add r6,r6,r10 @ E+=X[i] 2663 + eor r11,r4,r5 @ F_xx_xx 2664 + str r10,[r14,#-4]! 2665 + and r11,r3,r11,ror#2 2666 + eor r11,r11,r5,ror#2 @ F_00_19(B,C,D) 2667 + add r6,r6,r11 @ E+=F_00_19(B,C,D) 2668 + ldrb r10,[r1],#4 2669 + ldrb r11,[r1,#-3] 2670 + ldrb r12,[r1,#-2] 2671 + add r5,r8,r5,ror#2 @ E+=K_00_19 2672 + orr r10,r11,r10,lsl#8 2673 + ldrb r11,[r1,#-1] 2674 + orr r10,r12,r10,lsl#8 2675 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) 2676 + orr r10,r11,r10,lsl#8 2677 + add r5,r5,r10 @ E+=X[i] 2678 + eor r11,r3,r4 @ F_xx_xx 2679 + str r10,[r14,#-4]! 2680 + and r11,r7,r11,ror#2 2681 + eor r11,r11,r4,ror#2 @ F_00_19(B,C,D) 2682 + add r5,r5,r11 @ E+=F_00_19(B,C,D) 2683 + ldrb r10,[r1],#4 2684 + ldrb r11,[r1,#-3] 2685 + ldrb r12,[r1,#-2] 2686 + add r4,r8,r4,ror#2 @ E+=K_00_19 2687 + orr r10,r11,r10,lsl#8 2688 + ldrb r11,[r1,#-1] 2689 + orr r10,r12,r10,lsl#8 2690 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) 2691 + orr r10,r11,r10,lsl#8 2692 + add r4,r4,r10 @ E+=X[i] 2693 + eor r11,r7,r3 @ F_xx_xx 2694 + str r10,[r14,#-4]! 2695 + and r11,r6,r11,ror#2 2696 + eor r11,r11,r3,ror#2 @ F_00_19(B,C,D) 2697 + add r4,r4,r11 @ E+=F_00_19(B,C,D) 2698 + ldrb r10,[r1],#4 2699 + ldrb r11,[r1,#-3] 2700 + ldrb r12,[r1,#-2] 2701 + add r3,r8,r3,ror#2 @ E+=K_00_19 2702 + orr r10,r11,r10,lsl#8 2703 + ldrb r11,[r1,#-1] 2704 + orr r10,r12,r10,lsl#8 2705 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) 2706 + orr r10,r11,r10,lsl#8 2707 + add r3,r3,r10 @ E+=X[i] 2708 + eor r11,r6,r7 @ F_xx_xx 2709 + str r10,[r14,#-4]! 2710 + and r11,r5,r11,ror#2 2711 + eor r11,r11,r7,ror#2 @ F_00_19(B,C,D) 2712 + add r3,r3,r11 @ E+=F_00_19(B,C,D) 2713 + teq r14,sp 2714 + bne .L_00_15 @ [((11+4)*5+2)*3] 2715 + ldrb r10,[r1],#4 2716 + ldrb r11,[r1,#-3] 2717 + ldrb r12,[r1,#-2] 2718 + add r7,r8,r7,ror#2 @ E+=K_00_19 2719 + orr r10,r11,r10,lsl#8 2720 + ldrb r11,[r1,#-1] 2721 + orr r10,r12,r10,lsl#8 2722 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) 2723 + orr r10,r11,r10,lsl#8 2724 + add r7,r7,r10 @ E+=X[i] 2725 + eor r11,r5,r6 @ F_xx_xx 2726 + str r10,[r14,#-4]! 2727 + and r11,r4,r11,ror#2 2728 + eor r11,r11,r6,ror#2 @ F_00_19(B,C,D) 2729 + add r7,r7,r11 @ E+=F_00_19(B,C,D) 2730 + ldr r10,[r14,#15*4] 2731 + ldr r11,[r14,#13*4] 2732 + ldr r12,[r14,#7*4] 2733 + add r6,r8,r6,ror#2 @ E+=K_xx_xx 2734 + eor r10,r10,r11 2735 + ldr r11,[r14,#2*4] 2736 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) 2737 + eor r10,r10,r12 2738 + eor r10,r10,r11 2739 + eor r11,r4,r5 @ F_xx_xx, but not in 40_59 2740 + mov r10,r10,ror#31 2741 + add r6,r6,r10 @ E+=X[i] 2742 + str r10,[r14,#-4]! 2743 + and r11,r3,r11,ror#2 2744 + eor r11,r11,r5,ror#2 @ F_00_19(B,C,D) 2745 + add r6,r6,r11 @ E+=F_00_19(B,C,D) 2746 + ldr r10,[r14,#15*4] 2747 + ldr r11,[r14,#13*4] 2748 + ldr r12,[r14,#7*4] 2749 + add r5,r8,r5,ror#2 @ E+=K_xx_xx 2750 + eor r10,r10,r11 2751 + ldr r11,[r14,#2*4] 2752 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) 2753 + eor r10,r10,r12 2754 + eor r10,r10,r11 2755 + eor r11,r3,r4 @ F_xx_xx, but not in 40_59 2756 + mov r10,r10,ror#31 2757 + add r5,r5,r10 @ E+=X[i] 2758 + str r10,[r14,#-4]! 2759 + and r11,r7,r11,ror#2 2760 + eor r11,r11,r4,ror#2 @ F_00_19(B,C,D) 2761 + add r5,r5,r11 @ E+=F_00_19(B,C,D) 2762 + ldr r10,[r14,#15*4] 2763 + ldr r11,[r14,#13*4] 2764 + ldr r12,[r14,#7*4] 2765 + add r4,r8,r4,ror#2 @ E+=K_xx_xx 2766 + eor r10,r10,r11 2767 + ldr r11,[r14,#2*4] 2768 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) 2769 + eor r10,r10,r12 2770 + eor r10,r10,r11 2771 + eor r11,r7,r3 @ F_xx_xx, but not in 40_59 2772 + mov r10,r10,ror#31 2773 + add r4,r4,r10 @ E+=X[i] 2774 + str r10,[r14,#-4]! 2775 + and r11,r6,r11,ror#2 2776 + eor r11,r11,r3,ror#2 @ F_00_19(B,C,D) 2777 + add r4,r4,r11 @ E+=F_00_19(B,C,D) 2778 + ldr r10,[r14,#15*4] 2779 + ldr r11,[r14,#13*4] 2780 + ldr r12,[r14,#7*4] 2781 + add r3,r8,r3,ror#2 @ E+=K_xx_xx 2782 + eor r10,r10,r11 2783 + ldr r11,[r14,#2*4] 2784 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) 2785 + eor r10,r10,r12 2786 + eor r10,r10,r11 2787 + eor r11,r6,r7 @ F_xx_xx, but not in 40_59 2788 + mov r10,r10,ror#31 2789 + add r3,r3,r10 @ E+=X[i] 2790 + str r10,[r14,#-4]! 2791 + and r11,r5,r11,ror#2 2792 + eor r11,r11,r7,ror#2 @ F_00_19(B,C,D) 2793 + add r3,r3,r11 @ E+=F_00_19(B,C,D) 2794 + 2795 + ldr r8,.LK_20_39 @ [+15+16*4] 2796 + sub sp,sp,#25*4 2797 + cmn sp,#0 @ [+3], clear carry to denote 20_39 2798 +.L_20_39_or_60_79: 2799 + ldr r10,[r14,#15*4] 2800 + ldr r11,[r14,#13*4] 2801 + ldr r12,[r14,#7*4] 2802 + add r7,r8,r7,ror#2 @ E+=K_xx_xx 2803 + eor r10,r10,r11 2804 + ldr r11,[r14,#2*4] 2805 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) 2806 + eor r10,r10,r12 2807 + eor r10,r10,r11 2808 + eor r11,r5,r6 @ F_xx_xx, but not in 40_59 2809 + mov r10,r10,ror#31 2810 + add r7,r7,r10 @ E+=X[i] 2811 + str r10,[r14,#-4]! 2812 + eor r11,r4,r11,ror#2 @ F_20_39(B,C,D) 2813 + add r7,r7,r11 @ E+=F_20_39(B,C,D) 2814 + ldr r10,[r14,#15*4] 2815 + ldr r11,[r14,#13*4] 2816 + ldr r12,[r14,#7*4] 2817 + add r6,r8,r6,ror#2 @ E+=K_xx_xx 2818 + eor r10,r10,r11 2819 + ldr r11,[r14,#2*4] 2820 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) 2821 + eor r10,r10,r12 2822 + eor r10,r10,r11 2823 + eor r11,r4,r5 @ F_xx_xx, but not in 40_59 2824 + mov r10,r10,ror#31 2825 + add r6,r6,r10 @ E+=X[i] 2826 + str r10,[r14,#-4]! 2827 + eor r11,r3,r11,ror#2 @ F_20_39(B,C,D) 2828 + add r6,r6,r11 @ E+=F_20_39(B,C,D) 2829 + ldr r10,[r14,#15*4] 2830 + ldr r11,[r14,#13*4] 2831 + ldr r12,[r14,#7*4] 2832 + add r5,r8,r5,ror#2 @ E+=K_xx_xx 2833 + eor r10,r10,r11 2834 + ldr r11,[r14,#2*4] 2835 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) 2836 + eor r10,r10,r12 2837 + eor r10,r10,r11 2838 + eor r11,r3,r4 @ F_xx_xx, but not in 40_59 2839 + mov r10,r10,ror#31 2840 + add r5,r5,r10 @ E+=X[i] 2841 + str r10,[r14,#-4]! 2842 + eor r11,r7,r11,ror#2 @ F_20_39(B,C,D) 2843 + add r5,r5,r11 @ E+=F_20_39(B,C,D) 2844 + ldr r10,[r14,#15*4] 2845 + ldr r11,[r14,#13*4] 2846 + ldr r12,[r14,#7*4] 2847 + add r4,r8,r4,ror#2 @ E+=K_xx_xx 2848 + eor r10,r10,r11 2849 + ldr r11,[r14,#2*4] 2850 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) 2851 + eor r10,r10,r12 2852 + eor r10,r10,r11 2853 + eor r11,r7,r3 @ F_xx_xx, but not in 40_59 2854 + mov r10,r10,ror#31 2855 + add r4,r4,r10 @ E+=X[i] 2856 + str r10,[r14,#-4]! 2857 + eor r11,r6,r11,ror#2 @ F_20_39(B,C,D) 2858 + add r4,r4,r11 @ E+=F_20_39(B,C,D) 2859 + ldr r10,[r14,#15*4] 2860 + ldr r11,[r14,#13*4] 2861 + ldr r12,[r14,#7*4] 2862 + add r3,r8,r3,ror#2 @ E+=K_xx_xx 2863 + eor r10,r10,r11 2864 + ldr r11,[r14,#2*4] 2865 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) 2866 + eor r10,r10,r12 2867 + eor r10,r10,r11 2868 + eor r11,r6,r7 @ F_xx_xx, but not in 40_59 2869 + mov r10,r10,ror#31 2870 + add r3,r3,r10 @ E+=X[i] 2871 + str r10,[r14,#-4]! 2872 + eor r11,r5,r11,ror#2 @ F_20_39(B,C,D) 2873 + add r3,r3,r11 @ E+=F_20_39(B,C,D) 2874 + teq r14,sp @ preserve carry 2875 + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 2876 + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 2877 + 2878 + ldr r8,.LK_40_59 2879 + sub sp,sp,#20*4 @ [+2] 2880 +.L_40_59: 2881 + ldr r10,[r14,#15*4] 2882 + ldr r11,[r14,#13*4] 2883 + ldr r12,[r14,#7*4] 2884 + add r7,r8,r7,ror#2 @ E+=K_xx_xx 2885 + eor r10,r10,r11 2886 + ldr r11,[r14,#2*4] 2887 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) 2888 + eor r10,r10,r12 2889 + eor r10,r10,r11 2890 + mov r10,r10,ror#31 2891 + add r7,r7,r10 @ E+=X[i] 2892 + str r10,[r14,#-4]! 2893 + and r11,r4,r5,ror#2 2894 + orr r12,r4,r5,ror#2 2895 + and r12,r12,r6,ror#2 2896 + orr r11,r11,r12 @ F_40_59(B,C,D) 2897 + add r7,r7,r11 @ E+=F_40_59(B,C,D) 2898 + ldr r10,[r14,#15*4] 2899 + ldr r11,[r14,#13*4] 2900 + ldr r12,[r14,#7*4] 2901 + add r6,r8,r6,ror#2 @ E+=K_xx_xx 2902 + eor r10,r10,r11 2903 + ldr r11,[r14,#2*4] 2904 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) 2905 + eor r10,r10,r12 2906 + eor r10,r10,r11 2907 + mov r10,r10,ror#31 2908 + add r6,r6,r10 @ E+=X[i] 2909 + str r10,[r14,#-4]! 2910 + and r11,r3,r4,ror#2 2911 + orr r12,r3,r4,ror#2 2912 + and r12,r12,r5,ror#2 2913 + orr r11,r11,r12 @ F_40_59(B,C,D) 2914 + add r6,r6,r11 @ E+=F_40_59(B,C,D) 2915 + ldr r10,[r14,#15*4] 2916 + ldr r11,[r14,#13*4] 2917 + ldr r12,[r14,#7*4] 2918 + add r5,r8,r5,ror#2 @ E+=K_xx_xx 2919 + eor r10,r10,r11 2920 + ldr r11,[r14,#2*4] 2921 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) 2922 + eor r10,r10,r12 2923 + eor r10,r10,r11 2924 + mov r10,r10,ror#31 2925 + add r5,r5,r10 @ E+=X[i] 2926 + str r10,[r14,#-4]! 2927 + and r11,r7,r3,ror#2 2928 + orr r12,r7,r3,ror#2 2929 + and r12,r12,r4,ror#2 2930 + orr r11,r11,r12 @ F_40_59(B,C,D) 2931 + add r5,r5,r11 @ E+=F_40_59(B,C,D) 2932 + ldr r10,[r14,#15*4] 2933 + ldr r11,[r14,#13*4] 2934 + ldr r12,[r14,#7*4] 2935 + add r4,r8,r4,ror#2 @ E+=K_xx_xx 2936 + eor r10,r10,r11 2937 + ldr r11,[r14,#2*4] 2938 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) 2939 + eor r10,r10,r12 2940 + eor r10,r10,r11 2941 + mov r10,r10,ror#31 2942 + add r4,r4,r10 @ E+=X[i] 2943 + str r10,[r14,#-4]! 2944 + and r11,r6,r7,ror#2 2945 + orr r12,r6,r7,ror#2 2946 + and r12,r12,r3,ror#2 2947 + orr r11,r11,r12 @ F_40_59(B,C,D) 2948 + add r4,r4,r11 @ E+=F_40_59(B,C,D) 2949 + ldr r10,[r14,#15*4] 2950 + ldr r11,[r14,#13*4] 2951 + ldr r12,[r14,#7*4] 2952 + add r3,r8,r3,ror#2 @ E+=K_xx_xx 2953 + eor r10,r10,r11 2954 + ldr r11,[r14,#2*4] 2955 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) 2956 + eor r10,r10,r12 2957 + eor r10,r10,r11 2958 + mov r10,r10,ror#31 2959 + add r3,r3,r10 @ E+=X[i] 2960 + str r10,[r14,#-4]! 2961 + and r11,r5,r6,ror#2 2962 + orr r12,r5,r6,ror#2 2963 + and r12,r12,r7,ror#2 2964 + orr r11,r11,r12 @ F_40_59(B,C,D) 2965 + add r3,r3,r11 @ E+=F_40_59(B,C,D) 2966 + teq r14,sp 2967 + bne .L_40_59 @ [+((12+5)*5+2)*4] 2968 + 2969 + ldr r8,.LK_60_79 2970 + sub sp,sp,#20*4 2971 + cmp sp,#0 @ set carry to denote 60_79 2972 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes 2973 +.L_done: 2974 + add sp,sp,#80*4 @ "deallocate" stack frame 2975 + ldmia r0,{r8,r10,r11,r12,r14} 2976 + add r3,r8,r3 2977 + add r4,r10,r4 2978 + add r5,r11,r5,ror#2 2979 + add r6,r12,r6,ror#2 2980 + add r7,r14,r7,ror#2 2981 + stmia r0,{r3,r4,r5,r6,r7} 2982 + teq r1,r2 2983 + bne .Lloop @ [+18], total 1307 2984 + 2985 + ldmia sp!,{r4-r12,lr} 2986 + tst lr,#1 2987 + moveq pc,lr @ be binary compatible with V4, yet 2988 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 2989 +.align 2 2990 +.LK_00_19: .word 0x5a827999 2991 +.LK_20_39: .word 0x6ed9eba1 2992 +.LK_40_59: .word 0x8f1bbcdc 2993 +.LK_60_79: .word 0xca62c1d6 2994 +.size sha1_block_data_order,.-sha1_block_data_order 2995 +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>" 2996 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 2997 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha256-armv4.pl 2009-09-03 15:42:39.000000000 -0700 2998 @@ -0,0 +1,180 @@ 2999 +#!/usr/bin/env perl 3000 + 3001 +# ==================================================================== 3002 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL 3003 +# project. The module is, however, dual licensed under OpenSSL and 3004 +# CRYPTOGAMS licenses depending on where you obtain it. For further 3005 +# details see http://www.openssl.org/~appro/cryptogams/. 3006 +# ==================================================================== 3007 + 3008 +# SHA256 block procedure for ARMv4. May 2007. 3009 + 3010 +# Performance is ~2x better than gcc 3.4 generated code and in "abso- 3011 +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 3012 +# byte. 3013 + 3014 +$output=shift; 3015 +open STDOUT,">$output"; 3016 + 3017 +$ctx="r0"; $t0="r0"; 3018 +$inp="r1"; 3019 +$len="r2"; $t1="r2"; 3020 +$T1="r3"; 3021 +$A="r4"; 3022 +$B="r5"; 3023 +$C="r6"; 3024 +$D="r7"; 3025 +$E="r8"; 3026 +$F="r9"; 3027 +$G="r10"; 3028 +$H="r11"; 3029 +@V=($A,$B,$C,$D,$E,$F,$G,$H); 3030 +$t2="r12"; 3031 +$Ktbl="r14"; 3032 + 3033 +@Sigma0=( 2,13,22); 3034 +@Sigma1=( 6,11,25); 3035 +@sigma0=( 7,18, 3); 3036 +@sigma1=(17,19,10); 3037 + 3038 +sub BODY_00_15 { 3039 +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 3040 + 3041 +$code.=<<___ if ($i<16); 3042 + ldrb $T1,[$inp,#3] @ $i 3043 + ldrb $t2,[$inp,#2] 3044 + ldrb $t1,[$inp,#1] 3045 + ldrb $t0,[$inp],#4 3046 + orr $T1,$T1,$t2,lsl#8 3047 + orr $T1,$T1,$t1,lsl#16 3048 + orr $T1,$T1,$t0,lsl#24 3049 + `"str $inp,[sp,#17*4]" if ($i==15)` 3050 +___ 3051 +$code.=<<___; 3052 + ldr $t2,[$Ktbl],#4 @ *K256++ 3053 + str $T1,[sp,#`$i%16`*4] 3054 + mov $t0,$e,ror#$Sigma1[0] 3055 + eor $t0,$t0,$e,ror#$Sigma1[1] 3056 + eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) 3057 + add $T1,$T1,$t0 3058 + eor $t1,$f,$g 3059 + and $t1,$t1,$e 3060 + eor $t1,$t1,$g @ Ch(e,f,g) 3061 + add $T1,$T1,$t1 3062 + add $T1,$T1,$h 3063 + add $T1,$T1,$t2 3064 + mov $h,$a,ror#$Sigma0[0] 3065 + eor $h,$h,$a,ror#$Sigma0[1] 3066 + eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) 3067 + orr $t0,$a,$b 3068 + and $t0,$t0,$c 3069 + and $t1,$a,$b 3070 + orr $t0,$t0,$t1 @ Maj(a,b,c) 3071 + add $h,$h,$t0 3072 + add $d,$d,$T1 3073 + add $h,$h,$T1 3074 +___ 3075 +} 3076 + 3077 +sub BODY_16_XX { 3078 +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 3079 + 3080 +$code.=<<___; 3081 + ldr $t1,[sp,#`($i+1)%16`*4] @ $i 3082 + ldr $t2,[sp,#`($i+14)%16`*4] 3083 + ldr $T1,[sp,#`($i+0)%16`*4] 3084 + ldr $inp,[sp,#`($i+9)%16`*4] 3085 + mov $t0,$t1,ror#$sigma0[0] 3086 + eor $t0,$t0,$t1,ror#$sigma0[1] 3087 + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 3088 + mov $t1,$t2,ror#$sigma1[0] 3089 + eor $t1,$t1,$t2,ror#$sigma1[1] 3090 + eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) 3091 + add $T1,$T1,$t0 3092 + add $T1,$T1,$t1 3093 + add $T1,$T1,$inp 3094 +___ 3095 + &BODY_00_15(@_); 3096 +} 3097 + 3098 +$code=<<___; 3099 +.text 3100 +.code 32 3101 + 3102 +.type K256,%object 3103 +.align 5 3104 +K256: 3105 +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 3106 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 3107 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 3108 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 3109 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 3110 +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 3111 +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 3112 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 3113 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 3114 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 3115 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 3116 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 3117 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 3118 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 3119 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 3120 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 3121 +.size K256,.-K256 3122 + 3123 +.global sha256_block_data_order 3124 +.type sha256_block_data_order,%function 3125 +sha256_block_data_order: 3126 + sub r3,pc,#8 @ sha256_block_data_order 3127 + add $len,$inp,$len,lsl#6 @ len to point at the end of inp 3128 + stmdb sp!,{$ctx,$inp,$len,r4-r12,lr} 3129 + ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 3130 + sub $Ktbl,r3,#256 @ K256 3131 + sub sp,sp,#16*4 @ alloca(X[16]) 3132 +.Loop: 3133 +___ 3134 +for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 3135 +$code.=".Lrounds_16_xx:\n"; 3136 +for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 3137 +$code.=<<___; 3138 + and $t2,$t2,#0xff 3139 + cmp $t2,#0xf2 3140 + bne .Lrounds_16_xx 3141 + 3142 + ldr $T1,[sp,#16*4] @ pull ctx 3143 + ldr $t0,[$T1,#0] 3144 + ldr $t1,[$T1,#4] 3145 + ldr $t2,[$T1,#8] 3146 + add $A,$A,$t0 3147 + ldr $t0,[$T1,#12] 3148 + add $B,$B,$t1 3149 + ldr $t1,[$T1,#16] 3150 + add $C,$C,$t2 3151 + ldr $t2,[$T1,#20] 3152 + add $D,$D,$t0 3153 + ldr $t0,[$T1,#24] 3154 + add $E,$E,$t1 3155 + ldr $t1,[$T1,#28] 3156 + add $F,$F,$t2 3157 + ldr $inp,[sp,#17*4] @ pull inp 3158 + ldr $t2,[sp,#18*4] @ pull inp+len 3159 + add $G,$G,$t0 3160 + add $H,$H,$t1 3161 + stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} 3162 + cmp $inp,$t2 3163 + sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 3164 + bne .Loop 3165 + 3166 + add sp,sp,#`16+3`*4 @ destroy frame 3167 + ldmia sp!,{r4-r12,lr} 3168 + tst lr,#1 3169 + moveq pc,lr @ be binary compatible with V4, yet 3170 + bx lr @ interoperable with Thumb ISA:-) 3171 +.size sha256_block_data_order,.-sha256_block_data_order 3172 +.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 3173 +___ 3174 + 3175 +$code =~ s/\`([^\`]*)\`/eval $1/gem; 3176 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 3177 +print $code; 3178 +close STDOUT; # enforce flush 3179 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 3180 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha256-armv4.s 2009-09-03 15:42:39.000000000 -0700 3181 @@ -0,0 +1,1110 @@ 3182 +.text 3183 +.code 32 3184 + 3185 +.type K256,%object 3186 +.align 5 3187 +K256: 3188 +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 3189 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 3190 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 3191 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 3192 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 3193 +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 3194 +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 3195 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 3196 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 3197 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 3198 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 3199 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 3200 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 3201 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 3202 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 3203 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 3204 +.size K256,.-K256 3205 + 3206 +.global sha256_block_data_order 3207 +.type sha256_block_data_order,%function 3208 +sha256_block_data_order: 3209 + sub r3,pc,#8 @ sha256_block_data_order 3210 + add r2,r1,r2,lsl#6 @ len to point at the end of inp 3211 + stmdb sp!,{r0,r1,r2,r4-r12,lr} 3212 + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} 3213 + sub r14,r3,#256 @ K256 3214 + sub sp,sp,#16*4 @ alloca(X[16]) 3215 +.Loop: 3216 + ldrb r3,[r1,#3] @ 0 3217 + ldrb r12,[r1,#2] 3218 + ldrb r2,[r1,#1] 3219 + ldrb r0,[r1],#4 3220 + orr r3,r3,r12,lsl#8 3221 + orr r3,r3,r2,lsl#16 3222 + orr r3,r3,r0,lsl#24 3223 + 3224 + ldr r12,[r14],#4 @ *K256++ 3225 + str r3,[sp,#0*4] 3226 + mov r0,r8,ror#6 3227 + eor r0,r0,r8,ror#11 3228 + eor r0,r0,r8,ror#25 @ Sigma1(e) 3229 + add r3,r3,r0 3230 + eor r2,r9,r10 3231 + and r2,r2,r8 3232 + eor r2,r2,r10 @ Ch(e,f,g) 3233 + add r3,r3,r2 3234 + add r3,r3,r11 3235 + add r3,r3,r12 3236 + mov r11,r4,ror#2 3237 + eor r11,r11,r4,ror#13 3238 + eor r11,r11,r4,ror#22 @ Sigma0(a) 3239 + orr r0,r4,r5 3240 + and r0,r0,r6 3241 + and r2,r4,r5 3242 + orr r0,r0,r2 @ Maj(a,b,c) 3243 + add r11,r11,r0 3244 + add r7,r7,r3 3245 + add r11,r11,r3 3246 + ldrb r3,[r1,#3] @ 1 3247 + ldrb r12,[r1,#2] 3248 + ldrb r2,[r1,#1] 3249 + ldrb r0,[r1],#4 3250 + orr r3,r3,r12,lsl#8 3251 + orr r3,r3,r2,lsl#16 3252 + orr r3,r3,r0,lsl#24 3253 + 3254 + ldr r12,[r14],#4 @ *K256++ 3255 + str r3,[sp,#1*4] 3256 + mov r0,r7,ror#6 3257 + eor r0,r0,r7,ror#11 3258 + eor r0,r0,r7,ror#25 @ Sigma1(e) 3259 + add r3,r3,r0 3260 + eor r2,r8,r9 3261 + and r2,r2,r7 3262 + eor r2,r2,r9 @ Ch(e,f,g) 3263 + add r3,r3,r2 3264 + add r3,r3,r10 3265 + add r3,r3,r12 3266 + mov r10,r11,ror#2 3267 + eor r10,r10,r11,ror#13 3268 + eor r10,r10,r11,ror#22 @ Sigma0(a) 3269 + orr r0,r11,r4 3270 + and r0,r0,r5 3271 + and r2,r11,r4 3272 + orr r0,r0,r2 @ Maj(a,b,c) 3273 + add r10,r10,r0 3274 + add r6,r6,r3 3275 + add r10,r10,r3 3276 + ldrb r3,[r1,#3] @ 2 3277 + ldrb r12,[r1,#2] 3278 + ldrb r2,[r1,#1] 3279 + ldrb r0,[r1],#4 3280 + orr r3,r3,r12,lsl#8 3281 + orr r3,r3,r2,lsl#16 3282 + orr r3,r3,r0,lsl#24 3283 + 3284 + ldr r12,[r14],#4 @ *K256++ 3285 + str r3,[sp,#2*4] 3286 + mov r0,r6,ror#6 3287 + eor r0,r0,r6,ror#11 3288 + eor r0,r0,r6,ror#25 @ Sigma1(e) 3289 + add r3,r3,r0 3290 + eor r2,r7,r8 3291 + and r2,r2,r6 3292 + eor r2,r2,r8 @ Ch(e,f,g) 3293 + add r3,r3,r2 3294 + add r3,r3,r9 3295 + add r3,r3,r12 3296 + mov r9,r10,ror#2 3297 + eor r9,r9,r10,ror#13 3298 + eor r9,r9,r10,ror#22 @ Sigma0(a) 3299 + orr r0,r10,r11 3300 + and r0,r0,r4 3301 + and r2,r10,r11 3302 + orr r0,r0,r2 @ Maj(a,b,c) 3303 + add r9,r9,r0 3304 + add r5,r5,r3 3305 + add r9,r9,r3 3306 + ldrb r3,[r1,#3] @ 3 3307 + ldrb r12,[r1,#2] 3308 + ldrb r2,[r1,#1] 3309 + ldrb r0,[r1],#4 3310 + orr r3,r3,r12,lsl#8 3311 + orr r3,r3,r2,lsl#16 3312 + orr r3,r3,r0,lsl#24 3313 + 3314 + ldr r12,[r14],#4 @ *K256++ 3315 + str r3,[sp,#3*4] 3316 + mov r0,r5,ror#6 3317 + eor r0,r0,r5,ror#11 3318 + eor r0,r0,r5,ror#25 @ Sigma1(e) 3319 + add r3,r3,r0 3320 + eor r2,r6,r7 3321 + and r2,r2,r5 3322 + eor r2,r2,r7 @ Ch(e,f,g) 3323 + add r3,r3,r2 3324 + add r3,r3,r8 3325 + add r3,r3,r12 3326 + mov r8,r9,ror#2 3327 + eor r8,r8,r9,ror#13 3328 + eor r8,r8,r9,ror#22 @ Sigma0(a) 3329 + orr r0,r9,r10 3330 + and r0,r0,r11 3331 + and r2,r9,r10 3332 + orr r0,r0,r2 @ Maj(a,b,c) 3333 + add r8,r8,r0 3334 + add r4,r4,r3 3335 + add r8,r8,r3 3336 + ldrb r3,[r1,#3] @ 4 3337 + ldrb r12,[r1,#2] 3338 + ldrb r2,[r1,#1] 3339 + ldrb r0,[r1],#4 3340 + orr r3,r3,r12,lsl#8 3341 + orr r3,r3,r2,lsl#16 3342 + orr r3,r3,r0,lsl#24 3343 + 3344 + ldr r12,[r14],#4 @ *K256++ 3345 + str r3,[sp,#4*4] 3346 + mov r0,r4,ror#6 3347 + eor r0,r0,r4,ror#11 3348 + eor r0,r0,r4,ror#25 @ Sigma1(e) 3349 + add r3,r3,r0 3350 + eor r2,r5,r6 3351 + and r2,r2,r4 3352 + eor r2,r2,r6 @ Ch(e,f,g) 3353 + add r3,r3,r2 3354 + add r3,r3,r7 3355 + add r3,r3,r12 3356 + mov r7,r8,ror#2 3357 + eor r7,r7,r8,ror#13 3358 + eor r7,r7,r8,ror#22 @ Sigma0(a) 3359 + orr r0,r8,r9 3360 + and r0,r0,r10 3361 + and r2,r8,r9 3362 + orr r0,r0,r2 @ Maj(a,b,c) 3363 + add r7,r7,r0 3364 + add r11,r11,r3 3365 + add r7,r7,r3 3366 + ldrb r3,[r1,#3] @ 5 3367 + ldrb r12,[r1,#2] 3368 + ldrb r2,[r1,#1] 3369 + ldrb r0,[r1],#4 3370 + orr r3,r3,r12,lsl#8 3371 + orr r3,r3,r2,lsl#16 3372 + orr r3,r3,r0,lsl#24 3373 + 3374 + ldr r12,[r14],#4 @ *K256++ 3375 + str r3,[sp,#5*4] 3376 + mov r0,r11,ror#6 3377 + eor r0,r0,r11,ror#11 3378 + eor r0,r0,r11,ror#25 @ Sigma1(e) 3379 + add r3,r3,r0 3380 + eor r2,r4,r5 3381 + and r2,r2,r11 3382 + eor r2,r2,r5 @ Ch(e,f,g) 3383 + add r3,r3,r2 3384 + add r3,r3,r6 3385 + add r3,r3,r12 3386 + mov r6,r7,ror#2 3387 + eor r6,r6,r7,ror#13 3388 + eor r6,r6,r7,ror#22 @ Sigma0(a) 3389 + orr r0,r7,r8 3390 + and r0,r0,r9 3391 + and r2,r7,r8 3392 + orr r0,r0,r2 @ Maj(a,b,c) 3393 + add r6,r6,r0 3394 + add r10,r10,r3 3395 + add r6,r6,r3 3396 + ldrb r3,[r1,#3] @ 6 3397 + ldrb r12,[r1,#2] 3398 + ldrb r2,[r1,#1] 3399 + ldrb r0,[r1],#4 3400 + orr r3,r3,r12,lsl#8 3401 + orr r3,r3,r2,lsl#16 3402 + orr r3,r3,r0,lsl#24 3403 + 3404 + ldr r12,[r14],#4 @ *K256++ 3405 + str r3,[sp,#6*4] 3406 + mov r0,r10,ror#6 3407 + eor r0,r0,r10,ror#11 3408 + eor r0,r0,r10,ror#25 @ Sigma1(e) 3409 + add r3,r3,r0 3410 + eor r2,r11,r4 3411 + and r2,r2,r10 3412 + eor r2,r2,r4 @ Ch(e,f,g) 3413 + add r3,r3,r2 3414 + add r3,r3,r5 3415 + add r3,r3,r12 3416 + mov r5,r6,ror#2 3417 + eor r5,r5,r6,ror#13 3418 + eor r5,r5,r6,ror#22 @ Sigma0(a) 3419 + orr r0,r6,r7 3420 + and r0,r0,r8 3421 + and r2,r6,r7 3422 + orr r0,r0,r2 @ Maj(a,b,c) 3423 + add r5,r5,r0 3424 + add r9,r9,r3 3425 + add r5,r5,r3 3426 + ldrb r3,[r1,#3] @ 7 3427 + ldrb r12,[r1,#2] 3428 + ldrb r2,[r1,#1] 3429 + ldrb r0,[r1],#4 3430 + orr r3,r3,r12,lsl#8 3431 + orr r3,r3,r2,lsl#16 3432 + orr r3,r3,r0,lsl#24 3433 + 3434 + ldr r12,[r14],#4 @ *K256++ 3435 + str r3,[sp,#7*4] 3436 + mov r0,r9,ror#6 3437 + eor r0,r0,r9,ror#11 3438 + eor r0,r0,r9,ror#25 @ Sigma1(e) 3439 + add r3,r3,r0 3440 + eor r2,r10,r11 3441 + and r2,r2,r9 3442 + eor r2,r2,r11 @ Ch(e,f,g) 3443 + add r3,r3,r2 3444 + add r3,r3,r4 3445 + add r3,r3,r12 3446 + mov r4,r5,ror#2 3447 + eor r4,r4,r5,ror#13 3448 + eor r4,r4,r5,ror#22 @ Sigma0(a) 3449 + orr r0,r5,r6 3450 + and r0,r0,r7 3451 + and r2,r5,r6 3452 + orr r0,r0,r2 @ Maj(a,b,c) 3453 + add r4,r4,r0 3454 + add r8,r8,r3 3455 + add r4,r4,r3 3456 + ldrb r3,[r1,#3] @ 8 3457 + ldrb r12,[r1,#2] 3458 + ldrb r2,[r1,#1] 3459 + ldrb r0,[r1],#4 3460 + orr r3,r3,r12,lsl#8 3461 + orr r3,r3,r2,lsl#16 3462 + orr r3,r3,r0,lsl#24 3463 + 3464 + ldr r12,[r14],#4 @ *K256++ 3465 + str r3,[sp,#8*4] 3466 + mov r0,r8,ror#6 3467 + eor r0,r0,r8,ror#11 3468 + eor r0,r0,r8,ror#25 @ Sigma1(e) 3469 + add r3,r3,r0 3470 + eor r2,r9,r10 3471 + and r2,r2,r8 3472 + eor r2,r2,r10 @ Ch(e,f,g) 3473 + add r3,r3,r2 3474 + add r3,r3,r11 3475 + add r3,r3,r12 3476 + mov r11,r4,ror#2 3477 + eor r11,r11,r4,ror#13 3478 + eor r11,r11,r4,ror#22 @ Sigma0(a) 3479 + orr r0,r4,r5 3480 + and r0,r0,r6 3481 + and r2,r4,r5 3482 + orr r0,r0,r2 @ Maj(a,b,c) 3483 + add r11,r11,r0 3484 + add r7,r7,r3 3485 + add r11,r11,r3 3486 + ldrb r3,[r1,#3] @ 9 3487 + ldrb r12,[r1,#2] 3488 + ldrb r2,[r1,#1] 3489 + ldrb r0,[r1],#4 3490 + orr r3,r3,r12,lsl#8 3491 + orr r3,r3,r2,lsl#16 3492 + orr r3,r3,r0,lsl#24 3493 + 3494 + ldr r12,[r14],#4 @ *K256++ 3495 + str r3,[sp,#9*4] 3496 + mov r0,r7,ror#6 3497 + eor r0,r0,r7,ror#11 3498 + eor r0,r0,r7,ror#25 @ Sigma1(e) 3499 + add r3,r3,r0 3500 + eor r2,r8,r9 3501 + and r2,r2,r7 3502 + eor r2,r2,r9 @ Ch(e,f,g) 3503 + add r3,r3,r2 3504 + add r3,r3,r10 3505 + add r3,r3,r12 3506 + mov r10,r11,ror#2 3507 + eor r10,r10,r11,ror#13 3508 + eor r10,r10,r11,ror#22 @ Sigma0(a) 3509 + orr r0,r11,r4 3510 + and r0,r0,r5 3511 + and r2,r11,r4 3512 + orr r0,r0,r2 @ Maj(a,b,c) 3513 + add r10,r10,r0 3514 + add r6,r6,r3 3515 + add r10,r10,r3 3516 + ldrb r3,[r1,#3] @ 10 3517 + ldrb r12,[r1,#2] 3518 + ldrb r2,[r1,#1] 3519 + ldrb r0,[r1],#4 3520 + orr r3,r3,r12,lsl#8 3521 + orr r3,r3,r2,lsl#16 3522 + orr r3,r3,r0,lsl#24 3523 + 3524 + ldr r12,[r14],#4 @ *K256++ 3525 + str r3,[sp,#10*4] 3526 + mov r0,r6,ror#6 3527 + eor r0,r0,r6,ror#11 3528 + eor r0,r0,r6,ror#25 @ Sigma1(e) 3529 + add r3,r3,r0 3530 + eor r2,r7,r8 3531 + and r2,r2,r6 3532 + eor r2,r2,r8 @ Ch(e,f,g) 3533 + add r3,r3,r2 3534 + add r3,r3,r9 3535 + add r3,r3,r12 3536 + mov r9,r10,ror#2 3537 + eor r9,r9,r10,ror#13 3538 + eor r9,r9,r10,ror#22 @ Sigma0(a) 3539 + orr r0,r10,r11 3540 + and r0,r0,r4 3541 + and r2,r10,r11 3542 + orr r0,r0,r2 @ Maj(a,b,c) 3543 + add r9,r9,r0 3544 + add r5,r5,r3 3545 + add r9,r9,r3 3546 + ldrb r3,[r1,#3] @ 11 3547 + ldrb r12,[r1,#2] 3548 + ldrb r2,[r1,#1] 3549 + ldrb r0,[r1],#4 3550 + orr r3,r3,r12,lsl#8 3551 + orr r3,r3,r2,lsl#16 3552 + orr r3,r3,r0,lsl#24 3553 + 3554 + ldr r12,[r14],#4 @ *K256++ 3555 + str r3,[sp,#11*4] 3556 + mov r0,r5,ror#6 3557 + eor r0,r0,r5,ror#11 3558 + eor r0,r0,r5,ror#25 @ Sigma1(e) 3559 + add r3,r3,r0 3560 + eor r2,r6,r7 3561 + and r2,r2,r5 3562 + eor r2,r2,r7 @ Ch(e,f,g) 3563 + add r3,r3,r2 3564 + add r3,r3,r8 3565 + add r3,r3,r12 3566 + mov r8,r9,ror#2 3567 + eor r8,r8,r9,ror#13 3568 + eor r8,r8,r9,ror#22 @ Sigma0(a) 3569 + orr r0,r9,r10 3570 + and r0,r0,r11 3571 + and r2,r9,r10 3572 + orr r0,r0,r2 @ Maj(a,b,c) 3573 + add r8,r8,r0 3574 + add r4,r4,r3 3575 + add r8,r8,r3 3576 + ldrb r3,[r1,#3] @ 12 3577 + ldrb r12,[r1,#2] 3578 + ldrb r2,[r1,#1] 3579 + ldrb r0,[r1],#4 3580 + orr r3,r3,r12,lsl#8 3581 + orr r3,r3,r2,lsl#16 3582 + orr r3,r3,r0,lsl#24 3583 + 3584 + ldr r12,[r14],#4 @ *K256++ 3585 + str r3,[sp,#12*4] 3586 + mov r0,r4,ror#6 3587 + eor r0,r0,r4,ror#11 3588 + eor r0,r0,r4,ror#25 @ Sigma1(e) 3589 + add r3,r3,r0 3590 + eor r2,r5,r6 3591 + and r2,r2,r4 3592 + eor r2,r2,r6 @ Ch(e,f,g) 3593 + add r3,r3,r2 3594 + add r3,r3,r7 3595 + add r3,r3,r12 3596 + mov r7,r8,ror#2 3597 + eor r7,r7,r8,ror#13 3598 + eor r7,r7,r8,ror#22 @ Sigma0(a) 3599 + orr r0,r8,r9 3600 + and r0,r0,r10 3601 + and r2,r8,r9 3602 + orr r0,r0,r2 @ Maj(a,b,c) 3603 + add r7,r7,r0 3604 + add r11,r11,r3 3605 + add r7,r7,r3 3606 + ldrb r3,[r1,#3] @ 13 3607 + ldrb r12,[r1,#2] 3608 + ldrb r2,[r1,#1] 3609 + ldrb r0,[r1],#4 3610 + orr r3,r3,r12,lsl#8 3611 + orr r3,r3,r2,lsl#16 3612 + orr r3,r3,r0,lsl#24 3613 + 3614 + ldr r12,[r14],#4 @ *K256++ 3615 + str r3,[sp,#13*4] 3616 + mov r0,r11,ror#6 3617 + eor r0,r0,r11,ror#11 3618 + eor r0,r0,r11,ror#25 @ Sigma1(e) 3619 + add r3,r3,r0 3620 + eor r2,r4,r5 3621 + and r2,r2,r11 3622 + eor r2,r2,r5 @ Ch(e,f,g) 3623 + add r3,r3,r2 3624 + add r3,r3,r6 3625 + add r3,r3,r12 3626 + mov r6,r7,ror#2 3627 + eor r6,r6,r7,ror#13 3628 + eor r6,r6,r7,ror#22 @ Sigma0(a) 3629 + orr r0,r7,r8 3630 + and r0,r0,r9 3631 + and r2,r7,r8 3632 + orr r0,r0,r2 @ Maj(a,b,c) 3633 + add r6,r6,r0 3634 + add r10,r10,r3 3635 + add r6,r6,r3 3636 + ldrb r3,[r1,#3] @ 14 3637 + ldrb r12,[r1,#2] 3638 + ldrb r2,[r1,#1] 3639 + ldrb r0,[r1],#4 3640 + orr r3,r3,r12,lsl#8 3641 + orr r3,r3,r2,lsl#16 3642 + orr r3,r3,r0,lsl#24 3643 + 3644 + ldr r12,[r14],#4 @ *K256++ 3645 + str r3,[sp,#14*4] 3646 + mov r0,r10,ror#6 3647 + eor r0,r0,r10,ror#11 3648 + eor r0,r0,r10,ror#25 @ Sigma1(e) 3649 + add r3,r3,r0 3650 + eor r2,r11,r4 3651 + and r2,r2,r10 3652 + eor r2,r2,r4 @ Ch(e,f,g) 3653 + add r3,r3,r2 3654 + add r3,r3,r5 3655 + add r3,r3,r12 3656 + mov r5,r6,ror#2 3657 + eor r5,r5,r6,ror#13 3658 + eor r5,r5,r6,ror#22 @ Sigma0(a) 3659 + orr r0,r6,r7 3660 + and r0,r0,r8 3661 + and r2,r6,r7 3662 + orr r0,r0,r2 @ Maj(a,b,c) 3663 + add r5,r5,r0 3664 + add r9,r9,r3 3665 + add r5,r5,r3 3666 + ldrb r3,[r1,#3] @ 15 3667 + ldrb r12,[r1,#2] 3668 + ldrb r2,[r1,#1] 3669 + ldrb r0,[r1],#4 3670 + orr r3,r3,r12,lsl#8 3671 + orr r3,r3,r2,lsl#16 3672 + orr r3,r3,r0,lsl#24 3673 + str r1,[sp,#17*4] 3674 + ldr r12,[r14],#4 @ *K256++ 3675 + str r3,[sp,#15*4] 3676 + mov r0,r9,ror#6 3677 + eor r0,r0,r9,ror#11 3678 + eor r0,r0,r9,ror#25 @ Sigma1(e) 3679 + add r3,r3,r0 3680 + eor r2,r10,r11 3681 + and r2,r2,r9 3682 + eor r2,r2,r11 @ Ch(e,f,g) 3683 + add r3,r3,r2 3684 + add r3,r3,r4 3685 + add r3,r3,r12 3686 + mov r4,r5,ror#2 3687 + eor r4,r4,r5,ror#13 3688 + eor r4,r4,r5,ror#22 @ Sigma0(a) 3689 + orr r0,r5,r6 3690 + and r0,r0,r7 3691 + and r2,r5,r6 3692 + orr r0,r0,r2 @ Maj(a,b,c) 3693 + add r4,r4,r0 3694 + add r8,r8,r3 3695 + add r4,r4,r3 3696 +.Lrounds_16_xx: 3697 + ldr r2,[sp,#1*4] @ 16 3698 + ldr r12,[sp,#14*4] 3699 + ldr r3,[sp,#0*4] 3700 + ldr r1,[sp,#9*4] 3701 + mov r0,r2,ror#7 3702 + eor r0,r0,r2,ror#18 3703 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3704 + mov r2,r12,ror#17 3705 + eor r2,r2,r12,ror#19 3706 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3707 + add r3,r3,r0 3708 + add r3,r3,r2 3709 + add r3,r3,r1 3710 + ldr r12,[r14],#4 @ *K256++ 3711 + str r3,[sp,#0*4] 3712 + mov r0,r8,ror#6 3713 + eor r0,r0,r8,ror#11 3714 + eor r0,r0,r8,ror#25 @ Sigma1(e) 3715 + add r3,r3,r0 3716 + eor r2,r9,r10 3717 + and r2,r2,r8 3718 + eor r2,r2,r10 @ Ch(e,f,g) 3719 + add r3,r3,r2 3720 + add r3,r3,r11 3721 + add r3,r3,r12 3722 + mov r11,r4,ror#2 3723 + eor r11,r11,r4,ror#13 3724 + eor r11,r11,r4,ror#22 @ Sigma0(a) 3725 + orr r0,r4,r5 3726 + and r0,r0,r6 3727 + and r2,r4,r5 3728 + orr r0,r0,r2 @ Maj(a,b,c) 3729 + add r11,r11,r0 3730 + add r7,r7,r3 3731 + add r11,r11,r3 3732 + ldr r2,[sp,#2*4] @ 17 3733 + ldr r12,[sp,#15*4] 3734 + ldr r3,[sp,#1*4] 3735 + ldr r1,[sp,#10*4] 3736 + mov r0,r2,ror#7 3737 + eor r0,r0,r2,ror#18 3738 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3739 + mov r2,r12,ror#17 3740 + eor r2,r2,r12,ror#19 3741 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3742 + add r3,r3,r0 3743 + add r3,r3,r2 3744 + add r3,r3,r1 3745 + ldr r12,[r14],#4 @ *K256++ 3746 + str r3,[sp,#1*4] 3747 + mov r0,r7,ror#6 3748 + eor r0,r0,r7,ror#11 3749 + eor r0,r0,r7,ror#25 @ Sigma1(e) 3750 + add r3,r3,r0 3751 + eor r2,r8,r9 3752 + and r2,r2,r7 3753 + eor r2,r2,r9 @ Ch(e,f,g) 3754 + add r3,r3,r2 3755 + add r3,r3,r10 3756 + add r3,r3,r12 3757 + mov r10,r11,ror#2 3758 + eor r10,r10,r11,ror#13 3759 + eor r10,r10,r11,ror#22 @ Sigma0(a) 3760 + orr r0,r11,r4 3761 + and r0,r0,r5 3762 + and r2,r11,r4 3763 + orr r0,r0,r2 @ Maj(a,b,c) 3764 + add r10,r10,r0 3765 + add r6,r6,r3 3766 + add r10,r10,r3 3767 + ldr r2,[sp,#3*4] @ 18 3768 + ldr r12,[sp,#0*4] 3769 + ldr r3,[sp,#2*4] 3770 + ldr r1,[sp,#11*4] 3771 + mov r0,r2,ror#7 3772 + eor r0,r0,r2,ror#18 3773 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3774 + mov r2,r12,ror#17 3775 + eor r2,r2,r12,ror#19 3776 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3777 + add r3,r3,r0 3778 + add r3,r3,r2 3779 + add r3,r3,r1 3780 + ldr r12,[r14],#4 @ *K256++ 3781 + str r3,[sp,#2*4] 3782 + mov r0,r6,ror#6 3783 + eor r0,r0,r6,ror#11 3784 + eor r0,r0,r6,ror#25 @ Sigma1(e) 3785 + add r3,r3,r0 3786 + eor r2,r7,r8 3787 + and r2,r2,r6 3788 + eor r2,r2,r8 @ Ch(e,f,g) 3789 + add r3,r3,r2 3790 + add r3,r3,r9 3791 + add r3,r3,r12 3792 + mov r9,r10,ror#2 3793 + eor r9,r9,r10,ror#13 3794 + eor r9,r9,r10,ror#22 @ Sigma0(a) 3795 + orr r0,r10,r11 3796 + and r0,r0,r4 3797 + and r2,r10,r11 3798 + orr r0,r0,r2 @ Maj(a,b,c) 3799 + add r9,r9,r0 3800 + add r5,r5,r3 3801 + add r9,r9,r3 3802 + ldr r2,[sp,#4*4] @ 19 3803 + ldr r12,[sp,#1*4] 3804 + ldr r3,[sp,#3*4] 3805 + ldr r1,[sp,#12*4] 3806 + mov r0,r2,ror#7 3807 + eor r0,r0,r2,ror#18 3808 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3809 + mov r2,r12,ror#17 3810 + eor r2,r2,r12,ror#19 3811 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3812 + add r3,r3,r0 3813 + add r3,r3,r2 3814 + add r3,r3,r1 3815 + ldr r12,[r14],#4 @ *K256++ 3816 + str r3,[sp,#3*4] 3817 + mov r0,r5,ror#6 3818 + eor r0,r0,r5,ror#11 3819 + eor r0,r0,r5,ror#25 @ Sigma1(e) 3820 + add r3,r3,r0 3821 + eor r2,r6,r7 3822 + and r2,r2,r5 3823 + eor r2,r2,r7 @ Ch(e,f,g) 3824 + add r3,r3,r2 3825 + add r3,r3,r8 3826 + add r3,r3,r12 3827 + mov r8,r9,ror#2 3828 + eor r8,r8,r9,ror#13 3829 + eor r8,r8,r9,ror#22 @ Sigma0(a) 3830 + orr r0,r9,r10 3831 + and r0,r0,r11 3832 + and r2,r9,r10 3833 + orr r0,r0,r2 @ Maj(a,b,c) 3834 + add r8,r8,r0 3835 + add r4,r4,r3 3836 + add r8,r8,r3 3837 + ldr r2,[sp,#5*4] @ 20 3838 + ldr r12,[sp,#2*4] 3839 + ldr r3,[sp,#4*4] 3840 + ldr r1,[sp,#13*4] 3841 + mov r0,r2,ror#7 3842 + eor r0,r0,r2,ror#18 3843 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3844 + mov r2,r12,ror#17 3845 + eor r2,r2,r12,ror#19 3846 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3847 + add r3,r3,r0 3848 + add r3,r3,r2 3849 + add r3,r3,r1 3850 + ldr r12,[r14],#4 @ *K256++ 3851 + str r3,[sp,#4*4] 3852 + mov r0,r4,ror#6 3853 + eor r0,r0,r4,ror#11 3854 + eor r0,r0,r4,ror#25 @ Sigma1(e) 3855 + add r3,r3,r0 3856 + eor r2,r5,r6 3857 + and r2,r2,r4 3858 + eor r2,r2,r6 @ Ch(e,f,g) 3859 + add r3,r3,r2 3860 + add r3,r3,r7 3861 + add r3,r3,r12 3862 + mov r7,r8,ror#2 3863 + eor r7,r7,r8,ror#13 3864 + eor r7,r7,r8,ror#22 @ Sigma0(a) 3865 + orr r0,r8,r9 3866 + and r0,r0,r10 3867 + and r2,r8,r9 3868 + orr r0,r0,r2 @ Maj(a,b,c) 3869 + add r7,r7,r0 3870 + add r11,r11,r3 3871 + add r7,r7,r3 3872 + ldr r2,[sp,#6*4] @ 21 3873 + ldr r12,[sp,#3*4] 3874 + ldr r3,[sp,#5*4] 3875 + ldr r1,[sp,#14*4] 3876 + mov r0,r2,ror#7 3877 + eor r0,r0,r2,ror#18 3878 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3879 + mov r2,r12,ror#17 3880 + eor r2,r2,r12,ror#19 3881 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3882 + add r3,r3,r0 3883 + add r3,r3,r2 3884 + add r3,r3,r1 3885 + ldr r12,[r14],#4 @ *K256++ 3886 + str r3,[sp,#5*4] 3887 + mov r0,r11,ror#6 3888 + eor r0,r0,r11,ror#11 3889 + eor r0,r0,r11,ror#25 @ Sigma1(e) 3890 + add r3,r3,r0 3891 + eor r2,r4,r5 3892 + and r2,r2,r11 3893 + eor r2,r2,r5 @ Ch(e,f,g) 3894 + add r3,r3,r2 3895 + add r3,r3,r6 3896 + add r3,r3,r12 3897 + mov r6,r7,ror#2 3898 + eor r6,r6,r7,ror#13 3899 + eor r6,r6,r7,ror#22 @ Sigma0(a) 3900 + orr r0,r7,r8 3901 + and r0,r0,r9 3902 + and r2,r7,r8 3903 + orr r0,r0,r2 @ Maj(a,b,c) 3904 + add r6,r6,r0 3905 + add r10,r10,r3 3906 + add r6,r6,r3 3907 + ldr r2,[sp,#7*4] @ 22 3908 + ldr r12,[sp,#4*4] 3909 + ldr r3,[sp,#6*4] 3910 + ldr r1,[sp,#15*4] 3911 + mov r0,r2,ror#7 3912 + eor r0,r0,r2,ror#18 3913 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3914 + mov r2,r12,ror#17 3915 + eor r2,r2,r12,ror#19 3916 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3917 + add r3,r3,r0 3918 + add r3,r3,r2 3919 + add r3,r3,r1 3920 + ldr r12,[r14],#4 @ *K256++ 3921 + str r3,[sp,#6*4] 3922 + mov r0,r10,ror#6 3923 + eor r0,r0,r10,ror#11 3924 + eor r0,r0,r10,ror#25 @ Sigma1(e) 3925 + add r3,r3,r0 3926 + eor r2,r11,r4 3927 + and r2,r2,r10 3928 + eor r2,r2,r4 @ Ch(e,f,g) 3929 + add r3,r3,r2 3930 + add r3,r3,r5 3931 + add r3,r3,r12 3932 + mov r5,r6,ror#2 3933 + eor r5,r5,r6,ror#13 3934 + eor r5,r5,r6,ror#22 @ Sigma0(a) 3935 + orr r0,r6,r7 3936 + and r0,r0,r8 3937 + and r2,r6,r7 3938 + orr r0,r0,r2 @ Maj(a,b,c) 3939 + add r5,r5,r0 3940 + add r9,r9,r3 3941 + add r5,r5,r3 3942 + ldr r2,[sp,#8*4] @ 23 3943 + ldr r12,[sp,#5*4] 3944 + ldr r3,[sp,#7*4] 3945 + ldr r1,[sp,#0*4] 3946 + mov r0,r2,ror#7 3947 + eor r0,r0,r2,ror#18 3948 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3949 + mov r2,r12,ror#17 3950 + eor r2,r2,r12,ror#19 3951 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3952 + add r3,r3,r0 3953 + add r3,r3,r2 3954 + add r3,r3,r1 3955 + ldr r12,[r14],#4 @ *K256++ 3956 + str r3,[sp,#7*4] 3957 + mov r0,r9,ror#6 3958 + eor r0,r0,r9,ror#11 3959 + eor r0,r0,r9,ror#25 @ Sigma1(e) 3960 + add r3,r3,r0 3961 + eor r2,r10,r11 3962 + and r2,r2,r9 3963 + eor r2,r2,r11 @ Ch(e,f,g) 3964 + add r3,r3,r2 3965 + add r3,r3,r4 3966 + add r3,r3,r12 3967 + mov r4,r5,ror#2 3968 + eor r4,r4,r5,ror#13 3969 + eor r4,r4,r5,ror#22 @ Sigma0(a) 3970 + orr r0,r5,r6 3971 + and r0,r0,r7 3972 + and r2,r5,r6 3973 + orr r0,r0,r2 @ Maj(a,b,c) 3974 + add r4,r4,r0 3975 + add r8,r8,r3 3976 + add r4,r4,r3 3977 + ldr r2,[sp,#9*4] @ 24 3978 + ldr r12,[sp,#6*4] 3979 + ldr r3,[sp,#8*4] 3980 + ldr r1,[sp,#1*4] 3981 + mov r0,r2,ror#7 3982 + eor r0,r0,r2,ror#18 3983 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 3984 + mov r2,r12,ror#17 3985 + eor r2,r2,r12,ror#19 3986 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 3987 + add r3,r3,r0 3988 + add r3,r3,r2 3989 + add r3,r3,r1 3990 + ldr r12,[r14],#4 @ *K256++ 3991 + str r3,[sp,#8*4] 3992 + mov r0,r8,ror#6 3993 + eor r0,r0,r8,ror#11 3994 + eor r0,r0,r8,ror#25 @ Sigma1(e) 3995 + add r3,r3,r0 3996 + eor r2,r9,r10 3997 + and r2,r2,r8 3998 + eor r2,r2,r10 @ Ch(e,f,g) 3999 + add r3,r3,r2 4000 + add r3,r3,r11 4001 + add r3,r3,r12 4002 + mov r11,r4,ror#2 4003 + eor r11,r11,r4,ror#13 4004 + eor r11,r11,r4,ror#22 @ Sigma0(a) 4005 + orr r0,r4,r5 4006 + and r0,r0,r6 4007 + and r2,r4,r5 4008 + orr r0,r0,r2 @ Maj(a,b,c) 4009 + add r11,r11,r0 4010 + add r7,r7,r3 4011 + add r11,r11,r3 4012 + ldr r2,[sp,#10*4] @ 25 4013 + ldr r12,[sp,#7*4] 4014 + ldr r3,[sp,#9*4] 4015 + ldr r1,[sp,#2*4] 4016 + mov r0,r2,ror#7 4017 + eor r0,r0,r2,ror#18 4018 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4019 + mov r2,r12,ror#17 4020 + eor r2,r2,r12,ror#19 4021 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4022 + add r3,r3,r0 4023 + add r3,r3,r2 4024 + add r3,r3,r1 4025 + ldr r12,[r14],#4 @ *K256++ 4026 + str r3,[sp,#9*4] 4027 + mov r0,r7,ror#6 4028 + eor r0,r0,r7,ror#11 4029 + eor r0,r0,r7,ror#25 @ Sigma1(e) 4030 + add r3,r3,r0 4031 + eor r2,r8,r9 4032 + and r2,r2,r7 4033 + eor r2,r2,r9 @ Ch(e,f,g) 4034 + add r3,r3,r2 4035 + add r3,r3,r10 4036 + add r3,r3,r12 4037 + mov r10,r11,ror#2 4038 + eor r10,r10,r11,ror#13 4039 + eor r10,r10,r11,ror#22 @ Sigma0(a) 4040 + orr r0,r11,r4 4041 + and r0,r0,r5 4042 + and r2,r11,r4 4043 + orr r0,r0,r2 @ Maj(a,b,c) 4044 + add r10,r10,r0 4045 + add r6,r6,r3 4046 + add r10,r10,r3 4047 + ldr r2,[sp,#11*4] @ 26 4048 + ldr r12,[sp,#8*4] 4049 + ldr r3,[sp,#10*4] 4050 + ldr r1,[sp,#3*4] 4051 + mov r0,r2,ror#7 4052 + eor r0,r0,r2,ror#18 4053 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4054 + mov r2,r12,ror#17 4055 + eor r2,r2,r12,ror#19 4056 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4057 + add r3,r3,r0 4058 + add r3,r3,r2 4059 + add r3,r3,r1 4060 + ldr r12,[r14],#4 @ *K256++ 4061 + str r3,[sp,#10*4] 4062 + mov r0,r6,ror#6 4063 + eor r0,r0,r6,ror#11 4064 + eor r0,r0,r6,ror#25 @ Sigma1(e) 4065 + add r3,r3,r0 4066 + eor r2,r7,r8 4067 + and r2,r2,r6 4068 + eor r2,r2,r8 @ Ch(e,f,g) 4069 + add r3,r3,r2 4070 + add r3,r3,r9 4071 + add r3,r3,r12 4072 + mov r9,r10,ror#2 4073 + eor r9,r9,r10,ror#13 4074 + eor r9,r9,r10,ror#22 @ Sigma0(a) 4075 + orr r0,r10,r11 4076 + and r0,r0,r4 4077 + and r2,r10,r11 4078 + orr r0,r0,r2 @ Maj(a,b,c) 4079 + add r9,r9,r0 4080 + add r5,r5,r3 4081 + add r9,r9,r3 4082 + ldr r2,[sp,#12*4] @ 27 4083 + ldr r12,[sp,#9*4] 4084 + ldr r3,[sp,#11*4] 4085 + ldr r1,[sp,#4*4] 4086 + mov r0,r2,ror#7 4087 + eor r0,r0,r2,ror#18 4088 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4089 + mov r2,r12,ror#17 4090 + eor r2,r2,r12,ror#19 4091 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4092 + add r3,r3,r0 4093 + add r3,r3,r2 4094 + add r3,r3,r1 4095 + ldr r12,[r14],#4 @ *K256++ 4096 + str r3,[sp,#11*4] 4097 + mov r0,r5,ror#6 4098 + eor r0,r0,r5,ror#11 4099 + eor r0,r0,r5,ror#25 @ Sigma1(e) 4100 + add r3,r3,r0 4101 + eor r2,r6,r7 4102 + and r2,r2,r5 4103 + eor r2,r2,r7 @ Ch(e,f,g) 4104 + add r3,r3,r2 4105 + add r3,r3,r8 4106 + add r3,r3,r12 4107 + mov r8,r9,ror#2 4108 + eor r8,r8,r9,ror#13 4109 + eor r8,r8,r9,ror#22 @ Sigma0(a) 4110 + orr r0,r9,r10 4111 + and r0,r0,r11 4112 + and r2,r9,r10 4113 + orr r0,r0,r2 @ Maj(a,b,c) 4114 + add r8,r8,r0 4115 + add r4,r4,r3 4116 + add r8,r8,r3 4117 + ldr r2,[sp,#13*4] @ 28 4118 + ldr r12,[sp,#10*4] 4119 + ldr r3,[sp,#12*4] 4120 + ldr r1,[sp,#5*4] 4121 + mov r0,r2,ror#7 4122 + eor r0,r0,r2,ror#18 4123 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4124 + mov r2,r12,ror#17 4125 + eor r2,r2,r12,ror#19 4126 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4127 + add r3,r3,r0 4128 + add r3,r3,r2 4129 + add r3,r3,r1 4130 + ldr r12,[r14],#4 @ *K256++ 4131 + str r3,[sp,#12*4] 4132 + mov r0,r4,ror#6 4133 + eor r0,r0,r4,ror#11 4134 + eor r0,r0,r4,ror#25 @ Sigma1(e) 4135 + add r3,r3,r0 4136 + eor r2,r5,r6 4137 + and r2,r2,r4 4138 + eor r2,r2,r6 @ Ch(e,f,g) 4139 + add r3,r3,r2 4140 + add r3,r3,r7 4141 + add r3,r3,r12 4142 + mov r7,r8,ror#2 4143 + eor r7,r7,r8,ror#13 4144 + eor r7,r7,r8,ror#22 @ Sigma0(a) 4145 + orr r0,r8,r9 4146 + and r0,r0,r10 4147 + and r2,r8,r9 4148 + orr r0,r0,r2 @ Maj(a,b,c) 4149 + add r7,r7,r0 4150 + add r11,r11,r3 4151 + add r7,r7,r3 4152 + ldr r2,[sp,#14*4] @ 29 4153 + ldr r12,[sp,#11*4] 4154 + ldr r3,[sp,#13*4] 4155 + ldr r1,[sp,#6*4] 4156 + mov r0,r2,ror#7 4157 + eor r0,r0,r2,ror#18 4158 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4159 + mov r2,r12,ror#17 4160 + eor r2,r2,r12,ror#19 4161 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4162 + add r3,r3,r0 4163 + add r3,r3,r2 4164 + add r3,r3,r1 4165 + ldr r12,[r14],#4 @ *K256++ 4166 + str r3,[sp,#13*4] 4167 + mov r0,r11,ror#6 4168 + eor r0,r0,r11,ror#11 4169 + eor r0,r0,r11,ror#25 @ Sigma1(e) 4170 + add r3,r3,r0 4171 + eor r2,r4,r5 4172 + and r2,r2,r11 4173 + eor r2,r2,r5 @ Ch(e,f,g) 4174 + add r3,r3,r2 4175 + add r3,r3,r6 4176 + add r3,r3,r12 4177 + mov r6,r7,ror#2 4178 + eor r6,r6,r7,ror#13 4179 + eor r6,r6,r7,ror#22 @ Sigma0(a) 4180 + orr r0,r7,r8 4181 + and r0,r0,r9 4182 + and r2,r7,r8 4183 + orr r0,r0,r2 @ Maj(a,b,c) 4184 + add r6,r6,r0 4185 + add r10,r10,r3 4186 + add r6,r6,r3 4187 + ldr r2,[sp,#15*4] @ 30 4188 + ldr r12,[sp,#12*4] 4189 + ldr r3,[sp,#14*4] 4190 + ldr r1,[sp,#7*4] 4191 + mov r0,r2,ror#7 4192 + eor r0,r0,r2,ror#18 4193 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4194 + mov r2,r12,ror#17 4195 + eor r2,r2,r12,ror#19 4196 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4197 + add r3,r3,r0 4198 + add r3,r3,r2 4199 + add r3,r3,r1 4200 + ldr r12,[r14],#4 @ *K256++ 4201 + str r3,[sp,#14*4] 4202 + mov r0,r10,ror#6 4203 + eor r0,r0,r10,ror#11 4204 + eor r0,r0,r10,ror#25 @ Sigma1(e) 4205 + add r3,r3,r0 4206 + eor r2,r11,r4 4207 + and r2,r2,r10 4208 + eor r2,r2,r4 @ Ch(e,f,g) 4209 + add r3,r3,r2 4210 + add r3,r3,r5 4211 + add r3,r3,r12 4212 + mov r5,r6,ror#2 4213 + eor r5,r5,r6,ror#13 4214 + eor r5,r5,r6,ror#22 @ Sigma0(a) 4215 + orr r0,r6,r7 4216 + and r0,r0,r8 4217 + and r2,r6,r7 4218 + orr r0,r0,r2 @ Maj(a,b,c) 4219 + add r5,r5,r0 4220 + add r9,r9,r3 4221 + add r5,r5,r3 4222 + ldr r2,[sp,#0*4] @ 31 4223 + ldr r12,[sp,#13*4] 4224 + ldr r3,[sp,#15*4] 4225 + ldr r1,[sp,#8*4] 4226 + mov r0,r2,ror#7 4227 + eor r0,r0,r2,ror#18 4228 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) 4229 + mov r2,r12,ror#17 4230 + eor r2,r2,r12,ror#19 4231 + eor r2,r2,r12,lsr#10 @ sigma1(X[i+14]) 4232 + add r3,r3,r0 4233 + add r3,r3,r2 4234 + add r3,r3,r1 4235 + ldr r12,[r14],#4 @ *K256++ 4236 + str r3,[sp,#15*4] 4237 + mov r0,r9,ror#6 4238 + eor r0,r0,r9,ror#11 4239 + eor r0,r0,r9,ror#25 @ Sigma1(e) 4240 + add r3,r3,r0 4241 + eor r2,r10,r11 4242 + and r2,r2,r9 4243 + eor r2,r2,r11 @ Ch(e,f,g) 4244 + add r3,r3,r2 4245 + add r3,r3,r4 4246 + add r3,r3,r12 4247 + mov r4,r5,ror#2 4248 + eor r4,r4,r5,ror#13 4249 + eor r4,r4,r5,ror#22 @ Sigma0(a) 4250 + orr r0,r5,r6 4251 + and r0,r0,r7 4252 + and r2,r5,r6 4253 + orr r0,r0,r2 @ Maj(a,b,c) 4254 + add r4,r4,r0 4255 + add r8,r8,r3 4256 + add r4,r4,r3 4257 + and r12,r12,#0xff 4258 + cmp r12,#0xf2 4259 + bne .Lrounds_16_xx 4260 + 4261 + ldr r3,[sp,#16*4] @ pull ctx 4262 + ldr r0,[r3,#0] 4263 + ldr r2,[r3,#4] 4264 + ldr r12,[r3,#8] 4265 + add r4,r4,r0 4266 + ldr r0,[r3,#12] 4267 + add r5,r5,r2 4268 + ldr r2,[r3,#16] 4269 + add r6,r6,r12 4270 + ldr r12,[r3,#20] 4271 + add r7,r7,r0 4272 + ldr r0,[r3,#24] 4273 + add r8,r8,r2 4274 + ldr r2,[r3,#28] 4275 + add r9,r9,r12 4276 + ldr r1,[sp,#17*4] @ pull inp 4277 + ldr r12,[sp,#18*4] @ pull inp+len 4278 + add r10,r10,r0 4279 + add r11,r11,r2 4280 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} 4281 + cmp r1,r12 4282 + sub r14,r14,#256 @ rewind Ktbl 4283 + bne .Loop 4284 + 4285 + add sp,sp,#19*4 @ destroy frame 4286 + ldmia sp!,{r4-r12,lr} 4287 + tst lr,#1 4288 + moveq pc,lr @ be binary compatible with V4, yet 4289 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 4290 +.size sha256_block_data_order,.-sha256_block_data_order 4291 +.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>" 4292 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 4293 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha512-armv4.pl 2009-09-03 15:42:39.000000000 -0700 4294 @@ -0,0 +1,398 @@ 4295 +#!/usr/bin/env perl 4296 + 4297 +# ==================================================================== 4298 +# Written by Andy Polyakov <appro (a] fy.chalmers.se> for the OpenSSL 4299 +# project. The module is, however, dual licensed under OpenSSL and 4300 +# CRYPTOGAMS licenses depending on where you obtain it. For further 4301 +# details see http://www.openssl.org/~appro/cryptogams/. 4302 +# ==================================================================== 4303 + 4304 +# SHA512 block procedure for ARMv4. September 2007. 4305 + 4306 +# This code is ~4.5 (four and a half) times faster than code generated 4307 +# by gcc 3.4 and it spends ~72 clock cycles per byte. 4308 + 4309 +# Byte order [in]dependence. ========================================= 4310 +# 4311 +# Caller is expected to maintain specific *dword* order in h[0-7], 4312 +# namely with most significant dword at *lower* address, which is 4313 +# reflected in below two parameters. *Byte* order within these dwords 4314 +# in turn is whatever *native* byte order on current platform. 4315 +$hi=0; 4316 +$lo=4; 4317 +# ==================================================================== 4318 + 4319 +$output=shift; 4320 +open STDOUT,">$output"; 4321 + 4322 +$ctx="r0"; 4323 +$inp="r1"; 4324 +$len="r2"; 4325 +$Tlo="r3"; 4326 +$Thi="r4"; 4327 +$Alo="r5"; 4328 +$Ahi="r6"; 4329 +$Elo="r7"; 4330 +$Ehi="r8"; 4331 +$t0="r9"; 4332 +$t1="r10"; 4333 +$t2="r11"; 4334 +$t3="r12"; 4335 +############ r13 is stack pointer 4336 +$Ktbl="r14"; 4337 +############ r15 is program counter 4338 + 4339 +$Aoff=8*0; 4340 +$Boff=8*1; 4341 +$Coff=8*2; 4342 +$Doff=8*3; 4343 +$Eoff=8*4; 4344 +$Foff=8*5; 4345 +$Goff=8*6; 4346 +$Hoff=8*7; 4347 +$Xoff=8*8; 4348 + 4349 +sub BODY_00_15() { 4350 +my $magic = shift; 4351 +$code.=<<___; 4352 + ldr $t2,[sp,#$Hoff+0] @ h.lo 4353 + ldr $t3,[sp,#$Hoff+4] @ h.hi 4354 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 4355 + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 4356 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 4357 + mov $t0,$Elo,lsr#14 4358 + mov $t1,$Ehi,lsr#14 4359 + eor $t0,$t0,$Ehi,lsl#18 4360 + eor $t1,$t1,$Elo,lsl#18 4361 + eor $t0,$t0,$Elo,lsr#18 4362 + eor $t1,$t1,$Ehi,lsr#18 4363 + eor $t0,$t0,$Ehi,lsl#14 4364 + eor $t1,$t1,$Elo,lsl#14 4365 + eor $t0,$t0,$Ehi,lsr#9 4366 + eor $t1,$t1,$Elo,lsr#9 4367 + eor $t0,$t0,$Elo,lsl#23 4368 + eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) 4369 + adds $Tlo,$Tlo,$t0 4370 + adc $Thi,$Thi,$t1 @ T += Sigma1(e) 4371 + adds $Tlo,$Tlo,$t2 4372 + adc $Thi,$Thi,$t3 @ T += h 4373 + 4374 + ldr $t0,[sp,#$Foff+0] @ f.lo 4375 + ldr $t1,[sp,#$Foff+4] @ f.hi 4376 + ldr $t2,[sp,#$Goff+0] @ g.lo 4377 + ldr $t3,[sp,#$Goff+4] @ g.hi 4378 + str $Elo,[sp,#$Eoff+0] 4379 + str $Ehi,[sp,#$Eoff+4] 4380 + str $Alo,[sp,#$Aoff+0] 4381 + str $Ahi,[sp,#$Aoff+4] 4382 + 4383 + eor $t0,$t0,$t2 4384 + eor $t1,$t1,$t3 4385 + and $t0,$t0,$Elo 4386 + and $t1,$t1,$Ehi 4387 + eor $t0,$t0,$t2 4388 + eor $t1,$t1,$t3 @ Ch(e,f,g) 4389 + 4390 + ldr $t2,[$Ktbl,#4] @ K[i].lo 4391 + ldr $t3,[$Ktbl,#0] @ K[i].hi 4392 + ldr $Elo,[sp,#$Doff+0] @ d.lo 4393 + ldr $Ehi,[sp,#$Doff+4] @ d.hi 4394 + 4395 + adds $Tlo,$Tlo,$t0 4396 + adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) 4397 + adds $Tlo,$Tlo,$t2 4398 + adc $Thi,$Thi,$t3 @ T += K[i] 4399 + adds $Elo,$Elo,$Tlo 4400 + adc $Ehi,$Ehi,$Thi @ d += T 4401 + 4402 + and $t0,$t2,#0xff 4403 + teq $t0,#$magic 4404 + orreq $Ktbl,$Ktbl,#1 4405 + 4406 + ldr $t2,[sp,#$Boff+0] @ b.lo 4407 + ldr $t3,[sp,#$Coff+0] @ c.lo 4408 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 4409 + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 4410 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 4411 + mov $t0,$Alo,lsr#28 4412 + mov $t1,$Ahi,lsr#28 4413 + eor $t0,$t0,$Ahi,lsl#4 4414 + eor $t1,$t1,$Alo,lsl#4 4415 + eor $t0,$t0,$Ahi,lsr#2 4416 + eor $t1,$t1,$Alo,lsr#2 4417 + eor $t0,$t0,$Alo,lsl#30 4418 + eor $t1,$t1,$Ahi,lsl#30 4419 + eor $t0,$t0,$Ahi,lsr#7 4420 + eor $t1,$t1,$Alo,lsr#7 4421 + eor $t0,$t0,$Alo,lsl#25 4422 + eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) 4423 + adds $Tlo,$Tlo,$t0 4424 + adc $Thi,$Thi,$t1 @ T += Sigma0(a) 4425 + 4426 + and $t0,$Alo,$t2 4427 + orr $Alo,$Alo,$t2 4428 + ldr $t1,[sp,#$Boff+4] @ b.hi 4429 + ldr $t2,[sp,#$Coff+4] @ c.hi 4430 + and $Alo,$Alo,$t3 4431 + orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo 4432 + and $t3,$Ahi,$t1 4433 + orr $Ahi,$Ahi,$t1 4434 + and $Ahi,$Ahi,$t2 4435 + orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi 4436 + adds $Alo,$Alo,$Tlo 4437 + adc $Ahi,$Ahi,$Thi @ h += T 4438 + 4439 + sub sp,sp,#8 4440 + add $Ktbl,$Ktbl,#8 4441 +___ 4442 +} 4443 +$code=<<___; 4444 +.text 4445 +.code 32 4446 +.type K512,%object 4447 +.align 5 4448 +K512: 4449 +.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd 4450 +.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc 4451 +.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 4452 +.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 4453 +.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe 4454 +.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 4455 +.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 4456 +.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 4457 +.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 4458 +.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 4459 +.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 4460 +.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 4461 +.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 4462 +.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 4463 +.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 4464 +.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 4465 +.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 4466 +.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df 4467 +.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 4468 +.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b 4469 +.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 4470 +.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 4471 +.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 4472 +.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 4473 +.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 4474 +.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 4475 +.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb 4476 +.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 4477 +.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 4478 +.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec 4479 +.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 4480 +.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b 4481 +.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 4482 +.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 4483 +.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 4484 +.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b 4485 +.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 4486 +.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c 4487 +.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a 4488 +.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 4489 +.size K512,.-K512 4490 + 4491 +.global sha512_block_data_order 4492 +.type sha512_block_data_order,%function 4493 +sha512_block_data_order: 4494 + sub r3,pc,#8 @ sha512_block_data_order 4495 + add $len,$inp,$len,lsl#7 @ len to point at the end of inp 4496 + stmdb sp!,{r4-r12,lr} 4497 + sub $Ktbl,r3,#640 @ K512 4498 + sub sp,sp,#9*8 4499 + 4500 + ldr $Elo,[$ctx,#$Eoff+$lo] 4501 + ldr $Ehi,[$ctx,#$Eoff+$hi] 4502 + ldr $t0, [$ctx,#$Goff+$lo] 4503 + ldr $t1, [$ctx,#$Goff+$hi] 4504 + ldr $t2, [$ctx,#$Hoff+$lo] 4505 + ldr $t3, [$ctx,#$Hoff+$hi] 4506 +.Loop: 4507 + str $t0, [sp,#$Goff+0] 4508 + str $t1, [sp,#$Goff+4] 4509 + str $t2, [sp,#$Hoff+0] 4510 + str $t3, [sp,#$Hoff+4] 4511 + ldr $Alo,[$ctx,#$Aoff+$lo] 4512 + ldr $Ahi,[$ctx,#$Aoff+$hi] 4513 + ldr $Tlo,[$ctx,#$Boff+$lo] 4514 + ldr $Thi,[$ctx,#$Boff+$hi] 4515 + ldr $t0, [$ctx,#$Coff+$lo] 4516 + ldr $t1, [$ctx,#$Coff+$hi] 4517 + ldr $t2, [$ctx,#$Doff+$lo] 4518 + ldr $t3, [$ctx,#$Doff+$hi] 4519 + str $Tlo,[sp,#$Boff+0] 4520 + str $Thi,[sp,#$Boff+4] 4521 + str $t0, [sp,#$Coff+0] 4522 + str $t1, [sp,#$Coff+4] 4523 + str $t2, [sp,#$Doff+0] 4524 + str $t3, [sp,#$Doff+4] 4525 + ldr $Tlo,[$ctx,#$Foff+$lo] 4526 + ldr $Thi,[$ctx,#$Foff+$hi] 4527 + str $Tlo,[sp,#$Foff+0] 4528 + str $Thi,[sp,#$Foff+4] 4529 + 4530 +.L00_15: 4531 + ldrb $Tlo,[$inp,#7] 4532 + ldrb $t0, [$inp,#6] 4533 + ldrb $t1, [$inp,#5] 4534 + ldrb $t2, [$inp,#4] 4535 + ldrb $Thi,[$inp,#3] 4536 + ldrb $t3, [$inp,#2] 4537 + orr $Tlo,$Tlo,$t0,lsl#8 4538 + ldrb $t0, [$inp,#1] 4539 + orr $Tlo,$Tlo,$t1,lsl#16 4540 + ldrb $t1, [$inp],#8 4541 + orr $Tlo,$Tlo,$t2,lsl#24 4542 + orr $Thi,$Thi,$t3,lsl#8 4543 + orr $Thi,$Thi,$t0,lsl#16 4544 + orr $Thi,$Thi,$t1,lsl#24 4545 + str $Tlo,[sp,#$Xoff+0] 4546 + str $Thi,[sp,#$Xoff+4] 4547 +___ 4548 + &BODY_00_15(0x94); 4549 +$code.=<<___; 4550 + tst $Ktbl,#1 4551 + beq .L00_15 4552 + bic $Ktbl,$Ktbl,#1 4553 + 4554 +.L16_79: 4555 + ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] 4556 + ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] 4557 + ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] 4558 + ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] 4559 + 4560 + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 4561 + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 4562 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 4563 + mov $Tlo,$t0,lsr#1 4564 + mov $Thi,$t1,lsr#1 4565 + eor $Tlo,$Tlo,$t1,lsl#31 4566 + eor $Thi,$Thi,$t0,lsl#31 4567 + eor $Tlo,$Tlo,$t0,lsr#8 4568 + eor $Thi,$Thi,$t1,lsr#8 4569 + eor $Tlo,$Tlo,$t1,lsl#24 4570 + eor $Thi,$Thi,$t0,lsl#24 4571 + eor $Tlo,$Tlo,$t0,lsr#7 4572 + eor $Thi,$Thi,$t1,lsr#7 4573 + eor $Tlo,$Tlo,$t1,lsl#25 4574 + 4575 + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 4576 + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 4577 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 4578 + mov $t0,$t2,lsr#19 4579 + mov $t1,$t3,lsr#19 4580 + eor $t0,$t0,$t3,lsl#13 4581 + eor $t1,$t1,$t2,lsl#13 4582 + eor $t0,$t0,$t3,lsr#29 4583 + eor $t1,$t1,$t2,lsr#29 4584 + eor $t0,$t0,$t2,lsl#3 4585 + eor $t1,$t1,$t3,lsl#3 4586 + eor $t0,$t0,$t2,lsr#6 4587 + eor $t1,$t1,$t3,lsr#6 4588 + eor $t0,$t0,$t3,lsl#26 4589 + 4590 + ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] 4591 + ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] 4592 + adds $Tlo,$Tlo,$t0 4593 + adc $Thi,$Thi,$t1 4594 + 4595 + ldr $t0,[sp,#`$Xoff+8*16`+0] 4596 + ldr $t1,[sp,#`$Xoff+8*16`+4] 4597 + adds $Tlo,$Tlo,$t2 4598 + adc $Thi,$Thi,$t3 4599 + adds $Tlo,$Tlo,$t0 4600 + adc $Thi,$Thi,$t1 4601 + str $Tlo,[sp,#$Xoff+0] 4602 + str $Thi,[sp,#$Xoff+4] 4603 +___ 4604 + &BODY_00_15(0x17); 4605 +$code.=<<___; 4606 + tst $Ktbl,#1 4607 + beq .L16_79 4608 + bic $Ktbl,$Ktbl,#1 4609 + 4610 + ldr $Tlo,[sp,#$Boff+0] 4611 + ldr $Thi,[sp,#$Boff+4] 4612 + ldr $t0, [$ctx,#$Aoff+$lo] 4613 + ldr $t1, [$ctx,#$Aoff+$hi] 4614 + ldr $t2, [$ctx,#$Boff+$lo] 4615 + ldr $t3, [$ctx,#$Boff+$hi] 4616 + adds $t0,$Alo,$t0 4617 + adc $t1,$Ahi,$t1 4618 + adds $t2,$Tlo,$t2 4619 + adc $t3,$Thi,$t3 4620 + str $t0, [$ctx,#$Aoff+$lo] 4621 + str $t1, [$ctx,#$Aoff+$hi] 4622 + str $t2, [$ctx,#$Boff+$lo] 4623 + str $t3, [$ctx,#$Boff+$hi] 4624 + 4625 + ldr $Alo,[sp,#$Coff+0] 4626 + ldr $Ahi,[sp,#$Coff+4] 4627 + ldr $Tlo,[sp,#$Doff+0] 4628 + ldr $Thi,[sp,#$Doff+4] 4629 + ldr $t0, [$ctx,#$Coff+$lo] 4630 + ldr $t1, [$ctx,#$Coff+$hi] 4631 + ldr $t2, [$ctx,#$Doff+$lo] 4632 + ldr $t3, [$ctx,#$Doff+$hi] 4633 + adds $t0,$Alo,$t0 4634 + adc $t1,$Ahi,$t1 4635 + adds $t2,$Tlo,$t2 4636 + adc $t3,$Thi,$t3 4637 + str $t0, [$ctx,#$Coff+$lo] 4638 + str $t1, [$ctx,#$Coff+$hi] 4639 + str $t2, [$ctx,#$Doff+$lo] 4640 + str $t3, [$ctx,#$Doff+$hi] 4641 + 4642 + ldr $Tlo,[sp,#$Foff+0] 4643 + ldr $Thi,[sp,#$Foff+4] 4644 + ldr $t0, [$ctx,#$Eoff+$lo] 4645 + ldr $t1, [$ctx,#$Eoff+$hi] 4646 + ldr $t2, [$ctx,#$Foff+$lo] 4647 + ldr $t3, [$ctx,#$Foff+$hi] 4648 + adds $Elo,$Elo,$t0 4649 + adc $Ehi,$Ehi,$t1 4650 + adds $t2,$Tlo,$t2 4651 + adc $t3,$Thi,$t3 4652 + str $Elo,[$ctx,#$Eoff+$lo] 4653 + str $Ehi,[$ctx,#$Eoff+$hi] 4654 + str $t2, [$ctx,#$Foff+$lo] 4655 + str $t3, [$ctx,#$Foff+$hi] 4656 + 4657 + ldr $Alo,[sp,#$Goff+0] 4658 + ldr $Ahi,[sp,#$Goff+4] 4659 + ldr $Tlo,[sp,#$Hoff+0] 4660 + ldr $Thi,[sp,#$Hoff+4] 4661 + ldr $t0, [$ctx,#$Goff+$lo] 4662 + ldr $t1, [$ctx,#$Goff+$hi] 4663 + ldr $t2, [$ctx,#$Hoff+$lo] 4664 + ldr $t3, [$ctx,#$Hoff+$hi] 4665 + adds $t0,$Alo,$t0 4666 + adc $t1,$Ahi,$t1 4667 + adds $t2,$Tlo,$t2 4668 + adc $t3,$Thi,$t3 4669 + str $t0, [$ctx,#$Goff+$lo] 4670 + str $t1, [$ctx,#$Goff+$hi] 4671 + str $t2, [$ctx,#$Hoff+$lo] 4672 + str $t3, [$ctx,#$Hoff+$hi] 4673 + 4674 + add sp,sp,#640 4675 + sub $Ktbl,$Ktbl,#640 4676 + 4677 + teq $inp,$len 4678 + bne .Loop 4679 + 4680 + add sp,sp,#8*9 @ destroy frame 4681 + ldmia sp!,{r4-r12,lr} 4682 + tst lr,#1 4683 + moveq pc,lr @ be binary compatible with V4, yet 4684 + bx lr @ interoperable with Thumb ISA:-) 4685 +.size sha512_block_data_order,.-sha512_block_data_order 4686 +.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 4687 +___ 4688 + 4689 +$code =~ s/\`([^\`]*)\`/eval $1/gem; 4690 +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 4691 +print $code; 4692 +close STDOUT; # enforce flush 4693 --- /dev/null 2009-04-24 06:09:48.000000000 -0700 4694 +++ openssl-0.9.8h/crypto/0.9.9-dev/sha/sha512-armv4.s 2009-09-03 15:42:39.000000000 -0700 4695 @@ -0,0 +1,415 @@ 4696 +.text 4697 +.code 32 4698 +.type K512,%object 4699 +.align 5 4700 +K512: 4701 +.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd 4702 +.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc 4703 +.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 4704 +.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 4705 +.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe 4706 +.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 4707 +.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 4708 +.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 4709 +.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 4710 +.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 4711 +.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 4712 +.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 4713 +.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 4714 +.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 4715 +.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 4716 +.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 4717 +.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 4718 +.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df 4719 +.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 4720 +.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b 4721 +.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 4722 +.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 4723 +.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 4724 +.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 4725 +.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 4726 +.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 4727 +.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb 4728 +.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 4729 +.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 4730 +.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec 4731 +.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 4732 +.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b 4733 +.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 4734 +.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 4735 +.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 4736 +.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b 4737 +.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 4738 +.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c 4739 +.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a 4740 +.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 4741 +.size K512,.-K512 4742 + 4743 +.global sha512_block_data_order 4744 +.type sha512_block_data_order,%function 4745 +sha512_block_data_order: 4746 + sub r3,pc,#8 @ sha512_block_data_order 4747 + add r2,r1,r2,lsl#7 @ len to point at the end of inp 4748 + stmdb sp!,{r4-r12,lr} 4749 + sub r14,r3,#640 @ K512 4750 + sub sp,sp,#9*8 4751 + 4752 + ldr r7,[r0,#32+4] 4753 + ldr r8,[r0,#32+0] 4754 + ldr r9, [r0,#48+4] 4755 + ldr r10, [r0,#48+0] 4756 + ldr r11, [r0,#56+4] 4757 + ldr r12, [r0,#56+0] 4758 +.Loop: 4759 + str r9, [sp,#48+0] 4760 + str r10, [sp,#48+4] 4761 + str r11, [sp,#56+0] 4762 + str r12, [sp,#56+4] 4763 + ldr r5,[r0,#0+4] 4764 + ldr r6,[r0,#0+0] 4765 + ldr r3,[r0,#8+4] 4766 + ldr r4,[r0,#8+0] 4767 + ldr r9, [r0,#16+4] 4768 + ldr r10, [r0,#16+0] 4769 + ldr r11, [r0,#24+4] 4770 + ldr r12, [r0,#24+0] 4771 + str r3,[sp,#8+0] 4772 + str r4,[sp,#8+4] 4773 + str r9, [sp,#16+0] 4774 + str r10, [sp,#16+4] 4775 + str r11, [sp,#24+0] 4776 + str r12, [sp,#24+4] 4777 + ldr r3,[r0,#40+4] 4778 + ldr r4,[r0,#40+0] 4779 + str r3,[sp,#40+0] 4780 + str r4,[sp,#40+4] 4781 + 4782 +.L00_15: 4783 + ldrb r3,[r1,#7] 4784 + ldrb r9, [r1,#6] 4785 + ldrb r10, [r1,#5] 4786 + ldrb r11, [r1,#4] 4787 + ldrb r4,[r1,#3] 4788 + ldrb r12, [r1,#2] 4789 + orr r3,r3,r9,lsl#8 4790 + ldrb r9, [r1,#1] 4791 + orr r3,r3,r10,lsl#16 4792 + ldrb r10, [r1],#8 4793 + orr r3,r3,r11,lsl#24 4794 + orr r4,r4,r12,lsl#8 4795 + orr r4,r4,r9,lsl#16 4796 + orr r4,r4,r10,lsl#24 4797 + str r3,[sp,#64+0] 4798 + str r4,[sp,#64+4] 4799 + ldr r11,[sp,#56+0] @ h.lo 4800 + ldr r12,[sp,#56+4] @ h.hi 4801 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 4802 + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 4803 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 4804 + mov r9,r7,lsr#14 4805 + mov r10,r8,lsr#14 4806 + eor r9,r9,r8,lsl#18 4807 + eor r10,r10,r7,lsl#18 4808 + eor r9,r9,r7,lsr#18 4809 + eor r10,r10,r8,lsr#18 4810 + eor r9,r9,r8,lsl#14 4811 + eor r10,r10,r7,lsl#14 4812 + eor r9,r9,r8,lsr#9 4813 + eor r10,r10,r7,lsr#9 4814 + eor r9,r9,r7,lsl#23 4815 + eor r10,r10,r8,lsl#23 @ Sigma1(e) 4816 + adds r3,r3,r9 4817 + adc r4,r4,r10 @ T += Sigma1(e) 4818 + adds r3,r3,r11 4819 + adc r4,r4,r12 @ T += h 4820 + 4821 + ldr r9,[sp,#40+0] @ f.lo 4822 + ldr r10,[sp,#40+4] @ f.hi 4823 + ldr r11,[sp,#48+0] @ g.lo 4824 + ldr r12,[sp,#48+4] @ g.hi 4825 + str r7,[sp,#32+0] 4826 + str r8,[sp,#32+4] 4827 + str r5,[sp,#0+0] 4828 + str r6,[sp,#0+4] 4829 + 4830 + eor r9,r9,r11 4831 + eor r10,r10,r12 4832 + and r9,r9,r7 4833 + and r10,r10,r8 4834 + eor r9,r9,r11 4835 + eor r10,r10,r12 @ Ch(e,f,g) 4836 + 4837 + ldr r11,[r14,#4] @ K[i].lo 4838 + ldr r12,[r14,#0] @ K[i].hi 4839 + ldr r7,[sp,#24+0] @ d.lo 4840 + ldr r8,[sp,#24+4] @ d.hi 4841 + 4842 + adds r3,r3,r9 4843 + adc r4,r4,r10 @ T += Ch(e,f,g) 4844 + adds r3,r3,r11 4845 + adc r4,r4,r12 @ T += K[i] 4846 + adds r7,r7,r3 4847 + adc r8,r8,r4 @ d += T 4848 + 4849 + and r9,r11,#0xff 4850 + teq r9,#148 4851 + orreq r14,r14,#1 4852 + 4853 + ldr r11,[sp,#8+0] @ b.lo 4854 + ldr r12,[sp,#16+0] @ c.lo 4855 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 4856 + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 4857 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 4858 + mov r9,r5,lsr#28 4859 + mov r10,r6,lsr#28 4860 + eor r9,r9,r6,lsl#4 4861 + eor r10,r10,r5,lsl#4 4862 + eor r9,r9,r6,lsr#2 4863 + eor r10,r10,r5,lsr#2 4864 + eor r9,r9,r5,lsl#30 4865 + eor r10,r10,r6,lsl#30 4866 + eor r9,r9,r6,lsr#7 4867 + eor r10,r10,r5,lsr#7 4868 + eor r9,r9,r5,lsl#25 4869 + eor r10,r10,r6,lsl#25 @ Sigma0(a) 4870 + adds r3,r3,r9 4871 + adc r4,r4,r10 @ T += Sigma0(a) 4872 + 4873 + and r9,r5,r11 4874 + orr r5,r5,r11 4875 + ldr r10,[sp,#8+4] @ b.hi 4876 + ldr r11,[sp,#16+4] @ c.hi 4877 + and r5,r5,r12 4878 + orr r5,r5,r9 @ Maj(a,b,c).lo 4879 + and r12,r6,r10 4880 + orr r6,r6,r10 4881 + and r6,r6,r11 4882 + orr r6,r6,r12 @ Maj(a,b,c).hi 4883 + adds r5,r5,r3 4884 + adc r6,r6,r4 @ h += T 4885 + 4886 + sub sp,sp,#8 4887 + add r14,r14,#8 4888 + tst r14,#1 4889 + beq .L00_15 4890 + bic r14,r14,#1 4891 + 4892 +.L16_79: 4893 + ldr r9,[sp,#184+0] 4894 + ldr r10,[sp,#184+4] 4895 + ldr r11,[sp,#80+0] 4896 + ldr r12,[sp,#80+4] 4897 + 4898 + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 4899 + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 4900 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 4901 + mov r3,r9,lsr#1 4902 + mov r4,r10,lsr#1 4903 + eor r3,r3,r10,lsl#31 4904 + eor r4,r4,r9,lsl#31 4905 + eor r3,r3,r9,lsr#8 4906 + eor r4,r4,r10,lsr#8 4907 + eor r3,r3,r10,lsl#24 4908 + eor r4,r4,r9,lsl#24 4909 + eor r3,r3,r9,lsr#7 4910 + eor r4,r4,r10,lsr#7 4911 + eor r3,r3,r10,lsl#25 4912 + 4913 + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 4914 + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 4915 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 4916 + mov r9,r11,lsr#19 4917 + mov r10,r12,lsr#19 4918 + eor r9,r9,r12,lsl#13 4919 + eor r10,r10,r11,lsl#13 4920 + eor r9,r9,r12,lsr#29 4921 + eor r10,r10,r11,lsr#29 4922 + eor r9,r9,r11,lsl#3 4923 + eor r10,r10,r12,lsl#3 4924 + eor r9,r9,r11,lsr#6 4925 + eor r10,r10,r12,lsr#6 4926 + eor r9,r9,r12,lsl#26 4927 + 4928 + ldr r11,[sp,#120+0] 4929 + ldr r12,[sp,#120+4] 4930 + adds r3,r3,r9 4931 + adc r4,r4,r10 4932 + 4933 + ldr r9,[sp,#192+0] 4934 + ldr r10,[sp,#192+4] 4935 + adds r3,r3,r11 4936 + adc r4,r4,r12 4937 + adds r3,r3,r9 4938 + adc r4,r4,r10 4939 + str r3,[sp,#64+0] 4940 + str r4,[sp,#64+4] 4941 + ldr r11,[sp,#56+0] @ h.lo 4942 + ldr r12,[sp,#56+4] @ h.hi 4943 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 4944 + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 4945 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 4946 + mov r9,r7,lsr#14 4947 + mov r10,r8,lsr#14 4948 + eor r9,r9,r8,lsl#18 4949 + eor r10,r10,r7,lsl#18 4950 + eor r9,r9,r7,lsr#18 4951 + eor r10,r10,r8,lsr#18 4952 + eor r9,r9,r8,lsl#14 4953 + eor r10,r10,r7,lsl#14 4954 + eor r9,r9,r8,lsr#9 4955 + eor r10,r10,r7,lsr#9 4956 + eor r9,r9,r7,lsl#23 4957 + eor r10,r10,r8,lsl#23 @ Sigma1(e) 4958 + adds r3,r3,r9 4959 + adc r4,r4,r10 @ T += Sigma1(e) 4960 + adds r3,r3,r11 4961 + adc r4,r4,r12 @ T += h 4962 + 4963 + ldr r9,[sp,#40+0] @ f.lo 4964 + ldr r10,[sp,#40+4] @ f.hi 4965 + ldr r11,[sp,#48+0] @ g.lo 4966 + ldr r12,[sp,#48+4] @ g.hi 4967 + str r7,[sp,#32+0] 4968 + str r8,[sp,#32+4] 4969 + str r5,[sp,#0+0] 4970 + str r6,[sp,#0+4] 4971 + 4972 + eor r9,r9,r11 4973 + eor r10,r10,r12 4974 + and r9,r9,r7 4975 + and r10,r10,r8 4976 + eor r9,r9,r11 4977 + eor r10,r10,r12 @ Ch(e,f,g) 4978 + 4979 + ldr r11,[r14,#4] @ K[i].lo 4980 + ldr r12,[r14,#0] @ K[i].hi 4981 + ldr r7,[sp,#24+0] @ d.lo 4982 + ldr r8,[sp,#24+4] @ d.hi 4983 + 4984 + adds r3,r3,r9 4985 + adc r4,r4,r10 @ T += Ch(e,f,g) 4986 + adds r3,r3,r11 4987 + adc r4,r4,r12 @ T += K[i] 4988 + adds r7,r7,r3 4989 + adc r8,r8,r4 @ d += T 4990 + 4991 + and r9,r11,#0xff 4992 + teq r9,#23 4993 + orreq r14,r14,#1 4994 + 4995 + ldr r11,[sp,#8+0] @ b.lo 4996 + ldr r12,[sp,#16+0] @ c.lo 4997 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 4998 + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 4999 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 5000 + mov r9,r5,lsr#28 5001 + mov r10,r6,lsr#28 5002 + eor r9,r9,r6,lsl#4 5003 + eor r10,r10,r5,lsl#4 5004 + eor r9,r9,r6,lsr#2 5005 + eor r10,r10,r5,lsr#2 5006 + eor r9,r9,r5,lsl#30 5007 + eor r10,r10,r6,lsl#30 5008 + eor r9,r9,r6,lsr#7 5009 + eor r10,r10,r5,lsr#7 5010 + eor r9,r9,r5,lsl#25 5011 + eor r10,r10,r6,lsl#25 @ Sigma0(a) 5012 + adds r3,r3,r9 5013 + adc r4,r4,r10 @ T += Sigma0(a) 5014 + 5015 + and r9,r5,r11 5016 + orr r5,r5,r11 5017 + ldr r10,[sp,#8+4] @ b.hi 5018 + ldr r11,[sp,#16+4] @ c.hi 5019 + and r5,r5,r12 5020 + orr r5,r5,r9 @ Maj(a,b,c).lo 5021 + and r12,r6,r10 5022 + orr r6,r6,r10 5023 + and r6,r6,r11 5024 + orr r6,r6,r12 @ Maj(a,b,c).hi 5025 + adds r5,r5,r3 5026 + adc r6,r6,r4 @ h += T 5027 + 5028 + sub sp,sp,#8 5029 + add r14,r14,#8 5030 + tst r14,#1 5031 + beq .L16_79 5032 + bic r14,r14,#1 5033 + 5034 + ldr r3,[sp,#8+0] 5035 + ldr r4,[sp,#8+4] 5036 + ldr r9, [r0,#0+4] 5037 + ldr r10, [r0,#0+0] 5038 + ldr r11, [r0,#8+4] 5039 + ldr r12, [r0,#8+0] 5040 + adds r9,r5,r9 5041 + adc r10,r6,r10 5042 + adds r11,r3,r11 5043 + adc r12,r4,r12 5044 + str r9, [r0,#0+4] 5045 + str r10, [r0,#0+0] 5046 + str r11, [r0,#8+4] 5047 + str r12, [r0,#8+0] 5048 + 5049 + ldr r5,[sp,#16+0] 5050 + ldr r6,[sp,#16+4] 5051 + ldr r3,[sp,#24+0] 5052 + ldr r4,[sp,#24+4] 5053 + ldr r9, [r0,#16+4] 5054 + ldr r10, [r0,#16+0] 5055 + ldr r11, [r0,#24+4] 5056 + ldr r12, [r0,#24+0] 5057 + adds r9,r5,r9 5058 + adc r10,r6,r10 5059 + adds r11,r3,r11 5060 + adc r12,r4,r12 5061 + str r9, [r0,#16+4] 5062 + str r10, [r0,#16+0] 5063 + str r11, [r0,#24+4] 5064 + str r12, [r0,#24+0] 5065 + 5066 + ldr r3,[sp,#40+0] 5067 + ldr r4,[sp,#40+4] 5068 + ldr r9, [r0,#32+4] 5069 + ldr r10, [r0,#32+0] 5070 + ldr r11, [r0,#40+4] 5071 + ldr r12, [r0,#40+0] 5072 + adds r7,r7,r9 5073 + adc r8,r8,r10 5074 + adds r11,r3,r11 5075 + adc r12,r4,r12 5076 + str r7,[r0,#32+4] 5077 + str r8,[r0,#32+0] 5078 + str r11, [r0,#40+4] 5079 + str r12, [r0,#40+0] 5080 + 5081 + ldr r5,[sp,#48+0] 5082 + ldr r6,[sp,#48+4] 5083 + ldr r3,[sp,#56+0] 5084 + ldr r4,[sp,#56+4] 5085 + ldr r9, [r0,#48+4] 5086 + ldr r10, [r0,#48+0] 5087 + ldr r11, [r0,#56+4] 5088 + ldr r12, [r0,#56+0] 5089 + adds r9,r5,r9 5090 + adc r10,r6,r10 5091 + adds r11,r3,r11 5092 + adc r12,r4,r12 5093 + str r9, [r0,#48+4] 5094 + str r10, [r0,#48+0] 5095 + str r11, [r0,#56+4] 5096 + str r12, [r0,#56+0] 5097 + 5098 + add sp,sp,#640 5099 + sub r14,r14,#640 5100 + 5101 + teq r1,r2 5102 + bne .Loop 5103 + 5104 + add sp,sp,#8*9 @ destroy frame 5105 + ldmia sp!,{r4-r12,lr} 5106 + tst lr,#1 5107 + moveq pc,lr @ be binary compatible with V4, yet 5108 + .word 0xe12fff1e @ interoperable with Thumb ISA:-) 5109 +.size sha512_block_data_order,.-sha512_block_data_order 5110 +.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro (a] openssl.org>" 5111