1 #include "arm_arch.h" 2 3 .text 4 5 .global sha1_block_data_order 6 .type sha1_block_data_order,%function 7 8 .align 2 9 sha1_block_data_order: 10 stmdb sp!,{r4-r12,lr} 11 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 12 ldmia r0,{r3,r4,r5,r6,r7} 13 .Lloop: 14 ldr r8,.LK_00_19 15 mov r14,sp 16 sub sp,sp,#15*4 17 mov r5,r5,ror#30 18 mov r6,r6,ror#30 19 mov r7,r7,ror#30 @ [6] 20 .L_00_15: 21 #if __ARM_ARCH__<7 22 ldrb r10,[r1,#2] 23 ldrb r9,[r1,#3] 24 ldrb r11,[r1,#1] 25 add r7,r8,r7,ror#2 @ E+=K_00_19 26 ldrb r12,[r1],#4 27 orr r9,r9,r10,lsl#8 28 eor r10,r5,r6 @ F_xx_xx 29 orr r9,r9,r11,lsl#16 30 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 31 orr r9,r9,r12,lsl#24 32 #else 33 ldr r9,[r1],#4 @ handles unaligned 34 add r7,r8,r7,ror#2 @ E+=K_00_19 35 eor r10,r5,r6 @ F_xx_xx 36 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 37 #ifdef __ARMEL__ 38 rev r9,r9 @ byte swap 39 #endif 40 #endif 41 and r10,r4,r10,ror#2 42 add r7,r7,r9 @ E+=X[i] 43 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 44 str r9,[r14,#-4]! 45 add r7,r7,r10 @ E+=F_00_19(B,C,D) 46 #if __ARM_ARCH__<7 47 ldrb r10,[r1,#2] 48 ldrb r9,[r1,#3] 49 ldrb r11,[r1,#1] 50 add r6,r8,r6,ror#2 @ E+=K_00_19 51 ldrb r12,[r1],#4 52 orr r9,r9,r10,lsl#8 53 eor r10,r4,r5 @ F_xx_xx 54 orr r9,r9,r11,lsl#16 55 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 56 orr r9,r9,r12,lsl#24 57 #else 58 ldr r9,[r1],#4 @ handles unaligned 59 add r6,r8,r6,ror#2 @ E+=K_00_19 60 eor r10,r4,r5 @ F_xx_xx 61 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 62 #ifdef __ARMEL__ 63 rev r9,r9 @ byte swap 64 #endif 65 #endif 66 and r10,r3,r10,ror#2 67 add r6,r6,r9 @ E+=X[i] 68 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 69 str r9,[r14,#-4]! 70 add r6,r6,r10 @ E+=F_00_19(B,C,D) 71 #if __ARM_ARCH__<7 72 ldrb r10,[r1,#2] 73 ldrb r9,[r1,#3] 74 ldrb r11,[r1,#1] 75 add r5,r8,r5,ror#2 @ E+=K_00_19 76 ldrb r12,[r1],#4 77 orr r9,r9,r10,lsl#8 78 eor r10,r3,r4 @ F_xx_xx 79 orr r9,r9,r11,lsl#16 80 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 81 orr r9,r9,r12,lsl#24 82 #else 83 ldr r9,[r1],#4 @ handles unaligned 84 add r5,r8,r5,ror#2 @ E+=K_00_19 85 eor r10,r3,r4 @ F_xx_xx 86 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 87 #ifdef __ARMEL__ 88 rev r9,r9 @ byte swap 89 #endif 90 #endif 91 and r10,r7,r10,ror#2 92 add r5,r5,r9 @ E+=X[i] 93 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 94 str r9,[r14,#-4]! 95 add r5,r5,r10 @ E+=F_00_19(B,C,D) 96 #if __ARM_ARCH__<7 97 ldrb r10,[r1,#2] 98 ldrb r9,[r1,#3] 99 ldrb r11,[r1,#1] 100 add r4,r8,r4,ror#2 @ E+=K_00_19 101 ldrb r12,[r1],#4 102 orr r9,r9,r10,lsl#8 103 eor r10,r7,r3 @ F_xx_xx 104 orr r9,r9,r11,lsl#16 105 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 106 orr r9,r9,r12,lsl#24 107 #else 108 ldr r9,[r1],#4 @ handles unaligned 109 add r4,r8,r4,ror#2 @ E+=K_00_19 110 eor r10,r7,r3 @ F_xx_xx 111 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 112 #ifdef __ARMEL__ 113 rev r9,r9 @ byte swap 114 #endif 115 #endif 116 and r10,r6,r10,ror#2 117 add r4,r4,r9 @ E+=X[i] 118 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 119 str r9,[r14,#-4]! 120 add r4,r4,r10 @ E+=F_00_19(B,C,D) 121 #if __ARM_ARCH__<7 122 ldrb r10,[r1,#2] 123 ldrb r9,[r1,#3] 124 ldrb r11,[r1,#1] 125 add r3,r8,r3,ror#2 @ E+=K_00_19 126 ldrb r12,[r1],#4 127 orr r9,r9,r10,lsl#8 128 eor r10,r6,r7 @ F_xx_xx 129 orr r9,r9,r11,lsl#16 130 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 131 orr r9,r9,r12,lsl#24 132 #else 133 ldr r9,[r1],#4 @ handles unaligned 134 add r3,r8,r3,ror#2 @ E+=K_00_19 135 eor r10,r6,r7 @ F_xx_xx 136 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 137 #ifdef __ARMEL__ 138 rev r9,r9 @ byte swap 139 #endif 140 #endif 141 and r10,r5,r10,ror#2 142 add r3,r3,r9 @ E+=X[i] 143 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 144 str r9,[r14,#-4]! 145 add r3,r3,r10 @ E+=F_00_19(B,C,D) 146 teq r14,sp 147 bne .L_00_15 @ [((11+4)*5+2)*3] 148 sub sp,sp,#25*4 149 #if __ARM_ARCH__<7 150 ldrb r10,[r1,#2] 151 ldrb r9,[r1,#3] 152 ldrb r11,[r1,#1] 153 add r7,r8,r7,ror#2 @ E+=K_00_19 154 ldrb r12,[r1],#4 155 orr r9,r9,r10,lsl#8 156 eor r10,r5,r6 @ F_xx_xx 157 orr r9,r9,r11,lsl#16 158 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 159 orr r9,r9,r12,lsl#24 160 #else 161 ldr r9,[r1],#4 @ handles unaligned 162 add r7,r8,r7,ror#2 @ E+=K_00_19 163 eor r10,r5,r6 @ F_xx_xx 164 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 165 #ifdef __ARMEL__ 166 rev r9,r9 @ byte swap 167 #endif 168 #endif 169 and r10,r4,r10,ror#2 170 add r7,r7,r9 @ E+=X[i] 171 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 172 str r9,[r14,#-4]! 173 add r7,r7,r10 @ E+=F_00_19(B,C,D) 174 ldr r9,[r14,#15*4] 175 ldr r10,[r14,#13*4] 176 ldr r11,[r14,#7*4] 177 add r6,r8,r6,ror#2 @ E+=K_xx_xx 178 ldr r12,[r14,#2*4] 179 eor r9,r9,r10 180 eor r11,r11,r12 @ 1 cycle stall 181 eor r10,r4,r5 @ F_xx_xx 182 mov r9,r9,ror#31 183 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 184 eor r9,r9,r11,ror#31 185 str r9,[r14,#-4]! 186 and r10,r3,r10,ror#2 @ F_xx_xx 187 @ F_xx_xx 188 add r6,r6,r9 @ E+=X[i] 189 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 190 add r6,r6,r10 @ E+=F_00_19(B,C,D) 191 ldr r9,[r14,#15*4] 192 ldr r10,[r14,#13*4] 193 ldr r11,[r14,#7*4] 194 add r5,r8,r5,ror#2 @ E+=K_xx_xx 195 ldr r12,[r14,#2*4] 196 eor r9,r9,r10 197 eor r11,r11,r12 @ 1 cycle stall 198 eor r10,r3,r4 @ F_xx_xx 199 mov r9,r9,ror#31 200 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 201 eor r9,r9,r11,ror#31 202 str r9,[r14,#-4]! 203 and r10,r7,r10,ror#2 @ F_xx_xx 204 @ F_xx_xx 205 add r5,r5,r9 @ E+=X[i] 206 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 207 add r5,r5,r10 @ E+=F_00_19(B,C,D) 208 ldr r9,[r14,#15*4] 209 ldr r10,[r14,#13*4] 210 ldr r11,[r14,#7*4] 211 add r4,r8,r4,ror#2 @ E+=K_xx_xx 212 ldr r12,[r14,#2*4] 213 eor r9,r9,r10 214 eor r11,r11,r12 @ 1 cycle stall 215 eor r10,r7,r3 @ F_xx_xx 216 mov r9,r9,ror#31 217 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 218 eor r9,r9,r11,ror#31 219 str r9,[r14,#-4]! 220 and r10,r6,r10,ror#2 @ F_xx_xx 221 @ F_xx_xx 222 add r4,r4,r9 @ E+=X[i] 223 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 224 add r4,r4,r10 @ E+=F_00_19(B,C,D) 225 ldr r9,[r14,#15*4] 226 ldr r10,[r14,#13*4] 227 ldr r11,[r14,#7*4] 228 add r3,r8,r3,ror#2 @ E+=K_xx_xx 229 ldr r12,[r14,#2*4] 230 eor r9,r9,r10 231 eor r11,r11,r12 @ 1 cycle stall 232 eor r10,r6,r7 @ F_xx_xx 233 mov r9,r9,ror#31 234 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 235 eor r9,r9,r11,ror#31 236 str r9,[r14,#-4]! 237 and r10,r5,r10,ror#2 @ F_xx_xx 238 @ F_xx_xx 239 add r3,r3,r9 @ E+=X[i] 240 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 241 add r3,r3,r10 @ E+=F_00_19(B,C,D) 242 243 ldr r8,.LK_20_39 @ [+15+16*4] 244 cmn sp,#0 @ [+3], clear carry to denote 20_39 245 .L_20_39_or_60_79: 246 ldr r9,[r14,#15*4] 247 ldr r10,[r14,#13*4] 248 ldr r11,[r14,#7*4] 249 add r7,r8,r7,ror#2 @ E+=K_xx_xx 250 ldr r12,[r14,#2*4] 251 eor r9,r9,r10 252 eor r11,r11,r12 @ 1 cycle stall 253 eor r10,r5,r6 @ F_xx_xx 254 mov r9,r9,ror#31 255 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 256 eor r9,r9,r11,ror#31 257 str r9,[r14,#-4]! 258 eor r10,r4,r10,ror#2 @ F_xx_xx 259 @ F_xx_xx 260 add r7,r7,r9 @ E+=X[i] 261 add r7,r7,r10 @ E+=F_20_39(B,C,D) 262 ldr r9,[r14,#15*4] 263 ldr r10,[r14,#13*4] 264 ldr r11,[r14,#7*4] 265 add r6,r8,r6,ror#2 @ E+=K_xx_xx 266 ldr r12,[r14,#2*4] 267 eor r9,r9,r10 268 eor r11,r11,r12 @ 1 cycle stall 269 eor r10,r4,r5 @ F_xx_xx 270 mov r9,r9,ror#31 271 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 272 eor r9,r9,r11,ror#31 273 str r9,[r14,#-4]! 274 eor r10,r3,r10,ror#2 @ F_xx_xx 275 @ F_xx_xx 276 add r6,r6,r9 @ E+=X[i] 277 add r6,r6,r10 @ E+=F_20_39(B,C,D) 278 ldr r9,[r14,#15*4] 279 ldr r10,[r14,#13*4] 280 ldr r11,[r14,#7*4] 281 add r5,r8,r5,ror#2 @ E+=K_xx_xx 282 ldr r12,[r14,#2*4] 283 eor r9,r9,r10 284 eor r11,r11,r12 @ 1 cycle stall 285 eor r10,r3,r4 @ F_xx_xx 286 mov r9,r9,ror#31 287 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 288 eor r9,r9,r11,ror#31 289 str r9,[r14,#-4]! 290 eor r10,r7,r10,ror#2 @ F_xx_xx 291 @ F_xx_xx 292 add r5,r5,r9 @ E+=X[i] 293 add r5,r5,r10 @ E+=F_20_39(B,C,D) 294 ldr r9,[r14,#15*4] 295 ldr r10,[r14,#13*4] 296 ldr r11,[r14,#7*4] 297 add r4,r8,r4,ror#2 @ E+=K_xx_xx 298 ldr r12,[r14,#2*4] 299 eor r9,r9,r10 300 eor r11,r11,r12 @ 1 cycle stall 301 eor r10,r7,r3 @ F_xx_xx 302 mov r9,r9,ror#31 303 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 304 eor r9,r9,r11,ror#31 305 str r9,[r14,#-4]! 306 eor r10,r6,r10,ror#2 @ F_xx_xx 307 @ F_xx_xx 308 add r4,r4,r9 @ E+=X[i] 309 add r4,r4,r10 @ E+=F_20_39(B,C,D) 310 ldr r9,[r14,#15*4] 311 ldr r10,[r14,#13*4] 312 ldr r11,[r14,#7*4] 313 add r3,r8,r3,ror#2 @ E+=K_xx_xx 314 ldr r12,[r14,#2*4] 315 eor r9,r9,r10 316 eor r11,r11,r12 @ 1 cycle stall 317 eor r10,r6,r7 @ F_xx_xx 318 mov r9,r9,ror#31 319 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 320 eor r9,r9,r11,ror#31 321 str r9,[r14,#-4]! 322 eor r10,r5,r10,ror#2 @ F_xx_xx 323 @ F_xx_xx 324 add r3,r3,r9 @ E+=X[i] 325 add r3,r3,r10 @ E+=F_20_39(B,C,D) 326 teq r14,sp @ preserve carry 327 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 328 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 329 330 ldr r8,.LK_40_59 331 sub sp,sp,#20*4 @ [+2] 332 .L_40_59: 333 ldr r9,[r14,#15*4] 334 ldr r10,[r14,#13*4] 335 ldr r11,[r14,#7*4] 336 add r7,r8,r7,ror#2 @ E+=K_xx_xx 337 ldr r12,[r14,#2*4] 338 eor r9,r9,r10 339 eor r11,r11,r12 @ 1 cycle stall 340 eor r10,r5,r6 @ F_xx_xx 341 mov r9,r9,ror#31 342 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 343 eor r9,r9,r11,ror#31 344 str r9,[r14,#-4]! 345 and r10,r4,r10,ror#2 @ F_xx_xx 346 and r11,r5,r6 @ F_xx_xx 347 add r7,r7,r9 @ E+=X[i] 348 add r7,r7,r10 @ E+=F_40_59(B,C,D) 349 add r7,r7,r11,ror#2 350 ldr r9,[r14,#15*4] 351 ldr r10,[r14,#13*4] 352 ldr r11,[r14,#7*4] 353 add r6,r8,r6,ror#2 @ E+=K_xx_xx 354 ldr r12,[r14,#2*4] 355 eor r9,r9,r10 356 eor r11,r11,r12 @ 1 cycle stall 357 eor r10,r4,r5 @ F_xx_xx 358 mov r9,r9,ror#31 359 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 360 eor r9,r9,r11,ror#31 361 str r9,[r14,#-4]! 362 and r10,r3,r10,ror#2 @ F_xx_xx 363 and r11,r4,r5 @ F_xx_xx 364 add r6,r6,r9 @ E+=X[i] 365 add r6,r6,r10 @ E+=F_40_59(B,C,D) 366 add r6,r6,r11,ror#2 367 ldr r9,[r14,#15*4] 368 ldr r10,[r14,#13*4] 369 ldr r11,[r14,#7*4] 370 add r5,r8,r5,ror#2 @ E+=K_xx_xx 371 ldr r12,[r14,#2*4] 372 eor r9,r9,r10 373 eor r11,r11,r12 @ 1 cycle stall 374 eor r10,r3,r4 @ F_xx_xx 375 mov r9,r9,ror#31 376 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 377 eor r9,r9,r11,ror#31 378 str r9,[r14,#-4]! 379 and r10,r7,r10,ror#2 @ F_xx_xx 380 and r11,r3,r4 @ F_xx_xx 381 add r5,r5,r9 @ E+=X[i] 382 add r5,r5,r10 @ E+=F_40_59(B,C,D) 383 add r5,r5,r11,ror#2 384 ldr r9,[r14,#15*4] 385 ldr r10,[r14,#13*4] 386 ldr r11,[r14,#7*4] 387 add r4,r8,r4,ror#2 @ E+=K_xx_xx 388 ldr r12,[r14,#2*4] 389 eor r9,r9,r10 390 eor r11,r11,r12 @ 1 cycle stall 391 eor r10,r7,r3 @ F_xx_xx 392 mov r9,r9,ror#31 393 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 394 eor r9,r9,r11,ror#31 395 str r9,[r14,#-4]! 396 and r10,r6,r10,ror#2 @ F_xx_xx 397 and r11,r7,r3 @ F_xx_xx 398 add r4,r4,r9 @ E+=X[i] 399 add r4,r4,r10 @ E+=F_40_59(B,C,D) 400 add r4,r4,r11,ror#2 401 ldr r9,[r14,#15*4] 402 ldr r10,[r14,#13*4] 403 ldr r11,[r14,#7*4] 404 add r3,r8,r3,ror#2 @ E+=K_xx_xx 405 ldr r12,[r14,#2*4] 406 eor r9,r9,r10 407 eor r11,r11,r12 @ 1 cycle stall 408 eor r10,r6,r7 @ F_xx_xx 409 mov r9,r9,ror#31 410 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 411 eor r9,r9,r11,ror#31 412 str r9,[r14,#-4]! 413 and r10,r5,r10,ror#2 @ F_xx_xx 414 and r11,r6,r7 @ F_xx_xx 415 add r3,r3,r9 @ E+=X[i] 416 add r3,r3,r10 @ E+=F_40_59(B,C,D) 417 add r3,r3,r11,ror#2 418 teq r14,sp 419 bne .L_40_59 @ [+((12+5)*5+2)*4] 420 421 ldr r8,.LK_60_79 422 sub sp,sp,#20*4 423 cmp sp,#0 @ set carry to denote 60_79 424 b .L_20_39_or_60_79 @ [+4], spare 300 bytes 425 .L_done: 426 add sp,sp,#80*4 @ "deallocate" stack frame 427 ldmia r0,{r8,r9,r10,r11,r12} 428 add r3,r8,r3 429 add r4,r9,r4 430 add r5,r10,r5,ror#2 431 add r6,r11,r6,ror#2 432 add r7,r12,r7,ror#2 433 stmia r0,{r3,r4,r5,r6,r7} 434 teq r1,r2 435 bne .Lloop @ [+18], total 1307 436 437 #if __ARM_ARCH__>=5 438 ldmia sp!,{r4-r12,pc} 439 #else 440 ldmia sp!,{r4-r12,lr} 441 tst lr,#1 442 moveq pc,lr @ be binary compatible with V4, yet 443 .word 0xe12fff1e @ interoperable with Thumb ISA:-) 444 #endif 445 .align 2 446 .LK_00_19: .word 0x5a827999 447 .LK_20_39: .word 0x6ed9eba1 448 .LK_40_59: .word 0x8f1bbcdc 449 .LK_60_79: .word 0xca62c1d6 450 .size sha1_block_data_order,.-sha1_block_data_order 451 .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro (at) openssl.org>" 452 .align 2 453