1 #if defined(__arm__) 2 #include <openssl/arm_arch.h> 3 4 #if __ARM_MAX_ARCH__>=7 5 .text 6 .arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 7 .fpu neon 8 .code 32 9 #undef __thumb2__ 10 .align 5 11 .Lrcon: 12 .long 0x01,0x01,0x01,0x01 13 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 14 .long 0x1b,0x1b,0x1b,0x1b 15 16 .globl aes_hw_set_encrypt_key 17 .hidden aes_hw_set_encrypt_key 18 .type aes_hw_set_encrypt_key,%function 19 .align 5 20 aes_hw_set_encrypt_key: 21 .Lenc_key: 22 mov r3,#-1 23 cmp r0,#0 24 beq .Lenc_key_abort 25 cmp r2,#0 26 beq .Lenc_key_abort 27 mov r3,#-2 28 cmp r1,#128 29 blt .Lenc_key_abort 30 cmp r1,#256 31 bgt .Lenc_key_abort 32 tst r1,#0x3f 33 bne .Lenc_key_abort 34 35 adr r3,.Lrcon 36 cmp r1,#192 37 38 veor q0,q0,q0 39 vld1.8 {q3},[r0]! 40 mov r1,#8 @ reuse r1 41 vld1.32 {q1,q2},[r3]! 42 43 blt .Loop128 44 beq .L192 45 b .L256 46 47 .align 4 48 .Loop128: 49 vtbl.8 d20,{q3},d4 50 vtbl.8 d21,{q3},d5 51 vext.8 q9,q0,q3,#12 52 vst1.32 {q3},[r2]! 53 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 54 subs r1,r1,#1 55 56 veor q3,q3,q9 57 vext.8 q9,q0,q9,#12 58 veor q3,q3,q9 59 vext.8 q9,q0,q9,#12 60 veor q10,q10,q1 61 veor q3,q3,q9 62 vshl.u8 q1,q1,#1 63 veor q3,q3,q10 64 bne .Loop128 65 66 vld1.32 {q1},[r3] 67 68 vtbl.8 d20,{q3},d4 69 vtbl.8 d21,{q3},d5 70 vext.8 q9,q0,q3,#12 71 vst1.32 {q3},[r2]! 72 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 73 74 veor q3,q3,q9 75 vext.8 q9,q0,q9,#12 76 veor q3,q3,q9 77 vext.8 q9,q0,q9,#12 78 veor q10,q10,q1 79 veor q3,q3,q9 80 vshl.u8 q1,q1,#1 81 veor q3,q3,q10 82 83 vtbl.8 d20,{q3},d4 84 vtbl.8 d21,{q3},d5 85 vext.8 q9,q0,q3,#12 86 vst1.32 {q3},[r2]! 87 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 88 89 veor q3,q3,q9 90 vext.8 q9,q0,q9,#12 91 veor q3,q3,q9 92 vext.8 q9,q0,q9,#12 93 veor q10,q10,q1 94 veor q3,q3,q9 95 veor q3,q3,q10 96 vst1.32 {q3},[r2] 97 add r2,r2,#0x50 98 99 mov r12,#10 100 b .Ldone 101 102 .align 4 103 .L192: 104 vld1.8 {d16},[r0]! 105 vmov.i8 q10,#8 @ borrow q10 106 vst1.32 {q3},[r2]! 107 vsub.i8 q2,q2,q10 @ adjust the mask 108 109 .Loop192: 110 vtbl.8 d20,{q8},d4 111 vtbl.8 d21,{q8},d5 112 vext.8 q9,q0,q3,#12 113 vst1.32 {d16},[r2]! 114 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 115 subs r1,r1,#1 116 117 veor q3,q3,q9 118 vext.8 q9,q0,q9,#12 119 veor q3,q3,q9 120 vext.8 q9,q0,q9,#12 121 veor q3,q3,q9 122 123 vdup.32 q9,d7[1] 124 veor q9,q9,q8 125 veor q10,q10,q1 126 vext.8 q8,q0,q8,#12 127 vshl.u8 q1,q1,#1 128 veor q8,q8,q9 129 veor q3,q3,q10 130 veor q8,q8,q10 131 vst1.32 {q3},[r2]! 132 bne .Loop192 133 134 mov r12,#12 135 add r2,r2,#0x20 136 b .Ldone 137 138 .align 4 139 .L256: 140 vld1.8 {q8},[r0] 141 mov r1,#7 142 mov r12,#14 143 vst1.32 {q3},[r2]! 144 145 .Loop256: 146 vtbl.8 d20,{q8},d4 147 vtbl.8 d21,{q8},d5 148 vext.8 q9,q0,q3,#12 149 vst1.32 {q8},[r2]! 150 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 151 subs r1,r1,#1 152 153 veor q3,q3,q9 154 vext.8 q9,q0,q9,#12 155 veor q3,q3,q9 156 vext.8 q9,q0,q9,#12 157 veor q10,q10,q1 158 veor q3,q3,q9 159 vshl.u8 q1,q1,#1 160 veor q3,q3,q10 161 vst1.32 {q3},[r2]! 162 beq .Ldone 163 164 vdup.32 q10,d7[1] 165 vext.8 q9,q0,q8,#12 166 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 167 168 veor q8,q8,q9 169 vext.8 q9,q0,q9,#12 170 veor q8,q8,q9 171 vext.8 q9,q0,q9,#12 172 veor q8,q8,q9 173 174 veor q8,q8,q10 175 b .Loop256 176 177 .Ldone: 178 str r12,[r2] 179 mov r3,#0 180 181 .Lenc_key_abort: 182 mov r0,r3 @ return value 183 184 bx lr 185 .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 186 187 .globl aes_hw_set_decrypt_key 188 .hidden aes_hw_set_decrypt_key 189 .type aes_hw_set_decrypt_key,%function 190 .align 5 191 aes_hw_set_decrypt_key: 192 stmdb sp!,{r4,lr} 193 bl .Lenc_key 194 195 cmp r0,#0 196 bne .Ldec_key_abort 197 198 sub r2,r2,#240 @ restore original r2 199 mov r4,#-16 200 add r0,r2,r12,lsl#4 @ end of key schedule 201 202 vld1.32 {q0},[r2] 203 vld1.32 {q1},[r0] 204 vst1.32 {q0},[r0],r4 205 vst1.32 {q1},[r2]! 206 207 .Loop_imc: 208 vld1.32 {q0},[r2] 209 vld1.32 {q1},[r0] 210 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 211 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 212 vst1.32 {q0},[r0],r4 213 vst1.32 {q1},[r2]! 214 cmp r0,r2 215 bhi .Loop_imc 216 217 vld1.32 {q0},[r2] 218 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 219 vst1.32 {q0},[r0] 220 221 eor r0,r0,r0 @ return value 222 .Ldec_key_abort: 223 ldmia sp!,{r4,pc} 224 .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 225 .globl aes_hw_encrypt 226 .hidden aes_hw_encrypt 227 .type aes_hw_encrypt,%function 228 .align 5 229 aes_hw_encrypt: 230 ldr r3,[r2,#240] 231 vld1.32 {q0},[r2]! 232 vld1.8 {q2},[r0] 233 sub r3,r3,#2 234 vld1.32 {q1},[r2]! 235 236 .Loop_enc: 237 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 238 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 239 vld1.32 {q0},[r2]! 240 subs r3,r3,#2 241 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 242 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 243 vld1.32 {q1},[r2]! 244 bgt .Loop_enc 245 246 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 247 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 248 vld1.32 {q0},[r2] 249 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 250 veor q2,q2,q0 251 252 vst1.8 {q2},[r1] 253 bx lr 254 .size aes_hw_encrypt,.-aes_hw_encrypt 255 .globl aes_hw_decrypt 256 .hidden aes_hw_decrypt 257 .type aes_hw_decrypt,%function 258 .align 5 259 aes_hw_decrypt: 260 ldr r3,[r2,#240] 261 vld1.32 {q0},[r2]! 262 vld1.8 {q2},[r0] 263 sub r3,r3,#2 264 vld1.32 {q1},[r2]! 265 266 .Loop_dec: 267 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 268 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 269 vld1.32 {q0},[r2]! 270 subs r3,r3,#2 271 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 272 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 273 vld1.32 {q1},[r2]! 274 bgt .Loop_dec 275 276 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 277 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 278 vld1.32 {q0},[r2] 279 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 280 veor q2,q2,q0 281 282 vst1.8 {q2},[r1] 283 bx lr 284 .size aes_hw_decrypt,.-aes_hw_decrypt 285 .globl aes_hw_cbc_encrypt 286 .hidden aes_hw_cbc_encrypt 287 .type aes_hw_cbc_encrypt,%function 288 .align 5 289 aes_hw_cbc_encrypt: 290 mov ip,sp 291 stmdb sp!,{r4,r5,r6,r7,r8,lr} 292 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 293 ldmia ip,{r4,r5} @ load remaining args 294 subs r2,r2,#16 295 mov r8,#16 296 blo .Lcbc_abort 297 moveq r8,#0 298 299 cmp r5,#0 @ en- or decrypting? 300 ldr r5,[r3,#240] 301 and r2,r2,#-16 302 vld1.8 {q6},[r4] 303 vld1.8 {q0},[r0],r8 304 305 vld1.32 {q8,q9},[r3] @ load key schedule... 306 sub r5,r5,#6 307 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 308 sub r5,r5,#2 309 vld1.32 {q10,q11},[r7]! 310 vld1.32 {q12,q13},[r7]! 311 vld1.32 {q14,q15},[r7]! 312 vld1.32 {q7},[r7] 313 314 add r7,r3,#32 315 mov r6,r5 316 beq .Lcbc_dec 317 318 cmp r5,#2 319 veor q0,q0,q6 320 veor q5,q8,q7 321 beq .Lcbc_enc128 322 323 vld1.32 {q2,q3},[r7] 324 add r7,r3,#16 325 add r6,r3,#16*4 326 add r12,r3,#16*5 327 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 328 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 329 add r14,r3,#16*6 330 add r3,r3,#16*7 331 b .Lenter_cbc_enc 332 333 .align 4 334 .Loop_cbc_enc: 335 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 336 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 337 vst1.8 {q6},[r1]! 338 .Lenter_cbc_enc: 339 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 340 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 341 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 342 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 343 vld1.32 {q8},[r6] 344 cmp r5,#4 345 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 346 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 347 vld1.32 {q9},[r12] 348 beq .Lcbc_enc192 349 350 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 351 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 352 vld1.32 {q8},[r14] 353 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 354 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 355 vld1.32 {q9},[r3] 356 nop 357 358 .Lcbc_enc192: 359 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 360 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 subs r2,r2,#16 362 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 363 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 moveq r8,#0 365 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 366 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 367 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 368 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 369 vld1.8 {q8},[r0],r8 370 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 371 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 372 veor q8,q8,q5 373 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 374 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 375 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 376 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 377 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 379 veor q6,q0,q7 380 bhs .Loop_cbc_enc 381 382 vst1.8 {q6},[r1]! 383 b .Lcbc_done 384 385 .align 5 386 .Lcbc_enc128: 387 vld1.32 {q2,q3},[r7] 388 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 389 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 390 b .Lenter_cbc_enc128 391 .Loop_cbc_enc128: 392 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 393 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 394 vst1.8 {q6},[r1]! 395 .Lenter_cbc_enc128: 396 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 397 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 398 subs r2,r2,#16 399 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 400 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 401 moveq r8,#0 402 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 403 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 404 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 405 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 406 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 407 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408 vld1.8 {q8},[r0],r8 409 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 410 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 411 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 412 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 414 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415 veor q8,q8,q5 416 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 417 veor q6,q0,q7 418 bhs .Loop_cbc_enc128 419 420 vst1.8 {q6},[r1]! 421 b .Lcbc_done 422 .align 5 423 .Lcbc_dec: 424 vld1.8 {q10},[r0]! 425 subs r2,r2,#32 @ bias 426 add r6,r5,#2 427 vorr q3,q0,q0 428 vorr q1,q0,q0 429 vorr q11,q10,q10 430 blo .Lcbc_dec_tail 431 432 vorr q1,q10,q10 433 vld1.8 {q10},[r0]! 434 vorr q2,q0,q0 435 vorr q3,q1,q1 436 vorr q11,q10,q10 437 438 .Loop3x_cbc_dec: 439 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 440 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 441 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 442 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 443 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 444 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 445 vld1.32 {q8},[r7]! 446 subs r6,r6,#2 447 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 448 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 449 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 450 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 451 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 452 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 453 vld1.32 {q9},[r7]! 454 bgt .Loop3x_cbc_dec 455 456 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 457 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 459 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 461 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 veor q4,q6,q7 463 subs r2,r2,#0x30 464 veor q5,q2,q7 465 movlo r6,r2 @ r6, r6, is zero at this point 466 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 467 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 468 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 469 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 470 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 471 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 472 veor q9,q3,q7 473 add r0,r0,r6 @ r0 is adjusted in such way that 474 @ at exit from the loop q1-q10 475 @ are loaded with last "words" 476 vorr q6,q11,q11 477 mov r7,r3 478 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 479 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 480 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 481 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 482 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 483 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 484 vld1.8 {q2},[r0]! 485 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 486 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 487 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 488 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 489 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 490 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 491 vld1.8 {q3},[r0]! 492 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 493 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 494 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 495 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 496 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 497 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 498 vld1.8 {q11},[r0]! 499 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 500 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 501 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 502 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 503 add r6,r5,#2 504 veor q4,q4,q0 505 veor q5,q5,q1 506 veor q10,q10,q9 507 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 508 vst1.8 {q4},[r1]! 509 vorr q0,q2,q2 510 vst1.8 {q5},[r1]! 511 vorr q1,q3,q3 512 vst1.8 {q10},[r1]! 513 vorr q10,q11,q11 514 bhs .Loop3x_cbc_dec 515 516 cmn r2,#0x30 517 beq .Lcbc_done 518 nop 519 520 .Lcbc_dec_tail: 521 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 522 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 523 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 524 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 525 vld1.32 {q8},[r7]! 526 subs r6,r6,#2 527 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 528 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 529 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 530 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 531 vld1.32 {q9},[r7]! 532 bgt .Lcbc_dec_tail 533 534 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 535 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 536 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 537 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 538 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 539 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 540 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 541 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 542 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 543 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 544 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 545 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 546 cmn r2,#0x20 547 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 548 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 550 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551 veor q5,q6,q7 552 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 553 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 554 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 555 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 556 veor q9,q3,q7 557 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 558 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 559 beq .Lcbc_dec_one 560 veor q5,q5,q1 561 veor q9,q9,q10 562 vorr q6,q11,q11 563 vst1.8 {q5},[r1]! 564 vst1.8 {q9},[r1]! 565 b .Lcbc_done 566 567 .Lcbc_dec_one: 568 veor q5,q5,q10 569 vorr q6,q11,q11 570 vst1.8 {q5},[r1]! 571 572 .Lcbc_done: 573 vst1.8 {q6},[r4] 574 .Lcbc_abort: 575 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 576 ldmia sp!,{r4,r5,r6,r7,r8,pc} 577 .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 578 .globl aes_hw_ctr32_encrypt_blocks 579 .hidden aes_hw_ctr32_encrypt_blocks 580 .type aes_hw_ctr32_encrypt_blocks,%function 581 .align 5 582 aes_hw_ctr32_encrypt_blocks: 583 mov ip,sp 584 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 585 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 586 ldr r4, [ip] @ load remaining arg 587 ldr r5,[r3,#240] 588 589 ldr r8, [r4, #12] 590 vld1.32 {q0},[r4] 591 592 vld1.32 {q8,q9},[r3] @ load key schedule... 593 sub r5,r5,#4 594 mov r12,#16 595 cmp r2,#2 596 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 597 sub r5,r5,#2 598 vld1.32 {q12,q13},[r7]! 599 vld1.32 {q14,q15},[r7]! 600 vld1.32 {q7},[r7] 601 add r7,r3,#32 602 mov r6,r5 603 movlo r12,#0 604 #ifndef __ARMEB__ 605 rev r8, r8 606 #endif 607 vorr q1,q0,q0 608 add r10, r8, #1 609 vorr q10,q0,q0 610 add r8, r8, #2 611 vorr q6,q0,q0 612 rev r10, r10 613 vmov.32 d3[1],r10 614 bls .Lctr32_tail 615 rev r12, r8 616 sub r2,r2,#3 @ bias 617 vmov.32 d21[1],r12 618 b .Loop3x_ctr32 619 620 .align 4 621 .Loop3x_ctr32: 622 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 623 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 624 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 625 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 626 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 627 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 628 vld1.32 {q8},[r7]! 629 subs r6,r6,#2 630 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 631 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 632 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 633 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 634 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 635 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 636 vld1.32 {q9},[r7]! 637 bgt .Loop3x_ctr32 638 639 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 640 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 641 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 642 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 643 vld1.8 {q2},[r0]! 644 vorr q0,q6,q6 645 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 646 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 647 vld1.8 {q3},[r0]! 648 vorr q1,q6,q6 649 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 650 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 651 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 652 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 653 vld1.8 {q11},[r0]! 654 mov r7,r3 655 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 656 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 657 vorr q10,q6,q6 658 add r9,r8,#1 659 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 660 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 661 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 662 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 663 veor q2,q2,q7 664 add r10,r8,#2 665 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 666 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 667 veor q3,q3,q7 668 add r8,r8,#3 669 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 670 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 671 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 672 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 673 veor q11,q11,q7 674 rev r9,r9 675 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 676 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 677 vmov.32 d1[1], r9 678 rev r10,r10 679 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 680 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 681 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 682 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 683 vmov.32 d3[1], r10 684 rev r12,r8 685 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 686 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 687 vmov.32 d21[1], r12 688 subs r2,r2,#3 689 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 690 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 691 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 692 693 veor q2,q2,q4 694 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 695 vst1.8 {q2},[r1]! 696 veor q3,q3,q5 697 mov r6,r5 698 vst1.8 {q3},[r1]! 699 veor q11,q11,q9 700 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 701 vst1.8 {q11},[r1]! 702 bhs .Loop3x_ctr32 703 704 adds r2,r2,#3 705 beq .Lctr32_done 706 cmp r2,#1 707 mov r12,#16 708 moveq r12,#0 709 710 .Lctr32_tail: 711 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 712 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 713 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 714 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 715 vld1.32 {q8},[r7]! 716 subs r6,r6,#2 717 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 718 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 719 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 720 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 721 vld1.32 {q9},[r7]! 722 bgt .Lctr32_tail 723 724 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 725 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 726 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 727 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 728 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 729 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 730 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 731 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 732 vld1.8 {q2},[r0],r12 733 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 734 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 735 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 736 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 737 vld1.8 {q3},[r0] 738 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 739 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 740 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 741 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 742 veor q2,q2,q7 743 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 744 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 745 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 746 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 747 veor q3,q3,q7 748 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 749 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 750 751 cmp r2,#1 752 veor q2,q2,q0 753 veor q3,q3,q1 754 vst1.8 {q2},[r1]! 755 beq .Lctr32_done 756 vst1.8 {q3},[r1] 757 758 .Lctr32_done: 759 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 760 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 761 .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 762 #endif 763 #endif 764