1 #include <openssl/arm_arch.h> 2 3 #if __ARM_MAX_ARCH__>=7 4 .text 5 6 7 .code 32 8 #undef __thumb2__ 9 .align 5 10 Lrcon: 11 .long 0x01,0x01,0x01,0x01 12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 13 .long 0x1b,0x1b,0x1b,0x1b 14 15 .globl _aes_hw_set_encrypt_key 16 .private_extern _aes_hw_set_encrypt_key 17 #ifdef __thumb2__ 18 .thumb_func _aes_hw_set_encrypt_key 19 #endif 20 .align 5 21 _aes_hw_set_encrypt_key: 22 Lenc_key: 23 mov r3,#-1 24 cmp r0,#0 25 beq Lenc_key_abort 26 cmp r2,#0 27 beq Lenc_key_abort 28 mov r3,#-2 29 cmp r1,#128 30 blt Lenc_key_abort 31 cmp r1,#256 32 bgt Lenc_key_abort 33 tst r1,#0x3f 34 bne Lenc_key_abort 35 36 adr r3,Lrcon 37 cmp r1,#192 38 39 veor q0,q0,q0 40 vld1.8 {q3},[r0]! 41 mov r1,#8 @ reuse r1 42 vld1.32 {q1,q2},[r3]! 43 44 blt Loop128 45 beq L192 46 b L256 47 48 .align 4 49 Loop128: 50 vtbl.8 d20,{q3},d4 51 vtbl.8 d21,{q3},d5 52 vext.8 q9,q0,q3,#12 53 vst1.32 {q3},[r2]! 54 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 55 subs r1,r1,#1 56 57 veor q3,q3,q9 58 vext.8 q9,q0,q9,#12 59 veor q3,q3,q9 60 vext.8 q9,q0,q9,#12 61 veor q10,q10,q1 62 veor q3,q3,q9 63 vshl.u8 q1,q1,#1 64 veor q3,q3,q10 65 bne Loop128 66 67 vld1.32 {q1},[r3] 68 69 vtbl.8 d20,{q3},d4 70 vtbl.8 d21,{q3},d5 71 vext.8 q9,q0,q3,#12 72 vst1.32 {q3},[r2]! 73 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 74 75 veor q3,q3,q9 76 vext.8 q9,q0,q9,#12 77 veor q3,q3,q9 78 vext.8 q9,q0,q9,#12 79 veor q10,q10,q1 80 veor q3,q3,q9 81 vshl.u8 q1,q1,#1 82 veor q3,q3,q10 83 84 vtbl.8 d20,{q3},d4 85 vtbl.8 d21,{q3},d5 86 vext.8 q9,q0,q3,#12 87 vst1.32 {q3},[r2]! 88 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 89 90 veor q3,q3,q9 91 vext.8 q9,q0,q9,#12 92 veor q3,q3,q9 93 vext.8 q9,q0,q9,#12 94 veor q10,q10,q1 95 veor q3,q3,q9 96 veor q3,q3,q10 97 vst1.32 {q3},[r2] 98 add r2,r2,#0x50 99 100 mov r12,#10 101 b Ldone 102 103 .align 4 104 L192: 105 vld1.8 {d16},[r0]! 106 vmov.i8 q10,#8 @ borrow q10 107 vst1.32 {q3},[r2]! 108 vsub.i8 q2,q2,q10 @ adjust the mask 109 110 Loop192: 111 vtbl.8 d20,{q8},d4 112 vtbl.8 d21,{q8},d5 113 vext.8 q9,q0,q3,#12 114 vst1.32 {d16},[r2]! 115 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 116 subs r1,r1,#1 117 118 veor q3,q3,q9 119 vext.8 q9,q0,q9,#12 120 veor q3,q3,q9 121 vext.8 q9,q0,q9,#12 122 veor q3,q3,q9 123 124 vdup.32 q9,d7[1] 125 veor q9,q9,q8 126 veor q10,q10,q1 127 vext.8 q8,q0,q8,#12 128 vshl.u8 q1,q1,#1 129 veor q8,q8,q9 130 veor q3,q3,q10 131 veor q8,q8,q10 132 vst1.32 {q3},[r2]! 133 bne Loop192 134 135 mov r12,#12 136 add r2,r2,#0x20 137 b Ldone 138 139 .align 4 140 L256: 141 vld1.8 {q8},[r0] 142 mov r1,#7 143 mov r12,#14 144 vst1.32 {q3},[r2]! 145 146 Loop256: 147 vtbl.8 d20,{q8},d4 148 vtbl.8 d21,{q8},d5 149 vext.8 q9,q0,q3,#12 150 vst1.32 {q8},[r2]! 151 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 152 subs r1,r1,#1 153 154 veor q3,q3,q9 155 vext.8 q9,q0,q9,#12 156 veor q3,q3,q9 157 vext.8 q9,q0,q9,#12 158 veor q10,q10,q1 159 veor q3,q3,q9 160 vshl.u8 q1,q1,#1 161 veor q3,q3,q10 162 vst1.32 {q3},[r2]! 163 beq Ldone 164 165 vdup.32 q10,d7[1] 166 vext.8 q9,q0,q8,#12 167 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 168 169 veor q8,q8,q9 170 vext.8 q9,q0,q9,#12 171 veor q8,q8,q9 172 vext.8 q9,q0,q9,#12 173 veor q8,q8,q9 174 175 veor q8,q8,q10 176 b Loop256 177 178 Ldone: 179 str r12,[r2] 180 mov r3,#0 181 182 Lenc_key_abort: 183 mov r0,r3 @ return value 184 185 bx lr 186 187 188 .globl _aes_hw_set_decrypt_key 189 .private_extern _aes_hw_set_decrypt_key 190 #ifdef __thumb2__ 191 .thumb_func _aes_hw_set_decrypt_key 192 #endif 193 .align 5 194 _aes_hw_set_decrypt_key: 195 stmdb sp!,{r4,lr} 196 bl Lenc_key 197 198 cmp r0,#0 199 bne Ldec_key_abort 200 201 sub r2,r2,#240 @ restore original r2 202 mov r4,#-16 203 add r0,r2,r12,lsl#4 @ end of key schedule 204 205 vld1.32 {q0},[r2] 206 vld1.32 {q1},[r0] 207 vst1.32 {q0},[r0],r4 208 vst1.32 {q1},[r2]! 209 210 Loop_imc: 211 vld1.32 {q0},[r2] 212 vld1.32 {q1},[r0] 213 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 214 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 215 vst1.32 {q0},[r0],r4 216 vst1.32 {q1},[r2]! 217 cmp r0,r2 218 bhi Loop_imc 219 220 vld1.32 {q0},[r2] 221 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 222 vst1.32 {q0},[r0] 223 224 eor r0,r0,r0 @ return value 225 Ldec_key_abort: 226 ldmia sp!,{r4,pc} 227 228 .globl _aes_hw_encrypt 229 .private_extern _aes_hw_encrypt 230 #ifdef __thumb2__ 231 .thumb_func _aes_hw_encrypt 232 #endif 233 .align 5 234 _aes_hw_encrypt: 235 ldr r3,[r2,#240] 236 vld1.32 {q0},[r2]! 237 vld1.8 {q2},[r0] 238 sub r3,r3,#2 239 vld1.32 {q1},[r2]! 240 241 Loop_enc: 242 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 243 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 244 vld1.32 {q0},[r2]! 245 subs r3,r3,#2 246 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 247 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 248 vld1.32 {q1},[r2]! 249 bgt Loop_enc 250 251 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 252 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 253 vld1.32 {q0},[r2] 254 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 255 veor q2,q2,q0 256 257 vst1.8 {q2},[r1] 258 bx lr 259 260 .globl _aes_hw_decrypt 261 .private_extern _aes_hw_decrypt 262 #ifdef __thumb2__ 263 .thumb_func _aes_hw_decrypt 264 #endif 265 .align 5 266 _aes_hw_decrypt: 267 ldr r3,[r2,#240] 268 vld1.32 {q0},[r2]! 269 vld1.8 {q2},[r0] 270 sub r3,r3,#2 271 vld1.32 {q1},[r2]! 272 273 Loop_dec: 274 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 275 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 276 vld1.32 {q0},[r2]! 277 subs r3,r3,#2 278 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 279 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 280 vld1.32 {q1},[r2]! 281 bgt Loop_dec 282 283 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 284 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 285 vld1.32 {q0},[r2] 286 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 287 veor q2,q2,q0 288 289 vst1.8 {q2},[r1] 290 bx lr 291 292 .globl _aes_hw_cbc_encrypt 293 .private_extern _aes_hw_cbc_encrypt 294 #ifdef __thumb2__ 295 .thumb_func _aes_hw_cbc_encrypt 296 #endif 297 .align 5 298 _aes_hw_cbc_encrypt: 299 mov ip,sp 300 stmdb sp!,{r4,r5,r6,r7,r8,lr} 301 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 302 ldmia ip,{r4,r5} @ load remaining args 303 subs r2,r2,#16 304 mov r8,#16 305 blo Lcbc_abort 306 moveq r8,#0 307 308 cmp r5,#0 @ en- or decrypting? 309 ldr r5,[r3,#240] 310 and r2,r2,#-16 311 vld1.8 {q6},[r4] 312 vld1.8 {q0},[r0],r8 313 314 vld1.32 {q8,q9},[r3] @ load key schedule... 315 sub r5,r5,#6 316 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 317 sub r5,r5,#2 318 vld1.32 {q10,q11},[r7]! 319 vld1.32 {q12,q13},[r7]! 320 vld1.32 {q14,q15},[r7]! 321 vld1.32 {q7},[r7] 322 323 add r7,r3,#32 324 mov r6,r5 325 beq Lcbc_dec 326 327 cmp r5,#2 328 veor q0,q0,q6 329 veor q5,q8,q7 330 beq Lcbc_enc128 331 332 vld1.32 {q2,q3},[r7] 333 add r7,r3,#16 334 add r6,r3,#16*4 335 add r12,r3,#16*5 336 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 337 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 338 add r14,r3,#16*6 339 add r3,r3,#16*7 340 b Lenter_cbc_enc 341 342 .align 4 343 Loop_cbc_enc: 344 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 345 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 346 vst1.8 {q6},[r1]! 347 Lenter_cbc_enc: 348 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 349 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 350 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 351 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 352 vld1.32 {q8},[r6] 353 cmp r5,#4 354 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 355 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 vld1.32 {q9},[r12] 357 beq Lcbc_enc192 358 359 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 360 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 vld1.32 {q8},[r14] 362 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 363 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 vld1.32 {q9},[r3] 365 nop 366 367 Lcbc_enc192: 368 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 369 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 370 subs r2,r2,#16 371 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 372 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 moveq r8,#0 374 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 375 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 376 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 377 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378 vld1.8 {q8},[r0],r8 379 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 380 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 381 veor q8,q8,q5 382 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 383 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 384 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 385 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 386 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 387 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 388 veor q6,q0,q7 389 bhs Loop_cbc_enc 390 391 vst1.8 {q6},[r1]! 392 b Lcbc_done 393 394 .align 5 395 Lcbc_enc128: 396 vld1.32 {q2,q3},[r7] 397 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 398 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399 b Lenter_cbc_enc128 400 Loop_cbc_enc128: 401 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 402 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 403 vst1.8 {q6},[r1]! 404 Lenter_cbc_enc128: 405 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 406 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 407 subs r2,r2,#16 408 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 409 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 410 moveq r8,#0 411 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 412 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 414 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 416 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 417 vld1.8 {q8},[r0],r8 418 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 419 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 420 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 421 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 422 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 423 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 424 veor q8,q8,q5 425 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 426 veor q6,q0,q7 427 bhs Loop_cbc_enc128 428 429 vst1.8 {q6},[r1]! 430 b Lcbc_done 431 .align 5 432 Lcbc_dec: 433 vld1.8 {q10},[r0]! 434 subs r2,r2,#32 @ bias 435 add r6,r5,#2 436 vorr q3,q0,q0 437 vorr q1,q0,q0 438 vorr q11,q10,q10 439 blo Lcbc_dec_tail 440 441 vorr q1,q10,q10 442 vld1.8 {q10},[r0]! 443 vorr q2,q0,q0 444 vorr q3,q1,q1 445 vorr q11,q10,q10 446 447 Loop3x_cbc_dec: 448 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 449 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 450 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 451 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 452 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 453 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 454 vld1.32 {q8},[r7]! 455 subs r6,r6,#2 456 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 457 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 459 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 461 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 vld1.32 {q9},[r7]! 463 bgt Loop3x_cbc_dec 464 465 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 466 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 467 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 468 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 469 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 470 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 471 veor q4,q6,q7 472 subs r2,r2,#0x30 473 veor q5,q2,q7 474 movlo r6,r2 @ r6, r6, is zero at this point 475 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 476 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 477 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 478 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 479 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 480 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 481 veor q9,q3,q7 482 add r0,r0,r6 @ r0 is adjusted in such way that 483 @ at exit from the loop q1-q10 484 @ are loaded with last "words" 485 vorr q6,q11,q11 486 mov r7,r3 487 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 488 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 489 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 490 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 491 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 492 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 493 vld1.8 {q2},[r0]! 494 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 495 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 496 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 497 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 498 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 499 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 500 vld1.8 {q3},[r0]! 501 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 502 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 503 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 504 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 505 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 506 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 507 vld1.8 {q11},[r0]! 508 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 509 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 510 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 511 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 512 add r6,r5,#2 513 veor q4,q4,q0 514 veor q5,q5,q1 515 veor q10,q10,q9 516 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 517 vst1.8 {q4},[r1]! 518 vorr q0,q2,q2 519 vst1.8 {q5},[r1]! 520 vorr q1,q3,q3 521 vst1.8 {q10},[r1]! 522 vorr q10,q11,q11 523 bhs Loop3x_cbc_dec 524 525 cmn r2,#0x30 526 beq Lcbc_done 527 nop 528 529 Lcbc_dec_tail: 530 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 531 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 532 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 533 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 534 vld1.32 {q8},[r7]! 535 subs r6,r6,#2 536 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 537 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 538 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 539 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 540 vld1.32 {q9},[r7]! 541 bgt Lcbc_dec_tail 542 543 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 544 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 545 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 546 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 547 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 548 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 550 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 552 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 553 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 554 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 555 cmn r2,#0x20 556 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 557 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 558 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 559 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 560 veor q5,q6,q7 561 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 562 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 563 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 564 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 565 veor q9,q3,q7 566 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 567 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 568 beq Lcbc_dec_one 569 veor q5,q5,q1 570 veor q9,q9,q10 571 vorr q6,q11,q11 572 vst1.8 {q5},[r1]! 573 vst1.8 {q9},[r1]! 574 b Lcbc_done 575 576 Lcbc_dec_one: 577 veor q5,q5,q10 578 vorr q6,q11,q11 579 vst1.8 {q5},[r1]! 580 581 Lcbc_done: 582 vst1.8 {q6},[r4] 583 Lcbc_abort: 584 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 585 ldmia sp!,{r4,r5,r6,r7,r8,pc} 586 587 .globl _aes_hw_ctr32_encrypt_blocks 588 .private_extern _aes_hw_ctr32_encrypt_blocks 589 #ifdef __thumb2__ 590 .thumb_func _aes_hw_ctr32_encrypt_blocks 591 #endif 592 .align 5 593 _aes_hw_ctr32_encrypt_blocks: 594 mov ip,sp 595 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 596 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 597 ldr r4, [ip] @ load remaining arg 598 ldr r5,[r3,#240] 599 600 ldr r8, [r4, #12] 601 vld1.32 {q0},[r4] 602 603 vld1.32 {q8,q9},[r3] @ load key schedule... 604 sub r5,r5,#4 605 mov r12,#16 606 cmp r2,#2 607 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 608 sub r5,r5,#2 609 vld1.32 {q12,q13},[r7]! 610 vld1.32 {q14,q15},[r7]! 611 vld1.32 {q7},[r7] 612 add r7,r3,#32 613 mov r6,r5 614 movlo r12,#0 615 #ifndef __ARMEB__ 616 rev r8, r8 617 #endif 618 vorr q1,q0,q0 619 add r10, r8, #1 620 vorr q10,q0,q0 621 add r8, r8, #2 622 vorr q6,q0,q0 623 rev r10, r10 624 vmov.32 d3[1],r10 625 bls Lctr32_tail 626 rev r12, r8 627 sub r2,r2,#3 @ bias 628 vmov.32 d21[1],r12 629 b Loop3x_ctr32 630 631 .align 4 632 Loop3x_ctr32: 633 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 634 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 635 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 636 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 637 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 638 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 639 vld1.32 {q8},[r7]! 640 subs r6,r6,#2 641 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 642 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 643 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 644 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 645 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 646 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 647 vld1.32 {q9},[r7]! 648 bgt Loop3x_ctr32 649 650 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 651 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 652 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 653 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 654 vld1.8 {q2},[r0]! 655 vorr q0,q6,q6 656 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 657 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 658 vld1.8 {q3},[r0]! 659 vorr q1,q6,q6 660 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 661 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 662 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 663 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 664 vld1.8 {q11},[r0]! 665 mov r7,r3 666 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 667 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 668 vorr q10,q6,q6 669 add r9,r8,#1 670 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 671 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 672 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 673 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 674 veor q2,q2,q7 675 add r10,r8,#2 676 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 677 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 678 veor q3,q3,q7 679 add r8,r8,#3 680 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 681 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 682 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 683 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 684 veor q11,q11,q7 685 rev r9,r9 686 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 687 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 688 vmov.32 d1[1], r9 689 rev r10,r10 690 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 691 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 692 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 693 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 694 vmov.32 d3[1], r10 695 rev r12,r8 696 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 697 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 698 vmov.32 d21[1], r12 699 subs r2,r2,#3 700 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 701 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 702 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 703 704 veor q2,q2,q4 705 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 706 vst1.8 {q2},[r1]! 707 veor q3,q3,q5 708 mov r6,r5 709 vst1.8 {q3},[r1]! 710 veor q11,q11,q9 711 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 712 vst1.8 {q11},[r1]! 713 bhs Loop3x_ctr32 714 715 adds r2,r2,#3 716 beq Lctr32_done 717 cmp r2,#1 718 mov r12,#16 719 moveq r12,#0 720 721 Lctr32_tail: 722 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 723 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 724 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 725 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 726 vld1.32 {q8},[r7]! 727 subs r6,r6,#2 728 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 729 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 730 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 731 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 732 vld1.32 {q9},[r7]! 733 bgt Lctr32_tail 734 735 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 736 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 737 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 738 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 739 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 740 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 741 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 742 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 743 vld1.8 {q2},[r0],r12 744 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 745 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 746 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 747 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 748 vld1.8 {q3},[r0] 749 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 750 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 751 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 752 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 753 veor q2,q2,q7 754 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 755 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 756 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 757 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 758 veor q3,q3,q7 759 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 760 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 761 762 cmp r2,#1 763 veor q2,q2,q0 764 veor q3,q3,q1 765 vst1.8 {q2},[r1]! 766 beq Lctr32_done 767 vst1.8 {q3},[r1] 768 769 Lctr32_done: 770 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 771 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 772 773 #endif 774