1 #include "arm_arch.h" 2 3 #if __ARM_MAX_ARCH__>=7 4 .text 5 .arch armv7-a 6 .fpu neon 7 .code 32 8 .align 5 9 .Lrcon: 10 .long 0x01,0x01,0x01,0x01 11 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 12 .long 0x1b,0x1b,0x1b,0x1b 13 14 .globl aes_v8_set_encrypt_key 15 .type aes_v8_set_encrypt_key,%function 16 .align 5 17 aes_v8_set_encrypt_key: 18 .Lenc_key: 19 mov r3,#-1 20 cmp r0,#0 21 beq .Lenc_key_abort 22 cmp r2,#0 23 beq .Lenc_key_abort 24 mov r3,#-2 25 cmp r1,#128 26 blt .Lenc_key_abort 27 cmp r1,#256 28 bgt .Lenc_key_abort 29 tst r1,#0x3f 30 bne .Lenc_key_abort 31 32 adr r3,.Lrcon 33 cmp r1,#192 34 35 veor q0,q0,q0 36 vld1.8 {q3},[r0]! 37 mov r1,#8 @ reuse r1 38 vld1.32 {q1,q2},[r3]! 39 40 blt .Loop128 41 beq .L192 42 b .L256 43 44 .align 4 45 .Loop128: 46 vtbl.8 d20,{q3},d4 47 vtbl.8 d21,{q3},d5 48 vext.8 q9,q0,q3,#12 49 vst1.32 {q3},[r2]! 50 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 51 subs r1,r1,#1 52 53 veor q3,q3,q9 54 vext.8 q9,q0,q9,#12 55 veor q3,q3,q9 56 vext.8 q9,q0,q9,#12 57 veor q10,q10,q1 58 veor q3,q3,q9 59 vshl.u8 q1,q1,#1 60 veor q3,q3,q10 61 bne .Loop128 62 63 vld1.32 {q1},[r3] 64 65 vtbl.8 d20,{q3},d4 66 vtbl.8 d21,{q3},d5 67 vext.8 q9,q0,q3,#12 68 vst1.32 {q3},[r2]! 69 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 70 71 veor q3,q3,q9 72 vext.8 q9,q0,q9,#12 73 veor q3,q3,q9 74 vext.8 q9,q0,q9,#12 75 veor q10,q10,q1 76 veor q3,q3,q9 77 vshl.u8 q1,q1,#1 78 veor q3,q3,q10 79 80 vtbl.8 d20,{q3},d4 81 vtbl.8 d21,{q3},d5 82 vext.8 q9,q0,q3,#12 83 vst1.32 {q3},[r2]! 84 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 85 86 veor q3,q3,q9 87 vext.8 q9,q0,q9,#12 88 veor q3,q3,q9 89 vext.8 q9,q0,q9,#12 90 veor q10,q10,q1 91 veor q3,q3,q9 92 veor q3,q3,q10 93 vst1.32 {q3},[r2] 94 add r2,r2,#0x50 95 96 mov r12,#10 97 b .Ldone 98 99 .align 4 100 .L192: 101 vld1.8 {d16},[r0]! 102 vmov.i8 q10,#8 @ borrow q10 103 vst1.32 {q3},[r2]! 104 vsub.i8 q2,q2,q10 @ adjust the mask 105 106 .Loop192: 107 vtbl.8 d20,{q8},d4 108 vtbl.8 d21,{q8},d5 109 vext.8 q9,q0,q3,#12 110 vst1.32 {d16},[r2]! 111 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 112 subs r1,r1,#1 113 114 veor q3,q3,q9 115 vext.8 q9,q0,q9,#12 116 veor q3,q3,q9 117 vext.8 q9,q0,q9,#12 118 veor q3,q3,q9 119 120 vdup.32 q9,d7[1] 121 veor q9,q9,q8 122 veor q10,q10,q1 123 vext.8 q8,q0,q8,#12 124 vshl.u8 q1,q1,#1 125 veor q8,q8,q9 126 veor q3,q3,q10 127 veor q8,q8,q10 128 vst1.32 {q3},[r2]! 129 bne .Loop192 130 131 mov r12,#12 132 add r2,r2,#0x20 133 b .Ldone 134 135 .align 4 136 .L256: 137 vld1.8 {q8},[r0] 138 mov r1,#7 139 mov r12,#14 140 vst1.32 {q3},[r2]! 141 142 .Loop256: 143 vtbl.8 d20,{q8},d4 144 vtbl.8 d21,{q8},d5 145 vext.8 q9,q0,q3,#12 146 vst1.32 {q8},[r2]! 147 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 148 subs r1,r1,#1 149 150 veor q3,q3,q9 151 vext.8 q9,q0,q9,#12 152 veor q3,q3,q9 153 vext.8 q9,q0,q9,#12 154 veor q10,q10,q1 155 veor q3,q3,q9 156 vshl.u8 q1,q1,#1 157 veor q3,q3,q10 158 vst1.32 {q3},[r2]! 159 beq .Ldone 160 161 vdup.32 q10,d7[1] 162 vext.8 q9,q0,q8,#12 163 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 164 165 veor q8,q8,q9 166 vext.8 q9,q0,q9,#12 167 veor q8,q8,q9 168 vext.8 q9,q0,q9,#12 169 veor q8,q8,q9 170 171 veor q8,q8,q10 172 b .Loop256 173 174 .Ldone: 175 str r12,[r2] 176 mov r3,#0 177 178 .Lenc_key_abort: 179 mov r0,r3 @ return value 180 181 bx lr 182 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 183 184 .globl aes_v8_set_decrypt_key 185 .type aes_v8_set_decrypt_key,%function 186 .align 5 187 aes_v8_set_decrypt_key: 188 stmdb sp!,{r4,lr} 189 bl .Lenc_key 190 191 cmp r0,#0 192 bne .Ldec_key_abort 193 194 sub r2,r2,#240 @ restore original r2 195 mov r4,#-16 196 add r0,r2,r12,lsl#4 @ end of key schedule 197 198 vld1.32 {q0},[r2] 199 vld1.32 {q1},[r0] 200 vst1.32 {q0},[r0],r4 201 vst1.32 {q1},[r2]! 202 203 .Loop_imc: 204 vld1.32 {q0},[r2] 205 vld1.32 {q1},[r0] 206 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 207 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 208 vst1.32 {q0},[r0],r4 209 vst1.32 {q1},[r2]! 210 cmp r0,r2 211 bhi .Loop_imc 212 213 vld1.32 {q0},[r2] 214 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 215 vst1.32 {q0},[r0] 216 217 eor r0,r0,r0 @ return value 218 .Ldec_key_abort: 219 ldmia sp!,{r4,pc} 220 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 221 .globl aes_v8_encrypt 222 .type aes_v8_encrypt,%function 223 .align 5 224 aes_v8_encrypt: 225 ldr r3,[r2,#240] 226 vld1.32 {q0},[r2]! 227 vld1.8 {q2},[r0] 228 sub r3,r3,#2 229 vld1.32 {q1},[r2]! 230 231 .Loop_enc: 232 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 233 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 234 vld1.32 {q0},[r2]! 235 subs r3,r3,#2 236 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 237 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 238 vld1.32 {q1},[r2]! 239 bgt .Loop_enc 240 241 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 242 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 243 vld1.32 {q0},[r2] 244 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 245 veor q2,q2,q0 246 247 vst1.8 {q2},[r1] 248 bx lr 249 .size aes_v8_encrypt,.-aes_v8_encrypt 250 .globl aes_v8_decrypt 251 .type aes_v8_decrypt,%function 252 .align 5 253 aes_v8_decrypt: 254 ldr r3,[r2,#240] 255 vld1.32 {q0},[r2]! 256 vld1.8 {q2},[r0] 257 sub r3,r3,#2 258 vld1.32 {q1},[r2]! 259 260 .Loop_dec: 261 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 262 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 263 vld1.32 {q0},[r2]! 264 subs r3,r3,#2 265 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 266 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 267 vld1.32 {q1},[r2]! 268 bgt .Loop_dec 269 270 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 271 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 272 vld1.32 {q0},[r2] 273 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 274 veor q2,q2,q0 275 276 vst1.8 {q2},[r1] 277 bx lr 278 .size aes_v8_decrypt,.-aes_v8_decrypt 279 .globl aes_v8_cbc_encrypt 280 .type aes_v8_cbc_encrypt,%function 281 .align 5 282 aes_v8_cbc_encrypt: 283 mov ip,sp 284 stmdb sp!,{r4,r5,r6,r7,r8,lr} 285 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 286 ldmia ip,{r4,r5} @ load remaining args 287 subs r2,r2,#16 288 mov r8,#16 289 blo .Lcbc_abort 290 moveq r8,#0 291 292 cmp r5,#0 @ en- or decrypting? 293 ldr r5,[r3,#240] 294 and r2,r2,#-16 295 vld1.8 {q6},[r4] 296 vld1.8 {q0},[r0],r8 297 298 vld1.32 {q8,q9},[r3] @ load key schedule... 299 sub r5,r5,#6 300 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 301 sub r5,r5,#2 302 vld1.32 {q10,q11},[r7]! 303 vld1.32 {q12,q13},[r7]! 304 vld1.32 {q14,q15},[r7]! 305 vld1.32 {q7},[r7] 306 307 add r7,r3,#32 308 mov r6,r5 309 beq .Lcbc_dec 310 311 cmp r5,#2 312 veor q0,q0,q6 313 veor q5,q8,q7 314 beq .Lcbc_enc128 315 316 vld1.32 {q2,q3},[r7] 317 add r7,r3,#16 318 add r6,r3,#16*4 319 add r12,r3,#16*5 320 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 321 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 322 add r14,r3,#16*6 323 add r3,r3,#16*7 324 b .Lenter_cbc_enc 325 326 .align 4 327 .Loop_cbc_enc: 328 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 329 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 330 vst1.8 {q6},[r1]! 331 .Lenter_cbc_enc: 332 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 333 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 334 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 335 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 336 vld1.32 {q8},[r6] 337 cmp r5,#4 338 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 339 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 340 vld1.32 {q9},[r12] 341 beq .Lcbc_enc192 342 343 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 344 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 345 vld1.32 {q8},[r14] 346 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 347 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 348 vld1.32 {q9},[r3] 349 nop 350 351 .Lcbc_enc192: 352 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 353 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 354 subs r2,r2,#16 355 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 356 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 357 moveq r8,#0 358 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 359 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 360 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 361 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 362 vld1.8 {q8},[r0],r8 363 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 364 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 365 veor q8,q8,q5 366 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 367 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 368 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 369 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 370 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 371 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 372 veor q6,q0,q7 373 bhs .Loop_cbc_enc 374 375 vst1.8 {q6},[r1]! 376 b .Lcbc_done 377 378 .align 5 379 .Lcbc_enc128: 380 vld1.32 {q2,q3},[r7] 381 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 382 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 383 b .Lenter_cbc_enc128 384 .Loop_cbc_enc128: 385 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 386 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 387 vst1.8 {q6},[r1]! 388 .Lenter_cbc_enc128: 389 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 390 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 391 subs r2,r2,#16 392 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 393 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 394 moveq r8,#0 395 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 396 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 397 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 398 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 400 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 401 vld1.8 {q8},[r0],r8 402 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 403 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 404 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 405 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 406 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 407 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408 veor q8,q8,q5 409 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 410 veor q6,q0,q7 411 bhs .Loop_cbc_enc128 412 413 vst1.8 {q6},[r1]! 414 b .Lcbc_done 415 .align 5 416 .Lcbc_dec: 417 vld1.8 {q10},[r0]! 418 subs r2,r2,#32 @ bias 419 add r6,r5,#2 420 vorr q3,q0,q0 421 vorr q1,q0,q0 422 vorr q11,q10,q10 423 blo .Lcbc_dec_tail 424 425 vorr q1,q10,q10 426 vld1.8 {q10},[r0]! 427 vorr q2,q0,q0 428 vorr q3,q1,q1 429 vorr q11,q10,q10 430 431 .Loop3x_cbc_dec: 432 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 433 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 434 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 435 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 436 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 437 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 438 vld1.32 {q8},[r7]! 439 subs r6,r6,#2 440 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 441 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 442 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 443 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 444 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 445 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 446 vld1.32 {q9},[r7]! 447 bgt .Loop3x_cbc_dec 448 449 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 450 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 451 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 452 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 453 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 454 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 455 veor q4,q6,q7 456 subs r2,r2,#0x30 457 veor q5,q2,q7 458 movlo r6,r2 @ r6, r6, is zero at this point 459 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 460 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 461 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 462 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 463 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 464 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 465 veor q9,q3,q7 466 add r0,r0,r6 @ r0 is adjusted in such way that 467 @ at exit from the loop q1-q10 468 @ are loaded with last "words" 469 vorr q6,q11,q11 470 mov r7,r3 471 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 472 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 473 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 474 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 475 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 476 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 477 vld1.8 {q2},[r0]! 478 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 479 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 480 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 481 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 482 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 483 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 484 vld1.8 {q3},[r0]! 485 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 486 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 487 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 488 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 489 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 490 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 491 vld1.8 {q11},[r0]! 492 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 493 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 494 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 495 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 496 add r6,r5,#2 497 veor q4,q4,q0 498 veor q5,q5,q1 499 veor q10,q10,q9 500 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 501 vst1.8 {q4},[r1]! 502 vorr q0,q2,q2 503 vst1.8 {q5},[r1]! 504 vorr q1,q3,q3 505 vst1.8 {q10},[r1]! 506 vorr q10,q11,q11 507 bhs .Loop3x_cbc_dec 508 509 cmn r2,#0x30 510 beq .Lcbc_done 511 nop 512 513 .Lcbc_dec_tail: 514 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 515 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 516 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 517 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 518 vld1.32 {q8},[r7]! 519 subs r6,r6,#2 520 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 521 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 522 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 523 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 524 vld1.32 {q9},[r7]! 525 bgt .Lcbc_dec_tail 526 527 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 528 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 529 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 530 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 531 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 532 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 533 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 534 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 535 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 536 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 537 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 538 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 539 cmn r2,#0x20 540 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 541 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 542 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 543 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 544 veor q5,q6,q7 545 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 546 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 547 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 548 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 549 veor q9,q3,q7 550 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 551 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 552 beq .Lcbc_dec_one 553 veor q5,q5,q1 554 veor q9,q9,q10 555 vorr q6,q11,q11 556 vst1.8 {q5},[r1]! 557 vst1.8 {q9},[r1]! 558 b .Lcbc_done 559 560 .Lcbc_dec_one: 561 veor q5,q5,q10 562 vorr q6,q11,q11 563 vst1.8 {q5},[r1]! 564 565 .Lcbc_done: 566 vst1.8 {q6},[r4] 567 .Lcbc_abort: 568 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 569 ldmia sp!,{r4,r5,r6,r7,r8,pc} 570 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 571 .globl aes_v8_ctr32_encrypt_blocks 572 .type aes_v8_ctr32_encrypt_blocks,%function 573 .align 5 574 aes_v8_ctr32_encrypt_blocks: 575 mov ip,sp 576 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 577 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 578 ldr r4, [ip] @ load remaining arg 579 ldr r5,[r3,#240] 580 581 ldr r8, [r4, #12] 582 vld1.32 {q0},[r4] 583 584 vld1.32 {q8,q9},[r3] @ load key schedule... 585 sub r5,r5,#4 586 mov r12,#16 587 cmp r2,#2 588 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 589 sub r5,r5,#2 590 vld1.32 {q12,q13},[r7]! 591 vld1.32 {q14,q15},[r7]! 592 vld1.32 {q7},[r7] 593 add r7,r3,#32 594 mov r6,r5 595 movlo r12,#0 596 #ifndef __ARMEB__ 597 rev r8, r8 598 #endif 599 vorr q1,q0,q0 600 add r10, r8, #1 601 vorr q10,q0,q0 602 add r8, r8, #2 603 vorr q6,q0,q0 604 rev r10, r10 605 vmov.32 d3[1],r10 606 bls .Lctr32_tail 607 rev r12, r8 608 sub r2,r2,#3 @ bias 609 vmov.32 d21[1],r12 610 b .Loop3x_ctr32 611 612 .align 4 613 .Loop3x_ctr32: 614 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 615 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 616 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 617 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 618 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 619 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 620 vld1.32 {q8},[r7]! 621 subs r6,r6,#2 622 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 623 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 624 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 625 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 626 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 627 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 628 vld1.32 {q9},[r7]! 629 bgt .Loop3x_ctr32 630 631 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 632 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 633 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 634 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 635 vld1.8 {q2},[r0]! 636 vorr q0,q6,q6 637 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 638 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 639 vld1.8 {q3},[r0]! 640 vorr q1,q6,q6 641 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 642 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 643 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 644 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 645 vld1.8 {q11},[r0]! 646 mov r7,r3 647 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 648 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 649 vorr q10,q6,q6 650 add r9,r8,#1 651 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 652 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 653 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 654 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 655 veor q2,q2,q7 656 add r10,r8,#2 657 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 658 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 659 veor q3,q3,q7 660 add r8,r8,#3 661 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 662 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 663 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 664 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 665 veor q11,q11,q7 666 rev r9,r9 667 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 668 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 669 vmov.32 d1[1], r9 670 rev r10,r10 671 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 672 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 673 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 674 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 675 vmov.32 d3[1], r10 676 rev r12,r8 677 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 678 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 679 vmov.32 d21[1], r12 680 subs r2,r2,#3 681 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 682 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 683 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 684 685 veor q2,q2,q4 686 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 687 vst1.8 {q2},[r1]! 688 veor q3,q3,q5 689 mov r6,r5 690 vst1.8 {q3},[r1]! 691 veor q11,q11,q9 692 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 693 vst1.8 {q11},[r1]! 694 bhs .Loop3x_ctr32 695 696 adds r2,r2,#3 697 beq .Lctr32_done 698 cmp r2,#1 699 mov r12,#16 700 moveq r12,#0 701 702 .Lctr32_tail: 703 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 704 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 705 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 706 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 707 vld1.32 {q8},[r7]! 708 subs r6,r6,#2 709 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 710 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 711 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 712 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 713 vld1.32 {q9},[r7]! 714 bgt .Lctr32_tail 715 716 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 717 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 718 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 719 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 720 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 721 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 722 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 723 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 724 vld1.8 {q2},[r0],r12 725 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 726 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 727 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 728 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 729 vld1.8 {q3},[r0] 730 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 731 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 732 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 733 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 734 veor q2,q2,q7 735 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 736 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 737 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 738 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 739 veor q3,q3,q7 740 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 741 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 742 743 cmp r2,#1 744 veor q2,q2,q0 745 veor q3,q3,q1 746 vst1.8 {q2},[r1]! 747 beq .Lctr32_done 748 vst1.8 {q3},[r1] 749 750 .Lctr32_done: 751 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 752 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 753 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 754 #endif 755