1 #include "arm_arch.h" 2 3 #if __ARM_MAX_ARCH__>=7 4 .text 5 #if !defined(__clang__) 6 .arch armv8-a+crypto 7 #endif 8 .align 5 9 .Lrcon: 10 .long 0x01,0x01,0x01,0x01 11 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 12 .long 0x1b,0x1b,0x1b,0x1b 13 14 .globl aes_v8_set_encrypt_key 15 .type aes_v8_set_encrypt_key,%function 16 .align 5 17 aes_v8_set_encrypt_key: 18 .Lenc_key: 19 stp x29,x30,[sp,#-16]! 20 add x29,sp,#0 21 mov x3,#-1 22 cmp x0,#0 23 b.eq .Lenc_key_abort 24 cmp x2,#0 25 b.eq .Lenc_key_abort 26 mov x3,#-2 27 cmp w1,#128 28 b.lt .Lenc_key_abort 29 cmp w1,#256 30 b.gt .Lenc_key_abort 31 tst w1,#0x3f 32 b.ne .Lenc_key_abort 33 34 adr x3,.Lrcon 35 cmp w1,#192 36 37 eor v0.16b,v0.16b,v0.16b 38 ld1 {v3.16b},[x0],#16 39 mov w1,#8 // reuse w1 40 ld1 {v1.4s,v2.4s},[x3],#32 41 42 b.lt .Loop128 43 b.eq .L192 44 b .L256 45 46 .align 4 47 .Loop128: 48 tbl v6.16b,{v3.16b},v2.16b 49 ext v5.16b,v0.16b,v3.16b,#12 50 st1 {v3.4s},[x2],#16 51 aese v6.16b,v0.16b 52 subs w1,w1,#1 53 54 eor v3.16b,v3.16b,v5.16b 55 ext v5.16b,v0.16b,v5.16b,#12 56 eor v3.16b,v3.16b,v5.16b 57 ext v5.16b,v0.16b,v5.16b,#12 58 eor v6.16b,v6.16b,v1.16b 59 eor v3.16b,v3.16b,v5.16b 60 shl v1.16b,v1.16b,#1 61 eor v3.16b,v3.16b,v6.16b 62 b.ne .Loop128 63 64 ld1 {v1.4s},[x3] 65 66 tbl v6.16b,{v3.16b},v2.16b 67 ext v5.16b,v0.16b,v3.16b,#12 68 st1 {v3.4s},[x2],#16 69 aese v6.16b,v0.16b 70 71 eor v3.16b,v3.16b,v5.16b 72 ext v5.16b,v0.16b,v5.16b,#12 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v6.16b,v6.16b,v1.16b 76 eor v3.16b,v3.16b,v5.16b 77 shl v1.16b,v1.16b,#1 78 eor v3.16b,v3.16b,v6.16b 79 80 tbl v6.16b,{v3.16b},v2.16b 81 ext v5.16b,v0.16b,v3.16b,#12 82 st1 {v3.4s},[x2],#16 83 aese v6.16b,v0.16b 84 85 eor v3.16b,v3.16b,v5.16b 86 ext v5.16b,v0.16b,v5.16b,#12 87 eor v3.16b,v3.16b,v5.16b 88 ext v5.16b,v0.16b,v5.16b,#12 89 eor v6.16b,v6.16b,v1.16b 90 eor v3.16b,v3.16b,v5.16b 91 eor v3.16b,v3.16b,v6.16b 92 st1 {v3.4s},[x2] 93 add x2,x2,#0x50 94 95 mov w12,#10 96 b .Ldone 97 98 .align 4 99 .L192: 100 ld1 {v4.8b},[x0],#8 101 movi v6.16b,#8 // borrow v6.16b 102 st1 {v3.4s},[x2],#16 103 sub v2.16b,v2.16b,v6.16b // adjust the mask 104 105 .Loop192: 106 tbl v6.16b,{v4.16b},v2.16b 107 ext v5.16b,v0.16b,v3.16b,#12 108 st1 {v4.8b},[x2],#8 109 aese v6.16b,v0.16b 110 subs w1,w1,#1 111 112 eor v3.16b,v3.16b,v5.16b 113 ext v5.16b,v0.16b,v5.16b,#12 114 eor v3.16b,v3.16b,v5.16b 115 ext v5.16b,v0.16b,v5.16b,#12 116 eor v3.16b,v3.16b,v5.16b 117 118 dup v5.4s,v3.s[3] 119 eor v5.16b,v5.16b,v4.16b 120 eor v6.16b,v6.16b,v1.16b 121 ext v4.16b,v0.16b,v4.16b,#12 122 shl v1.16b,v1.16b,#1 123 eor v4.16b,v4.16b,v5.16b 124 eor v3.16b,v3.16b,v6.16b 125 eor v4.16b,v4.16b,v6.16b 126 st1 {v3.4s},[x2],#16 127 b.ne .Loop192 128 129 mov w12,#12 130 add x2,x2,#0x20 131 b .Ldone 132 133 .align 4 134 .L256: 135 ld1 {v4.16b},[x0] 136 mov w1,#7 137 mov w12,#14 138 st1 {v3.4s},[x2],#16 139 140 .Loop256: 141 tbl v6.16b,{v4.16b},v2.16b 142 ext v5.16b,v0.16b,v3.16b,#12 143 st1 {v4.4s},[x2],#16 144 aese v6.16b,v0.16b 145 subs w1,w1,#1 146 147 eor v3.16b,v3.16b,v5.16b 148 ext v5.16b,v0.16b,v5.16b,#12 149 eor v3.16b,v3.16b,v5.16b 150 ext v5.16b,v0.16b,v5.16b,#12 151 eor v6.16b,v6.16b,v1.16b 152 eor v3.16b,v3.16b,v5.16b 153 shl v1.16b,v1.16b,#1 154 eor v3.16b,v3.16b,v6.16b 155 st1 {v3.4s},[x2],#16 156 b.eq .Ldone 157 158 dup v6.4s,v3.s[3] // just splat 159 ext v5.16b,v0.16b,v4.16b,#12 160 aese v6.16b,v0.16b 161 162 eor v4.16b,v4.16b,v5.16b 163 ext v5.16b,v0.16b,v5.16b,#12 164 eor v4.16b,v4.16b,v5.16b 165 ext v5.16b,v0.16b,v5.16b,#12 166 eor v4.16b,v4.16b,v5.16b 167 168 eor v4.16b,v4.16b,v6.16b 169 b .Loop256 170 171 .Ldone: 172 str w12,[x2] 173 mov x3,#0 174 175 .Lenc_key_abort: 176 mov x0,x3 // return value 177 ldr x29,[sp],#16 178 ret 179 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 180 181 .globl aes_v8_set_decrypt_key 182 .type aes_v8_set_decrypt_key,%function 183 .align 5 184 aes_v8_set_decrypt_key: 185 stp x29,x30,[sp,#-16]! 186 add x29,sp,#0 187 bl .Lenc_key 188 189 cmp x0,#0 190 b.ne .Ldec_key_abort 191 192 sub x2,x2,#240 // restore original x2 193 mov x4,#-16 194 add x0,x2,x12,lsl#4 // end of key schedule 195 196 ld1 {v0.4s},[x2] 197 ld1 {v1.4s},[x0] 198 st1 {v0.4s},[x0],x4 199 st1 {v1.4s},[x2],#16 200 201 .Loop_imc: 202 ld1 {v0.4s},[x2] 203 ld1 {v1.4s},[x0] 204 aesimc v0.16b,v0.16b 205 aesimc v1.16b,v1.16b 206 st1 {v0.4s},[x0],x4 207 st1 {v1.4s},[x2],#16 208 cmp x0,x2 209 b.hi .Loop_imc 210 211 ld1 {v0.4s},[x2] 212 aesimc v0.16b,v0.16b 213 st1 {v0.4s},[x0] 214 215 eor x0,x0,x0 // return value 216 .Ldec_key_abort: 217 ldp x29,x30,[sp],#16 218 ret 219 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 220 .globl aes_v8_encrypt 221 .type aes_v8_encrypt,%function 222 .align 5 223 aes_v8_encrypt: 224 ldr w3,[x2,#240] 225 ld1 {v0.4s},[x2],#16 226 ld1 {v2.16b},[x0] 227 sub w3,w3,#2 228 ld1 {v1.4s},[x2],#16 229 230 .Loop_enc: 231 aese v2.16b,v0.16b 232 aesmc v2.16b,v2.16b 233 ld1 {v0.4s},[x2],#16 234 subs w3,w3,#2 235 aese v2.16b,v1.16b 236 aesmc v2.16b,v2.16b 237 ld1 {v1.4s},[x2],#16 238 b.gt .Loop_enc 239 240 aese v2.16b,v0.16b 241 aesmc v2.16b,v2.16b 242 ld1 {v0.4s},[x2] 243 aese v2.16b,v1.16b 244 eor v2.16b,v2.16b,v0.16b 245 246 st1 {v2.16b},[x1] 247 ret 248 .size aes_v8_encrypt,.-aes_v8_encrypt 249 .globl aes_v8_decrypt 250 .type aes_v8_decrypt,%function 251 .align 5 252 aes_v8_decrypt: 253 ldr w3,[x2,#240] 254 ld1 {v0.4s},[x2],#16 255 ld1 {v2.16b},[x0] 256 sub w3,w3,#2 257 ld1 {v1.4s},[x2],#16 258 259 .Loop_dec: 260 aesd v2.16b,v0.16b 261 aesimc v2.16b,v2.16b 262 ld1 {v0.4s},[x2],#16 263 subs w3,w3,#2 264 aesd v2.16b,v1.16b 265 aesimc v2.16b,v2.16b 266 ld1 {v1.4s},[x2],#16 267 b.gt .Loop_dec 268 269 aesd v2.16b,v0.16b 270 aesimc v2.16b,v2.16b 271 ld1 {v0.4s},[x2] 272 aesd v2.16b,v1.16b 273 eor v2.16b,v2.16b,v0.16b 274 275 st1 {v2.16b},[x1] 276 ret 277 .size aes_v8_decrypt,.-aes_v8_decrypt 278 .globl aes_v8_cbc_encrypt 279 .type aes_v8_cbc_encrypt,%function 280 .align 5 281 aes_v8_cbc_encrypt: 282 stp x29,x30,[sp,#-16]! 283 add x29,sp,#0 284 subs x2,x2,#16 285 mov x8,#16 286 b.lo .Lcbc_abort 287 csel x8,xzr,x8,eq 288 289 cmp w5,#0 // en- or decrypting? 290 ldr w5,[x3,#240] 291 and x2,x2,#-16 292 ld1 {v6.16b},[x4] 293 ld1 {v0.16b},[x0],x8 294 295 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 296 sub w5,w5,#6 297 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 298 sub w5,w5,#2 299 ld1 {v18.4s,v19.4s},[x7],#32 300 ld1 {v20.4s,v21.4s},[x7],#32 301 ld1 {v22.4s,v23.4s},[x7],#32 302 ld1 {v7.4s},[x7] 303 304 add x7,x3,#32 305 mov w6,w5 306 b.eq .Lcbc_dec 307 308 cmp w5,#2 309 eor v0.16b,v0.16b,v6.16b 310 eor v5.16b,v16.16b,v7.16b 311 b.eq .Lcbc_enc128 312 313 ld1 {v2.4s,v3.4s},[x7] 314 add x7,x3,#16 315 add x6,x3,#16*4 316 add x12,x3,#16*5 317 aese v0.16b,v16.16b 318 aesmc v0.16b,v0.16b 319 add x14,x3,#16*6 320 add x3,x3,#16*7 321 b .Lenter_cbc_enc 322 323 .align 4 324 .Loop_cbc_enc: 325 aese v0.16b,v16.16b 326 aesmc v0.16b,v0.16b 327 st1 {v6.16b},[x1],#16 328 .Lenter_cbc_enc: 329 aese v0.16b,v17.16b 330 aesmc v0.16b,v0.16b 331 aese v0.16b,v2.16b 332 aesmc v0.16b,v0.16b 333 ld1 {v16.4s},[x6] 334 cmp w5,#4 335 aese v0.16b,v3.16b 336 aesmc v0.16b,v0.16b 337 ld1 {v17.4s},[x12] 338 b.eq .Lcbc_enc192 339 340 aese v0.16b,v16.16b 341 aesmc v0.16b,v0.16b 342 ld1 {v16.4s},[x14] 343 aese v0.16b,v17.16b 344 aesmc v0.16b,v0.16b 345 ld1 {v17.4s},[x3] 346 nop 347 348 .Lcbc_enc192: 349 aese v0.16b,v16.16b 350 aesmc v0.16b,v0.16b 351 subs x2,x2,#16 352 aese v0.16b,v17.16b 353 aesmc v0.16b,v0.16b 354 csel x8,xzr,x8,eq 355 aese v0.16b,v18.16b 356 aesmc v0.16b,v0.16b 357 aese v0.16b,v19.16b 358 aesmc v0.16b,v0.16b 359 ld1 {v16.16b},[x0],x8 360 aese v0.16b,v20.16b 361 aesmc v0.16b,v0.16b 362 eor v16.16b,v16.16b,v5.16b 363 aese v0.16b,v21.16b 364 aesmc v0.16b,v0.16b 365 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 366 aese v0.16b,v22.16b 367 aesmc v0.16b,v0.16b 368 aese v0.16b,v23.16b 369 eor v6.16b,v0.16b,v7.16b 370 b.hs .Loop_cbc_enc 371 372 st1 {v6.16b},[x1],#16 373 b .Lcbc_done 374 375 .align 5 376 .Lcbc_enc128: 377 ld1 {v2.4s,v3.4s},[x7] 378 aese v0.16b,v16.16b 379 aesmc v0.16b,v0.16b 380 b .Lenter_cbc_enc128 381 .Loop_cbc_enc128: 382 aese v0.16b,v16.16b 383 aesmc v0.16b,v0.16b 384 st1 {v6.16b},[x1],#16 385 .Lenter_cbc_enc128: 386 aese v0.16b,v17.16b 387 aesmc v0.16b,v0.16b 388 subs x2,x2,#16 389 aese v0.16b,v2.16b 390 aesmc v0.16b,v0.16b 391 csel x8,xzr,x8,eq 392 aese v0.16b,v3.16b 393 aesmc v0.16b,v0.16b 394 aese v0.16b,v18.16b 395 aesmc v0.16b,v0.16b 396 aese v0.16b,v19.16b 397 aesmc v0.16b,v0.16b 398 ld1 {v16.16b},[x0],x8 399 aese v0.16b,v20.16b 400 aesmc v0.16b,v0.16b 401 aese v0.16b,v21.16b 402 aesmc v0.16b,v0.16b 403 aese v0.16b,v22.16b 404 aesmc v0.16b,v0.16b 405 eor v16.16b,v16.16b,v5.16b 406 aese v0.16b,v23.16b 407 eor v6.16b,v0.16b,v7.16b 408 b.hs .Loop_cbc_enc128 409 410 st1 {v6.16b},[x1],#16 411 b .Lcbc_done 412 .align 5 413 .Lcbc_dec: 414 ld1 {v18.16b},[x0],#16 415 subs x2,x2,#32 // bias 416 add w6,w5,#2 417 orr v3.16b,v0.16b,v0.16b 418 orr v1.16b,v0.16b,v0.16b 419 orr v19.16b,v18.16b,v18.16b 420 b.lo .Lcbc_dec_tail 421 422 orr v1.16b,v18.16b,v18.16b 423 ld1 {v18.16b},[x0],#16 424 orr v2.16b,v0.16b,v0.16b 425 orr v3.16b,v1.16b,v1.16b 426 orr v19.16b,v18.16b,v18.16b 427 428 .Loop3x_cbc_dec: 429 aesd v0.16b,v16.16b 430 aesimc v0.16b,v0.16b 431 aesd v1.16b,v16.16b 432 aesimc v1.16b,v1.16b 433 aesd v18.16b,v16.16b 434 aesimc v18.16b,v18.16b 435 ld1 {v16.4s},[x7],#16 436 subs w6,w6,#2 437 aesd v0.16b,v17.16b 438 aesimc v0.16b,v0.16b 439 aesd v1.16b,v17.16b 440 aesimc v1.16b,v1.16b 441 aesd v18.16b,v17.16b 442 aesimc v18.16b,v18.16b 443 ld1 {v17.4s},[x7],#16 444 b.gt .Loop3x_cbc_dec 445 446 aesd v0.16b,v16.16b 447 aesimc v0.16b,v0.16b 448 aesd v1.16b,v16.16b 449 aesimc v1.16b,v1.16b 450 aesd v18.16b,v16.16b 451 aesimc v18.16b,v18.16b 452 eor v4.16b,v6.16b,v7.16b 453 subs x2,x2,#0x30 454 eor v5.16b,v2.16b,v7.16b 455 csel x6,x2,x6,lo // x6, w6, is zero at this point 456 aesd v0.16b,v17.16b 457 aesimc v0.16b,v0.16b 458 aesd v1.16b,v17.16b 459 aesimc v1.16b,v1.16b 460 aesd v18.16b,v17.16b 461 aesimc v18.16b,v18.16b 462 eor v17.16b,v3.16b,v7.16b 463 add x0,x0,x6 // x0 is adjusted in such way that 464 // at exit from the loop v1.16b-v18.16b 465 // are loaded with last "words" 466 orr v6.16b,v19.16b,v19.16b 467 mov x7,x3 468 aesd v0.16b,v20.16b 469 aesimc v0.16b,v0.16b 470 aesd v1.16b,v20.16b 471 aesimc v1.16b,v1.16b 472 aesd v18.16b,v20.16b 473 aesimc v18.16b,v18.16b 474 ld1 {v2.16b},[x0],#16 475 aesd v0.16b,v21.16b 476 aesimc v0.16b,v0.16b 477 aesd v1.16b,v21.16b 478 aesimc v1.16b,v1.16b 479 aesd v18.16b,v21.16b 480 aesimc v18.16b,v18.16b 481 ld1 {v3.16b},[x0],#16 482 aesd v0.16b,v22.16b 483 aesimc v0.16b,v0.16b 484 aesd v1.16b,v22.16b 485 aesimc v1.16b,v1.16b 486 aesd v18.16b,v22.16b 487 aesimc v18.16b,v18.16b 488 ld1 {v19.16b},[x0],#16 489 aesd v0.16b,v23.16b 490 aesd v1.16b,v23.16b 491 aesd v18.16b,v23.16b 492 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 493 add w6,w5,#2 494 eor v4.16b,v4.16b,v0.16b 495 eor v5.16b,v5.16b,v1.16b 496 eor v18.16b,v18.16b,v17.16b 497 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 498 st1 {v4.16b},[x1],#16 499 orr v0.16b,v2.16b,v2.16b 500 st1 {v5.16b},[x1],#16 501 orr v1.16b,v3.16b,v3.16b 502 st1 {v18.16b},[x1],#16 503 orr v18.16b,v19.16b,v19.16b 504 b.hs .Loop3x_cbc_dec 505 506 cmn x2,#0x30 507 b.eq .Lcbc_done 508 nop 509 510 .Lcbc_dec_tail: 511 aesd v1.16b,v16.16b 512 aesimc v1.16b,v1.16b 513 aesd v18.16b,v16.16b 514 aesimc v18.16b,v18.16b 515 ld1 {v16.4s},[x7],#16 516 subs w6,w6,#2 517 aesd v1.16b,v17.16b 518 aesimc v1.16b,v1.16b 519 aesd v18.16b,v17.16b 520 aesimc v18.16b,v18.16b 521 ld1 {v17.4s},[x7],#16 522 b.gt .Lcbc_dec_tail 523 524 aesd v1.16b,v16.16b 525 aesimc v1.16b,v1.16b 526 aesd v18.16b,v16.16b 527 aesimc v18.16b,v18.16b 528 aesd v1.16b,v17.16b 529 aesimc v1.16b,v1.16b 530 aesd v18.16b,v17.16b 531 aesimc v18.16b,v18.16b 532 aesd v1.16b,v20.16b 533 aesimc v1.16b,v1.16b 534 aesd v18.16b,v20.16b 535 aesimc v18.16b,v18.16b 536 cmn x2,#0x20 537 aesd v1.16b,v21.16b 538 aesimc v1.16b,v1.16b 539 aesd v18.16b,v21.16b 540 aesimc v18.16b,v18.16b 541 eor v5.16b,v6.16b,v7.16b 542 aesd v1.16b,v22.16b 543 aesimc v1.16b,v1.16b 544 aesd v18.16b,v22.16b 545 aesimc v18.16b,v18.16b 546 eor v17.16b,v3.16b,v7.16b 547 aesd v1.16b,v23.16b 548 aesd v18.16b,v23.16b 549 b.eq .Lcbc_dec_one 550 eor v5.16b,v5.16b,v1.16b 551 eor v17.16b,v17.16b,v18.16b 552 orr v6.16b,v19.16b,v19.16b 553 st1 {v5.16b},[x1],#16 554 st1 {v17.16b},[x1],#16 555 b .Lcbc_done 556 557 .Lcbc_dec_one: 558 eor v5.16b,v5.16b,v18.16b 559 orr v6.16b,v19.16b,v19.16b 560 st1 {v5.16b},[x1],#16 561 562 .Lcbc_done: 563 st1 {v6.16b},[x4] 564 .Lcbc_abort: 565 ldr x29,[sp],#16 566 ret 567 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 568 .globl aes_v8_ctr32_encrypt_blocks 569 .type aes_v8_ctr32_encrypt_blocks,%function 570 .align 5 571 aes_v8_ctr32_encrypt_blocks: 572 stp x29,x30,[sp,#-16]! 573 add x29,sp,#0 574 ldr w5,[x3,#240] 575 576 ldr w8, [x4, #12] 577 ld1 {v0.4s},[x4] 578 579 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 580 sub w5,w5,#4 581 mov x12,#16 582 cmp x2,#2 583 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 584 sub w5,w5,#2 585 ld1 {v20.4s,v21.4s},[x7],#32 586 ld1 {v22.4s,v23.4s},[x7],#32 587 ld1 {v7.4s},[x7] 588 add x7,x3,#32 589 mov w6,w5 590 csel x12,xzr,x12,lo 591 #ifndef __ARMEB__ 592 rev w8, w8 593 #endif 594 orr v1.16b,v0.16b,v0.16b 595 add w10, w8, #1 596 orr v18.16b,v0.16b,v0.16b 597 add w8, w8, #2 598 orr v6.16b,v0.16b,v0.16b 599 rev w10, w10 600 mov v1.s[3],w10 601 b.ls .Lctr32_tail 602 rev w12, w8 603 sub x2,x2,#3 // bias 604 mov v18.s[3],w12 605 b .Loop3x_ctr32 606 607 .align 4 608 .Loop3x_ctr32: 609 aese v0.16b,v16.16b 610 aesmc v0.16b,v0.16b 611 aese v1.16b,v16.16b 612 aesmc v1.16b,v1.16b 613 aese v18.16b,v16.16b 614 aesmc v18.16b,v18.16b 615 ld1 {v16.4s},[x7],#16 616 subs w6,w6,#2 617 aese v0.16b,v17.16b 618 aesmc v0.16b,v0.16b 619 aese v1.16b,v17.16b 620 aesmc v1.16b,v1.16b 621 aese v18.16b,v17.16b 622 aesmc v18.16b,v18.16b 623 ld1 {v17.4s},[x7],#16 624 b.gt .Loop3x_ctr32 625 626 aese v0.16b,v16.16b 627 aesmc v4.16b,v0.16b 628 aese v1.16b,v16.16b 629 aesmc v5.16b,v1.16b 630 ld1 {v2.16b},[x0],#16 631 orr v0.16b,v6.16b,v6.16b 632 aese v18.16b,v16.16b 633 aesmc v18.16b,v18.16b 634 ld1 {v3.16b},[x0],#16 635 orr v1.16b,v6.16b,v6.16b 636 aese v4.16b,v17.16b 637 aesmc v4.16b,v4.16b 638 aese v5.16b,v17.16b 639 aesmc v5.16b,v5.16b 640 ld1 {v19.16b},[x0],#16 641 mov x7,x3 642 aese v18.16b,v17.16b 643 aesmc v17.16b,v18.16b 644 orr v18.16b,v6.16b,v6.16b 645 add w9,w8,#1 646 aese v4.16b,v20.16b 647 aesmc v4.16b,v4.16b 648 aese v5.16b,v20.16b 649 aesmc v5.16b,v5.16b 650 eor v2.16b,v2.16b,v7.16b 651 add w10,w8,#2 652 aese v17.16b,v20.16b 653 aesmc v17.16b,v17.16b 654 eor v3.16b,v3.16b,v7.16b 655 add w8,w8,#3 656 aese v4.16b,v21.16b 657 aesmc v4.16b,v4.16b 658 aese v5.16b,v21.16b 659 aesmc v5.16b,v5.16b 660 eor v19.16b,v19.16b,v7.16b 661 rev w9,w9 662 aese v17.16b,v21.16b 663 aesmc v17.16b,v17.16b 664 mov v0.s[3], w9 665 rev w10,w10 666 aese v4.16b,v22.16b 667 aesmc v4.16b,v4.16b 668 aese v5.16b,v22.16b 669 aesmc v5.16b,v5.16b 670 mov v1.s[3], w10 671 rev w12,w8 672 aese v17.16b,v22.16b 673 aesmc v17.16b,v17.16b 674 mov v18.s[3], w12 675 subs x2,x2,#3 676 aese v4.16b,v23.16b 677 aese v5.16b,v23.16b 678 aese v17.16b,v23.16b 679 680 eor v2.16b,v2.16b,v4.16b 681 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 682 st1 {v2.16b},[x1],#16 683 eor v3.16b,v3.16b,v5.16b 684 mov w6,w5 685 st1 {v3.16b},[x1],#16 686 eor v19.16b,v19.16b,v17.16b 687 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 688 st1 {v19.16b},[x1],#16 689 b.hs .Loop3x_ctr32 690 691 adds x2,x2,#3 692 b.eq .Lctr32_done 693 cmp x2,#1 694 mov x12,#16 695 csel x12,xzr,x12,eq 696 697 .Lctr32_tail: 698 aese v0.16b,v16.16b 699 aesmc v0.16b,v0.16b 700 aese v1.16b,v16.16b 701 aesmc v1.16b,v1.16b 702 ld1 {v16.4s},[x7],#16 703 subs w6,w6,#2 704 aese v0.16b,v17.16b 705 aesmc v0.16b,v0.16b 706 aese v1.16b,v17.16b 707 aesmc v1.16b,v1.16b 708 ld1 {v17.4s},[x7],#16 709 b.gt .Lctr32_tail 710 711 aese v0.16b,v16.16b 712 aesmc v0.16b,v0.16b 713 aese v1.16b,v16.16b 714 aesmc v1.16b,v1.16b 715 aese v0.16b,v17.16b 716 aesmc v0.16b,v0.16b 717 aese v1.16b,v17.16b 718 aesmc v1.16b,v1.16b 719 ld1 {v2.16b},[x0],x12 720 aese v0.16b,v20.16b 721 aesmc v0.16b,v0.16b 722 aese v1.16b,v20.16b 723 aesmc v1.16b,v1.16b 724 ld1 {v3.16b},[x0] 725 aese v0.16b,v21.16b 726 aesmc v0.16b,v0.16b 727 aese v1.16b,v21.16b 728 aesmc v1.16b,v1.16b 729 eor v2.16b,v2.16b,v7.16b 730 aese v0.16b,v22.16b 731 aesmc v0.16b,v0.16b 732 aese v1.16b,v22.16b 733 aesmc v1.16b,v1.16b 734 eor v3.16b,v3.16b,v7.16b 735 aese v0.16b,v23.16b 736 aese v1.16b,v23.16b 737 738 cmp x2,#1 739 eor v2.16b,v2.16b,v0.16b 740 eor v3.16b,v3.16b,v1.16b 741 st1 {v2.16b},[x1],#16 742 b.eq .Lctr32_done 743 st1 {v3.16b},[x1] 744 745 .Lctr32_done: 746 ldr x29,[sp],#16 747 ret 748 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 749 #endif 750