1 #if defined(__x86_64__) 2 .text 3 4 .globl _aesni_encrypt 5 .private_extern _aesni_encrypt 6 7 .p2align 4 8 _aesni_encrypt: 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15 L$oop_enc1_1: 16 .byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz L$oop_enc1_1 21 .byte 102,15,56,221,209 22 movups %xmm2,(%rsi) 23 .byte 0xf3,0xc3 24 25 26 .globl _aesni_decrypt 27 .private_extern _aesni_decrypt 28 29 .p2align 4 30 _aesni_decrypt: 31 movups (%rdi),%xmm2 32 movl 240(%rdx),%eax 33 movups (%rdx),%xmm0 34 movups 16(%rdx),%xmm1 35 leaq 32(%rdx),%rdx 36 xorps %xmm0,%xmm2 37 L$oop_dec1_2: 38 .byte 102,15,56,222,209 39 decl %eax 40 movups (%rdx),%xmm1 41 leaq 16(%rdx),%rdx 42 jnz L$oop_dec1_2 43 .byte 102,15,56,223,209 44 movups %xmm2,(%rsi) 45 .byte 0xf3,0xc3 46 47 48 .p2align 4 49 _aesni_encrypt2: 50 movups (%rcx),%xmm0 51 shll $4,%eax 52 movups 16(%rcx),%xmm1 53 xorps %xmm0,%xmm2 54 xorps %xmm0,%xmm3 55 movups 32(%rcx),%xmm0 56 leaq 32(%rcx,%rax,1),%rcx 57 negq %rax 58 addq $16,%rax 59 60 L$enc_loop2: 61 .byte 102,15,56,220,209 62 .byte 102,15,56,220,217 63 movups (%rcx,%rax,1),%xmm1 64 addq $32,%rax 65 .byte 102,15,56,220,208 66 .byte 102,15,56,220,216 67 movups -16(%rcx,%rax,1),%xmm0 68 jnz L$enc_loop2 69 70 .byte 102,15,56,220,209 71 .byte 102,15,56,220,217 72 .byte 102,15,56,221,208 73 .byte 102,15,56,221,216 74 .byte 0xf3,0xc3 75 76 77 .p2align 4 78 _aesni_decrypt2: 79 movups (%rcx),%xmm0 80 shll $4,%eax 81 movups 16(%rcx),%xmm1 82 xorps %xmm0,%xmm2 83 xorps %xmm0,%xmm3 84 movups 32(%rcx),%xmm0 85 leaq 32(%rcx,%rax,1),%rcx 86 negq %rax 87 addq $16,%rax 88 89 L$dec_loop2: 90 .byte 102,15,56,222,209 91 .byte 102,15,56,222,217 92 movups (%rcx,%rax,1),%xmm1 93 addq $32,%rax 94 .byte 102,15,56,222,208 95 .byte 102,15,56,222,216 96 movups -16(%rcx,%rax,1),%xmm0 97 jnz L$dec_loop2 98 99 .byte 102,15,56,222,209 100 .byte 102,15,56,222,217 101 .byte 102,15,56,223,208 102 .byte 102,15,56,223,216 103 .byte 0xf3,0xc3 104 105 106 .p2align 4 107 _aesni_encrypt3: 108 movups (%rcx),%xmm0 109 shll $4,%eax 110 movups 16(%rcx),%xmm1 111 xorps %xmm0,%xmm2 112 xorps %xmm0,%xmm3 113 xorps %xmm0,%xmm4 114 movups 32(%rcx),%xmm0 115 leaq 32(%rcx,%rax,1),%rcx 116 negq %rax 117 addq $16,%rax 118 119 L$enc_loop3: 120 .byte 102,15,56,220,209 121 .byte 102,15,56,220,217 122 .byte 102,15,56,220,225 123 movups (%rcx,%rax,1),%xmm1 124 addq $32,%rax 125 .byte 102,15,56,220,208 126 .byte 102,15,56,220,216 127 .byte 102,15,56,220,224 128 movups -16(%rcx,%rax,1),%xmm0 129 jnz L$enc_loop3 130 131 .byte 102,15,56,220,209 132 .byte 102,15,56,220,217 133 .byte 102,15,56,220,225 134 .byte 102,15,56,221,208 135 .byte 102,15,56,221,216 136 .byte 102,15,56,221,224 137 .byte 0xf3,0xc3 138 139 140 .p2align 4 141 _aesni_decrypt3: 142 movups (%rcx),%xmm0 143 shll $4,%eax 144 movups 16(%rcx),%xmm1 145 xorps %xmm0,%xmm2 146 xorps %xmm0,%xmm3 147 xorps %xmm0,%xmm4 148 movups 32(%rcx),%xmm0 149 leaq 32(%rcx,%rax,1),%rcx 150 negq %rax 151 addq $16,%rax 152 153 L$dec_loop3: 154 .byte 102,15,56,222,209 155 .byte 102,15,56,222,217 156 .byte 102,15,56,222,225 157 movups (%rcx,%rax,1),%xmm1 158 addq $32,%rax 159 .byte 102,15,56,222,208 160 .byte 102,15,56,222,216 161 .byte 102,15,56,222,224 162 movups -16(%rcx,%rax,1),%xmm0 163 jnz L$dec_loop3 164 165 .byte 102,15,56,222,209 166 .byte 102,15,56,222,217 167 .byte 102,15,56,222,225 168 .byte 102,15,56,223,208 169 .byte 102,15,56,223,216 170 .byte 102,15,56,223,224 171 .byte 0xf3,0xc3 172 173 174 .p2align 4 175 _aesni_encrypt4: 176 movups (%rcx),%xmm0 177 shll $4,%eax 178 movups 16(%rcx),%xmm1 179 xorps %xmm0,%xmm2 180 xorps %xmm0,%xmm3 181 xorps %xmm0,%xmm4 182 xorps %xmm0,%xmm5 183 movups 32(%rcx),%xmm0 184 leaq 32(%rcx,%rax,1),%rcx 185 negq %rax 186 .byte 0x0f,0x1f,0x00 187 addq $16,%rax 188 189 L$enc_loop4: 190 .byte 102,15,56,220,209 191 .byte 102,15,56,220,217 192 .byte 102,15,56,220,225 193 .byte 102,15,56,220,233 194 movups (%rcx,%rax,1),%xmm1 195 addq $32,%rax 196 .byte 102,15,56,220,208 197 .byte 102,15,56,220,216 198 .byte 102,15,56,220,224 199 .byte 102,15,56,220,232 200 movups -16(%rcx,%rax,1),%xmm0 201 jnz L$enc_loop4 202 203 .byte 102,15,56,220,209 204 .byte 102,15,56,220,217 205 .byte 102,15,56,220,225 206 .byte 102,15,56,220,233 207 .byte 102,15,56,221,208 208 .byte 102,15,56,221,216 209 .byte 102,15,56,221,224 210 .byte 102,15,56,221,232 211 .byte 0xf3,0xc3 212 213 214 .p2align 4 215 _aesni_decrypt4: 216 movups (%rcx),%xmm0 217 shll $4,%eax 218 movups 16(%rcx),%xmm1 219 xorps %xmm0,%xmm2 220 xorps %xmm0,%xmm3 221 xorps %xmm0,%xmm4 222 xorps %xmm0,%xmm5 223 movups 32(%rcx),%xmm0 224 leaq 32(%rcx,%rax,1),%rcx 225 negq %rax 226 .byte 0x0f,0x1f,0x00 227 addq $16,%rax 228 229 L$dec_loop4: 230 .byte 102,15,56,222,209 231 .byte 102,15,56,222,217 232 .byte 102,15,56,222,225 233 .byte 102,15,56,222,233 234 movups (%rcx,%rax,1),%xmm1 235 addq $32,%rax 236 .byte 102,15,56,222,208 237 .byte 102,15,56,222,216 238 .byte 102,15,56,222,224 239 .byte 102,15,56,222,232 240 movups -16(%rcx,%rax,1),%xmm0 241 jnz L$dec_loop4 242 243 .byte 102,15,56,222,209 244 .byte 102,15,56,222,217 245 .byte 102,15,56,222,225 246 .byte 102,15,56,222,233 247 .byte 102,15,56,223,208 248 .byte 102,15,56,223,216 249 .byte 102,15,56,223,224 250 .byte 102,15,56,223,232 251 .byte 0xf3,0xc3 252 253 254 .p2align 4 255 _aesni_encrypt6: 256 movups (%rcx),%xmm0 257 shll $4,%eax 258 movups 16(%rcx),%xmm1 259 xorps %xmm0,%xmm2 260 pxor %xmm0,%xmm3 261 pxor %xmm0,%xmm4 262 .byte 102,15,56,220,209 263 leaq 32(%rcx,%rax,1),%rcx 264 negq %rax 265 .byte 102,15,56,220,217 266 pxor %xmm0,%xmm5 267 pxor %xmm0,%xmm6 268 .byte 102,15,56,220,225 269 pxor %xmm0,%xmm7 270 addq $16,%rax 271 .byte 102,15,56,220,233 272 .byte 102,15,56,220,241 273 .byte 102,15,56,220,249 274 movups -16(%rcx,%rax,1),%xmm0 275 jmp L$enc_loop6_enter 276 .p2align 4 277 L$enc_loop6: 278 .byte 102,15,56,220,209 279 .byte 102,15,56,220,217 280 .byte 102,15,56,220,225 281 .byte 102,15,56,220,233 282 .byte 102,15,56,220,241 283 .byte 102,15,56,220,249 284 L$enc_loop6_enter: 285 movups (%rcx,%rax,1),%xmm1 286 addq $32,%rax 287 .byte 102,15,56,220,208 288 .byte 102,15,56,220,216 289 .byte 102,15,56,220,224 290 .byte 102,15,56,220,232 291 .byte 102,15,56,220,240 292 .byte 102,15,56,220,248 293 movups -16(%rcx,%rax,1),%xmm0 294 jnz L$enc_loop6 295 296 .byte 102,15,56,220,209 297 .byte 102,15,56,220,217 298 .byte 102,15,56,220,225 299 .byte 102,15,56,220,233 300 .byte 102,15,56,220,241 301 .byte 102,15,56,220,249 302 .byte 102,15,56,221,208 303 .byte 102,15,56,221,216 304 .byte 102,15,56,221,224 305 .byte 102,15,56,221,232 306 .byte 102,15,56,221,240 307 .byte 102,15,56,221,248 308 .byte 0xf3,0xc3 309 310 311 .p2align 4 312 _aesni_decrypt6: 313 movups (%rcx),%xmm0 314 shll $4,%eax 315 movups 16(%rcx),%xmm1 316 xorps %xmm0,%xmm2 317 pxor %xmm0,%xmm3 318 pxor %xmm0,%xmm4 319 .byte 102,15,56,222,209 320 leaq 32(%rcx,%rax,1),%rcx 321 negq %rax 322 .byte 102,15,56,222,217 323 pxor %xmm0,%xmm5 324 pxor %xmm0,%xmm6 325 .byte 102,15,56,222,225 326 pxor %xmm0,%xmm7 327 addq $16,%rax 328 .byte 102,15,56,222,233 329 .byte 102,15,56,222,241 330 .byte 102,15,56,222,249 331 movups -16(%rcx,%rax,1),%xmm0 332 jmp L$dec_loop6_enter 333 .p2align 4 334 L$dec_loop6: 335 .byte 102,15,56,222,209 336 .byte 102,15,56,222,217 337 .byte 102,15,56,222,225 338 .byte 102,15,56,222,233 339 .byte 102,15,56,222,241 340 .byte 102,15,56,222,249 341 L$dec_loop6_enter: 342 movups (%rcx,%rax,1),%xmm1 343 addq $32,%rax 344 .byte 102,15,56,222,208 345 .byte 102,15,56,222,216 346 .byte 102,15,56,222,224 347 .byte 102,15,56,222,232 348 .byte 102,15,56,222,240 349 .byte 102,15,56,222,248 350 movups -16(%rcx,%rax,1),%xmm0 351 jnz L$dec_loop6 352 353 .byte 102,15,56,222,209 354 .byte 102,15,56,222,217 355 .byte 102,15,56,222,225 356 .byte 102,15,56,222,233 357 .byte 102,15,56,222,241 358 .byte 102,15,56,222,249 359 .byte 102,15,56,223,208 360 .byte 102,15,56,223,216 361 .byte 102,15,56,223,224 362 .byte 102,15,56,223,232 363 .byte 102,15,56,223,240 364 .byte 102,15,56,223,248 365 .byte 0xf3,0xc3 366 367 368 .p2align 4 369 _aesni_encrypt8: 370 movups (%rcx),%xmm0 371 shll $4,%eax 372 movups 16(%rcx),%xmm1 373 xorps %xmm0,%xmm2 374 xorps %xmm0,%xmm3 375 pxor %xmm0,%xmm4 376 pxor %xmm0,%xmm5 377 pxor %xmm0,%xmm6 378 leaq 32(%rcx,%rax,1),%rcx 379 negq %rax 380 .byte 102,15,56,220,209 381 addq $16,%rax 382 pxor %xmm0,%xmm7 383 .byte 102,15,56,220,217 384 pxor %xmm0,%xmm8 385 pxor %xmm0,%xmm9 386 .byte 102,15,56,220,225 387 .byte 102,15,56,220,233 388 .byte 102,15,56,220,241 389 .byte 102,15,56,220,249 390 .byte 102,68,15,56,220,193 391 .byte 102,68,15,56,220,201 392 movups -16(%rcx,%rax,1),%xmm0 393 jmp L$enc_loop8_enter 394 .p2align 4 395 L$enc_loop8: 396 .byte 102,15,56,220,209 397 .byte 102,15,56,220,217 398 .byte 102,15,56,220,225 399 .byte 102,15,56,220,233 400 .byte 102,15,56,220,241 401 .byte 102,15,56,220,249 402 .byte 102,68,15,56,220,193 403 .byte 102,68,15,56,220,201 404 L$enc_loop8_enter: 405 movups (%rcx,%rax,1),%xmm1 406 addq $32,%rax 407 .byte 102,15,56,220,208 408 .byte 102,15,56,220,216 409 .byte 102,15,56,220,224 410 .byte 102,15,56,220,232 411 .byte 102,15,56,220,240 412 .byte 102,15,56,220,248 413 .byte 102,68,15,56,220,192 414 .byte 102,68,15,56,220,200 415 movups -16(%rcx,%rax,1),%xmm0 416 jnz L$enc_loop8 417 418 .byte 102,15,56,220,209 419 .byte 102,15,56,220,217 420 .byte 102,15,56,220,225 421 .byte 102,15,56,220,233 422 .byte 102,15,56,220,241 423 .byte 102,15,56,220,249 424 .byte 102,68,15,56,220,193 425 .byte 102,68,15,56,220,201 426 .byte 102,15,56,221,208 427 .byte 102,15,56,221,216 428 .byte 102,15,56,221,224 429 .byte 102,15,56,221,232 430 .byte 102,15,56,221,240 431 .byte 102,15,56,221,248 432 .byte 102,68,15,56,221,192 433 .byte 102,68,15,56,221,200 434 .byte 0xf3,0xc3 435 436 437 .p2align 4 438 _aesni_decrypt8: 439 movups (%rcx),%xmm0 440 shll $4,%eax 441 movups 16(%rcx),%xmm1 442 xorps %xmm0,%xmm2 443 xorps %xmm0,%xmm3 444 pxor %xmm0,%xmm4 445 pxor %xmm0,%xmm5 446 pxor %xmm0,%xmm6 447 leaq 32(%rcx,%rax,1),%rcx 448 negq %rax 449 .byte 102,15,56,222,209 450 addq $16,%rax 451 pxor %xmm0,%xmm7 452 .byte 102,15,56,222,217 453 pxor %xmm0,%xmm8 454 pxor %xmm0,%xmm9 455 .byte 102,15,56,222,225 456 .byte 102,15,56,222,233 457 .byte 102,15,56,222,241 458 .byte 102,15,56,222,249 459 .byte 102,68,15,56,222,193 460 .byte 102,68,15,56,222,201 461 movups -16(%rcx,%rax,1),%xmm0 462 jmp L$dec_loop8_enter 463 .p2align 4 464 L$dec_loop8: 465 .byte 102,15,56,222,209 466 .byte 102,15,56,222,217 467 .byte 102,15,56,222,225 468 .byte 102,15,56,222,233 469 .byte 102,15,56,222,241 470 .byte 102,15,56,222,249 471 .byte 102,68,15,56,222,193 472 .byte 102,68,15,56,222,201 473 L$dec_loop8_enter: 474 movups (%rcx,%rax,1),%xmm1 475 addq $32,%rax 476 .byte 102,15,56,222,208 477 .byte 102,15,56,222,216 478 .byte 102,15,56,222,224 479 .byte 102,15,56,222,232 480 .byte 102,15,56,222,240 481 .byte 102,15,56,222,248 482 .byte 102,68,15,56,222,192 483 .byte 102,68,15,56,222,200 484 movups -16(%rcx,%rax,1),%xmm0 485 jnz L$dec_loop8 486 487 .byte 102,15,56,222,209 488 .byte 102,15,56,222,217 489 .byte 102,15,56,222,225 490 .byte 102,15,56,222,233 491 .byte 102,15,56,222,241 492 .byte 102,15,56,222,249 493 .byte 102,68,15,56,222,193 494 .byte 102,68,15,56,222,201 495 .byte 102,15,56,223,208 496 .byte 102,15,56,223,216 497 .byte 102,15,56,223,224 498 .byte 102,15,56,223,232 499 .byte 102,15,56,223,240 500 .byte 102,15,56,223,248 501 .byte 102,68,15,56,223,192 502 .byte 102,68,15,56,223,200 503 .byte 0xf3,0xc3 504 505 .globl _aesni_ecb_encrypt 506 .private_extern _aesni_ecb_encrypt 507 508 .p2align 4 509 _aesni_ecb_encrypt: 510 andq $-16,%rdx 511 jz L$ecb_ret 512 513 movl 240(%rcx),%eax 514 movups (%rcx),%xmm0 515 movq %rcx,%r11 516 movl %eax,%r10d 517 testl %r8d,%r8d 518 jz L$ecb_decrypt 519 520 cmpq $128,%rdx 521 jb L$ecb_enc_tail 522 523 movdqu (%rdi),%xmm2 524 movdqu 16(%rdi),%xmm3 525 movdqu 32(%rdi),%xmm4 526 movdqu 48(%rdi),%xmm5 527 movdqu 64(%rdi),%xmm6 528 movdqu 80(%rdi),%xmm7 529 movdqu 96(%rdi),%xmm8 530 movdqu 112(%rdi),%xmm9 531 leaq 128(%rdi),%rdi 532 subq $128,%rdx 533 jmp L$ecb_enc_loop8_enter 534 .p2align 4 535 L$ecb_enc_loop8: 536 movups %xmm2,(%rsi) 537 movq %r11,%rcx 538 movdqu (%rdi),%xmm2 539 movl %r10d,%eax 540 movups %xmm3,16(%rsi) 541 movdqu 16(%rdi),%xmm3 542 movups %xmm4,32(%rsi) 543 movdqu 32(%rdi),%xmm4 544 movups %xmm5,48(%rsi) 545 movdqu 48(%rdi),%xmm5 546 movups %xmm6,64(%rsi) 547 movdqu 64(%rdi),%xmm6 548 movups %xmm7,80(%rsi) 549 movdqu 80(%rdi),%xmm7 550 movups %xmm8,96(%rsi) 551 movdqu 96(%rdi),%xmm8 552 movups %xmm9,112(%rsi) 553 leaq 128(%rsi),%rsi 554 movdqu 112(%rdi),%xmm9 555 leaq 128(%rdi),%rdi 556 L$ecb_enc_loop8_enter: 557 558 call _aesni_encrypt8 559 560 subq $128,%rdx 561 jnc L$ecb_enc_loop8 562 563 movups %xmm2,(%rsi) 564 movq %r11,%rcx 565 movups %xmm3,16(%rsi) 566 movl %r10d,%eax 567 movups %xmm4,32(%rsi) 568 movups %xmm5,48(%rsi) 569 movups %xmm6,64(%rsi) 570 movups %xmm7,80(%rsi) 571 movups %xmm8,96(%rsi) 572 movups %xmm9,112(%rsi) 573 leaq 128(%rsi),%rsi 574 addq $128,%rdx 575 jz L$ecb_ret 576 577 L$ecb_enc_tail: 578 movups (%rdi),%xmm2 579 cmpq $32,%rdx 580 jb L$ecb_enc_one 581 movups 16(%rdi),%xmm3 582 je L$ecb_enc_two 583 movups 32(%rdi),%xmm4 584 cmpq $64,%rdx 585 jb L$ecb_enc_three 586 movups 48(%rdi),%xmm5 587 je L$ecb_enc_four 588 movups 64(%rdi),%xmm6 589 cmpq $96,%rdx 590 jb L$ecb_enc_five 591 movups 80(%rdi),%xmm7 592 je L$ecb_enc_six 593 movdqu 96(%rdi),%xmm8 594 call _aesni_encrypt8 595 movups %xmm2,(%rsi) 596 movups %xmm3,16(%rsi) 597 movups %xmm4,32(%rsi) 598 movups %xmm5,48(%rsi) 599 movups %xmm6,64(%rsi) 600 movups %xmm7,80(%rsi) 601 movups %xmm8,96(%rsi) 602 jmp L$ecb_ret 603 .p2align 4 604 L$ecb_enc_one: 605 movups (%rcx),%xmm0 606 movups 16(%rcx),%xmm1 607 leaq 32(%rcx),%rcx 608 xorps %xmm0,%xmm2 609 L$oop_enc1_3: 610 .byte 102,15,56,220,209 611 decl %eax 612 movups (%rcx),%xmm1 613 leaq 16(%rcx),%rcx 614 jnz L$oop_enc1_3 615 .byte 102,15,56,221,209 616 movups %xmm2,(%rsi) 617 jmp L$ecb_ret 618 .p2align 4 619 L$ecb_enc_two: 620 call _aesni_encrypt2 621 movups %xmm2,(%rsi) 622 movups %xmm3,16(%rsi) 623 jmp L$ecb_ret 624 .p2align 4 625 L$ecb_enc_three: 626 call _aesni_encrypt3 627 movups %xmm2,(%rsi) 628 movups %xmm3,16(%rsi) 629 movups %xmm4,32(%rsi) 630 jmp L$ecb_ret 631 .p2align 4 632 L$ecb_enc_four: 633 call _aesni_encrypt4 634 movups %xmm2,(%rsi) 635 movups %xmm3,16(%rsi) 636 movups %xmm4,32(%rsi) 637 movups %xmm5,48(%rsi) 638 jmp L$ecb_ret 639 .p2align 4 640 L$ecb_enc_five: 641 xorps %xmm7,%xmm7 642 call _aesni_encrypt6 643 movups %xmm2,(%rsi) 644 movups %xmm3,16(%rsi) 645 movups %xmm4,32(%rsi) 646 movups %xmm5,48(%rsi) 647 movups %xmm6,64(%rsi) 648 jmp L$ecb_ret 649 .p2align 4 650 L$ecb_enc_six: 651 call _aesni_encrypt6 652 movups %xmm2,(%rsi) 653 movups %xmm3,16(%rsi) 654 movups %xmm4,32(%rsi) 655 movups %xmm5,48(%rsi) 656 movups %xmm6,64(%rsi) 657 movups %xmm7,80(%rsi) 658 jmp L$ecb_ret 659 660 .p2align 4 661 L$ecb_decrypt: 662 cmpq $128,%rdx 663 jb L$ecb_dec_tail 664 665 movdqu (%rdi),%xmm2 666 movdqu 16(%rdi),%xmm3 667 movdqu 32(%rdi),%xmm4 668 movdqu 48(%rdi),%xmm5 669 movdqu 64(%rdi),%xmm6 670 movdqu 80(%rdi),%xmm7 671 movdqu 96(%rdi),%xmm8 672 movdqu 112(%rdi),%xmm9 673 leaq 128(%rdi),%rdi 674 subq $128,%rdx 675 jmp L$ecb_dec_loop8_enter 676 .p2align 4 677 L$ecb_dec_loop8: 678 movups %xmm2,(%rsi) 679 movq %r11,%rcx 680 movdqu (%rdi),%xmm2 681 movl %r10d,%eax 682 movups %xmm3,16(%rsi) 683 movdqu 16(%rdi),%xmm3 684 movups %xmm4,32(%rsi) 685 movdqu 32(%rdi),%xmm4 686 movups %xmm5,48(%rsi) 687 movdqu 48(%rdi),%xmm5 688 movups %xmm6,64(%rsi) 689 movdqu 64(%rdi),%xmm6 690 movups %xmm7,80(%rsi) 691 movdqu 80(%rdi),%xmm7 692 movups %xmm8,96(%rsi) 693 movdqu 96(%rdi),%xmm8 694 movups %xmm9,112(%rsi) 695 leaq 128(%rsi),%rsi 696 movdqu 112(%rdi),%xmm9 697 leaq 128(%rdi),%rdi 698 L$ecb_dec_loop8_enter: 699 700 call _aesni_decrypt8 701 702 movups (%r11),%xmm0 703 subq $128,%rdx 704 jnc L$ecb_dec_loop8 705 706 movups %xmm2,(%rsi) 707 movq %r11,%rcx 708 movups %xmm3,16(%rsi) 709 movl %r10d,%eax 710 movups %xmm4,32(%rsi) 711 movups %xmm5,48(%rsi) 712 movups %xmm6,64(%rsi) 713 movups %xmm7,80(%rsi) 714 movups %xmm8,96(%rsi) 715 movups %xmm9,112(%rsi) 716 leaq 128(%rsi),%rsi 717 addq $128,%rdx 718 jz L$ecb_ret 719 720 L$ecb_dec_tail: 721 movups (%rdi),%xmm2 722 cmpq $32,%rdx 723 jb L$ecb_dec_one 724 movups 16(%rdi),%xmm3 725 je L$ecb_dec_two 726 movups 32(%rdi),%xmm4 727 cmpq $64,%rdx 728 jb L$ecb_dec_three 729 movups 48(%rdi),%xmm5 730 je L$ecb_dec_four 731 movups 64(%rdi),%xmm6 732 cmpq $96,%rdx 733 jb L$ecb_dec_five 734 movups 80(%rdi),%xmm7 735 je L$ecb_dec_six 736 movups 96(%rdi),%xmm8 737 movups (%rcx),%xmm0 738 call _aesni_decrypt8 739 movups %xmm2,(%rsi) 740 movups %xmm3,16(%rsi) 741 movups %xmm4,32(%rsi) 742 movups %xmm5,48(%rsi) 743 movups %xmm6,64(%rsi) 744 movups %xmm7,80(%rsi) 745 movups %xmm8,96(%rsi) 746 jmp L$ecb_ret 747 .p2align 4 748 L$ecb_dec_one: 749 movups (%rcx),%xmm0 750 movups 16(%rcx),%xmm1 751 leaq 32(%rcx),%rcx 752 xorps %xmm0,%xmm2 753 L$oop_dec1_4: 754 .byte 102,15,56,222,209 755 decl %eax 756 movups (%rcx),%xmm1 757 leaq 16(%rcx),%rcx 758 jnz L$oop_dec1_4 759 .byte 102,15,56,223,209 760 movups %xmm2,(%rsi) 761 jmp L$ecb_ret 762 .p2align 4 763 L$ecb_dec_two: 764 call _aesni_decrypt2 765 movups %xmm2,(%rsi) 766 movups %xmm3,16(%rsi) 767 jmp L$ecb_ret 768 .p2align 4 769 L$ecb_dec_three: 770 call _aesni_decrypt3 771 movups %xmm2,(%rsi) 772 movups %xmm3,16(%rsi) 773 movups %xmm4,32(%rsi) 774 jmp L$ecb_ret 775 .p2align 4 776 L$ecb_dec_four: 777 call _aesni_decrypt4 778 movups %xmm2,(%rsi) 779 movups %xmm3,16(%rsi) 780 movups %xmm4,32(%rsi) 781 movups %xmm5,48(%rsi) 782 jmp L$ecb_ret 783 .p2align 4 784 L$ecb_dec_five: 785 xorps %xmm7,%xmm7 786 call _aesni_decrypt6 787 movups %xmm2,(%rsi) 788 movups %xmm3,16(%rsi) 789 movups %xmm4,32(%rsi) 790 movups %xmm5,48(%rsi) 791 movups %xmm6,64(%rsi) 792 jmp L$ecb_ret 793 .p2align 4 794 L$ecb_dec_six: 795 call _aesni_decrypt6 796 movups %xmm2,(%rsi) 797 movups %xmm3,16(%rsi) 798 movups %xmm4,32(%rsi) 799 movups %xmm5,48(%rsi) 800 movups %xmm6,64(%rsi) 801 movups %xmm7,80(%rsi) 802 803 L$ecb_ret: 804 .byte 0xf3,0xc3 805 806 .globl _aesni_ccm64_encrypt_blocks 807 .private_extern _aesni_ccm64_encrypt_blocks 808 809 .p2align 4 810 _aesni_ccm64_encrypt_blocks: 811 movl 240(%rcx),%eax 812 movdqu (%r8),%xmm6 813 movdqa L$increment64(%rip),%xmm9 814 movdqa L$bswap_mask(%rip),%xmm7 815 816 shll $4,%eax 817 movl $16,%r10d 818 leaq 0(%rcx),%r11 819 movdqu (%r9),%xmm3 820 movdqa %xmm6,%xmm2 821 leaq 32(%rcx,%rax,1),%rcx 822 .byte 102,15,56,0,247 823 subq %rax,%r10 824 jmp L$ccm64_enc_outer 825 .p2align 4 826 L$ccm64_enc_outer: 827 movups (%r11),%xmm0 828 movq %r10,%rax 829 movups (%rdi),%xmm8 830 831 xorps %xmm0,%xmm2 832 movups 16(%r11),%xmm1 833 xorps %xmm8,%xmm0 834 xorps %xmm0,%xmm3 835 movups 32(%r11),%xmm0 836 837 L$ccm64_enc2_loop: 838 .byte 102,15,56,220,209 839 .byte 102,15,56,220,217 840 movups (%rcx,%rax,1),%xmm1 841 addq $32,%rax 842 .byte 102,15,56,220,208 843 .byte 102,15,56,220,216 844 movups -16(%rcx,%rax,1),%xmm0 845 jnz L$ccm64_enc2_loop 846 .byte 102,15,56,220,209 847 .byte 102,15,56,220,217 848 paddq %xmm9,%xmm6 849 decq %rdx 850 .byte 102,15,56,221,208 851 .byte 102,15,56,221,216 852 853 leaq 16(%rdi),%rdi 854 xorps %xmm2,%xmm8 855 movdqa %xmm6,%xmm2 856 movups %xmm8,(%rsi) 857 .byte 102,15,56,0,215 858 leaq 16(%rsi),%rsi 859 jnz L$ccm64_enc_outer 860 861 movups %xmm3,(%r9) 862 .byte 0xf3,0xc3 863 864 .globl _aesni_ccm64_decrypt_blocks 865 .private_extern _aesni_ccm64_decrypt_blocks 866 867 .p2align 4 868 _aesni_ccm64_decrypt_blocks: 869 movl 240(%rcx),%eax 870 movups (%r8),%xmm6 871 movdqu (%r9),%xmm3 872 movdqa L$increment64(%rip),%xmm9 873 movdqa L$bswap_mask(%rip),%xmm7 874 875 movaps %xmm6,%xmm2 876 movl %eax,%r10d 877 movq %rcx,%r11 878 .byte 102,15,56,0,247 879 movups (%rcx),%xmm0 880 movups 16(%rcx),%xmm1 881 leaq 32(%rcx),%rcx 882 xorps %xmm0,%xmm2 883 L$oop_enc1_5: 884 .byte 102,15,56,220,209 885 decl %eax 886 movups (%rcx),%xmm1 887 leaq 16(%rcx),%rcx 888 jnz L$oop_enc1_5 889 .byte 102,15,56,221,209 890 shll $4,%r10d 891 movl $16,%eax 892 movups (%rdi),%xmm8 893 paddq %xmm9,%xmm6 894 leaq 16(%rdi),%rdi 895 subq %r10,%rax 896 leaq 32(%r11,%r10,1),%rcx 897 movq %rax,%r10 898 jmp L$ccm64_dec_outer 899 .p2align 4 900 L$ccm64_dec_outer: 901 xorps %xmm2,%xmm8 902 movdqa %xmm6,%xmm2 903 movups %xmm8,(%rsi) 904 leaq 16(%rsi),%rsi 905 .byte 102,15,56,0,215 906 907 subq $1,%rdx 908 jz L$ccm64_dec_break 909 910 movups (%r11),%xmm0 911 movq %r10,%rax 912 movups 16(%r11),%xmm1 913 xorps %xmm0,%xmm8 914 xorps %xmm0,%xmm2 915 xorps %xmm8,%xmm3 916 movups 32(%r11),%xmm0 917 jmp L$ccm64_dec2_loop 918 .p2align 4 919 L$ccm64_dec2_loop: 920 .byte 102,15,56,220,209 921 .byte 102,15,56,220,217 922 movups (%rcx,%rax,1),%xmm1 923 addq $32,%rax 924 .byte 102,15,56,220,208 925 .byte 102,15,56,220,216 926 movups -16(%rcx,%rax,1),%xmm0 927 jnz L$ccm64_dec2_loop 928 movups (%rdi),%xmm8 929 paddq %xmm9,%xmm6 930 .byte 102,15,56,220,209 931 .byte 102,15,56,220,217 932 .byte 102,15,56,221,208 933 .byte 102,15,56,221,216 934 leaq 16(%rdi),%rdi 935 jmp L$ccm64_dec_outer 936 937 .p2align 4 938 L$ccm64_dec_break: 939 940 movl 240(%r11),%eax 941 movups (%r11),%xmm0 942 movups 16(%r11),%xmm1 943 xorps %xmm0,%xmm8 944 leaq 32(%r11),%r11 945 xorps %xmm8,%xmm3 946 L$oop_enc1_6: 947 .byte 102,15,56,220,217 948 decl %eax 949 movups (%r11),%xmm1 950 leaq 16(%r11),%r11 951 jnz L$oop_enc1_6 952 .byte 102,15,56,221,217 953 movups %xmm3,(%r9) 954 .byte 0xf3,0xc3 955 956 .globl _aesni_ctr32_encrypt_blocks 957 .private_extern _aesni_ctr32_encrypt_blocks 958 959 .p2align 4 960 _aesni_ctr32_encrypt_blocks: 961 leaq (%rsp),%rax 962 pushq %rbp 963 subq $128,%rsp 964 andq $-16,%rsp 965 leaq -8(%rax),%rbp 966 967 cmpq $1,%rdx 968 je L$ctr32_one_shortcut 969 970 movdqu (%r8),%xmm2 971 movdqu (%rcx),%xmm0 972 movl 12(%r8),%r8d 973 pxor %xmm0,%xmm2 974 movl 12(%rcx),%r11d 975 movdqa %xmm2,0(%rsp) 976 bswapl %r8d 977 movdqa %xmm2,%xmm3 978 movdqa %xmm2,%xmm4 979 movdqa %xmm2,%xmm5 980 movdqa %xmm2,64(%rsp) 981 movdqa %xmm2,80(%rsp) 982 movdqa %xmm2,96(%rsp) 983 movq %rdx,%r10 984 movdqa %xmm2,112(%rsp) 985 986 leaq 1(%r8),%rax 987 leaq 2(%r8),%rdx 988 bswapl %eax 989 bswapl %edx 990 xorl %r11d,%eax 991 xorl %r11d,%edx 992 .byte 102,15,58,34,216,3 993 leaq 3(%r8),%rax 994 movdqa %xmm3,16(%rsp) 995 .byte 102,15,58,34,226,3 996 bswapl %eax 997 movq %r10,%rdx 998 leaq 4(%r8),%r10 999 movdqa %xmm4,32(%rsp) 1000 xorl %r11d,%eax 1001 bswapl %r10d 1002 .byte 102,15,58,34,232,3 1003 xorl %r11d,%r10d 1004 movdqa %xmm5,48(%rsp) 1005 leaq 5(%r8),%r9 1006 movl %r10d,64+12(%rsp) 1007 bswapl %r9d 1008 leaq 6(%r8),%r10 1009 movl 240(%rcx),%eax 1010 xorl %r11d,%r9d 1011 bswapl %r10d 1012 movl %r9d,80+12(%rsp) 1013 xorl %r11d,%r10d 1014 leaq 7(%r8),%r9 1015 movl %r10d,96+12(%rsp) 1016 bswapl %r9d 1017 movl _OPENSSL_ia32cap_P+4(%rip),%r10d 1018 xorl %r11d,%r9d 1019 andl $71303168,%r10d 1020 movl %r9d,112+12(%rsp) 1021 1022 movups 16(%rcx),%xmm1 1023 1024 movdqa 64(%rsp),%xmm6 1025 movdqa 80(%rsp),%xmm7 1026 1027 cmpq $8,%rdx 1028 jb L$ctr32_tail 1029 1030 subq $6,%rdx 1031 cmpl $4194304,%r10d 1032 je L$ctr32_6x 1033 1034 leaq 128(%rcx),%rcx 1035 subq $2,%rdx 1036 jmp L$ctr32_loop8 1037 1038 .p2align 4 1039 L$ctr32_6x: 1040 shll $4,%eax 1041 movl $48,%r10d 1042 bswapl %r11d 1043 leaq 32(%rcx,%rax,1),%rcx 1044 subq %rax,%r10 1045 jmp L$ctr32_loop6 1046 1047 .p2align 4 1048 L$ctr32_loop6: 1049 addl $6,%r8d 1050 movups -48(%rcx,%r10,1),%xmm0 1051 .byte 102,15,56,220,209 1052 movl %r8d,%eax 1053 xorl %r11d,%eax 1054 .byte 102,15,56,220,217 1055 .byte 0x0f,0x38,0xf1,0x44,0x24,12 1056 leal 1(%r8),%eax 1057 .byte 102,15,56,220,225 1058 xorl %r11d,%eax 1059 .byte 0x0f,0x38,0xf1,0x44,0x24,28 1060 .byte 102,15,56,220,233 1061 leal 2(%r8),%eax 1062 xorl %r11d,%eax 1063 .byte 102,15,56,220,241 1064 .byte 0x0f,0x38,0xf1,0x44,0x24,44 1065 leal 3(%r8),%eax 1066 .byte 102,15,56,220,249 1067 movups -32(%rcx,%r10,1),%xmm1 1068 xorl %r11d,%eax 1069 1070 .byte 102,15,56,220,208 1071 .byte 0x0f,0x38,0xf1,0x44,0x24,60 1072 leal 4(%r8),%eax 1073 .byte 102,15,56,220,216 1074 xorl %r11d,%eax 1075 .byte 0x0f,0x38,0xf1,0x44,0x24,76 1076 .byte 102,15,56,220,224 1077 leal 5(%r8),%eax 1078 xorl %r11d,%eax 1079 .byte 102,15,56,220,232 1080 .byte 0x0f,0x38,0xf1,0x44,0x24,92 1081 movq %r10,%rax 1082 .byte 102,15,56,220,240 1083 .byte 102,15,56,220,248 1084 movups -16(%rcx,%r10,1),%xmm0 1085 1086 call L$enc_loop6 1087 1088 movdqu (%rdi),%xmm8 1089 movdqu 16(%rdi),%xmm9 1090 movdqu 32(%rdi),%xmm10 1091 movdqu 48(%rdi),%xmm11 1092 movdqu 64(%rdi),%xmm12 1093 movdqu 80(%rdi),%xmm13 1094 leaq 96(%rdi),%rdi 1095 movups -64(%rcx,%r10,1),%xmm1 1096 pxor %xmm2,%xmm8 1097 movaps 0(%rsp),%xmm2 1098 pxor %xmm3,%xmm9 1099 movaps 16(%rsp),%xmm3 1100 pxor %xmm4,%xmm10 1101 movaps 32(%rsp),%xmm4 1102 pxor %xmm5,%xmm11 1103 movaps 48(%rsp),%xmm5 1104 pxor %xmm6,%xmm12 1105 movaps 64(%rsp),%xmm6 1106 pxor %xmm7,%xmm13 1107 movaps 80(%rsp),%xmm7 1108 movdqu %xmm8,(%rsi) 1109 movdqu %xmm9,16(%rsi) 1110 movdqu %xmm10,32(%rsi) 1111 movdqu %xmm11,48(%rsi) 1112 movdqu %xmm12,64(%rsi) 1113 movdqu %xmm13,80(%rsi) 1114 leaq 96(%rsi),%rsi 1115 1116 subq $6,%rdx 1117 jnc L$ctr32_loop6 1118 1119 addq $6,%rdx 1120 jz L$ctr32_done 1121 1122 leal -48(%r10),%eax 1123 leaq -80(%rcx,%r10,1),%rcx 1124 negl %eax 1125 shrl $4,%eax 1126 jmp L$ctr32_tail 1127 1128 .p2align 5 1129 L$ctr32_loop8: 1130 addl $8,%r8d 1131 movdqa 96(%rsp),%xmm8 1132 .byte 102,15,56,220,209 1133 movl %r8d,%r9d 1134 movdqa 112(%rsp),%xmm9 1135 .byte 102,15,56,220,217 1136 bswapl %r9d 1137 movups 32-128(%rcx),%xmm0 1138 .byte 102,15,56,220,225 1139 xorl %r11d,%r9d 1140 nop 1141 .byte 102,15,56,220,233 1142 movl %r9d,0+12(%rsp) 1143 leaq 1(%r8),%r9 1144 .byte 102,15,56,220,241 1145 .byte 102,15,56,220,249 1146 .byte 102,68,15,56,220,193 1147 .byte 102,68,15,56,220,201 1148 movups 48-128(%rcx),%xmm1 1149 bswapl %r9d 1150 .byte 102,15,56,220,208 1151 .byte 102,15,56,220,216 1152 xorl %r11d,%r9d 1153 .byte 0x66,0x90 1154 .byte 102,15,56,220,224 1155 .byte 102,15,56,220,232 1156 movl %r9d,16+12(%rsp) 1157 leaq 2(%r8),%r9 1158 .byte 102,15,56,220,240 1159 .byte 102,15,56,220,248 1160 .byte 102,68,15,56,220,192 1161 .byte 102,68,15,56,220,200 1162 movups 64-128(%rcx),%xmm0 1163 bswapl %r9d 1164 .byte 102,15,56,220,209 1165 .byte 102,15,56,220,217 1166 xorl %r11d,%r9d 1167 .byte 0x66,0x90 1168 .byte 102,15,56,220,225 1169 .byte 102,15,56,220,233 1170 movl %r9d,32+12(%rsp) 1171 leaq 3(%r8),%r9 1172 .byte 102,15,56,220,241 1173 .byte 102,15,56,220,249 1174 .byte 102,68,15,56,220,193 1175 .byte 102,68,15,56,220,201 1176 movups 80-128(%rcx),%xmm1 1177 bswapl %r9d 1178 .byte 102,15,56,220,208 1179 .byte 102,15,56,220,216 1180 xorl %r11d,%r9d 1181 .byte 0x66,0x90 1182 .byte 102,15,56,220,224 1183 .byte 102,15,56,220,232 1184 movl %r9d,48+12(%rsp) 1185 leaq 4(%r8),%r9 1186 .byte 102,15,56,220,240 1187 .byte 102,15,56,220,248 1188 .byte 102,68,15,56,220,192 1189 .byte 102,68,15,56,220,200 1190 movups 96-128(%rcx),%xmm0 1191 bswapl %r9d 1192 .byte 102,15,56,220,209 1193 .byte 102,15,56,220,217 1194 xorl %r11d,%r9d 1195 .byte 0x66,0x90 1196 .byte 102,15,56,220,225 1197 .byte 102,15,56,220,233 1198 movl %r9d,64+12(%rsp) 1199 leaq 5(%r8),%r9 1200 .byte 102,15,56,220,241 1201 .byte 102,15,56,220,249 1202 .byte 102,68,15,56,220,193 1203 .byte 102,68,15,56,220,201 1204 movups 112-128(%rcx),%xmm1 1205 bswapl %r9d 1206 .byte 102,15,56,220,208 1207 .byte 102,15,56,220,216 1208 xorl %r11d,%r9d 1209 .byte 0x66,0x90 1210 .byte 102,15,56,220,224 1211 .byte 102,15,56,220,232 1212 movl %r9d,80+12(%rsp) 1213 leaq 6(%r8),%r9 1214 .byte 102,15,56,220,240 1215 .byte 102,15,56,220,248 1216 .byte 102,68,15,56,220,192 1217 .byte 102,68,15,56,220,200 1218 movups 128-128(%rcx),%xmm0 1219 bswapl %r9d 1220 .byte 102,15,56,220,209 1221 .byte 102,15,56,220,217 1222 xorl %r11d,%r9d 1223 .byte 0x66,0x90 1224 .byte 102,15,56,220,225 1225 .byte 102,15,56,220,233 1226 movl %r9d,96+12(%rsp) 1227 leaq 7(%r8),%r9 1228 .byte 102,15,56,220,241 1229 .byte 102,15,56,220,249 1230 .byte 102,68,15,56,220,193 1231 .byte 102,68,15,56,220,201 1232 movups 144-128(%rcx),%xmm1 1233 bswapl %r9d 1234 .byte 102,15,56,220,208 1235 .byte 102,15,56,220,216 1236 .byte 102,15,56,220,224 1237 xorl %r11d,%r9d 1238 movdqu 0(%rdi),%xmm10 1239 .byte 102,15,56,220,232 1240 movl %r9d,112+12(%rsp) 1241 cmpl $11,%eax 1242 .byte 102,15,56,220,240 1243 .byte 102,15,56,220,248 1244 .byte 102,68,15,56,220,192 1245 .byte 102,68,15,56,220,200 1246 movups 160-128(%rcx),%xmm0 1247 1248 jb L$ctr32_enc_done 1249 1250 .byte 102,15,56,220,209 1251 .byte 102,15,56,220,217 1252 .byte 102,15,56,220,225 1253 .byte 102,15,56,220,233 1254 .byte 102,15,56,220,241 1255 .byte 102,15,56,220,249 1256 .byte 102,68,15,56,220,193 1257 .byte 102,68,15,56,220,201 1258 movups 176-128(%rcx),%xmm1 1259 1260 .byte 102,15,56,220,208 1261 .byte 102,15,56,220,216 1262 .byte 102,15,56,220,224 1263 .byte 102,15,56,220,232 1264 .byte 102,15,56,220,240 1265 .byte 102,15,56,220,248 1266 .byte 102,68,15,56,220,192 1267 .byte 102,68,15,56,220,200 1268 movups 192-128(%rcx),%xmm0 1269 je L$ctr32_enc_done 1270 1271 .byte 102,15,56,220,209 1272 .byte 102,15,56,220,217 1273 .byte 102,15,56,220,225 1274 .byte 102,15,56,220,233 1275 .byte 102,15,56,220,241 1276 .byte 102,15,56,220,249 1277 .byte 102,68,15,56,220,193 1278 .byte 102,68,15,56,220,201 1279 movups 208-128(%rcx),%xmm1 1280 1281 .byte 102,15,56,220,208 1282 .byte 102,15,56,220,216 1283 .byte 102,15,56,220,224 1284 .byte 102,15,56,220,232 1285 .byte 102,15,56,220,240 1286 .byte 102,15,56,220,248 1287 .byte 102,68,15,56,220,192 1288 .byte 102,68,15,56,220,200 1289 movups 224-128(%rcx),%xmm0 1290 jmp L$ctr32_enc_done 1291 1292 .p2align 4 1293 L$ctr32_enc_done: 1294 movdqu 16(%rdi),%xmm11 1295 pxor %xmm0,%xmm10 1296 movdqu 32(%rdi),%xmm12 1297 pxor %xmm0,%xmm11 1298 movdqu 48(%rdi),%xmm13 1299 pxor %xmm0,%xmm12 1300 movdqu 64(%rdi),%xmm14 1301 pxor %xmm0,%xmm13 1302 movdqu 80(%rdi),%xmm15 1303 pxor %xmm0,%xmm14 1304 pxor %xmm0,%xmm15 1305 .byte 102,15,56,220,209 1306 .byte 102,15,56,220,217 1307 .byte 102,15,56,220,225 1308 .byte 102,15,56,220,233 1309 .byte 102,15,56,220,241 1310 .byte 102,15,56,220,249 1311 .byte 102,68,15,56,220,193 1312 .byte 102,68,15,56,220,201 1313 movdqu 96(%rdi),%xmm1 1314 leaq 128(%rdi),%rdi 1315 1316 .byte 102,65,15,56,221,210 1317 pxor %xmm0,%xmm1 1318 movdqu 112-128(%rdi),%xmm10 1319 .byte 102,65,15,56,221,219 1320 pxor %xmm0,%xmm10 1321 movdqa 0(%rsp),%xmm11 1322 .byte 102,65,15,56,221,228 1323 .byte 102,65,15,56,221,237 1324 movdqa 16(%rsp),%xmm12 1325 movdqa 32(%rsp),%xmm13 1326 .byte 102,65,15,56,221,246 1327 .byte 102,65,15,56,221,255 1328 movdqa 48(%rsp),%xmm14 1329 movdqa 64(%rsp),%xmm15 1330 .byte 102,68,15,56,221,193 1331 movdqa 80(%rsp),%xmm0 1332 movups 16-128(%rcx),%xmm1 1333 .byte 102,69,15,56,221,202 1334 1335 movups %xmm2,(%rsi) 1336 movdqa %xmm11,%xmm2 1337 movups %xmm3,16(%rsi) 1338 movdqa %xmm12,%xmm3 1339 movups %xmm4,32(%rsi) 1340 movdqa %xmm13,%xmm4 1341 movups %xmm5,48(%rsi) 1342 movdqa %xmm14,%xmm5 1343 movups %xmm6,64(%rsi) 1344 movdqa %xmm15,%xmm6 1345 movups %xmm7,80(%rsi) 1346 movdqa %xmm0,%xmm7 1347 movups %xmm8,96(%rsi) 1348 movups %xmm9,112(%rsi) 1349 leaq 128(%rsi),%rsi 1350 1351 subq $8,%rdx 1352 jnc L$ctr32_loop8 1353 1354 addq $8,%rdx 1355 jz L$ctr32_done 1356 leaq -128(%rcx),%rcx 1357 1358 L$ctr32_tail: 1359 leaq 16(%rcx),%rcx 1360 cmpq $4,%rdx 1361 jb L$ctr32_loop3 1362 je L$ctr32_loop4 1363 1364 shll $4,%eax 1365 movdqa 96(%rsp),%xmm8 1366 pxor %xmm9,%xmm9 1367 1368 movups 16(%rcx),%xmm0 1369 .byte 102,15,56,220,209 1370 .byte 102,15,56,220,217 1371 leaq 32-16(%rcx,%rax,1),%rcx 1372 negq %rax 1373 .byte 102,15,56,220,225 1374 addq $16,%rax 1375 movups (%rdi),%xmm10 1376 .byte 102,15,56,220,233 1377 .byte 102,15,56,220,241 1378 movups 16(%rdi),%xmm11 1379 movups 32(%rdi),%xmm12 1380 .byte 102,15,56,220,249 1381 .byte 102,68,15,56,220,193 1382 1383 call L$enc_loop8_enter 1384 1385 movdqu 48(%rdi),%xmm13 1386 pxor %xmm10,%xmm2 1387 movdqu 64(%rdi),%xmm10 1388 pxor %xmm11,%xmm3 1389 movdqu %xmm2,(%rsi) 1390 pxor %xmm12,%xmm4 1391 movdqu %xmm3,16(%rsi) 1392 pxor %xmm13,%xmm5 1393 movdqu %xmm4,32(%rsi) 1394 pxor %xmm10,%xmm6 1395 movdqu %xmm5,48(%rsi) 1396 movdqu %xmm6,64(%rsi) 1397 cmpq $6,%rdx 1398 jb L$ctr32_done 1399 1400 movups 80(%rdi),%xmm11 1401 xorps %xmm11,%xmm7 1402 movups %xmm7,80(%rsi) 1403 je L$ctr32_done 1404 1405 movups 96(%rdi),%xmm12 1406 xorps %xmm12,%xmm8 1407 movups %xmm8,96(%rsi) 1408 jmp L$ctr32_done 1409 1410 .p2align 5 1411 L$ctr32_loop4: 1412 .byte 102,15,56,220,209 1413 leaq 16(%rcx),%rcx 1414 decl %eax 1415 .byte 102,15,56,220,217 1416 .byte 102,15,56,220,225 1417 .byte 102,15,56,220,233 1418 movups (%rcx),%xmm1 1419 jnz L$ctr32_loop4 1420 .byte 102,15,56,221,209 1421 .byte 102,15,56,221,217 1422 movups (%rdi),%xmm10 1423 movups 16(%rdi),%xmm11 1424 .byte 102,15,56,221,225 1425 .byte 102,15,56,221,233 1426 movups 32(%rdi),%xmm12 1427 movups 48(%rdi),%xmm13 1428 1429 xorps %xmm10,%xmm2 1430 movups %xmm2,(%rsi) 1431 xorps %xmm11,%xmm3 1432 movups %xmm3,16(%rsi) 1433 pxor %xmm12,%xmm4 1434 movdqu %xmm4,32(%rsi) 1435 pxor %xmm13,%xmm5 1436 movdqu %xmm5,48(%rsi) 1437 jmp L$ctr32_done 1438 1439 .p2align 5 1440 L$ctr32_loop3: 1441 .byte 102,15,56,220,209 1442 leaq 16(%rcx),%rcx 1443 decl %eax 1444 .byte 102,15,56,220,217 1445 .byte 102,15,56,220,225 1446 movups (%rcx),%xmm1 1447 jnz L$ctr32_loop3 1448 .byte 102,15,56,221,209 1449 .byte 102,15,56,221,217 1450 .byte 102,15,56,221,225 1451 1452 movups (%rdi),%xmm10 1453 xorps %xmm10,%xmm2 1454 movups %xmm2,(%rsi) 1455 cmpq $2,%rdx 1456 jb L$ctr32_done 1457 1458 movups 16(%rdi),%xmm11 1459 xorps %xmm11,%xmm3 1460 movups %xmm3,16(%rsi) 1461 je L$ctr32_done 1462 1463 movups 32(%rdi),%xmm12 1464 xorps %xmm12,%xmm4 1465 movups %xmm4,32(%rsi) 1466 jmp L$ctr32_done 1467 1468 .p2align 4 1469 L$ctr32_one_shortcut: 1470 movups (%r8),%xmm2 1471 movups (%rdi),%xmm10 1472 movl 240(%rcx),%eax 1473 movups (%rcx),%xmm0 1474 movups 16(%rcx),%xmm1 1475 leaq 32(%rcx),%rcx 1476 xorps %xmm0,%xmm2 1477 L$oop_enc1_7: 1478 .byte 102,15,56,220,209 1479 decl %eax 1480 movups (%rcx),%xmm1 1481 leaq 16(%rcx),%rcx 1482 jnz L$oop_enc1_7 1483 .byte 102,15,56,221,209 1484 xorps %xmm10,%xmm2 1485 movups %xmm2,(%rsi) 1486 jmp L$ctr32_done 1487 1488 .p2align 4 1489 L$ctr32_done: 1490 leaq (%rbp),%rsp 1491 popq %rbp 1492 L$ctr32_epilogue: 1493 .byte 0xf3,0xc3 1494 1495 .globl _aesni_xts_encrypt 1496 .private_extern _aesni_xts_encrypt 1497 1498 .p2align 4 1499 _aesni_xts_encrypt: 1500 leaq (%rsp),%rax 1501 pushq %rbp 1502 subq $112,%rsp 1503 andq $-16,%rsp 1504 leaq -8(%rax),%rbp 1505 movups (%r9),%xmm2 1506 movl 240(%r8),%eax 1507 movl 240(%rcx),%r10d 1508 movups (%r8),%xmm0 1509 movups 16(%r8),%xmm1 1510 leaq 32(%r8),%r8 1511 xorps %xmm0,%xmm2 1512 L$oop_enc1_8: 1513 .byte 102,15,56,220,209 1514 decl %eax 1515 movups (%r8),%xmm1 1516 leaq 16(%r8),%r8 1517 jnz L$oop_enc1_8 1518 .byte 102,15,56,221,209 1519 movups (%rcx),%xmm0 1520 movq %rcx,%r11 1521 movl %r10d,%eax 1522 shll $4,%r10d 1523 movq %rdx,%r9 1524 andq $-16,%rdx 1525 1526 movups 16(%rcx,%r10,1),%xmm1 1527 1528 movdqa L$xts_magic(%rip),%xmm8 1529 movdqa %xmm2,%xmm15 1530 pshufd $95,%xmm2,%xmm9 1531 pxor %xmm0,%xmm1 1532 movdqa %xmm9,%xmm14 1533 paddd %xmm9,%xmm9 1534 movdqa %xmm15,%xmm10 1535 psrad $31,%xmm14 1536 paddq %xmm15,%xmm15 1537 pand %xmm8,%xmm14 1538 pxor %xmm0,%xmm10 1539 pxor %xmm14,%xmm15 1540 movdqa %xmm9,%xmm14 1541 paddd %xmm9,%xmm9 1542 movdqa %xmm15,%xmm11 1543 psrad $31,%xmm14 1544 paddq %xmm15,%xmm15 1545 pand %xmm8,%xmm14 1546 pxor %xmm0,%xmm11 1547 pxor %xmm14,%xmm15 1548 movdqa %xmm9,%xmm14 1549 paddd %xmm9,%xmm9 1550 movdqa %xmm15,%xmm12 1551 psrad $31,%xmm14 1552 paddq %xmm15,%xmm15 1553 pand %xmm8,%xmm14 1554 pxor %xmm0,%xmm12 1555 pxor %xmm14,%xmm15 1556 movdqa %xmm9,%xmm14 1557 paddd %xmm9,%xmm9 1558 movdqa %xmm15,%xmm13 1559 psrad $31,%xmm14 1560 paddq %xmm15,%xmm15 1561 pand %xmm8,%xmm14 1562 pxor %xmm0,%xmm13 1563 pxor %xmm14,%xmm15 1564 movdqa %xmm15,%xmm14 1565 psrad $31,%xmm9 1566 paddq %xmm15,%xmm15 1567 pand %xmm8,%xmm9 1568 pxor %xmm0,%xmm14 1569 pxor %xmm9,%xmm15 1570 movaps %xmm1,96(%rsp) 1571 1572 subq $96,%rdx 1573 jc L$xts_enc_short 1574 1575 movl $16+96,%eax 1576 leaq 32(%r11,%r10,1),%rcx 1577 subq %r10,%rax 1578 movups 16(%r11),%xmm1 1579 movq %rax,%r10 1580 leaq L$xts_magic(%rip),%r8 1581 jmp L$xts_enc_grandloop 1582 1583 .p2align 5 1584 L$xts_enc_grandloop: 1585 movdqu 0(%rdi),%xmm2 1586 movdqa %xmm0,%xmm8 1587 movdqu 16(%rdi),%xmm3 1588 pxor %xmm10,%xmm2 1589 movdqu 32(%rdi),%xmm4 1590 pxor %xmm11,%xmm3 1591 .byte 102,15,56,220,209 1592 movdqu 48(%rdi),%xmm5 1593 pxor %xmm12,%xmm4 1594 .byte 102,15,56,220,217 1595 movdqu 64(%rdi),%xmm6 1596 pxor %xmm13,%xmm5 1597 .byte 102,15,56,220,225 1598 movdqu 80(%rdi),%xmm7 1599 pxor %xmm15,%xmm8 1600 movdqa 96(%rsp),%xmm9 1601 pxor %xmm14,%xmm6 1602 .byte 102,15,56,220,233 1603 movups 32(%r11),%xmm0 1604 leaq 96(%rdi),%rdi 1605 pxor %xmm8,%xmm7 1606 1607 pxor %xmm9,%xmm10 1608 .byte 102,15,56,220,241 1609 pxor %xmm9,%xmm11 1610 movdqa %xmm10,0(%rsp) 1611 .byte 102,15,56,220,249 1612 movups 48(%r11),%xmm1 1613 pxor %xmm9,%xmm12 1614 1615 .byte 102,15,56,220,208 1616 pxor %xmm9,%xmm13 1617 movdqa %xmm11,16(%rsp) 1618 .byte 102,15,56,220,216 1619 pxor %xmm9,%xmm14 1620 movdqa %xmm12,32(%rsp) 1621 .byte 102,15,56,220,224 1622 .byte 102,15,56,220,232 1623 pxor %xmm9,%xmm8 1624 movdqa %xmm14,64(%rsp) 1625 .byte 102,15,56,220,240 1626 .byte 102,15,56,220,248 1627 movups 64(%r11),%xmm0 1628 movdqa %xmm8,80(%rsp) 1629 pshufd $95,%xmm15,%xmm9 1630 jmp L$xts_enc_loop6 1631 .p2align 5 1632 L$xts_enc_loop6: 1633 .byte 102,15,56,220,209 1634 .byte 102,15,56,220,217 1635 .byte 102,15,56,220,225 1636 .byte 102,15,56,220,233 1637 .byte 102,15,56,220,241 1638 .byte 102,15,56,220,249 1639 movups -64(%rcx,%rax,1),%xmm1 1640 addq $32,%rax 1641 1642 .byte 102,15,56,220,208 1643 .byte 102,15,56,220,216 1644 .byte 102,15,56,220,224 1645 .byte 102,15,56,220,232 1646 .byte 102,15,56,220,240 1647 .byte 102,15,56,220,248 1648 movups -80(%rcx,%rax,1),%xmm0 1649 jnz L$xts_enc_loop6 1650 1651 movdqa (%r8),%xmm8 1652 movdqa %xmm9,%xmm14 1653 paddd %xmm9,%xmm9 1654 .byte 102,15,56,220,209 1655 paddq %xmm15,%xmm15 1656 psrad $31,%xmm14 1657 .byte 102,15,56,220,217 1658 pand %xmm8,%xmm14 1659 movups (%r11),%xmm10 1660 .byte 102,15,56,220,225 1661 .byte 102,15,56,220,233 1662 .byte 102,15,56,220,241 1663 pxor %xmm14,%xmm15 1664 movaps %xmm10,%xmm11 1665 .byte 102,15,56,220,249 1666 movups -64(%rcx),%xmm1 1667 1668 movdqa %xmm9,%xmm14 1669 .byte 102,15,56,220,208 1670 paddd %xmm9,%xmm9 1671 pxor %xmm15,%xmm10 1672 .byte 102,15,56,220,216 1673 psrad $31,%xmm14 1674 paddq %xmm15,%xmm15 1675 .byte 102,15,56,220,224 1676 .byte 102,15,56,220,232 1677 pand %xmm8,%xmm14 1678 movaps %xmm11,%xmm12 1679 .byte 102,15,56,220,240 1680 pxor %xmm14,%xmm15 1681 movdqa %xmm9,%xmm14 1682 .byte 102,15,56,220,248 1683 movups -48(%rcx),%xmm0 1684 1685 paddd %xmm9,%xmm9 1686 .byte 102,15,56,220,209 1687 pxor %xmm15,%xmm11 1688 psrad $31,%xmm14 1689 .byte 102,15,56,220,217 1690 paddq %xmm15,%xmm15 1691 pand %xmm8,%xmm14 1692 .byte 102,15,56,220,225 1693 .byte 102,15,56,220,233 1694 movdqa %xmm13,48(%rsp) 1695 pxor %xmm14,%xmm15 1696 .byte 102,15,56,220,241 1697 movaps %xmm12,%xmm13 1698 movdqa %xmm9,%xmm14 1699 .byte 102,15,56,220,249 1700 movups -32(%rcx),%xmm1 1701 1702 paddd %xmm9,%xmm9 1703 .byte 102,15,56,220,208 1704 pxor %xmm15,%xmm12 1705 psrad $31,%xmm14 1706 .byte 102,15,56,220,216 1707 paddq %xmm15,%xmm15 1708 pand %xmm8,%xmm14 1709 .byte 102,15,56,220,224 1710 .byte 102,15,56,220,232 1711 .byte 102,15,56,220,240 1712 pxor %xmm14,%xmm15 1713 movaps %xmm13,%xmm14 1714 .byte 102,15,56,220,248 1715 1716 movdqa %xmm9,%xmm0 1717 paddd %xmm9,%xmm9 1718 .byte 102,15,56,220,209 1719 pxor %xmm15,%xmm13 1720 psrad $31,%xmm0 1721 .byte 102,15,56,220,217 1722 paddq %xmm15,%xmm15 1723 pand %xmm8,%xmm0 1724 .byte 102,15,56,220,225 1725 .byte 102,15,56,220,233 1726 pxor %xmm0,%xmm15 1727 movups (%r11),%xmm0 1728 .byte 102,15,56,220,241 1729 .byte 102,15,56,220,249 1730 movups 16(%r11),%xmm1 1731 1732 pxor %xmm15,%xmm14 1733 .byte 102,15,56,221,84,36,0 1734 psrad $31,%xmm9 1735 paddq %xmm15,%xmm15 1736 .byte 102,15,56,221,92,36,16 1737 .byte 102,15,56,221,100,36,32 1738 pand %xmm8,%xmm9 1739 movq %r10,%rax 1740 .byte 102,15,56,221,108,36,48 1741 .byte 102,15,56,221,116,36,64 1742 .byte 102,15,56,221,124,36,80 1743 pxor %xmm9,%xmm15 1744 1745 leaq 96(%rsi),%rsi 1746 movups %xmm2,-96(%rsi) 1747 movups %xmm3,-80(%rsi) 1748 movups %xmm4,-64(%rsi) 1749 movups %xmm5,-48(%rsi) 1750 movups %xmm6,-32(%rsi) 1751 movups %xmm7,-16(%rsi) 1752 subq $96,%rdx 1753 jnc L$xts_enc_grandloop 1754 1755 movl $16+96,%eax 1756 subl %r10d,%eax 1757 movq %r11,%rcx 1758 shrl $4,%eax 1759 1760 L$xts_enc_short: 1761 movl %eax,%r10d 1762 pxor %xmm0,%xmm10 1763 addq $96,%rdx 1764 jz L$xts_enc_done 1765 1766 pxor %xmm0,%xmm11 1767 cmpq $32,%rdx 1768 jb L$xts_enc_one 1769 pxor %xmm0,%xmm12 1770 je L$xts_enc_two 1771 1772 pxor %xmm0,%xmm13 1773 cmpq $64,%rdx 1774 jb L$xts_enc_three 1775 pxor %xmm0,%xmm14 1776 je L$xts_enc_four 1777 1778 movdqu (%rdi),%xmm2 1779 movdqu 16(%rdi),%xmm3 1780 movdqu 32(%rdi),%xmm4 1781 pxor %xmm10,%xmm2 1782 movdqu 48(%rdi),%xmm5 1783 pxor %xmm11,%xmm3 1784 movdqu 64(%rdi),%xmm6 1785 leaq 80(%rdi),%rdi 1786 pxor %xmm12,%xmm4 1787 pxor %xmm13,%xmm5 1788 pxor %xmm14,%xmm6 1789 1790 call _aesni_encrypt6 1791 1792 xorps %xmm10,%xmm2 1793 movdqa %xmm15,%xmm10 1794 xorps %xmm11,%xmm3 1795 xorps %xmm12,%xmm4 1796 movdqu %xmm2,(%rsi) 1797 xorps %xmm13,%xmm5 1798 movdqu %xmm3,16(%rsi) 1799 xorps %xmm14,%xmm6 1800 movdqu %xmm4,32(%rsi) 1801 movdqu %xmm5,48(%rsi) 1802 movdqu %xmm6,64(%rsi) 1803 leaq 80(%rsi),%rsi 1804 jmp L$xts_enc_done 1805 1806 .p2align 4 1807 L$xts_enc_one: 1808 movups (%rdi),%xmm2 1809 leaq 16(%rdi),%rdi 1810 xorps %xmm10,%xmm2 1811 movups (%rcx),%xmm0 1812 movups 16(%rcx),%xmm1 1813 leaq 32(%rcx),%rcx 1814 xorps %xmm0,%xmm2 1815 L$oop_enc1_9: 1816 .byte 102,15,56,220,209 1817 decl %eax 1818 movups (%rcx),%xmm1 1819 leaq 16(%rcx),%rcx 1820 jnz L$oop_enc1_9 1821 .byte 102,15,56,221,209 1822 xorps %xmm10,%xmm2 1823 movdqa %xmm11,%xmm10 1824 movups %xmm2,(%rsi) 1825 leaq 16(%rsi),%rsi 1826 jmp L$xts_enc_done 1827 1828 .p2align 4 1829 L$xts_enc_two: 1830 movups (%rdi),%xmm2 1831 movups 16(%rdi),%xmm3 1832 leaq 32(%rdi),%rdi 1833 xorps %xmm10,%xmm2 1834 xorps %xmm11,%xmm3 1835 1836 call _aesni_encrypt2 1837 1838 xorps %xmm10,%xmm2 1839 movdqa %xmm12,%xmm10 1840 xorps %xmm11,%xmm3 1841 movups %xmm2,(%rsi) 1842 movups %xmm3,16(%rsi) 1843 leaq 32(%rsi),%rsi 1844 jmp L$xts_enc_done 1845 1846 .p2align 4 1847 L$xts_enc_three: 1848 movups (%rdi),%xmm2 1849 movups 16(%rdi),%xmm3 1850 movups 32(%rdi),%xmm4 1851 leaq 48(%rdi),%rdi 1852 xorps %xmm10,%xmm2 1853 xorps %xmm11,%xmm3 1854 xorps %xmm12,%xmm4 1855 1856 call _aesni_encrypt3 1857 1858 xorps %xmm10,%xmm2 1859 movdqa %xmm13,%xmm10 1860 xorps %xmm11,%xmm3 1861 xorps %xmm12,%xmm4 1862 movups %xmm2,(%rsi) 1863 movups %xmm3,16(%rsi) 1864 movups %xmm4,32(%rsi) 1865 leaq 48(%rsi),%rsi 1866 jmp L$xts_enc_done 1867 1868 .p2align 4 1869 L$xts_enc_four: 1870 movups (%rdi),%xmm2 1871 movups 16(%rdi),%xmm3 1872 movups 32(%rdi),%xmm4 1873 xorps %xmm10,%xmm2 1874 movups 48(%rdi),%xmm5 1875 leaq 64(%rdi),%rdi 1876 xorps %xmm11,%xmm3 1877 xorps %xmm12,%xmm4 1878 xorps %xmm13,%xmm5 1879 1880 call _aesni_encrypt4 1881 1882 pxor %xmm10,%xmm2 1883 movdqa %xmm14,%xmm10 1884 pxor %xmm11,%xmm3 1885 pxor %xmm12,%xmm4 1886 movdqu %xmm2,(%rsi) 1887 pxor %xmm13,%xmm5 1888 movdqu %xmm3,16(%rsi) 1889 movdqu %xmm4,32(%rsi) 1890 movdqu %xmm5,48(%rsi) 1891 leaq 64(%rsi),%rsi 1892 jmp L$xts_enc_done 1893 1894 .p2align 4 1895 L$xts_enc_done: 1896 andq $15,%r9 1897 jz L$xts_enc_ret 1898 movq %r9,%rdx 1899 1900 L$xts_enc_steal: 1901 movzbl (%rdi),%eax 1902 movzbl -16(%rsi),%ecx 1903 leaq 1(%rdi),%rdi 1904 movb %al,-16(%rsi) 1905 movb %cl,0(%rsi) 1906 leaq 1(%rsi),%rsi 1907 subq $1,%rdx 1908 jnz L$xts_enc_steal 1909 1910 subq %r9,%rsi 1911 movq %r11,%rcx 1912 movl %r10d,%eax 1913 1914 movups -16(%rsi),%xmm2 1915 xorps %xmm10,%xmm2 1916 movups (%rcx),%xmm0 1917 movups 16(%rcx),%xmm1 1918 leaq 32(%rcx),%rcx 1919 xorps %xmm0,%xmm2 1920 L$oop_enc1_10: 1921 .byte 102,15,56,220,209 1922 decl %eax 1923 movups (%rcx),%xmm1 1924 leaq 16(%rcx),%rcx 1925 jnz L$oop_enc1_10 1926 .byte 102,15,56,221,209 1927 xorps %xmm10,%xmm2 1928 movups %xmm2,-16(%rsi) 1929 1930 L$xts_enc_ret: 1931 leaq (%rbp),%rsp 1932 popq %rbp 1933 L$xts_enc_epilogue: 1934 .byte 0xf3,0xc3 1935 1936 .globl _aesni_xts_decrypt 1937 .private_extern _aesni_xts_decrypt 1938 1939 .p2align 4 1940 _aesni_xts_decrypt: 1941 leaq (%rsp),%rax 1942 pushq %rbp 1943 subq $112,%rsp 1944 andq $-16,%rsp 1945 leaq -8(%rax),%rbp 1946 movups (%r9),%xmm2 1947 movl 240(%r8),%eax 1948 movl 240(%rcx),%r10d 1949 movups (%r8),%xmm0 1950 movups 16(%r8),%xmm1 1951 leaq 32(%r8),%r8 1952 xorps %xmm0,%xmm2 1953 L$oop_enc1_11: 1954 .byte 102,15,56,220,209 1955 decl %eax 1956 movups (%r8),%xmm1 1957 leaq 16(%r8),%r8 1958 jnz L$oop_enc1_11 1959 .byte 102,15,56,221,209 1960 xorl %eax,%eax 1961 testq $15,%rdx 1962 setnz %al 1963 shlq $4,%rax 1964 subq %rax,%rdx 1965 1966 movups (%rcx),%xmm0 1967 movq %rcx,%r11 1968 movl %r10d,%eax 1969 shll $4,%r10d 1970 movq %rdx,%r9 1971 andq $-16,%rdx 1972 1973 movups 16(%rcx,%r10,1),%xmm1 1974 1975 movdqa L$xts_magic(%rip),%xmm8 1976 movdqa %xmm2,%xmm15 1977 pshufd $95,%xmm2,%xmm9 1978 pxor %xmm0,%xmm1 1979 movdqa %xmm9,%xmm14 1980 paddd %xmm9,%xmm9 1981 movdqa %xmm15,%xmm10 1982 psrad $31,%xmm14 1983 paddq %xmm15,%xmm15 1984 pand %xmm8,%xmm14 1985 pxor %xmm0,%xmm10 1986 pxor %xmm14,%xmm15 1987 movdqa %xmm9,%xmm14 1988 paddd %xmm9,%xmm9 1989 movdqa %xmm15,%xmm11 1990 psrad $31,%xmm14 1991 paddq %xmm15,%xmm15 1992 pand %xmm8,%xmm14 1993 pxor %xmm0,%xmm11 1994 pxor %xmm14,%xmm15 1995 movdqa %xmm9,%xmm14 1996 paddd %xmm9,%xmm9 1997 movdqa %xmm15,%xmm12 1998 psrad $31,%xmm14 1999 paddq %xmm15,%xmm15 2000 pand %xmm8,%xmm14 2001 pxor %xmm0,%xmm12 2002 pxor %xmm14,%xmm15 2003 movdqa %xmm9,%xmm14 2004 paddd %xmm9,%xmm9 2005 movdqa %xmm15,%xmm13 2006 psrad $31,%xmm14 2007 paddq %xmm15,%xmm15 2008 pand %xmm8,%xmm14 2009 pxor %xmm0,%xmm13 2010 pxor %xmm14,%xmm15 2011 movdqa %xmm15,%xmm14 2012 psrad $31,%xmm9 2013 paddq %xmm15,%xmm15 2014 pand %xmm8,%xmm9 2015 pxor %xmm0,%xmm14 2016 pxor %xmm9,%xmm15 2017 movaps %xmm1,96(%rsp) 2018 2019 subq $96,%rdx 2020 jc L$xts_dec_short 2021 2022 movl $16+96,%eax 2023 leaq 32(%r11,%r10,1),%rcx 2024 subq %r10,%rax 2025 movups 16(%r11),%xmm1 2026 movq %rax,%r10 2027 leaq L$xts_magic(%rip),%r8 2028 jmp L$xts_dec_grandloop 2029 2030 .p2align 5 2031 L$xts_dec_grandloop: 2032 movdqu 0(%rdi),%xmm2 2033 movdqa %xmm0,%xmm8 2034 movdqu 16(%rdi),%xmm3 2035 pxor %xmm10,%xmm2 2036 movdqu 32(%rdi),%xmm4 2037 pxor %xmm11,%xmm3 2038 .byte 102,15,56,222,209 2039 movdqu 48(%rdi),%xmm5 2040 pxor %xmm12,%xmm4 2041 .byte 102,15,56,222,217 2042 movdqu 64(%rdi),%xmm6 2043 pxor %xmm13,%xmm5 2044 .byte 102,15,56,222,225 2045 movdqu 80(%rdi),%xmm7 2046 pxor %xmm15,%xmm8 2047 movdqa 96(%rsp),%xmm9 2048 pxor %xmm14,%xmm6 2049 .byte 102,15,56,222,233 2050 movups 32(%r11),%xmm0 2051 leaq 96(%rdi),%rdi 2052 pxor %xmm8,%xmm7 2053 2054 pxor %xmm9,%xmm10 2055 .byte 102,15,56,222,241 2056 pxor %xmm9,%xmm11 2057 movdqa %xmm10,0(%rsp) 2058 .byte 102,15,56,222,249 2059 movups 48(%r11),%xmm1 2060 pxor %xmm9,%xmm12 2061 2062 .byte 102,15,56,222,208 2063 pxor %xmm9,%xmm13 2064 movdqa %xmm11,16(%rsp) 2065 .byte 102,15,56,222,216 2066 pxor %xmm9,%xmm14 2067 movdqa %xmm12,32(%rsp) 2068 .byte 102,15,56,222,224 2069 .byte 102,15,56,222,232 2070 pxor %xmm9,%xmm8 2071 movdqa %xmm14,64(%rsp) 2072 .byte 102,15,56,222,240 2073 .byte 102,15,56,222,248 2074 movups 64(%r11),%xmm0 2075 movdqa %xmm8,80(%rsp) 2076 pshufd $95,%xmm15,%xmm9 2077 jmp L$xts_dec_loop6 2078 .p2align 5 2079 L$xts_dec_loop6: 2080 .byte 102,15,56,222,209 2081 .byte 102,15,56,222,217 2082 .byte 102,15,56,222,225 2083 .byte 102,15,56,222,233 2084 .byte 102,15,56,222,241 2085 .byte 102,15,56,222,249 2086 movups -64(%rcx,%rax,1),%xmm1 2087 addq $32,%rax 2088 2089 .byte 102,15,56,222,208 2090 .byte 102,15,56,222,216 2091 .byte 102,15,56,222,224 2092 .byte 102,15,56,222,232 2093 .byte 102,15,56,222,240 2094 .byte 102,15,56,222,248 2095 movups -80(%rcx,%rax,1),%xmm0 2096 jnz L$xts_dec_loop6 2097 2098 movdqa (%r8),%xmm8 2099 movdqa %xmm9,%xmm14 2100 paddd %xmm9,%xmm9 2101 .byte 102,15,56,222,209 2102 paddq %xmm15,%xmm15 2103 psrad $31,%xmm14 2104 .byte 102,15,56,222,217 2105 pand %xmm8,%xmm14 2106 movups (%r11),%xmm10 2107 .byte 102,15,56,222,225 2108 .byte 102,15,56,222,233 2109 .byte 102,15,56,222,241 2110 pxor %xmm14,%xmm15 2111 movaps %xmm10,%xmm11 2112 .byte 102,15,56,222,249 2113 movups -64(%rcx),%xmm1 2114 2115 movdqa %xmm9,%xmm14 2116 .byte 102,15,56,222,208 2117 paddd %xmm9,%xmm9 2118 pxor %xmm15,%xmm10 2119 .byte 102,15,56,222,216 2120 psrad $31,%xmm14 2121 paddq %xmm15,%xmm15 2122 .byte 102,15,56,222,224 2123 .byte 102,15,56,222,232 2124 pand %xmm8,%xmm14 2125 movaps %xmm11,%xmm12 2126 .byte 102,15,56,222,240 2127 pxor %xmm14,%xmm15 2128 movdqa %xmm9,%xmm14 2129 .byte 102,15,56,222,248 2130 movups -48(%rcx),%xmm0 2131 2132 paddd %xmm9,%xmm9 2133 .byte 102,15,56,222,209 2134 pxor %xmm15,%xmm11 2135 psrad $31,%xmm14 2136 .byte 102,15,56,222,217 2137 paddq %xmm15,%xmm15 2138 pand %xmm8,%xmm14 2139 .byte 102,15,56,222,225 2140 .byte 102,15,56,222,233 2141 movdqa %xmm13,48(%rsp) 2142 pxor %xmm14,%xmm15 2143 .byte 102,15,56,222,241 2144 movaps %xmm12,%xmm13 2145 movdqa %xmm9,%xmm14 2146 .byte 102,15,56,222,249 2147 movups -32(%rcx),%xmm1 2148 2149 paddd %xmm9,%xmm9 2150 .byte 102,15,56,222,208 2151 pxor %xmm15,%xmm12 2152 psrad $31,%xmm14 2153 .byte 102,15,56,222,216 2154 paddq %xmm15,%xmm15 2155 pand %xmm8,%xmm14 2156 .byte 102,15,56,222,224 2157 .byte 102,15,56,222,232 2158 .byte 102,15,56,222,240 2159 pxor %xmm14,%xmm15 2160 movaps %xmm13,%xmm14 2161 .byte 102,15,56,222,248 2162 2163 movdqa %xmm9,%xmm0 2164 paddd %xmm9,%xmm9 2165 .byte 102,15,56,222,209 2166 pxor %xmm15,%xmm13 2167 psrad $31,%xmm0 2168 .byte 102,15,56,222,217 2169 paddq %xmm15,%xmm15 2170 pand %xmm8,%xmm0 2171 .byte 102,15,56,222,225 2172 .byte 102,15,56,222,233 2173 pxor %xmm0,%xmm15 2174 movups (%r11),%xmm0 2175 .byte 102,15,56,222,241 2176 .byte 102,15,56,222,249 2177 movups 16(%r11),%xmm1 2178 2179 pxor %xmm15,%xmm14 2180 .byte 102,15,56,223,84,36,0 2181 psrad $31,%xmm9 2182 paddq %xmm15,%xmm15 2183 .byte 102,15,56,223,92,36,16 2184 .byte 102,15,56,223,100,36,32 2185 pand %xmm8,%xmm9 2186 movq %r10,%rax 2187 .byte 102,15,56,223,108,36,48 2188 .byte 102,15,56,223,116,36,64 2189 .byte 102,15,56,223,124,36,80 2190 pxor %xmm9,%xmm15 2191 2192 leaq 96(%rsi),%rsi 2193 movups %xmm2,-96(%rsi) 2194 movups %xmm3,-80(%rsi) 2195 movups %xmm4,-64(%rsi) 2196 movups %xmm5,-48(%rsi) 2197 movups %xmm6,-32(%rsi) 2198 movups %xmm7,-16(%rsi) 2199 subq $96,%rdx 2200 jnc L$xts_dec_grandloop 2201 2202 movl $16+96,%eax 2203 subl %r10d,%eax 2204 movq %r11,%rcx 2205 shrl $4,%eax 2206 2207 L$xts_dec_short: 2208 movl %eax,%r10d 2209 pxor %xmm0,%xmm10 2210 pxor %xmm0,%xmm11 2211 addq $96,%rdx 2212 jz L$xts_dec_done 2213 2214 pxor %xmm0,%xmm12 2215 cmpq $32,%rdx 2216 jb L$xts_dec_one 2217 pxor %xmm0,%xmm13 2218 je L$xts_dec_two 2219 2220 pxor %xmm0,%xmm14 2221 cmpq $64,%rdx 2222 jb L$xts_dec_three 2223 je L$xts_dec_four 2224 2225 movdqu (%rdi),%xmm2 2226 movdqu 16(%rdi),%xmm3 2227 movdqu 32(%rdi),%xmm4 2228 pxor %xmm10,%xmm2 2229 movdqu 48(%rdi),%xmm5 2230 pxor %xmm11,%xmm3 2231 movdqu 64(%rdi),%xmm6 2232 leaq 80(%rdi),%rdi 2233 pxor %xmm12,%xmm4 2234 pxor %xmm13,%xmm5 2235 pxor %xmm14,%xmm6 2236 2237 call _aesni_decrypt6 2238 2239 xorps %xmm10,%xmm2 2240 xorps %xmm11,%xmm3 2241 xorps %xmm12,%xmm4 2242 movdqu %xmm2,(%rsi) 2243 xorps %xmm13,%xmm5 2244 movdqu %xmm3,16(%rsi) 2245 xorps %xmm14,%xmm6 2246 movdqu %xmm4,32(%rsi) 2247 pxor %xmm14,%xmm14 2248 movdqu %xmm5,48(%rsi) 2249 pcmpgtd %xmm15,%xmm14 2250 movdqu %xmm6,64(%rsi) 2251 leaq 80(%rsi),%rsi 2252 pshufd $19,%xmm14,%xmm11 2253 andq $15,%r9 2254 jz L$xts_dec_ret 2255 2256 movdqa %xmm15,%xmm10 2257 paddq %xmm15,%xmm15 2258 pand %xmm8,%xmm11 2259 pxor %xmm15,%xmm11 2260 jmp L$xts_dec_done2 2261 2262 .p2align 4 2263 L$xts_dec_one: 2264 movups (%rdi),%xmm2 2265 leaq 16(%rdi),%rdi 2266 xorps %xmm10,%xmm2 2267 movups (%rcx),%xmm0 2268 movups 16(%rcx),%xmm1 2269 leaq 32(%rcx),%rcx 2270 xorps %xmm0,%xmm2 2271 L$oop_dec1_12: 2272 .byte 102,15,56,222,209 2273 decl %eax 2274 movups (%rcx),%xmm1 2275 leaq 16(%rcx),%rcx 2276 jnz L$oop_dec1_12 2277 .byte 102,15,56,223,209 2278 xorps %xmm10,%xmm2 2279 movdqa %xmm11,%xmm10 2280 movups %xmm2,(%rsi) 2281 movdqa %xmm12,%xmm11 2282 leaq 16(%rsi),%rsi 2283 jmp L$xts_dec_done 2284 2285 .p2align 4 2286 L$xts_dec_two: 2287 movups (%rdi),%xmm2 2288 movups 16(%rdi),%xmm3 2289 leaq 32(%rdi),%rdi 2290 xorps %xmm10,%xmm2 2291 xorps %xmm11,%xmm3 2292 2293 call _aesni_decrypt2 2294 2295 xorps %xmm10,%xmm2 2296 movdqa %xmm12,%xmm10 2297 xorps %xmm11,%xmm3 2298 movdqa %xmm13,%xmm11 2299 movups %xmm2,(%rsi) 2300 movups %xmm3,16(%rsi) 2301 leaq 32(%rsi),%rsi 2302 jmp L$xts_dec_done 2303 2304 .p2align 4 2305 L$xts_dec_three: 2306 movups (%rdi),%xmm2 2307 movups 16(%rdi),%xmm3 2308 movups 32(%rdi),%xmm4 2309 leaq 48(%rdi),%rdi 2310 xorps %xmm10,%xmm2 2311 xorps %xmm11,%xmm3 2312 xorps %xmm12,%xmm4 2313 2314 call _aesni_decrypt3 2315 2316 xorps %xmm10,%xmm2 2317 movdqa %xmm13,%xmm10 2318 xorps %xmm11,%xmm3 2319 movdqa %xmm14,%xmm11 2320 xorps %xmm12,%xmm4 2321 movups %xmm2,(%rsi) 2322 movups %xmm3,16(%rsi) 2323 movups %xmm4,32(%rsi) 2324 leaq 48(%rsi),%rsi 2325 jmp L$xts_dec_done 2326 2327 .p2align 4 2328 L$xts_dec_four: 2329 movups (%rdi),%xmm2 2330 movups 16(%rdi),%xmm3 2331 movups 32(%rdi),%xmm4 2332 xorps %xmm10,%xmm2 2333 movups 48(%rdi),%xmm5 2334 leaq 64(%rdi),%rdi 2335 xorps %xmm11,%xmm3 2336 xorps %xmm12,%xmm4 2337 xorps %xmm13,%xmm5 2338 2339 call _aesni_decrypt4 2340 2341 pxor %xmm10,%xmm2 2342 movdqa %xmm14,%xmm10 2343 pxor %xmm11,%xmm3 2344 movdqa %xmm15,%xmm11 2345 pxor %xmm12,%xmm4 2346 movdqu %xmm2,(%rsi) 2347 pxor %xmm13,%xmm5 2348 movdqu %xmm3,16(%rsi) 2349 movdqu %xmm4,32(%rsi) 2350 movdqu %xmm5,48(%rsi) 2351 leaq 64(%rsi),%rsi 2352 jmp L$xts_dec_done 2353 2354 .p2align 4 2355 L$xts_dec_done: 2356 andq $15,%r9 2357 jz L$xts_dec_ret 2358 L$xts_dec_done2: 2359 movq %r9,%rdx 2360 movq %r11,%rcx 2361 movl %r10d,%eax 2362 2363 movups (%rdi),%xmm2 2364 xorps %xmm11,%xmm2 2365 movups (%rcx),%xmm0 2366 movups 16(%rcx),%xmm1 2367 leaq 32(%rcx),%rcx 2368 xorps %xmm0,%xmm2 2369 L$oop_dec1_13: 2370 .byte 102,15,56,222,209 2371 decl %eax 2372 movups (%rcx),%xmm1 2373 leaq 16(%rcx),%rcx 2374 jnz L$oop_dec1_13 2375 .byte 102,15,56,223,209 2376 xorps %xmm11,%xmm2 2377 movups %xmm2,(%rsi) 2378 2379 L$xts_dec_steal: 2380 movzbl 16(%rdi),%eax 2381 movzbl (%rsi),%ecx 2382 leaq 1(%rdi),%rdi 2383 movb %al,(%rsi) 2384 movb %cl,16(%rsi) 2385 leaq 1(%rsi),%rsi 2386 subq $1,%rdx 2387 jnz L$xts_dec_steal 2388 2389 subq %r9,%rsi 2390 movq %r11,%rcx 2391 movl %r10d,%eax 2392 2393 movups (%rsi),%xmm2 2394 xorps %xmm10,%xmm2 2395 movups (%rcx),%xmm0 2396 movups 16(%rcx),%xmm1 2397 leaq 32(%rcx),%rcx 2398 xorps %xmm0,%xmm2 2399 L$oop_dec1_14: 2400 .byte 102,15,56,222,209 2401 decl %eax 2402 movups (%rcx),%xmm1 2403 leaq 16(%rcx),%rcx 2404 jnz L$oop_dec1_14 2405 .byte 102,15,56,223,209 2406 xorps %xmm10,%xmm2 2407 movups %xmm2,(%rsi) 2408 2409 L$xts_dec_ret: 2410 leaq (%rbp),%rsp 2411 popq %rbp 2412 L$xts_dec_epilogue: 2413 .byte 0xf3,0xc3 2414 2415 .globl _aesni_cbc_encrypt 2416 .private_extern _aesni_cbc_encrypt 2417 2418 .p2align 4 2419 _aesni_cbc_encrypt: 2420 testq %rdx,%rdx 2421 jz L$cbc_ret 2422 2423 movl 240(%rcx),%r10d 2424 movq %rcx,%r11 2425 testl %r9d,%r9d 2426 jz L$cbc_decrypt 2427 2428 movups (%r8),%xmm2 2429 movl %r10d,%eax 2430 cmpq $16,%rdx 2431 jb L$cbc_enc_tail 2432 subq $16,%rdx 2433 jmp L$cbc_enc_loop 2434 .p2align 4 2435 L$cbc_enc_loop: 2436 movups (%rdi),%xmm3 2437 leaq 16(%rdi),%rdi 2438 2439 movups (%rcx),%xmm0 2440 movups 16(%rcx),%xmm1 2441 xorps %xmm0,%xmm3 2442 leaq 32(%rcx),%rcx 2443 xorps %xmm3,%xmm2 2444 L$oop_enc1_15: 2445 .byte 102,15,56,220,209 2446 decl %eax 2447 movups (%rcx),%xmm1 2448 leaq 16(%rcx),%rcx 2449 jnz L$oop_enc1_15 2450 .byte 102,15,56,221,209 2451 movl %r10d,%eax 2452 movq %r11,%rcx 2453 movups %xmm2,0(%rsi) 2454 leaq 16(%rsi),%rsi 2455 subq $16,%rdx 2456 jnc L$cbc_enc_loop 2457 addq $16,%rdx 2458 jnz L$cbc_enc_tail 2459 movups %xmm2,(%r8) 2460 jmp L$cbc_ret 2461 2462 L$cbc_enc_tail: 2463 movq %rdx,%rcx 2464 xchgq %rdi,%rsi 2465 .long 0x9066A4F3 2466 movl $16,%ecx 2467 subq %rdx,%rcx 2468 xorl %eax,%eax 2469 .long 0x9066AAF3 2470 leaq -16(%rdi),%rdi 2471 movl %r10d,%eax 2472 movq %rdi,%rsi 2473 movq %r11,%rcx 2474 xorq %rdx,%rdx 2475 jmp L$cbc_enc_loop 2476 2477 .p2align 4 2478 L$cbc_decrypt: 2479 leaq (%rsp),%rax 2480 pushq %rbp 2481 subq $16,%rsp 2482 andq $-16,%rsp 2483 leaq -8(%rax),%rbp 2484 movups (%r8),%xmm10 2485 movl %r10d,%eax 2486 cmpq $80,%rdx 2487 jbe L$cbc_dec_tail 2488 2489 movups (%rcx),%xmm0 2490 movdqu 0(%rdi),%xmm2 2491 movdqu 16(%rdi),%xmm3 2492 movdqa %xmm2,%xmm11 2493 movdqu 32(%rdi),%xmm4 2494 movdqa %xmm3,%xmm12 2495 movdqu 48(%rdi),%xmm5 2496 movdqa %xmm4,%xmm13 2497 movdqu 64(%rdi),%xmm6 2498 movdqa %xmm5,%xmm14 2499 movdqu 80(%rdi),%xmm7 2500 movdqa %xmm6,%xmm15 2501 movl _OPENSSL_ia32cap_P+4(%rip),%r9d 2502 cmpq $112,%rdx 2503 jbe L$cbc_dec_six_or_seven 2504 2505 andl $71303168,%r9d 2506 subq $80,%rdx 2507 cmpl $4194304,%r9d 2508 je L$cbc_dec_loop6_enter 2509 subq $32,%rdx 2510 leaq 112(%rcx),%rcx 2511 jmp L$cbc_dec_loop8_enter 2512 .p2align 4 2513 L$cbc_dec_loop8: 2514 movups %xmm9,(%rsi) 2515 leaq 16(%rsi),%rsi 2516 L$cbc_dec_loop8_enter: 2517 movdqu 96(%rdi),%xmm8 2518 pxor %xmm0,%xmm2 2519 movdqu 112(%rdi),%xmm9 2520 pxor %xmm0,%xmm3 2521 movups 16-112(%rcx),%xmm1 2522 pxor %xmm0,%xmm4 2523 xorq %r11,%r11 2524 cmpq $112,%rdx 2525 pxor %xmm0,%xmm5 2526 pxor %xmm0,%xmm6 2527 pxor %xmm0,%xmm7 2528 pxor %xmm0,%xmm8 2529 2530 .byte 102,15,56,222,209 2531 pxor %xmm0,%xmm9 2532 movups 32-112(%rcx),%xmm0 2533 .byte 102,15,56,222,217 2534 .byte 102,15,56,222,225 2535 .byte 102,15,56,222,233 2536 .byte 102,15,56,222,241 2537 .byte 102,15,56,222,249 2538 .byte 102,68,15,56,222,193 2539 setnc %r11b 2540 shlq $7,%r11 2541 .byte 102,68,15,56,222,201 2542 addq %rdi,%r11 2543 movups 48-112(%rcx),%xmm1 2544 .byte 102,15,56,222,208 2545 .byte 102,15,56,222,216 2546 .byte 102,15,56,222,224 2547 .byte 102,15,56,222,232 2548 .byte 102,15,56,222,240 2549 .byte 102,15,56,222,248 2550 .byte 102,68,15,56,222,192 2551 .byte 102,68,15,56,222,200 2552 movups 64-112(%rcx),%xmm0 2553 nop 2554 .byte 102,15,56,222,209 2555 .byte 102,15,56,222,217 2556 .byte 102,15,56,222,225 2557 .byte 102,15,56,222,233 2558 .byte 102,15,56,222,241 2559 .byte 102,15,56,222,249 2560 .byte 102,68,15,56,222,193 2561 .byte 102,68,15,56,222,201 2562 movups 80-112(%rcx),%xmm1 2563 nop 2564 .byte 102,15,56,222,208 2565 .byte 102,15,56,222,216 2566 .byte 102,15,56,222,224 2567 .byte 102,15,56,222,232 2568 .byte 102,15,56,222,240 2569 .byte 102,15,56,222,248 2570 .byte 102,68,15,56,222,192 2571 .byte 102,68,15,56,222,200 2572 movups 96-112(%rcx),%xmm0 2573 nop 2574 .byte 102,15,56,222,209 2575 .byte 102,15,56,222,217 2576 .byte 102,15,56,222,225 2577 .byte 102,15,56,222,233 2578 .byte 102,15,56,222,241 2579 .byte 102,15,56,222,249 2580 .byte 102,68,15,56,222,193 2581 .byte 102,68,15,56,222,201 2582 movups 112-112(%rcx),%xmm1 2583 nop 2584 .byte 102,15,56,222,208 2585 .byte 102,15,56,222,216 2586 .byte 102,15,56,222,224 2587 .byte 102,15,56,222,232 2588 .byte 102,15,56,222,240 2589 .byte 102,15,56,222,248 2590 .byte 102,68,15,56,222,192 2591 .byte 102,68,15,56,222,200 2592 movups 128-112(%rcx),%xmm0 2593 nop 2594 .byte 102,15,56,222,209 2595 .byte 102,15,56,222,217 2596 .byte 102,15,56,222,225 2597 .byte 102,15,56,222,233 2598 .byte 102,15,56,222,241 2599 .byte 102,15,56,222,249 2600 .byte 102,68,15,56,222,193 2601 .byte 102,68,15,56,222,201 2602 movups 144-112(%rcx),%xmm1 2603 cmpl $11,%eax 2604 .byte 102,15,56,222,208 2605 .byte 102,15,56,222,216 2606 .byte 102,15,56,222,224 2607 .byte 102,15,56,222,232 2608 .byte 102,15,56,222,240 2609 .byte 102,15,56,222,248 2610 .byte 102,68,15,56,222,192 2611 .byte 102,68,15,56,222,200 2612 movups 160-112(%rcx),%xmm0 2613 jb L$cbc_dec_done 2614 .byte 102,15,56,222,209 2615 .byte 102,15,56,222,217 2616 .byte 102,15,56,222,225 2617 .byte 102,15,56,222,233 2618 .byte 102,15,56,222,241 2619 .byte 102,15,56,222,249 2620 .byte 102,68,15,56,222,193 2621 .byte 102,68,15,56,222,201 2622 movups 176-112(%rcx),%xmm1 2623 nop 2624 .byte 102,15,56,222,208 2625 .byte 102,15,56,222,216 2626 .byte 102,15,56,222,224 2627 .byte 102,15,56,222,232 2628 .byte 102,15,56,222,240 2629 .byte 102,15,56,222,248 2630 .byte 102,68,15,56,222,192 2631 .byte 102,68,15,56,222,200 2632 movups 192-112(%rcx),%xmm0 2633 je L$cbc_dec_done 2634 .byte 102,15,56,222,209 2635 .byte 102,15,56,222,217 2636 .byte 102,15,56,222,225 2637 .byte 102,15,56,222,233 2638 .byte 102,15,56,222,241 2639 .byte 102,15,56,222,249 2640 .byte 102,68,15,56,222,193 2641 .byte 102,68,15,56,222,201 2642 movups 208-112(%rcx),%xmm1 2643 nop 2644 .byte 102,15,56,222,208 2645 .byte 102,15,56,222,216 2646 .byte 102,15,56,222,224 2647 .byte 102,15,56,222,232 2648 .byte 102,15,56,222,240 2649 .byte 102,15,56,222,248 2650 .byte 102,68,15,56,222,192 2651 .byte 102,68,15,56,222,200 2652 movups 224-112(%rcx),%xmm0 2653 jmp L$cbc_dec_done 2654 .p2align 4 2655 L$cbc_dec_done: 2656 .byte 102,15,56,222,209 2657 .byte 102,15,56,222,217 2658 pxor %xmm0,%xmm10 2659 pxor %xmm0,%xmm11 2660 .byte 102,15,56,222,225 2661 .byte 102,15,56,222,233 2662 pxor %xmm0,%xmm12 2663 pxor %xmm0,%xmm13 2664 .byte 102,15,56,222,241 2665 .byte 102,15,56,222,249 2666 pxor %xmm0,%xmm14 2667 pxor %xmm0,%xmm15 2668 .byte 102,68,15,56,222,193 2669 .byte 102,68,15,56,222,201 2670 movdqu 80(%rdi),%xmm1 2671 2672 .byte 102,65,15,56,223,210 2673 movdqu 96(%rdi),%xmm10 2674 pxor %xmm0,%xmm1 2675 .byte 102,65,15,56,223,219 2676 pxor %xmm0,%xmm10 2677 movdqu 112(%rdi),%xmm0 2678 .byte 102,65,15,56,223,228 2679 leaq 128(%rdi),%rdi 2680 movdqu 0(%r11),%xmm11 2681 .byte 102,65,15,56,223,237 2682 .byte 102,65,15,56,223,246 2683 movdqu 16(%r11),%xmm12 2684 movdqu 32(%r11),%xmm13 2685 .byte 102,65,15,56,223,255 2686 .byte 102,68,15,56,223,193 2687 movdqu 48(%r11),%xmm14 2688 movdqu 64(%r11),%xmm15 2689 .byte 102,69,15,56,223,202 2690 movdqa %xmm0,%xmm10 2691 movdqu 80(%r11),%xmm1 2692 movups -112(%rcx),%xmm0 2693 2694 movups %xmm2,(%rsi) 2695 movdqa %xmm11,%xmm2 2696 movups %xmm3,16(%rsi) 2697 movdqa %xmm12,%xmm3 2698 movups %xmm4,32(%rsi) 2699 movdqa %xmm13,%xmm4 2700 movups %xmm5,48(%rsi) 2701 movdqa %xmm14,%xmm5 2702 movups %xmm6,64(%rsi) 2703 movdqa %xmm15,%xmm6 2704 movups %xmm7,80(%rsi) 2705 movdqa %xmm1,%xmm7 2706 movups %xmm8,96(%rsi) 2707 leaq 112(%rsi),%rsi 2708 2709 subq $128,%rdx 2710 ja L$cbc_dec_loop8 2711 2712 movaps %xmm9,%xmm2 2713 leaq -112(%rcx),%rcx 2714 addq $112,%rdx 2715 jle L$cbc_dec_tail_collected 2716 movups %xmm9,(%rsi) 2717 leaq 16(%rsi),%rsi 2718 cmpq $80,%rdx 2719 jbe L$cbc_dec_tail 2720 2721 movaps %xmm11,%xmm2 2722 L$cbc_dec_six_or_seven: 2723 cmpq $96,%rdx 2724 ja L$cbc_dec_seven 2725 2726 movaps %xmm7,%xmm8 2727 call _aesni_decrypt6 2728 pxor %xmm10,%xmm2 2729 movaps %xmm8,%xmm10 2730 pxor %xmm11,%xmm3 2731 movdqu %xmm2,(%rsi) 2732 pxor %xmm12,%xmm4 2733 movdqu %xmm3,16(%rsi) 2734 pxor %xmm13,%xmm5 2735 movdqu %xmm4,32(%rsi) 2736 pxor %xmm14,%xmm6 2737 movdqu %xmm5,48(%rsi) 2738 pxor %xmm15,%xmm7 2739 movdqu %xmm6,64(%rsi) 2740 leaq 80(%rsi),%rsi 2741 movdqa %xmm7,%xmm2 2742 jmp L$cbc_dec_tail_collected 2743 2744 .p2align 4 2745 L$cbc_dec_seven: 2746 movups 96(%rdi),%xmm8 2747 xorps %xmm9,%xmm9 2748 call _aesni_decrypt8 2749 movups 80(%rdi),%xmm9 2750 pxor %xmm10,%xmm2 2751 movups 96(%rdi),%xmm10 2752 pxor %xmm11,%xmm3 2753 movdqu %xmm2,(%rsi) 2754 pxor %xmm12,%xmm4 2755 movdqu %xmm3,16(%rsi) 2756 pxor %xmm13,%xmm5 2757 movdqu %xmm4,32(%rsi) 2758 pxor %xmm14,%xmm6 2759 movdqu %xmm5,48(%rsi) 2760 pxor %xmm15,%xmm7 2761 movdqu %xmm6,64(%rsi) 2762 pxor %xmm9,%xmm8 2763 movdqu %xmm7,80(%rsi) 2764 leaq 96(%rsi),%rsi 2765 movdqa %xmm8,%xmm2 2766 jmp L$cbc_dec_tail_collected 2767 2768 .p2align 4 2769 L$cbc_dec_loop6: 2770 movups %xmm7,(%rsi) 2771 leaq 16(%rsi),%rsi 2772 movdqu 0(%rdi),%xmm2 2773 movdqu 16(%rdi),%xmm3 2774 movdqa %xmm2,%xmm11 2775 movdqu 32(%rdi),%xmm4 2776 movdqa %xmm3,%xmm12 2777 movdqu 48(%rdi),%xmm5 2778 movdqa %xmm4,%xmm13 2779 movdqu 64(%rdi),%xmm6 2780 movdqa %xmm5,%xmm14 2781 movdqu 80(%rdi),%xmm7 2782 movdqa %xmm6,%xmm15 2783 L$cbc_dec_loop6_enter: 2784 leaq 96(%rdi),%rdi 2785 movdqa %xmm7,%xmm8 2786 2787 call _aesni_decrypt6 2788 2789 pxor %xmm10,%xmm2 2790 movdqa %xmm8,%xmm10 2791 pxor %xmm11,%xmm3 2792 movdqu %xmm2,(%rsi) 2793 pxor %xmm12,%xmm4 2794 movdqu %xmm3,16(%rsi) 2795 pxor %xmm13,%xmm5 2796 movdqu %xmm4,32(%rsi) 2797 pxor %xmm14,%xmm6 2798 movq %r11,%rcx 2799 movdqu %xmm5,48(%rsi) 2800 pxor %xmm15,%xmm7 2801 movl %r10d,%eax 2802 movdqu %xmm6,64(%rsi) 2803 leaq 80(%rsi),%rsi 2804 subq $96,%rdx 2805 ja L$cbc_dec_loop6 2806 2807 movdqa %xmm7,%xmm2 2808 addq $80,%rdx 2809 jle L$cbc_dec_tail_collected 2810 movups %xmm7,(%rsi) 2811 leaq 16(%rsi),%rsi 2812 2813 L$cbc_dec_tail: 2814 movups (%rdi),%xmm2 2815 subq $16,%rdx 2816 jbe L$cbc_dec_one 2817 2818 movups 16(%rdi),%xmm3 2819 movaps %xmm2,%xmm11 2820 subq $16,%rdx 2821 jbe L$cbc_dec_two 2822 2823 movups 32(%rdi),%xmm4 2824 movaps %xmm3,%xmm12 2825 subq $16,%rdx 2826 jbe L$cbc_dec_three 2827 2828 movups 48(%rdi),%xmm5 2829 movaps %xmm4,%xmm13 2830 subq $16,%rdx 2831 jbe L$cbc_dec_four 2832 2833 movups 64(%rdi),%xmm6 2834 movaps %xmm5,%xmm14 2835 movaps %xmm6,%xmm15 2836 xorps %xmm7,%xmm7 2837 call _aesni_decrypt6 2838 pxor %xmm10,%xmm2 2839 movaps %xmm15,%xmm10 2840 pxor %xmm11,%xmm3 2841 movdqu %xmm2,(%rsi) 2842 pxor %xmm12,%xmm4 2843 movdqu %xmm3,16(%rsi) 2844 pxor %xmm13,%xmm5 2845 movdqu %xmm4,32(%rsi) 2846 pxor %xmm14,%xmm6 2847 movdqu %xmm5,48(%rsi) 2848 leaq 64(%rsi),%rsi 2849 movdqa %xmm6,%xmm2 2850 subq $16,%rdx 2851 jmp L$cbc_dec_tail_collected 2852 2853 .p2align 4 2854 L$cbc_dec_one: 2855 movaps %xmm2,%xmm11 2856 movups (%rcx),%xmm0 2857 movups 16(%rcx),%xmm1 2858 leaq 32(%rcx),%rcx 2859 xorps %xmm0,%xmm2 2860 L$oop_dec1_16: 2861 .byte 102,15,56,222,209 2862 decl %eax 2863 movups (%rcx),%xmm1 2864 leaq 16(%rcx),%rcx 2865 jnz L$oop_dec1_16 2866 .byte 102,15,56,223,209 2867 xorps %xmm10,%xmm2 2868 movaps %xmm11,%xmm10 2869 jmp L$cbc_dec_tail_collected 2870 .p2align 4 2871 L$cbc_dec_two: 2872 movaps %xmm3,%xmm12 2873 call _aesni_decrypt2 2874 pxor %xmm10,%xmm2 2875 movaps %xmm12,%xmm10 2876 pxor %xmm11,%xmm3 2877 movdqu %xmm2,(%rsi) 2878 movdqa %xmm3,%xmm2 2879 leaq 16(%rsi),%rsi 2880 jmp L$cbc_dec_tail_collected 2881 .p2align 4 2882 L$cbc_dec_three: 2883 movaps %xmm4,%xmm13 2884 call _aesni_decrypt3 2885 pxor %xmm10,%xmm2 2886 movaps %xmm13,%xmm10 2887 pxor %xmm11,%xmm3 2888 movdqu %xmm2,(%rsi) 2889 pxor %xmm12,%xmm4 2890 movdqu %xmm3,16(%rsi) 2891 movdqa %xmm4,%xmm2 2892 leaq 32(%rsi),%rsi 2893 jmp L$cbc_dec_tail_collected 2894 .p2align 4 2895 L$cbc_dec_four: 2896 movaps %xmm5,%xmm14 2897 call _aesni_decrypt4 2898 pxor %xmm10,%xmm2 2899 movaps %xmm14,%xmm10 2900 pxor %xmm11,%xmm3 2901 movdqu %xmm2,(%rsi) 2902 pxor %xmm12,%xmm4 2903 movdqu %xmm3,16(%rsi) 2904 pxor %xmm13,%xmm5 2905 movdqu %xmm4,32(%rsi) 2906 movdqa %xmm5,%xmm2 2907 leaq 48(%rsi),%rsi 2908 jmp L$cbc_dec_tail_collected 2909 2910 .p2align 4 2911 L$cbc_dec_tail_collected: 2912 movups %xmm10,(%r8) 2913 andq $15,%rdx 2914 jnz L$cbc_dec_tail_partial 2915 movups %xmm2,(%rsi) 2916 jmp L$cbc_dec_ret 2917 .p2align 4 2918 L$cbc_dec_tail_partial: 2919 movaps %xmm2,(%rsp) 2920 movq $16,%rcx 2921 movq %rsi,%rdi 2922 subq %rdx,%rcx 2923 leaq (%rsp),%rsi 2924 .long 0x9066A4F3 2925 2926 L$cbc_dec_ret: 2927 leaq (%rbp),%rsp 2928 popq %rbp 2929 L$cbc_ret: 2930 .byte 0xf3,0xc3 2931 2932 .globl _aesni_set_decrypt_key 2933 .private_extern _aesni_set_decrypt_key 2934 2935 .p2align 4 2936 _aesni_set_decrypt_key: 2937 .byte 0x48,0x83,0xEC,0x08 2938 call __aesni_set_encrypt_key 2939 shll $4,%esi 2940 testl %eax,%eax 2941 jnz L$dec_key_ret 2942 leaq 16(%rdx,%rsi,1),%rdi 2943 2944 movups (%rdx),%xmm0 2945 movups (%rdi),%xmm1 2946 movups %xmm0,(%rdi) 2947 movups %xmm1,(%rdx) 2948 leaq 16(%rdx),%rdx 2949 leaq -16(%rdi),%rdi 2950 2951 L$dec_key_inverse: 2952 movups (%rdx),%xmm0 2953 movups (%rdi),%xmm1 2954 .byte 102,15,56,219,192 2955 .byte 102,15,56,219,201 2956 leaq 16(%rdx),%rdx 2957 leaq -16(%rdi),%rdi 2958 movups %xmm0,16(%rdi) 2959 movups %xmm1,-16(%rdx) 2960 cmpq %rdx,%rdi 2961 ja L$dec_key_inverse 2962 2963 movups (%rdx),%xmm0 2964 .byte 102,15,56,219,192 2965 movups %xmm0,(%rdi) 2966 L$dec_key_ret: 2967 addq $8,%rsp 2968 .byte 0xf3,0xc3 2969 L$SEH_end_set_decrypt_key: 2970 2971 .globl _aesni_set_encrypt_key 2972 .private_extern _aesni_set_encrypt_key 2973 2974 .p2align 4 2975 _aesni_set_encrypt_key: 2976 __aesni_set_encrypt_key: 2977 .byte 0x48,0x83,0xEC,0x08 2978 movq $-1,%rax 2979 testq %rdi,%rdi 2980 jz L$enc_key_ret 2981 testq %rdx,%rdx 2982 jz L$enc_key_ret 2983 2984 movups (%rdi),%xmm0 2985 xorps %xmm4,%xmm4 2986 leaq 16(%rdx),%rax 2987 cmpl $256,%esi 2988 je L$14rounds 2989 cmpl $192,%esi 2990 je L$12rounds 2991 cmpl $128,%esi 2992 jne L$bad_keybits 2993 2994 L$10rounds: 2995 movl $9,%esi 2996 movups %xmm0,(%rdx) 2997 .byte 102,15,58,223,200,1 2998 call L$key_expansion_128_cold 2999 .byte 102,15,58,223,200,2 3000 call L$key_expansion_128 3001 .byte 102,15,58,223,200,4 3002 call L$key_expansion_128 3003 .byte 102,15,58,223,200,8 3004 call L$key_expansion_128 3005 .byte 102,15,58,223,200,16 3006 call L$key_expansion_128 3007 .byte 102,15,58,223,200,32 3008 call L$key_expansion_128 3009 .byte 102,15,58,223,200,64 3010 call L$key_expansion_128 3011 .byte 102,15,58,223,200,128 3012 call L$key_expansion_128 3013 .byte 102,15,58,223,200,27 3014 call L$key_expansion_128 3015 .byte 102,15,58,223,200,54 3016 call L$key_expansion_128 3017 movups %xmm0,(%rax) 3018 movl %esi,80(%rax) 3019 xorl %eax,%eax 3020 jmp L$enc_key_ret 3021 3022 .p2align 4 3023 L$12rounds: 3024 movq 16(%rdi),%xmm2 3025 movl $11,%esi 3026 movups %xmm0,(%rdx) 3027 .byte 102,15,58,223,202,1 3028 call L$key_expansion_192a_cold 3029 .byte 102,15,58,223,202,2 3030 call L$key_expansion_192b 3031 .byte 102,15,58,223,202,4 3032 call L$key_expansion_192a 3033 .byte 102,15,58,223,202,8 3034 call L$key_expansion_192b 3035 .byte 102,15,58,223,202,16 3036 call L$key_expansion_192a 3037 .byte 102,15,58,223,202,32 3038 call L$key_expansion_192b 3039 .byte 102,15,58,223,202,64 3040 call L$key_expansion_192a 3041 .byte 102,15,58,223,202,128 3042 call L$key_expansion_192b 3043 movups %xmm0,(%rax) 3044 movl %esi,48(%rax) 3045 xorq %rax,%rax 3046 jmp L$enc_key_ret 3047 3048 .p2align 4 3049 L$14rounds: 3050 movups 16(%rdi),%xmm2 3051 movl $13,%esi 3052 leaq 16(%rax),%rax 3053 movups %xmm0,(%rdx) 3054 movups %xmm2,16(%rdx) 3055 .byte 102,15,58,223,202,1 3056 call L$key_expansion_256a_cold 3057 .byte 102,15,58,223,200,1 3058 call L$key_expansion_256b 3059 .byte 102,15,58,223,202,2 3060 call L$key_expansion_256a 3061 .byte 102,15,58,223,200,2 3062 call L$key_expansion_256b 3063 .byte 102,15,58,223,202,4 3064 call L$key_expansion_256a 3065 .byte 102,15,58,223,200,4 3066 call L$key_expansion_256b 3067 .byte 102,15,58,223,202,8 3068 call L$key_expansion_256a 3069 .byte 102,15,58,223,200,8 3070 call L$key_expansion_256b 3071 .byte 102,15,58,223,202,16 3072 call L$key_expansion_256a 3073 .byte 102,15,58,223,200,16 3074 call L$key_expansion_256b 3075 .byte 102,15,58,223,202,32 3076 call L$key_expansion_256a 3077 .byte 102,15,58,223,200,32 3078 call L$key_expansion_256b 3079 .byte 102,15,58,223,202,64 3080 call L$key_expansion_256a 3081 movups %xmm0,(%rax) 3082 movl %esi,16(%rax) 3083 xorq %rax,%rax 3084 jmp L$enc_key_ret 3085 3086 .p2align 4 3087 L$bad_keybits: 3088 movq $-2,%rax 3089 L$enc_key_ret: 3090 addq $8,%rsp 3091 .byte 0xf3,0xc3 3092 L$SEH_end_set_encrypt_key: 3093 3094 .p2align 4 3095 L$key_expansion_128: 3096 movups %xmm0,(%rax) 3097 leaq 16(%rax),%rax 3098 L$key_expansion_128_cold: 3099 shufps $16,%xmm0,%xmm4 3100 xorps %xmm4,%xmm0 3101 shufps $140,%xmm0,%xmm4 3102 xorps %xmm4,%xmm0 3103 shufps $255,%xmm1,%xmm1 3104 xorps %xmm1,%xmm0 3105 .byte 0xf3,0xc3 3106 3107 .p2align 4 3108 L$key_expansion_192a: 3109 movups %xmm0,(%rax) 3110 leaq 16(%rax),%rax 3111 L$key_expansion_192a_cold: 3112 movaps %xmm2,%xmm5 3113 L$key_expansion_192b_warm: 3114 shufps $16,%xmm0,%xmm4 3115 movdqa %xmm2,%xmm3 3116 xorps %xmm4,%xmm0 3117 shufps $140,%xmm0,%xmm4 3118 pslldq $4,%xmm3 3119 xorps %xmm4,%xmm0 3120 pshufd $85,%xmm1,%xmm1 3121 pxor %xmm3,%xmm2 3122 pxor %xmm1,%xmm0 3123 pshufd $255,%xmm0,%xmm3 3124 pxor %xmm3,%xmm2 3125 .byte 0xf3,0xc3 3126 3127 .p2align 4 3128 L$key_expansion_192b: 3129 movaps %xmm0,%xmm3 3130 shufps $68,%xmm0,%xmm5 3131 movups %xmm5,(%rax) 3132 shufps $78,%xmm2,%xmm3 3133 movups %xmm3,16(%rax) 3134 leaq 32(%rax),%rax 3135 jmp L$key_expansion_192b_warm 3136 3137 .p2align 4 3138 L$key_expansion_256a: 3139 movups %xmm2,(%rax) 3140 leaq 16(%rax),%rax 3141 L$key_expansion_256a_cold: 3142 shufps $16,%xmm0,%xmm4 3143 xorps %xmm4,%xmm0 3144 shufps $140,%xmm0,%xmm4 3145 xorps %xmm4,%xmm0 3146 shufps $255,%xmm1,%xmm1 3147 xorps %xmm1,%xmm0 3148 .byte 0xf3,0xc3 3149 3150 .p2align 4 3151 L$key_expansion_256b: 3152 movups %xmm0,(%rax) 3153 leaq 16(%rax),%rax 3154 3155 shufps $16,%xmm2,%xmm4 3156 xorps %xmm4,%xmm2 3157 shufps $140,%xmm2,%xmm4 3158 xorps %xmm4,%xmm2 3159 shufps $170,%xmm1,%xmm1 3160 xorps %xmm1,%xmm2 3161 .byte 0xf3,0xc3 3162 3163 3164 .p2align 6 3165 L$bswap_mask: 3166 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3167 L$increment32: 3168 .long 6,6,6,0 3169 L$increment64: 3170 .long 1,0,0,0 3171 L$xts_magic: 3172 .long 0x87,0,1,0 3173 L$increment1: 3174 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3175 3176 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3177 .p2align 6 3178 #endif 3179