1 #if defined(__x86_64__) 2 .text 3 4 .globl _aesni_encrypt 5 .private_extern _aesni_encrypt 6 7 .p2align 4 8 _aesni_encrypt: 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15 L$oop_enc1_1: 16 .byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz L$oop_enc1_1 21 .byte 102,15,56,221,209 22 pxor %xmm0,%xmm0 23 pxor %xmm1,%xmm1 24 movups %xmm2,(%rsi) 25 pxor %xmm2,%xmm2 26 .byte 0xf3,0xc3 27 28 29 .globl _aesni_decrypt 30 .private_extern _aesni_decrypt 31 32 .p2align 4 33 _aesni_decrypt: 34 movups (%rdi),%xmm2 35 movl 240(%rdx),%eax 36 movups (%rdx),%xmm0 37 movups 16(%rdx),%xmm1 38 leaq 32(%rdx),%rdx 39 xorps %xmm0,%xmm2 40 L$oop_dec1_2: 41 .byte 102,15,56,222,209 42 decl %eax 43 movups (%rdx),%xmm1 44 leaq 16(%rdx),%rdx 45 jnz L$oop_dec1_2 46 .byte 102,15,56,223,209 47 pxor %xmm0,%xmm0 48 pxor %xmm1,%xmm1 49 movups %xmm2,(%rsi) 50 pxor %xmm2,%xmm2 51 .byte 0xf3,0xc3 52 53 54 .p2align 4 55 _aesni_encrypt2: 56 movups (%rcx),%xmm0 57 shll $4,%eax 58 movups 16(%rcx),%xmm1 59 xorps %xmm0,%xmm2 60 xorps %xmm0,%xmm3 61 movups 32(%rcx),%xmm0 62 leaq 32(%rcx,%rax,1),%rcx 63 negq %rax 64 addq $16,%rax 65 66 L$enc_loop2: 67 .byte 102,15,56,220,209 68 .byte 102,15,56,220,217 69 movups (%rcx,%rax,1),%xmm1 70 addq $32,%rax 71 .byte 102,15,56,220,208 72 .byte 102,15,56,220,216 73 movups -16(%rcx,%rax,1),%xmm0 74 jnz L$enc_loop2 75 76 .byte 102,15,56,220,209 77 .byte 102,15,56,220,217 78 .byte 102,15,56,221,208 79 .byte 102,15,56,221,216 80 .byte 0xf3,0xc3 81 82 83 .p2align 4 84 _aesni_decrypt2: 85 movups (%rcx),%xmm0 86 shll $4,%eax 87 movups 16(%rcx),%xmm1 88 xorps %xmm0,%xmm2 89 xorps %xmm0,%xmm3 90 movups 32(%rcx),%xmm0 91 leaq 32(%rcx,%rax,1),%rcx 92 negq %rax 93 addq $16,%rax 94 95 L$dec_loop2: 96 .byte 102,15,56,222,209 97 .byte 102,15,56,222,217 98 movups (%rcx,%rax,1),%xmm1 99 addq $32,%rax 100 .byte 102,15,56,222,208 101 .byte 102,15,56,222,216 102 movups -16(%rcx,%rax,1),%xmm0 103 jnz L$dec_loop2 104 105 .byte 102,15,56,222,209 106 .byte 102,15,56,222,217 107 .byte 102,15,56,223,208 108 .byte 102,15,56,223,216 109 .byte 0xf3,0xc3 110 111 112 .p2align 4 113 _aesni_encrypt3: 114 movups (%rcx),%xmm0 115 shll $4,%eax 116 movups 16(%rcx),%xmm1 117 xorps %xmm0,%xmm2 118 xorps %xmm0,%xmm3 119 xorps %xmm0,%xmm4 120 movups 32(%rcx),%xmm0 121 leaq 32(%rcx,%rax,1),%rcx 122 negq %rax 123 addq $16,%rax 124 125 L$enc_loop3: 126 .byte 102,15,56,220,209 127 .byte 102,15,56,220,217 128 .byte 102,15,56,220,225 129 movups (%rcx,%rax,1),%xmm1 130 addq $32,%rax 131 .byte 102,15,56,220,208 132 .byte 102,15,56,220,216 133 .byte 102,15,56,220,224 134 movups -16(%rcx,%rax,1),%xmm0 135 jnz L$enc_loop3 136 137 .byte 102,15,56,220,209 138 .byte 102,15,56,220,217 139 .byte 102,15,56,220,225 140 .byte 102,15,56,221,208 141 .byte 102,15,56,221,216 142 .byte 102,15,56,221,224 143 .byte 0xf3,0xc3 144 145 146 .p2align 4 147 _aesni_decrypt3: 148 movups (%rcx),%xmm0 149 shll $4,%eax 150 movups 16(%rcx),%xmm1 151 xorps %xmm0,%xmm2 152 xorps %xmm0,%xmm3 153 xorps %xmm0,%xmm4 154 movups 32(%rcx),%xmm0 155 leaq 32(%rcx,%rax,1),%rcx 156 negq %rax 157 addq $16,%rax 158 159 L$dec_loop3: 160 .byte 102,15,56,222,209 161 .byte 102,15,56,222,217 162 .byte 102,15,56,222,225 163 movups (%rcx,%rax,1),%xmm1 164 addq $32,%rax 165 .byte 102,15,56,222,208 166 .byte 102,15,56,222,216 167 .byte 102,15,56,222,224 168 movups -16(%rcx,%rax,1),%xmm0 169 jnz L$dec_loop3 170 171 .byte 102,15,56,222,209 172 .byte 102,15,56,222,217 173 .byte 102,15,56,222,225 174 .byte 102,15,56,223,208 175 .byte 102,15,56,223,216 176 .byte 102,15,56,223,224 177 .byte 0xf3,0xc3 178 179 180 .p2align 4 181 _aesni_encrypt4: 182 movups (%rcx),%xmm0 183 shll $4,%eax 184 movups 16(%rcx),%xmm1 185 xorps %xmm0,%xmm2 186 xorps %xmm0,%xmm3 187 xorps %xmm0,%xmm4 188 xorps %xmm0,%xmm5 189 movups 32(%rcx),%xmm0 190 leaq 32(%rcx,%rax,1),%rcx 191 negq %rax 192 .byte 0x0f,0x1f,0x00 193 addq $16,%rax 194 195 L$enc_loop4: 196 .byte 102,15,56,220,209 197 .byte 102,15,56,220,217 198 .byte 102,15,56,220,225 199 .byte 102,15,56,220,233 200 movups (%rcx,%rax,1),%xmm1 201 addq $32,%rax 202 .byte 102,15,56,220,208 203 .byte 102,15,56,220,216 204 .byte 102,15,56,220,224 205 .byte 102,15,56,220,232 206 movups -16(%rcx,%rax,1),%xmm0 207 jnz L$enc_loop4 208 209 .byte 102,15,56,220,209 210 .byte 102,15,56,220,217 211 .byte 102,15,56,220,225 212 .byte 102,15,56,220,233 213 .byte 102,15,56,221,208 214 .byte 102,15,56,221,216 215 .byte 102,15,56,221,224 216 .byte 102,15,56,221,232 217 .byte 0xf3,0xc3 218 219 220 .p2align 4 221 _aesni_decrypt4: 222 movups (%rcx),%xmm0 223 shll $4,%eax 224 movups 16(%rcx),%xmm1 225 xorps %xmm0,%xmm2 226 xorps %xmm0,%xmm3 227 xorps %xmm0,%xmm4 228 xorps %xmm0,%xmm5 229 movups 32(%rcx),%xmm0 230 leaq 32(%rcx,%rax,1),%rcx 231 negq %rax 232 .byte 0x0f,0x1f,0x00 233 addq $16,%rax 234 235 L$dec_loop4: 236 .byte 102,15,56,222,209 237 .byte 102,15,56,222,217 238 .byte 102,15,56,222,225 239 .byte 102,15,56,222,233 240 movups (%rcx,%rax,1),%xmm1 241 addq $32,%rax 242 .byte 102,15,56,222,208 243 .byte 102,15,56,222,216 244 .byte 102,15,56,222,224 245 .byte 102,15,56,222,232 246 movups -16(%rcx,%rax,1),%xmm0 247 jnz L$dec_loop4 248 249 .byte 102,15,56,222,209 250 .byte 102,15,56,222,217 251 .byte 102,15,56,222,225 252 .byte 102,15,56,222,233 253 .byte 102,15,56,223,208 254 .byte 102,15,56,223,216 255 .byte 102,15,56,223,224 256 .byte 102,15,56,223,232 257 .byte 0xf3,0xc3 258 259 260 .p2align 4 261 _aesni_encrypt6: 262 movups (%rcx),%xmm0 263 shll $4,%eax 264 movups 16(%rcx),%xmm1 265 xorps %xmm0,%xmm2 266 pxor %xmm0,%xmm3 267 pxor %xmm0,%xmm4 268 .byte 102,15,56,220,209 269 leaq 32(%rcx,%rax,1),%rcx 270 negq %rax 271 .byte 102,15,56,220,217 272 pxor %xmm0,%xmm5 273 pxor %xmm0,%xmm6 274 .byte 102,15,56,220,225 275 pxor %xmm0,%xmm7 276 movups (%rcx,%rax,1),%xmm0 277 addq $16,%rax 278 jmp L$enc_loop6_enter 279 .p2align 4 280 L$enc_loop6: 281 .byte 102,15,56,220,209 282 .byte 102,15,56,220,217 283 .byte 102,15,56,220,225 284 L$enc_loop6_enter: 285 .byte 102,15,56,220,233 286 .byte 102,15,56,220,241 287 .byte 102,15,56,220,249 288 movups (%rcx,%rax,1),%xmm1 289 addq $32,%rax 290 .byte 102,15,56,220,208 291 .byte 102,15,56,220,216 292 .byte 102,15,56,220,224 293 .byte 102,15,56,220,232 294 .byte 102,15,56,220,240 295 .byte 102,15,56,220,248 296 movups -16(%rcx,%rax,1),%xmm0 297 jnz L$enc_loop6 298 299 .byte 102,15,56,220,209 300 .byte 102,15,56,220,217 301 .byte 102,15,56,220,225 302 .byte 102,15,56,220,233 303 .byte 102,15,56,220,241 304 .byte 102,15,56,220,249 305 .byte 102,15,56,221,208 306 .byte 102,15,56,221,216 307 .byte 102,15,56,221,224 308 .byte 102,15,56,221,232 309 .byte 102,15,56,221,240 310 .byte 102,15,56,221,248 311 .byte 0xf3,0xc3 312 313 314 .p2align 4 315 _aesni_decrypt6: 316 movups (%rcx),%xmm0 317 shll $4,%eax 318 movups 16(%rcx),%xmm1 319 xorps %xmm0,%xmm2 320 pxor %xmm0,%xmm3 321 pxor %xmm0,%xmm4 322 .byte 102,15,56,222,209 323 leaq 32(%rcx,%rax,1),%rcx 324 negq %rax 325 .byte 102,15,56,222,217 326 pxor %xmm0,%xmm5 327 pxor %xmm0,%xmm6 328 .byte 102,15,56,222,225 329 pxor %xmm0,%xmm7 330 movups (%rcx,%rax,1),%xmm0 331 addq $16,%rax 332 jmp L$dec_loop6_enter 333 .p2align 4 334 L$dec_loop6: 335 .byte 102,15,56,222,209 336 .byte 102,15,56,222,217 337 .byte 102,15,56,222,225 338 L$dec_loop6_enter: 339 .byte 102,15,56,222,233 340 .byte 102,15,56,222,241 341 .byte 102,15,56,222,249 342 movups (%rcx,%rax,1),%xmm1 343 addq $32,%rax 344 .byte 102,15,56,222,208 345 .byte 102,15,56,222,216 346 .byte 102,15,56,222,224 347 .byte 102,15,56,222,232 348 .byte 102,15,56,222,240 349 .byte 102,15,56,222,248 350 movups -16(%rcx,%rax,1),%xmm0 351 jnz L$dec_loop6 352 353 .byte 102,15,56,222,209 354 .byte 102,15,56,222,217 355 .byte 102,15,56,222,225 356 .byte 102,15,56,222,233 357 .byte 102,15,56,222,241 358 .byte 102,15,56,222,249 359 .byte 102,15,56,223,208 360 .byte 102,15,56,223,216 361 .byte 102,15,56,223,224 362 .byte 102,15,56,223,232 363 .byte 102,15,56,223,240 364 .byte 102,15,56,223,248 365 .byte 0xf3,0xc3 366 367 368 .p2align 4 369 _aesni_encrypt8: 370 movups (%rcx),%xmm0 371 shll $4,%eax 372 movups 16(%rcx),%xmm1 373 xorps %xmm0,%xmm2 374 xorps %xmm0,%xmm3 375 pxor %xmm0,%xmm4 376 pxor %xmm0,%xmm5 377 pxor %xmm0,%xmm6 378 leaq 32(%rcx,%rax,1),%rcx 379 negq %rax 380 .byte 102,15,56,220,209 381 pxor %xmm0,%xmm7 382 pxor %xmm0,%xmm8 383 .byte 102,15,56,220,217 384 pxor %xmm0,%xmm9 385 movups (%rcx,%rax,1),%xmm0 386 addq $16,%rax 387 jmp L$enc_loop8_inner 388 .p2align 4 389 L$enc_loop8: 390 .byte 102,15,56,220,209 391 .byte 102,15,56,220,217 392 L$enc_loop8_inner: 393 .byte 102,15,56,220,225 394 .byte 102,15,56,220,233 395 .byte 102,15,56,220,241 396 .byte 102,15,56,220,249 397 .byte 102,68,15,56,220,193 398 .byte 102,68,15,56,220,201 399 L$enc_loop8_enter: 400 movups (%rcx,%rax,1),%xmm1 401 addq $32,%rax 402 .byte 102,15,56,220,208 403 .byte 102,15,56,220,216 404 .byte 102,15,56,220,224 405 .byte 102,15,56,220,232 406 .byte 102,15,56,220,240 407 .byte 102,15,56,220,248 408 .byte 102,68,15,56,220,192 409 .byte 102,68,15,56,220,200 410 movups -16(%rcx,%rax,1),%xmm0 411 jnz L$enc_loop8 412 413 .byte 102,15,56,220,209 414 .byte 102,15,56,220,217 415 .byte 102,15,56,220,225 416 .byte 102,15,56,220,233 417 .byte 102,15,56,220,241 418 .byte 102,15,56,220,249 419 .byte 102,68,15,56,220,193 420 .byte 102,68,15,56,220,201 421 .byte 102,15,56,221,208 422 .byte 102,15,56,221,216 423 .byte 102,15,56,221,224 424 .byte 102,15,56,221,232 425 .byte 102,15,56,221,240 426 .byte 102,15,56,221,248 427 .byte 102,68,15,56,221,192 428 .byte 102,68,15,56,221,200 429 .byte 0xf3,0xc3 430 431 432 .p2align 4 433 _aesni_decrypt8: 434 movups (%rcx),%xmm0 435 shll $4,%eax 436 movups 16(%rcx),%xmm1 437 xorps %xmm0,%xmm2 438 xorps %xmm0,%xmm3 439 pxor %xmm0,%xmm4 440 pxor %xmm0,%xmm5 441 pxor %xmm0,%xmm6 442 leaq 32(%rcx,%rax,1),%rcx 443 negq %rax 444 .byte 102,15,56,222,209 445 pxor %xmm0,%xmm7 446 pxor %xmm0,%xmm8 447 .byte 102,15,56,222,217 448 pxor %xmm0,%xmm9 449 movups (%rcx,%rax,1),%xmm0 450 addq $16,%rax 451 jmp L$dec_loop8_inner 452 .p2align 4 453 L$dec_loop8: 454 .byte 102,15,56,222,209 455 .byte 102,15,56,222,217 456 L$dec_loop8_inner: 457 .byte 102,15,56,222,225 458 .byte 102,15,56,222,233 459 .byte 102,15,56,222,241 460 .byte 102,15,56,222,249 461 .byte 102,68,15,56,222,193 462 .byte 102,68,15,56,222,201 463 L$dec_loop8_enter: 464 movups (%rcx,%rax,1),%xmm1 465 addq $32,%rax 466 .byte 102,15,56,222,208 467 .byte 102,15,56,222,216 468 .byte 102,15,56,222,224 469 .byte 102,15,56,222,232 470 .byte 102,15,56,222,240 471 .byte 102,15,56,222,248 472 .byte 102,68,15,56,222,192 473 .byte 102,68,15,56,222,200 474 movups -16(%rcx,%rax,1),%xmm0 475 jnz L$dec_loop8 476 477 .byte 102,15,56,222,209 478 .byte 102,15,56,222,217 479 .byte 102,15,56,222,225 480 .byte 102,15,56,222,233 481 .byte 102,15,56,222,241 482 .byte 102,15,56,222,249 483 .byte 102,68,15,56,222,193 484 .byte 102,68,15,56,222,201 485 .byte 102,15,56,223,208 486 .byte 102,15,56,223,216 487 .byte 102,15,56,223,224 488 .byte 102,15,56,223,232 489 .byte 102,15,56,223,240 490 .byte 102,15,56,223,248 491 .byte 102,68,15,56,223,192 492 .byte 102,68,15,56,223,200 493 .byte 0xf3,0xc3 494 495 .globl _aesni_ecb_encrypt 496 .private_extern _aesni_ecb_encrypt 497 498 .p2align 4 499 _aesni_ecb_encrypt: 500 andq $-16,%rdx 501 jz L$ecb_ret 502 503 movl 240(%rcx),%eax 504 movups (%rcx),%xmm0 505 movq %rcx,%r11 506 movl %eax,%r10d 507 testl %r8d,%r8d 508 jz L$ecb_decrypt 509 510 cmpq $0x80,%rdx 511 jb L$ecb_enc_tail 512 513 movdqu (%rdi),%xmm2 514 movdqu 16(%rdi),%xmm3 515 movdqu 32(%rdi),%xmm4 516 movdqu 48(%rdi),%xmm5 517 movdqu 64(%rdi),%xmm6 518 movdqu 80(%rdi),%xmm7 519 movdqu 96(%rdi),%xmm8 520 movdqu 112(%rdi),%xmm9 521 leaq 128(%rdi),%rdi 522 subq $0x80,%rdx 523 jmp L$ecb_enc_loop8_enter 524 .p2align 4 525 L$ecb_enc_loop8: 526 movups %xmm2,(%rsi) 527 movq %r11,%rcx 528 movdqu (%rdi),%xmm2 529 movl %r10d,%eax 530 movups %xmm3,16(%rsi) 531 movdqu 16(%rdi),%xmm3 532 movups %xmm4,32(%rsi) 533 movdqu 32(%rdi),%xmm4 534 movups %xmm5,48(%rsi) 535 movdqu 48(%rdi),%xmm5 536 movups %xmm6,64(%rsi) 537 movdqu 64(%rdi),%xmm6 538 movups %xmm7,80(%rsi) 539 movdqu 80(%rdi),%xmm7 540 movups %xmm8,96(%rsi) 541 movdqu 96(%rdi),%xmm8 542 movups %xmm9,112(%rsi) 543 leaq 128(%rsi),%rsi 544 movdqu 112(%rdi),%xmm9 545 leaq 128(%rdi),%rdi 546 L$ecb_enc_loop8_enter: 547 548 call _aesni_encrypt8 549 550 subq $0x80,%rdx 551 jnc L$ecb_enc_loop8 552 553 movups %xmm2,(%rsi) 554 movq %r11,%rcx 555 movups %xmm3,16(%rsi) 556 movl %r10d,%eax 557 movups %xmm4,32(%rsi) 558 movups %xmm5,48(%rsi) 559 movups %xmm6,64(%rsi) 560 movups %xmm7,80(%rsi) 561 movups %xmm8,96(%rsi) 562 movups %xmm9,112(%rsi) 563 leaq 128(%rsi),%rsi 564 addq $0x80,%rdx 565 jz L$ecb_ret 566 567 L$ecb_enc_tail: 568 movups (%rdi),%xmm2 569 cmpq $0x20,%rdx 570 jb L$ecb_enc_one 571 movups 16(%rdi),%xmm3 572 je L$ecb_enc_two 573 movups 32(%rdi),%xmm4 574 cmpq $0x40,%rdx 575 jb L$ecb_enc_three 576 movups 48(%rdi),%xmm5 577 je L$ecb_enc_four 578 movups 64(%rdi),%xmm6 579 cmpq $0x60,%rdx 580 jb L$ecb_enc_five 581 movups 80(%rdi),%xmm7 582 je L$ecb_enc_six 583 movdqu 96(%rdi),%xmm8 584 xorps %xmm9,%xmm9 585 call _aesni_encrypt8 586 movups %xmm2,(%rsi) 587 movups %xmm3,16(%rsi) 588 movups %xmm4,32(%rsi) 589 movups %xmm5,48(%rsi) 590 movups %xmm6,64(%rsi) 591 movups %xmm7,80(%rsi) 592 movups %xmm8,96(%rsi) 593 jmp L$ecb_ret 594 .p2align 4 595 L$ecb_enc_one: 596 movups (%rcx),%xmm0 597 movups 16(%rcx),%xmm1 598 leaq 32(%rcx),%rcx 599 xorps %xmm0,%xmm2 600 L$oop_enc1_3: 601 .byte 102,15,56,220,209 602 decl %eax 603 movups (%rcx),%xmm1 604 leaq 16(%rcx),%rcx 605 jnz L$oop_enc1_3 606 .byte 102,15,56,221,209 607 movups %xmm2,(%rsi) 608 jmp L$ecb_ret 609 .p2align 4 610 L$ecb_enc_two: 611 call _aesni_encrypt2 612 movups %xmm2,(%rsi) 613 movups %xmm3,16(%rsi) 614 jmp L$ecb_ret 615 .p2align 4 616 L$ecb_enc_three: 617 call _aesni_encrypt3 618 movups %xmm2,(%rsi) 619 movups %xmm3,16(%rsi) 620 movups %xmm4,32(%rsi) 621 jmp L$ecb_ret 622 .p2align 4 623 L$ecb_enc_four: 624 call _aesni_encrypt4 625 movups %xmm2,(%rsi) 626 movups %xmm3,16(%rsi) 627 movups %xmm4,32(%rsi) 628 movups %xmm5,48(%rsi) 629 jmp L$ecb_ret 630 .p2align 4 631 L$ecb_enc_five: 632 xorps %xmm7,%xmm7 633 call _aesni_encrypt6 634 movups %xmm2,(%rsi) 635 movups %xmm3,16(%rsi) 636 movups %xmm4,32(%rsi) 637 movups %xmm5,48(%rsi) 638 movups %xmm6,64(%rsi) 639 jmp L$ecb_ret 640 .p2align 4 641 L$ecb_enc_six: 642 call _aesni_encrypt6 643 movups %xmm2,(%rsi) 644 movups %xmm3,16(%rsi) 645 movups %xmm4,32(%rsi) 646 movups %xmm5,48(%rsi) 647 movups %xmm6,64(%rsi) 648 movups %xmm7,80(%rsi) 649 jmp L$ecb_ret 650 651 .p2align 4 652 L$ecb_decrypt: 653 cmpq $0x80,%rdx 654 jb L$ecb_dec_tail 655 656 movdqu (%rdi),%xmm2 657 movdqu 16(%rdi),%xmm3 658 movdqu 32(%rdi),%xmm4 659 movdqu 48(%rdi),%xmm5 660 movdqu 64(%rdi),%xmm6 661 movdqu 80(%rdi),%xmm7 662 movdqu 96(%rdi),%xmm8 663 movdqu 112(%rdi),%xmm9 664 leaq 128(%rdi),%rdi 665 subq $0x80,%rdx 666 jmp L$ecb_dec_loop8_enter 667 .p2align 4 668 L$ecb_dec_loop8: 669 movups %xmm2,(%rsi) 670 movq %r11,%rcx 671 movdqu (%rdi),%xmm2 672 movl %r10d,%eax 673 movups %xmm3,16(%rsi) 674 movdqu 16(%rdi),%xmm3 675 movups %xmm4,32(%rsi) 676 movdqu 32(%rdi),%xmm4 677 movups %xmm5,48(%rsi) 678 movdqu 48(%rdi),%xmm5 679 movups %xmm6,64(%rsi) 680 movdqu 64(%rdi),%xmm6 681 movups %xmm7,80(%rsi) 682 movdqu 80(%rdi),%xmm7 683 movups %xmm8,96(%rsi) 684 movdqu 96(%rdi),%xmm8 685 movups %xmm9,112(%rsi) 686 leaq 128(%rsi),%rsi 687 movdqu 112(%rdi),%xmm9 688 leaq 128(%rdi),%rdi 689 L$ecb_dec_loop8_enter: 690 691 call _aesni_decrypt8 692 693 movups (%r11),%xmm0 694 subq $0x80,%rdx 695 jnc L$ecb_dec_loop8 696 697 movups %xmm2,(%rsi) 698 pxor %xmm2,%xmm2 699 movq %r11,%rcx 700 movups %xmm3,16(%rsi) 701 pxor %xmm3,%xmm3 702 movl %r10d,%eax 703 movups %xmm4,32(%rsi) 704 pxor %xmm4,%xmm4 705 movups %xmm5,48(%rsi) 706 pxor %xmm5,%xmm5 707 movups %xmm6,64(%rsi) 708 pxor %xmm6,%xmm6 709 movups %xmm7,80(%rsi) 710 pxor %xmm7,%xmm7 711 movups %xmm8,96(%rsi) 712 pxor %xmm8,%xmm8 713 movups %xmm9,112(%rsi) 714 pxor %xmm9,%xmm9 715 leaq 128(%rsi),%rsi 716 addq $0x80,%rdx 717 jz L$ecb_ret 718 719 L$ecb_dec_tail: 720 movups (%rdi),%xmm2 721 cmpq $0x20,%rdx 722 jb L$ecb_dec_one 723 movups 16(%rdi),%xmm3 724 je L$ecb_dec_two 725 movups 32(%rdi),%xmm4 726 cmpq $0x40,%rdx 727 jb L$ecb_dec_three 728 movups 48(%rdi),%xmm5 729 je L$ecb_dec_four 730 movups 64(%rdi),%xmm6 731 cmpq $0x60,%rdx 732 jb L$ecb_dec_five 733 movups 80(%rdi),%xmm7 734 je L$ecb_dec_six 735 movups 96(%rdi),%xmm8 736 movups (%rcx),%xmm0 737 xorps %xmm9,%xmm9 738 call _aesni_decrypt8 739 movups %xmm2,(%rsi) 740 pxor %xmm2,%xmm2 741 movups %xmm3,16(%rsi) 742 pxor %xmm3,%xmm3 743 movups %xmm4,32(%rsi) 744 pxor %xmm4,%xmm4 745 movups %xmm5,48(%rsi) 746 pxor %xmm5,%xmm5 747 movups %xmm6,64(%rsi) 748 pxor %xmm6,%xmm6 749 movups %xmm7,80(%rsi) 750 pxor %xmm7,%xmm7 751 movups %xmm8,96(%rsi) 752 pxor %xmm8,%xmm8 753 pxor %xmm9,%xmm9 754 jmp L$ecb_ret 755 .p2align 4 756 L$ecb_dec_one: 757 movups (%rcx),%xmm0 758 movups 16(%rcx),%xmm1 759 leaq 32(%rcx),%rcx 760 xorps %xmm0,%xmm2 761 L$oop_dec1_4: 762 .byte 102,15,56,222,209 763 decl %eax 764 movups (%rcx),%xmm1 765 leaq 16(%rcx),%rcx 766 jnz L$oop_dec1_4 767 .byte 102,15,56,223,209 768 movups %xmm2,(%rsi) 769 pxor %xmm2,%xmm2 770 jmp L$ecb_ret 771 .p2align 4 772 L$ecb_dec_two: 773 call _aesni_decrypt2 774 movups %xmm2,(%rsi) 775 pxor %xmm2,%xmm2 776 movups %xmm3,16(%rsi) 777 pxor %xmm3,%xmm3 778 jmp L$ecb_ret 779 .p2align 4 780 L$ecb_dec_three: 781 call _aesni_decrypt3 782 movups %xmm2,(%rsi) 783 pxor %xmm2,%xmm2 784 movups %xmm3,16(%rsi) 785 pxor %xmm3,%xmm3 786 movups %xmm4,32(%rsi) 787 pxor %xmm4,%xmm4 788 jmp L$ecb_ret 789 .p2align 4 790 L$ecb_dec_four: 791 call _aesni_decrypt4 792 movups %xmm2,(%rsi) 793 pxor %xmm2,%xmm2 794 movups %xmm3,16(%rsi) 795 pxor %xmm3,%xmm3 796 movups %xmm4,32(%rsi) 797 pxor %xmm4,%xmm4 798 movups %xmm5,48(%rsi) 799 pxor %xmm5,%xmm5 800 jmp L$ecb_ret 801 .p2align 4 802 L$ecb_dec_five: 803 xorps %xmm7,%xmm7 804 call _aesni_decrypt6 805 movups %xmm2,(%rsi) 806 pxor %xmm2,%xmm2 807 movups %xmm3,16(%rsi) 808 pxor %xmm3,%xmm3 809 movups %xmm4,32(%rsi) 810 pxor %xmm4,%xmm4 811 movups %xmm5,48(%rsi) 812 pxor %xmm5,%xmm5 813 movups %xmm6,64(%rsi) 814 pxor %xmm6,%xmm6 815 pxor %xmm7,%xmm7 816 jmp L$ecb_ret 817 .p2align 4 818 L$ecb_dec_six: 819 call _aesni_decrypt6 820 movups %xmm2,(%rsi) 821 pxor %xmm2,%xmm2 822 movups %xmm3,16(%rsi) 823 pxor %xmm3,%xmm3 824 movups %xmm4,32(%rsi) 825 pxor %xmm4,%xmm4 826 movups %xmm5,48(%rsi) 827 pxor %xmm5,%xmm5 828 movups %xmm6,64(%rsi) 829 pxor %xmm6,%xmm6 830 movups %xmm7,80(%rsi) 831 pxor %xmm7,%xmm7 832 833 L$ecb_ret: 834 xorps %xmm0,%xmm0 835 pxor %xmm1,%xmm1 836 .byte 0xf3,0xc3 837 838 .globl _aesni_ccm64_encrypt_blocks 839 .private_extern _aesni_ccm64_encrypt_blocks 840 841 .p2align 4 842 _aesni_ccm64_encrypt_blocks: 843 movl 240(%rcx),%eax 844 movdqu (%r8),%xmm6 845 movdqa L$increment64(%rip),%xmm9 846 movdqa L$bswap_mask(%rip),%xmm7 847 848 shll $4,%eax 849 movl $16,%r10d 850 leaq 0(%rcx),%r11 851 movdqu (%r9),%xmm3 852 movdqa %xmm6,%xmm2 853 leaq 32(%rcx,%rax,1),%rcx 854 .byte 102,15,56,0,247 855 subq %rax,%r10 856 jmp L$ccm64_enc_outer 857 .p2align 4 858 L$ccm64_enc_outer: 859 movups (%r11),%xmm0 860 movq %r10,%rax 861 movups (%rdi),%xmm8 862 863 xorps %xmm0,%xmm2 864 movups 16(%r11),%xmm1 865 xorps %xmm8,%xmm0 866 xorps %xmm0,%xmm3 867 movups 32(%r11),%xmm0 868 869 L$ccm64_enc2_loop: 870 .byte 102,15,56,220,209 871 .byte 102,15,56,220,217 872 movups (%rcx,%rax,1),%xmm1 873 addq $32,%rax 874 .byte 102,15,56,220,208 875 .byte 102,15,56,220,216 876 movups -16(%rcx,%rax,1),%xmm0 877 jnz L$ccm64_enc2_loop 878 .byte 102,15,56,220,209 879 .byte 102,15,56,220,217 880 paddq %xmm9,%xmm6 881 decq %rdx 882 .byte 102,15,56,221,208 883 .byte 102,15,56,221,216 884 885 leaq 16(%rdi),%rdi 886 xorps %xmm2,%xmm8 887 movdqa %xmm6,%xmm2 888 movups %xmm8,(%rsi) 889 .byte 102,15,56,0,215 890 leaq 16(%rsi),%rsi 891 jnz L$ccm64_enc_outer 892 893 pxor %xmm0,%xmm0 894 pxor %xmm1,%xmm1 895 pxor %xmm2,%xmm2 896 movups %xmm3,(%r9) 897 pxor %xmm3,%xmm3 898 pxor %xmm8,%xmm8 899 pxor %xmm6,%xmm6 900 .byte 0xf3,0xc3 901 902 .globl _aesni_ccm64_decrypt_blocks 903 .private_extern _aesni_ccm64_decrypt_blocks 904 905 .p2align 4 906 _aesni_ccm64_decrypt_blocks: 907 movl 240(%rcx),%eax 908 movups (%r8),%xmm6 909 movdqu (%r9),%xmm3 910 movdqa L$increment64(%rip),%xmm9 911 movdqa L$bswap_mask(%rip),%xmm7 912 913 movaps %xmm6,%xmm2 914 movl %eax,%r10d 915 movq %rcx,%r11 916 .byte 102,15,56,0,247 917 movups (%rcx),%xmm0 918 movups 16(%rcx),%xmm1 919 leaq 32(%rcx),%rcx 920 xorps %xmm0,%xmm2 921 L$oop_enc1_5: 922 .byte 102,15,56,220,209 923 decl %eax 924 movups (%rcx),%xmm1 925 leaq 16(%rcx),%rcx 926 jnz L$oop_enc1_5 927 .byte 102,15,56,221,209 928 shll $4,%r10d 929 movl $16,%eax 930 movups (%rdi),%xmm8 931 paddq %xmm9,%xmm6 932 leaq 16(%rdi),%rdi 933 subq %r10,%rax 934 leaq 32(%r11,%r10,1),%rcx 935 movq %rax,%r10 936 jmp L$ccm64_dec_outer 937 .p2align 4 938 L$ccm64_dec_outer: 939 xorps %xmm2,%xmm8 940 movdqa %xmm6,%xmm2 941 movups %xmm8,(%rsi) 942 leaq 16(%rsi),%rsi 943 .byte 102,15,56,0,215 944 945 subq $1,%rdx 946 jz L$ccm64_dec_break 947 948 movups (%r11),%xmm0 949 movq %r10,%rax 950 movups 16(%r11),%xmm1 951 xorps %xmm0,%xmm8 952 xorps %xmm0,%xmm2 953 xorps %xmm8,%xmm3 954 movups 32(%r11),%xmm0 955 jmp L$ccm64_dec2_loop 956 .p2align 4 957 L$ccm64_dec2_loop: 958 .byte 102,15,56,220,209 959 .byte 102,15,56,220,217 960 movups (%rcx,%rax,1),%xmm1 961 addq $32,%rax 962 .byte 102,15,56,220,208 963 .byte 102,15,56,220,216 964 movups -16(%rcx,%rax,1),%xmm0 965 jnz L$ccm64_dec2_loop 966 movups (%rdi),%xmm8 967 paddq %xmm9,%xmm6 968 .byte 102,15,56,220,209 969 .byte 102,15,56,220,217 970 .byte 102,15,56,221,208 971 .byte 102,15,56,221,216 972 leaq 16(%rdi),%rdi 973 jmp L$ccm64_dec_outer 974 975 .p2align 4 976 L$ccm64_dec_break: 977 978 movl 240(%r11),%eax 979 movups (%r11),%xmm0 980 movups 16(%r11),%xmm1 981 xorps %xmm0,%xmm8 982 leaq 32(%r11),%r11 983 xorps %xmm8,%xmm3 984 L$oop_enc1_6: 985 .byte 102,15,56,220,217 986 decl %eax 987 movups (%r11),%xmm1 988 leaq 16(%r11),%r11 989 jnz L$oop_enc1_6 990 .byte 102,15,56,221,217 991 pxor %xmm0,%xmm0 992 pxor %xmm1,%xmm1 993 pxor %xmm2,%xmm2 994 movups %xmm3,(%r9) 995 pxor %xmm3,%xmm3 996 pxor %xmm8,%xmm8 997 pxor %xmm6,%xmm6 998 .byte 0xf3,0xc3 999 1000 .globl _aesni_ctr32_encrypt_blocks 1001 .private_extern _aesni_ctr32_encrypt_blocks 1002 1003 .p2align 4 1004 _aesni_ctr32_encrypt_blocks: 1005 cmpq $1,%rdx 1006 jne L$ctr32_bulk 1007 1008 1009 1010 movups (%r8),%xmm2 1011 movups (%rdi),%xmm3 1012 movl 240(%rcx),%edx 1013 movups (%rcx),%xmm0 1014 movups 16(%rcx),%xmm1 1015 leaq 32(%rcx),%rcx 1016 xorps %xmm0,%xmm2 1017 L$oop_enc1_7: 1018 .byte 102,15,56,220,209 1019 decl %edx 1020 movups (%rcx),%xmm1 1021 leaq 16(%rcx),%rcx 1022 jnz L$oop_enc1_7 1023 .byte 102,15,56,221,209 1024 pxor %xmm0,%xmm0 1025 pxor %xmm1,%xmm1 1026 xorps %xmm3,%xmm2 1027 pxor %xmm3,%xmm3 1028 movups %xmm2,(%rsi) 1029 xorps %xmm2,%xmm2 1030 jmp L$ctr32_epilogue 1031 1032 .p2align 4 1033 L$ctr32_bulk: 1034 leaq (%rsp),%r11 1035 pushq %rbp 1036 subq $128,%rsp 1037 andq $-16,%rsp 1038 1039 1040 1041 1042 movdqu (%r8),%xmm2 1043 movdqu (%rcx),%xmm0 1044 movl 12(%r8),%r8d 1045 pxor %xmm0,%xmm2 1046 movl 12(%rcx),%ebp 1047 movdqa %xmm2,0(%rsp) 1048 bswapl %r8d 1049 movdqa %xmm2,%xmm3 1050 movdqa %xmm2,%xmm4 1051 movdqa %xmm2,%xmm5 1052 movdqa %xmm2,64(%rsp) 1053 movdqa %xmm2,80(%rsp) 1054 movdqa %xmm2,96(%rsp) 1055 movq %rdx,%r10 1056 movdqa %xmm2,112(%rsp) 1057 1058 leaq 1(%r8),%rax 1059 leaq 2(%r8),%rdx 1060 bswapl %eax 1061 bswapl %edx 1062 xorl %ebp,%eax 1063 xorl %ebp,%edx 1064 .byte 102,15,58,34,216,3 1065 leaq 3(%r8),%rax 1066 movdqa %xmm3,16(%rsp) 1067 .byte 102,15,58,34,226,3 1068 bswapl %eax 1069 movq %r10,%rdx 1070 leaq 4(%r8),%r10 1071 movdqa %xmm4,32(%rsp) 1072 xorl %ebp,%eax 1073 bswapl %r10d 1074 .byte 102,15,58,34,232,3 1075 xorl %ebp,%r10d 1076 movdqa %xmm5,48(%rsp) 1077 leaq 5(%r8),%r9 1078 movl %r10d,64+12(%rsp) 1079 bswapl %r9d 1080 leaq 6(%r8),%r10 1081 movl 240(%rcx),%eax 1082 xorl %ebp,%r9d 1083 bswapl %r10d 1084 movl %r9d,80+12(%rsp) 1085 xorl %ebp,%r10d 1086 leaq 7(%r8),%r9 1087 movl %r10d,96+12(%rsp) 1088 bswapl %r9d 1089 movl _OPENSSL_ia32cap_P+4(%rip),%r10d 1090 xorl %ebp,%r9d 1091 andl $71303168,%r10d 1092 movl %r9d,112+12(%rsp) 1093 1094 movups 16(%rcx),%xmm1 1095 1096 movdqa 64(%rsp),%xmm6 1097 movdqa 80(%rsp),%xmm7 1098 1099 cmpq $8,%rdx 1100 jb L$ctr32_tail 1101 1102 subq $6,%rdx 1103 cmpl $4194304,%r10d 1104 je L$ctr32_6x 1105 1106 leaq 128(%rcx),%rcx 1107 subq $2,%rdx 1108 jmp L$ctr32_loop8 1109 1110 .p2align 4 1111 L$ctr32_6x: 1112 shll $4,%eax 1113 movl $48,%r10d 1114 bswapl %ebp 1115 leaq 32(%rcx,%rax,1),%rcx 1116 subq %rax,%r10 1117 jmp L$ctr32_loop6 1118 1119 .p2align 4 1120 L$ctr32_loop6: 1121 addl $6,%r8d 1122 movups -48(%rcx,%r10,1),%xmm0 1123 .byte 102,15,56,220,209 1124 movl %r8d,%eax 1125 xorl %ebp,%eax 1126 .byte 102,15,56,220,217 1127 .byte 0x0f,0x38,0xf1,0x44,0x24,12 1128 leal 1(%r8),%eax 1129 .byte 102,15,56,220,225 1130 xorl %ebp,%eax 1131 .byte 0x0f,0x38,0xf1,0x44,0x24,28 1132 .byte 102,15,56,220,233 1133 leal 2(%r8),%eax 1134 xorl %ebp,%eax 1135 .byte 102,15,56,220,241 1136 .byte 0x0f,0x38,0xf1,0x44,0x24,44 1137 leal 3(%r8),%eax 1138 .byte 102,15,56,220,249 1139 movups -32(%rcx,%r10,1),%xmm1 1140 xorl %ebp,%eax 1141 1142 .byte 102,15,56,220,208 1143 .byte 0x0f,0x38,0xf1,0x44,0x24,60 1144 leal 4(%r8),%eax 1145 .byte 102,15,56,220,216 1146 xorl %ebp,%eax 1147 .byte 0x0f,0x38,0xf1,0x44,0x24,76 1148 .byte 102,15,56,220,224 1149 leal 5(%r8),%eax 1150 xorl %ebp,%eax 1151 .byte 102,15,56,220,232 1152 .byte 0x0f,0x38,0xf1,0x44,0x24,92 1153 movq %r10,%rax 1154 .byte 102,15,56,220,240 1155 .byte 102,15,56,220,248 1156 movups -16(%rcx,%r10,1),%xmm0 1157 1158 call L$enc_loop6 1159 1160 movdqu (%rdi),%xmm8 1161 movdqu 16(%rdi),%xmm9 1162 movdqu 32(%rdi),%xmm10 1163 movdqu 48(%rdi),%xmm11 1164 movdqu 64(%rdi),%xmm12 1165 movdqu 80(%rdi),%xmm13 1166 leaq 96(%rdi),%rdi 1167 movups -64(%rcx,%r10,1),%xmm1 1168 pxor %xmm2,%xmm8 1169 movaps 0(%rsp),%xmm2 1170 pxor %xmm3,%xmm9 1171 movaps 16(%rsp),%xmm3 1172 pxor %xmm4,%xmm10 1173 movaps 32(%rsp),%xmm4 1174 pxor %xmm5,%xmm11 1175 movaps 48(%rsp),%xmm5 1176 pxor %xmm6,%xmm12 1177 movaps 64(%rsp),%xmm6 1178 pxor %xmm7,%xmm13 1179 movaps 80(%rsp),%xmm7 1180 movdqu %xmm8,(%rsi) 1181 movdqu %xmm9,16(%rsi) 1182 movdqu %xmm10,32(%rsi) 1183 movdqu %xmm11,48(%rsi) 1184 movdqu %xmm12,64(%rsi) 1185 movdqu %xmm13,80(%rsi) 1186 leaq 96(%rsi),%rsi 1187 1188 subq $6,%rdx 1189 jnc L$ctr32_loop6 1190 1191 addq $6,%rdx 1192 jz L$ctr32_done 1193 1194 leal -48(%r10),%eax 1195 leaq -80(%rcx,%r10,1),%rcx 1196 negl %eax 1197 shrl $4,%eax 1198 jmp L$ctr32_tail 1199 1200 .p2align 5 1201 L$ctr32_loop8: 1202 addl $8,%r8d 1203 movdqa 96(%rsp),%xmm8 1204 .byte 102,15,56,220,209 1205 movl %r8d,%r9d 1206 movdqa 112(%rsp),%xmm9 1207 .byte 102,15,56,220,217 1208 bswapl %r9d 1209 movups 32-128(%rcx),%xmm0 1210 .byte 102,15,56,220,225 1211 xorl %ebp,%r9d 1212 nop 1213 .byte 102,15,56,220,233 1214 movl %r9d,0+12(%rsp) 1215 leaq 1(%r8),%r9 1216 .byte 102,15,56,220,241 1217 .byte 102,15,56,220,249 1218 .byte 102,68,15,56,220,193 1219 .byte 102,68,15,56,220,201 1220 movups 48-128(%rcx),%xmm1 1221 bswapl %r9d 1222 .byte 102,15,56,220,208 1223 .byte 102,15,56,220,216 1224 xorl %ebp,%r9d 1225 .byte 0x66,0x90 1226 .byte 102,15,56,220,224 1227 .byte 102,15,56,220,232 1228 movl %r9d,16+12(%rsp) 1229 leaq 2(%r8),%r9 1230 .byte 102,15,56,220,240 1231 .byte 102,15,56,220,248 1232 .byte 102,68,15,56,220,192 1233 .byte 102,68,15,56,220,200 1234 movups 64-128(%rcx),%xmm0 1235 bswapl %r9d 1236 .byte 102,15,56,220,209 1237 .byte 102,15,56,220,217 1238 xorl %ebp,%r9d 1239 .byte 0x66,0x90 1240 .byte 102,15,56,220,225 1241 .byte 102,15,56,220,233 1242 movl %r9d,32+12(%rsp) 1243 leaq 3(%r8),%r9 1244 .byte 102,15,56,220,241 1245 .byte 102,15,56,220,249 1246 .byte 102,68,15,56,220,193 1247 .byte 102,68,15,56,220,201 1248 movups 80-128(%rcx),%xmm1 1249 bswapl %r9d 1250 .byte 102,15,56,220,208 1251 .byte 102,15,56,220,216 1252 xorl %ebp,%r9d 1253 .byte 0x66,0x90 1254 .byte 102,15,56,220,224 1255 .byte 102,15,56,220,232 1256 movl %r9d,48+12(%rsp) 1257 leaq 4(%r8),%r9 1258 .byte 102,15,56,220,240 1259 .byte 102,15,56,220,248 1260 .byte 102,68,15,56,220,192 1261 .byte 102,68,15,56,220,200 1262 movups 96-128(%rcx),%xmm0 1263 bswapl %r9d 1264 .byte 102,15,56,220,209 1265 .byte 102,15,56,220,217 1266 xorl %ebp,%r9d 1267 .byte 0x66,0x90 1268 .byte 102,15,56,220,225 1269 .byte 102,15,56,220,233 1270 movl %r9d,64+12(%rsp) 1271 leaq 5(%r8),%r9 1272 .byte 102,15,56,220,241 1273 .byte 102,15,56,220,249 1274 .byte 102,68,15,56,220,193 1275 .byte 102,68,15,56,220,201 1276 movups 112-128(%rcx),%xmm1 1277 bswapl %r9d 1278 .byte 102,15,56,220,208 1279 .byte 102,15,56,220,216 1280 xorl %ebp,%r9d 1281 .byte 0x66,0x90 1282 .byte 102,15,56,220,224 1283 .byte 102,15,56,220,232 1284 movl %r9d,80+12(%rsp) 1285 leaq 6(%r8),%r9 1286 .byte 102,15,56,220,240 1287 .byte 102,15,56,220,248 1288 .byte 102,68,15,56,220,192 1289 .byte 102,68,15,56,220,200 1290 movups 128-128(%rcx),%xmm0 1291 bswapl %r9d 1292 .byte 102,15,56,220,209 1293 .byte 102,15,56,220,217 1294 xorl %ebp,%r9d 1295 .byte 0x66,0x90 1296 .byte 102,15,56,220,225 1297 .byte 102,15,56,220,233 1298 movl %r9d,96+12(%rsp) 1299 leaq 7(%r8),%r9 1300 .byte 102,15,56,220,241 1301 .byte 102,15,56,220,249 1302 .byte 102,68,15,56,220,193 1303 .byte 102,68,15,56,220,201 1304 movups 144-128(%rcx),%xmm1 1305 bswapl %r9d 1306 .byte 102,15,56,220,208 1307 .byte 102,15,56,220,216 1308 .byte 102,15,56,220,224 1309 xorl %ebp,%r9d 1310 movdqu 0(%rdi),%xmm10 1311 .byte 102,15,56,220,232 1312 movl %r9d,112+12(%rsp) 1313 cmpl $11,%eax 1314 .byte 102,15,56,220,240 1315 .byte 102,15,56,220,248 1316 .byte 102,68,15,56,220,192 1317 .byte 102,68,15,56,220,200 1318 movups 160-128(%rcx),%xmm0 1319 1320 jb L$ctr32_enc_done 1321 1322 .byte 102,15,56,220,209 1323 .byte 102,15,56,220,217 1324 .byte 102,15,56,220,225 1325 .byte 102,15,56,220,233 1326 .byte 102,15,56,220,241 1327 .byte 102,15,56,220,249 1328 .byte 102,68,15,56,220,193 1329 .byte 102,68,15,56,220,201 1330 movups 176-128(%rcx),%xmm1 1331 1332 .byte 102,15,56,220,208 1333 .byte 102,15,56,220,216 1334 .byte 102,15,56,220,224 1335 .byte 102,15,56,220,232 1336 .byte 102,15,56,220,240 1337 .byte 102,15,56,220,248 1338 .byte 102,68,15,56,220,192 1339 .byte 102,68,15,56,220,200 1340 movups 192-128(%rcx),%xmm0 1341 je L$ctr32_enc_done 1342 1343 .byte 102,15,56,220,209 1344 .byte 102,15,56,220,217 1345 .byte 102,15,56,220,225 1346 .byte 102,15,56,220,233 1347 .byte 102,15,56,220,241 1348 .byte 102,15,56,220,249 1349 .byte 102,68,15,56,220,193 1350 .byte 102,68,15,56,220,201 1351 movups 208-128(%rcx),%xmm1 1352 1353 .byte 102,15,56,220,208 1354 .byte 102,15,56,220,216 1355 .byte 102,15,56,220,224 1356 .byte 102,15,56,220,232 1357 .byte 102,15,56,220,240 1358 .byte 102,15,56,220,248 1359 .byte 102,68,15,56,220,192 1360 .byte 102,68,15,56,220,200 1361 movups 224-128(%rcx),%xmm0 1362 jmp L$ctr32_enc_done 1363 1364 .p2align 4 1365 L$ctr32_enc_done: 1366 movdqu 16(%rdi),%xmm11 1367 pxor %xmm0,%xmm10 1368 movdqu 32(%rdi),%xmm12 1369 pxor %xmm0,%xmm11 1370 movdqu 48(%rdi),%xmm13 1371 pxor %xmm0,%xmm12 1372 movdqu 64(%rdi),%xmm14 1373 pxor %xmm0,%xmm13 1374 movdqu 80(%rdi),%xmm15 1375 pxor %xmm0,%xmm14 1376 pxor %xmm0,%xmm15 1377 .byte 102,15,56,220,209 1378 .byte 102,15,56,220,217 1379 .byte 102,15,56,220,225 1380 .byte 102,15,56,220,233 1381 .byte 102,15,56,220,241 1382 .byte 102,15,56,220,249 1383 .byte 102,68,15,56,220,193 1384 .byte 102,68,15,56,220,201 1385 movdqu 96(%rdi),%xmm1 1386 leaq 128(%rdi),%rdi 1387 1388 .byte 102,65,15,56,221,210 1389 pxor %xmm0,%xmm1 1390 movdqu 112-128(%rdi),%xmm10 1391 .byte 102,65,15,56,221,219 1392 pxor %xmm0,%xmm10 1393 movdqa 0(%rsp),%xmm11 1394 .byte 102,65,15,56,221,228 1395 .byte 102,65,15,56,221,237 1396 movdqa 16(%rsp),%xmm12 1397 movdqa 32(%rsp),%xmm13 1398 .byte 102,65,15,56,221,246 1399 .byte 102,65,15,56,221,255 1400 movdqa 48(%rsp),%xmm14 1401 movdqa 64(%rsp),%xmm15 1402 .byte 102,68,15,56,221,193 1403 movdqa 80(%rsp),%xmm0 1404 movups 16-128(%rcx),%xmm1 1405 .byte 102,69,15,56,221,202 1406 1407 movups %xmm2,(%rsi) 1408 movdqa %xmm11,%xmm2 1409 movups %xmm3,16(%rsi) 1410 movdqa %xmm12,%xmm3 1411 movups %xmm4,32(%rsi) 1412 movdqa %xmm13,%xmm4 1413 movups %xmm5,48(%rsi) 1414 movdqa %xmm14,%xmm5 1415 movups %xmm6,64(%rsi) 1416 movdqa %xmm15,%xmm6 1417 movups %xmm7,80(%rsi) 1418 movdqa %xmm0,%xmm7 1419 movups %xmm8,96(%rsi) 1420 movups %xmm9,112(%rsi) 1421 leaq 128(%rsi),%rsi 1422 1423 subq $8,%rdx 1424 jnc L$ctr32_loop8 1425 1426 addq $8,%rdx 1427 jz L$ctr32_done 1428 leaq -128(%rcx),%rcx 1429 1430 L$ctr32_tail: 1431 1432 1433 leaq 16(%rcx),%rcx 1434 cmpq $4,%rdx 1435 jb L$ctr32_loop3 1436 je L$ctr32_loop4 1437 1438 1439 shll $4,%eax 1440 movdqa 96(%rsp),%xmm8 1441 pxor %xmm9,%xmm9 1442 1443 movups 16(%rcx),%xmm0 1444 .byte 102,15,56,220,209 1445 .byte 102,15,56,220,217 1446 leaq 32-16(%rcx,%rax,1),%rcx 1447 negq %rax 1448 .byte 102,15,56,220,225 1449 addq $16,%rax 1450 movups (%rdi),%xmm10 1451 .byte 102,15,56,220,233 1452 .byte 102,15,56,220,241 1453 movups 16(%rdi),%xmm11 1454 movups 32(%rdi),%xmm12 1455 .byte 102,15,56,220,249 1456 .byte 102,68,15,56,220,193 1457 1458 call L$enc_loop8_enter 1459 1460 movdqu 48(%rdi),%xmm13 1461 pxor %xmm10,%xmm2 1462 movdqu 64(%rdi),%xmm10 1463 pxor %xmm11,%xmm3 1464 movdqu %xmm2,(%rsi) 1465 pxor %xmm12,%xmm4 1466 movdqu %xmm3,16(%rsi) 1467 pxor %xmm13,%xmm5 1468 movdqu %xmm4,32(%rsi) 1469 pxor %xmm10,%xmm6 1470 movdqu %xmm5,48(%rsi) 1471 movdqu %xmm6,64(%rsi) 1472 cmpq $6,%rdx 1473 jb L$ctr32_done 1474 1475 movups 80(%rdi),%xmm11 1476 xorps %xmm11,%xmm7 1477 movups %xmm7,80(%rsi) 1478 je L$ctr32_done 1479 1480 movups 96(%rdi),%xmm12 1481 xorps %xmm12,%xmm8 1482 movups %xmm8,96(%rsi) 1483 jmp L$ctr32_done 1484 1485 .p2align 5 1486 L$ctr32_loop4: 1487 .byte 102,15,56,220,209 1488 leaq 16(%rcx),%rcx 1489 decl %eax 1490 .byte 102,15,56,220,217 1491 .byte 102,15,56,220,225 1492 .byte 102,15,56,220,233 1493 movups (%rcx),%xmm1 1494 jnz L$ctr32_loop4 1495 .byte 102,15,56,221,209 1496 .byte 102,15,56,221,217 1497 movups (%rdi),%xmm10 1498 movups 16(%rdi),%xmm11 1499 .byte 102,15,56,221,225 1500 .byte 102,15,56,221,233 1501 movups 32(%rdi),%xmm12 1502 movups 48(%rdi),%xmm13 1503 1504 xorps %xmm10,%xmm2 1505 movups %xmm2,(%rsi) 1506 xorps %xmm11,%xmm3 1507 movups %xmm3,16(%rsi) 1508 pxor %xmm12,%xmm4 1509 movdqu %xmm4,32(%rsi) 1510 pxor %xmm13,%xmm5 1511 movdqu %xmm5,48(%rsi) 1512 jmp L$ctr32_done 1513 1514 .p2align 5 1515 L$ctr32_loop3: 1516 .byte 102,15,56,220,209 1517 leaq 16(%rcx),%rcx 1518 decl %eax 1519 .byte 102,15,56,220,217 1520 .byte 102,15,56,220,225 1521 movups (%rcx),%xmm1 1522 jnz L$ctr32_loop3 1523 .byte 102,15,56,221,209 1524 .byte 102,15,56,221,217 1525 .byte 102,15,56,221,225 1526 1527 movups (%rdi),%xmm10 1528 xorps %xmm10,%xmm2 1529 movups %xmm2,(%rsi) 1530 cmpq $2,%rdx 1531 jb L$ctr32_done 1532 1533 movups 16(%rdi),%xmm11 1534 xorps %xmm11,%xmm3 1535 movups %xmm3,16(%rsi) 1536 je L$ctr32_done 1537 1538 movups 32(%rdi),%xmm12 1539 xorps %xmm12,%xmm4 1540 movups %xmm4,32(%rsi) 1541 1542 L$ctr32_done: 1543 xorps %xmm0,%xmm0 1544 xorl %ebp,%ebp 1545 pxor %xmm1,%xmm1 1546 pxor %xmm2,%xmm2 1547 pxor %xmm3,%xmm3 1548 pxor %xmm4,%xmm4 1549 pxor %xmm5,%xmm5 1550 pxor %xmm6,%xmm6 1551 pxor %xmm7,%xmm7 1552 movaps %xmm0,0(%rsp) 1553 pxor %xmm8,%xmm8 1554 movaps %xmm0,16(%rsp) 1555 pxor %xmm9,%xmm9 1556 movaps %xmm0,32(%rsp) 1557 pxor %xmm10,%xmm10 1558 movaps %xmm0,48(%rsp) 1559 pxor %xmm11,%xmm11 1560 movaps %xmm0,64(%rsp) 1561 pxor %xmm12,%xmm12 1562 movaps %xmm0,80(%rsp) 1563 pxor %xmm13,%xmm13 1564 movaps %xmm0,96(%rsp) 1565 pxor %xmm14,%xmm14 1566 movaps %xmm0,112(%rsp) 1567 pxor %xmm15,%xmm15 1568 movq -8(%r11),%rbp 1569 leaq (%r11),%rsp 1570 L$ctr32_epilogue: 1571 .byte 0xf3,0xc3 1572 1573 .globl _aesni_xts_encrypt 1574 .private_extern _aesni_xts_encrypt 1575 1576 .p2align 4 1577 _aesni_xts_encrypt: 1578 leaq (%rsp),%r11 1579 pushq %rbp 1580 subq $112,%rsp 1581 andq $-16,%rsp 1582 movups (%r9),%xmm2 1583 movl 240(%r8),%eax 1584 movl 240(%rcx),%r10d 1585 movups (%r8),%xmm0 1586 movups 16(%r8),%xmm1 1587 leaq 32(%r8),%r8 1588 xorps %xmm0,%xmm2 1589 L$oop_enc1_8: 1590 .byte 102,15,56,220,209 1591 decl %eax 1592 movups (%r8),%xmm1 1593 leaq 16(%r8),%r8 1594 jnz L$oop_enc1_8 1595 .byte 102,15,56,221,209 1596 movups (%rcx),%xmm0 1597 movq %rcx,%rbp 1598 movl %r10d,%eax 1599 shll $4,%r10d 1600 movq %rdx,%r9 1601 andq $-16,%rdx 1602 1603 movups 16(%rcx,%r10,1),%xmm1 1604 1605 movdqa L$xts_magic(%rip),%xmm8 1606 movdqa %xmm2,%xmm15 1607 pshufd $0x5f,%xmm2,%xmm9 1608 pxor %xmm0,%xmm1 1609 movdqa %xmm9,%xmm14 1610 paddd %xmm9,%xmm9 1611 movdqa %xmm15,%xmm10 1612 psrad $31,%xmm14 1613 paddq %xmm15,%xmm15 1614 pand %xmm8,%xmm14 1615 pxor %xmm0,%xmm10 1616 pxor %xmm14,%xmm15 1617 movdqa %xmm9,%xmm14 1618 paddd %xmm9,%xmm9 1619 movdqa %xmm15,%xmm11 1620 psrad $31,%xmm14 1621 paddq %xmm15,%xmm15 1622 pand %xmm8,%xmm14 1623 pxor %xmm0,%xmm11 1624 pxor %xmm14,%xmm15 1625 movdqa %xmm9,%xmm14 1626 paddd %xmm9,%xmm9 1627 movdqa %xmm15,%xmm12 1628 psrad $31,%xmm14 1629 paddq %xmm15,%xmm15 1630 pand %xmm8,%xmm14 1631 pxor %xmm0,%xmm12 1632 pxor %xmm14,%xmm15 1633 movdqa %xmm9,%xmm14 1634 paddd %xmm9,%xmm9 1635 movdqa %xmm15,%xmm13 1636 psrad $31,%xmm14 1637 paddq %xmm15,%xmm15 1638 pand %xmm8,%xmm14 1639 pxor %xmm0,%xmm13 1640 pxor %xmm14,%xmm15 1641 movdqa %xmm15,%xmm14 1642 psrad $31,%xmm9 1643 paddq %xmm15,%xmm15 1644 pand %xmm8,%xmm9 1645 pxor %xmm0,%xmm14 1646 pxor %xmm9,%xmm15 1647 movaps %xmm1,96(%rsp) 1648 1649 subq $96,%rdx 1650 jc L$xts_enc_short 1651 1652 movl $16+96,%eax 1653 leaq 32(%rbp,%r10,1),%rcx 1654 subq %r10,%rax 1655 movups 16(%rbp),%xmm1 1656 movq %rax,%r10 1657 leaq L$xts_magic(%rip),%r8 1658 jmp L$xts_enc_grandloop 1659 1660 .p2align 5 1661 L$xts_enc_grandloop: 1662 movdqu 0(%rdi),%xmm2 1663 movdqa %xmm0,%xmm8 1664 movdqu 16(%rdi),%xmm3 1665 pxor %xmm10,%xmm2 1666 movdqu 32(%rdi),%xmm4 1667 pxor %xmm11,%xmm3 1668 .byte 102,15,56,220,209 1669 movdqu 48(%rdi),%xmm5 1670 pxor %xmm12,%xmm4 1671 .byte 102,15,56,220,217 1672 movdqu 64(%rdi),%xmm6 1673 pxor %xmm13,%xmm5 1674 .byte 102,15,56,220,225 1675 movdqu 80(%rdi),%xmm7 1676 pxor %xmm15,%xmm8 1677 movdqa 96(%rsp),%xmm9 1678 pxor %xmm14,%xmm6 1679 .byte 102,15,56,220,233 1680 movups 32(%rbp),%xmm0 1681 leaq 96(%rdi),%rdi 1682 pxor %xmm8,%xmm7 1683 1684 pxor %xmm9,%xmm10 1685 .byte 102,15,56,220,241 1686 pxor %xmm9,%xmm11 1687 movdqa %xmm10,0(%rsp) 1688 .byte 102,15,56,220,249 1689 movups 48(%rbp),%xmm1 1690 pxor %xmm9,%xmm12 1691 1692 .byte 102,15,56,220,208 1693 pxor %xmm9,%xmm13 1694 movdqa %xmm11,16(%rsp) 1695 .byte 102,15,56,220,216 1696 pxor %xmm9,%xmm14 1697 movdqa %xmm12,32(%rsp) 1698 .byte 102,15,56,220,224 1699 .byte 102,15,56,220,232 1700 pxor %xmm9,%xmm8 1701 movdqa %xmm14,64(%rsp) 1702 .byte 102,15,56,220,240 1703 .byte 102,15,56,220,248 1704 movups 64(%rbp),%xmm0 1705 movdqa %xmm8,80(%rsp) 1706 pshufd $0x5f,%xmm15,%xmm9 1707 jmp L$xts_enc_loop6 1708 .p2align 5 1709 L$xts_enc_loop6: 1710 .byte 102,15,56,220,209 1711 .byte 102,15,56,220,217 1712 .byte 102,15,56,220,225 1713 .byte 102,15,56,220,233 1714 .byte 102,15,56,220,241 1715 .byte 102,15,56,220,249 1716 movups -64(%rcx,%rax,1),%xmm1 1717 addq $32,%rax 1718 1719 .byte 102,15,56,220,208 1720 .byte 102,15,56,220,216 1721 .byte 102,15,56,220,224 1722 .byte 102,15,56,220,232 1723 .byte 102,15,56,220,240 1724 .byte 102,15,56,220,248 1725 movups -80(%rcx,%rax,1),%xmm0 1726 jnz L$xts_enc_loop6 1727 1728 movdqa (%r8),%xmm8 1729 movdqa %xmm9,%xmm14 1730 paddd %xmm9,%xmm9 1731 .byte 102,15,56,220,209 1732 paddq %xmm15,%xmm15 1733 psrad $31,%xmm14 1734 .byte 102,15,56,220,217 1735 pand %xmm8,%xmm14 1736 movups (%rbp),%xmm10 1737 .byte 102,15,56,220,225 1738 .byte 102,15,56,220,233 1739 .byte 102,15,56,220,241 1740 pxor %xmm14,%xmm15 1741 movaps %xmm10,%xmm11 1742 .byte 102,15,56,220,249 1743 movups -64(%rcx),%xmm1 1744 1745 movdqa %xmm9,%xmm14 1746 .byte 102,15,56,220,208 1747 paddd %xmm9,%xmm9 1748 pxor %xmm15,%xmm10 1749 .byte 102,15,56,220,216 1750 psrad $31,%xmm14 1751 paddq %xmm15,%xmm15 1752 .byte 102,15,56,220,224 1753 .byte 102,15,56,220,232 1754 pand %xmm8,%xmm14 1755 movaps %xmm11,%xmm12 1756 .byte 102,15,56,220,240 1757 pxor %xmm14,%xmm15 1758 movdqa %xmm9,%xmm14 1759 .byte 102,15,56,220,248 1760 movups -48(%rcx),%xmm0 1761 1762 paddd %xmm9,%xmm9 1763 .byte 102,15,56,220,209 1764 pxor %xmm15,%xmm11 1765 psrad $31,%xmm14 1766 .byte 102,15,56,220,217 1767 paddq %xmm15,%xmm15 1768 pand %xmm8,%xmm14 1769 .byte 102,15,56,220,225 1770 .byte 102,15,56,220,233 1771 movdqa %xmm13,48(%rsp) 1772 pxor %xmm14,%xmm15 1773 .byte 102,15,56,220,241 1774 movaps %xmm12,%xmm13 1775 movdqa %xmm9,%xmm14 1776 .byte 102,15,56,220,249 1777 movups -32(%rcx),%xmm1 1778 1779 paddd %xmm9,%xmm9 1780 .byte 102,15,56,220,208 1781 pxor %xmm15,%xmm12 1782 psrad $31,%xmm14 1783 .byte 102,15,56,220,216 1784 paddq %xmm15,%xmm15 1785 pand %xmm8,%xmm14 1786 .byte 102,15,56,220,224 1787 .byte 102,15,56,220,232 1788 .byte 102,15,56,220,240 1789 pxor %xmm14,%xmm15 1790 movaps %xmm13,%xmm14 1791 .byte 102,15,56,220,248 1792 1793 movdqa %xmm9,%xmm0 1794 paddd %xmm9,%xmm9 1795 .byte 102,15,56,220,209 1796 pxor %xmm15,%xmm13 1797 psrad $31,%xmm0 1798 .byte 102,15,56,220,217 1799 paddq %xmm15,%xmm15 1800 pand %xmm8,%xmm0 1801 .byte 102,15,56,220,225 1802 .byte 102,15,56,220,233 1803 pxor %xmm0,%xmm15 1804 movups (%rbp),%xmm0 1805 .byte 102,15,56,220,241 1806 .byte 102,15,56,220,249 1807 movups 16(%rbp),%xmm1 1808 1809 pxor %xmm15,%xmm14 1810 .byte 102,15,56,221,84,36,0 1811 psrad $31,%xmm9 1812 paddq %xmm15,%xmm15 1813 .byte 102,15,56,221,92,36,16 1814 .byte 102,15,56,221,100,36,32 1815 pand %xmm8,%xmm9 1816 movq %r10,%rax 1817 .byte 102,15,56,221,108,36,48 1818 .byte 102,15,56,221,116,36,64 1819 .byte 102,15,56,221,124,36,80 1820 pxor %xmm9,%xmm15 1821 1822 leaq 96(%rsi),%rsi 1823 movups %xmm2,-96(%rsi) 1824 movups %xmm3,-80(%rsi) 1825 movups %xmm4,-64(%rsi) 1826 movups %xmm5,-48(%rsi) 1827 movups %xmm6,-32(%rsi) 1828 movups %xmm7,-16(%rsi) 1829 subq $96,%rdx 1830 jnc L$xts_enc_grandloop 1831 1832 movl $16+96,%eax 1833 subl %r10d,%eax 1834 movq %rbp,%rcx 1835 shrl $4,%eax 1836 1837 L$xts_enc_short: 1838 1839 movl %eax,%r10d 1840 pxor %xmm0,%xmm10 1841 addq $96,%rdx 1842 jz L$xts_enc_done 1843 1844 pxor %xmm0,%xmm11 1845 cmpq $0x20,%rdx 1846 jb L$xts_enc_one 1847 pxor %xmm0,%xmm12 1848 je L$xts_enc_two 1849 1850 pxor %xmm0,%xmm13 1851 cmpq $0x40,%rdx 1852 jb L$xts_enc_three 1853 pxor %xmm0,%xmm14 1854 je L$xts_enc_four 1855 1856 movdqu (%rdi),%xmm2 1857 movdqu 16(%rdi),%xmm3 1858 movdqu 32(%rdi),%xmm4 1859 pxor %xmm10,%xmm2 1860 movdqu 48(%rdi),%xmm5 1861 pxor %xmm11,%xmm3 1862 movdqu 64(%rdi),%xmm6 1863 leaq 80(%rdi),%rdi 1864 pxor %xmm12,%xmm4 1865 pxor %xmm13,%xmm5 1866 pxor %xmm14,%xmm6 1867 pxor %xmm7,%xmm7 1868 1869 call _aesni_encrypt6 1870 1871 xorps %xmm10,%xmm2 1872 movdqa %xmm15,%xmm10 1873 xorps %xmm11,%xmm3 1874 xorps %xmm12,%xmm4 1875 movdqu %xmm2,(%rsi) 1876 xorps %xmm13,%xmm5 1877 movdqu %xmm3,16(%rsi) 1878 xorps %xmm14,%xmm6 1879 movdqu %xmm4,32(%rsi) 1880 movdqu %xmm5,48(%rsi) 1881 movdqu %xmm6,64(%rsi) 1882 leaq 80(%rsi),%rsi 1883 jmp L$xts_enc_done 1884 1885 .p2align 4 1886 L$xts_enc_one: 1887 movups (%rdi),%xmm2 1888 leaq 16(%rdi),%rdi 1889 xorps %xmm10,%xmm2 1890 movups (%rcx),%xmm0 1891 movups 16(%rcx),%xmm1 1892 leaq 32(%rcx),%rcx 1893 xorps %xmm0,%xmm2 1894 L$oop_enc1_9: 1895 .byte 102,15,56,220,209 1896 decl %eax 1897 movups (%rcx),%xmm1 1898 leaq 16(%rcx),%rcx 1899 jnz L$oop_enc1_9 1900 .byte 102,15,56,221,209 1901 xorps %xmm10,%xmm2 1902 movdqa %xmm11,%xmm10 1903 movups %xmm2,(%rsi) 1904 leaq 16(%rsi),%rsi 1905 jmp L$xts_enc_done 1906 1907 .p2align 4 1908 L$xts_enc_two: 1909 movups (%rdi),%xmm2 1910 movups 16(%rdi),%xmm3 1911 leaq 32(%rdi),%rdi 1912 xorps %xmm10,%xmm2 1913 xorps %xmm11,%xmm3 1914 1915 call _aesni_encrypt2 1916 1917 xorps %xmm10,%xmm2 1918 movdqa %xmm12,%xmm10 1919 xorps %xmm11,%xmm3 1920 movups %xmm2,(%rsi) 1921 movups %xmm3,16(%rsi) 1922 leaq 32(%rsi),%rsi 1923 jmp L$xts_enc_done 1924 1925 .p2align 4 1926 L$xts_enc_three: 1927 movups (%rdi),%xmm2 1928 movups 16(%rdi),%xmm3 1929 movups 32(%rdi),%xmm4 1930 leaq 48(%rdi),%rdi 1931 xorps %xmm10,%xmm2 1932 xorps %xmm11,%xmm3 1933 xorps %xmm12,%xmm4 1934 1935 call _aesni_encrypt3 1936 1937 xorps %xmm10,%xmm2 1938 movdqa %xmm13,%xmm10 1939 xorps %xmm11,%xmm3 1940 xorps %xmm12,%xmm4 1941 movups %xmm2,(%rsi) 1942 movups %xmm3,16(%rsi) 1943 movups %xmm4,32(%rsi) 1944 leaq 48(%rsi),%rsi 1945 jmp L$xts_enc_done 1946 1947 .p2align 4 1948 L$xts_enc_four: 1949 movups (%rdi),%xmm2 1950 movups 16(%rdi),%xmm3 1951 movups 32(%rdi),%xmm4 1952 xorps %xmm10,%xmm2 1953 movups 48(%rdi),%xmm5 1954 leaq 64(%rdi),%rdi 1955 xorps %xmm11,%xmm3 1956 xorps %xmm12,%xmm4 1957 xorps %xmm13,%xmm5 1958 1959 call _aesni_encrypt4 1960 1961 pxor %xmm10,%xmm2 1962 movdqa %xmm14,%xmm10 1963 pxor %xmm11,%xmm3 1964 pxor %xmm12,%xmm4 1965 movdqu %xmm2,(%rsi) 1966 pxor %xmm13,%xmm5 1967 movdqu %xmm3,16(%rsi) 1968 movdqu %xmm4,32(%rsi) 1969 movdqu %xmm5,48(%rsi) 1970 leaq 64(%rsi),%rsi 1971 jmp L$xts_enc_done 1972 1973 .p2align 4 1974 L$xts_enc_done: 1975 andq $15,%r9 1976 jz L$xts_enc_ret 1977 movq %r9,%rdx 1978 1979 L$xts_enc_steal: 1980 movzbl (%rdi),%eax 1981 movzbl -16(%rsi),%ecx 1982 leaq 1(%rdi),%rdi 1983 movb %al,-16(%rsi) 1984 movb %cl,0(%rsi) 1985 leaq 1(%rsi),%rsi 1986 subq $1,%rdx 1987 jnz L$xts_enc_steal 1988 1989 subq %r9,%rsi 1990 movq %rbp,%rcx 1991 movl %r10d,%eax 1992 1993 movups -16(%rsi),%xmm2 1994 xorps %xmm10,%xmm2 1995 movups (%rcx),%xmm0 1996 movups 16(%rcx),%xmm1 1997 leaq 32(%rcx),%rcx 1998 xorps %xmm0,%xmm2 1999 L$oop_enc1_10: 2000 .byte 102,15,56,220,209 2001 decl %eax 2002 movups (%rcx),%xmm1 2003 leaq 16(%rcx),%rcx 2004 jnz L$oop_enc1_10 2005 .byte 102,15,56,221,209 2006 xorps %xmm10,%xmm2 2007 movups %xmm2,-16(%rsi) 2008 2009 L$xts_enc_ret: 2010 xorps %xmm0,%xmm0 2011 pxor %xmm1,%xmm1 2012 pxor %xmm2,%xmm2 2013 pxor %xmm3,%xmm3 2014 pxor %xmm4,%xmm4 2015 pxor %xmm5,%xmm5 2016 pxor %xmm6,%xmm6 2017 pxor %xmm7,%xmm7 2018 movaps %xmm0,0(%rsp) 2019 pxor %xmm8,%xmm8 2020 movaps %xmm0,16(%rsp) 2021 pxor %xmm9,%xmm9 2022 movaps %xmm0,32(%rsp) 2023 pxor %xmm10,%xmm10 2024 movaps %xmm0,48(%rsp) 2025 pxor %xmm11,%xmm11 2026 movaps %xmm0,64(%rsp) 2027 pxor %xmm12,%xmm12 2028 movaps %xmm0,80(%rsp) 2029 pxor %xmm13,%xmm13 2030 movaps %xmm0,96(%rsp) 2031 pxor %xmm14,%xmm14 2032 pxor %xmm15,%xmm15 2033 movq -8(%r11),%rbp 2034 leaq (%r11),%rsp 2035 L$xts_enc_epilogue: 2036 .byte 0xf3,0xc3 2037 2038 .globl _aesni_xts_decrypt 2039 .private_extern _aesni_xts_decrypt 2040 2041 .p2align 4 2042 _aesni_xts_decrypt: 2043 leaq (%rsp),%r11 2044 pushq %rbp 2045 subq $112,%rsp 2046 andq $-16,%rsp 2047 movups (%r9),%xmm2 2048 movl 240(%r8),%eax 2049 movl 240(%rcx),%r10d 2050 movups (%r8),%xmm0 2051 movups 16(%r8),%xmm1 2052 leaq 32(%r8),%r8 2053 xorps %xmm0,%xmm2 2054 L$oop_enc1_11: 2055 .byte 102,15,56,220,209 2056 decl %eax 2057 movups (%r8),%xmm1 2058 leaq 16(%r8),%r8 2059 jnz L$oop_enc1_11 2060 .byte 102,15,56,221,209 2061 xorl %eax,%eax 2062 testq $15,%rdx 2063 setnz %al 2064 shlq $4,%rax 2065 subq %rax,%rdx 2066 2067 movups (%rcx),%xmm0 2068 movq %rcx,%rbp 2069 movl %r10d,%eax 2070 shll $4,%r10d 2071 movq %rdx,%r9 2072 andq $-16,%rdx 2073 2074 movups 16(%rcx,%r10,1),%xmm1 2075 2076 movdqa L$xts_magic(%rip),%xmm8 2077 movdqa %xmm2,%xmm15 2078 pshufd $0x5f,%xmm2,%xmm9 2079 pxor %xmm0,%xmm1 2080 movdqa %xmm9,%xmm14 2081 paddd %xmm9,%xmm9 2082 movdqa %xmm15,%xmm10 2083 psrad $31,%xmm14 2084 paddq %xmm15,%xmm15 2085 pand %xmm8,%xmm14 2086 pxor %xmm0,%xmm10 2087 pxor %xmm14,%xmm15 2088 movdqa %xmm9,%xmm14 2089 paddd %xmm9,%xmm9 2090 movdqa %xmm15,%xmm11 2091 psrad $31,%xmm14 2092 paddq %xmm15,%xmm15 2093 pand %xmm8,%xmm14 2094 pxor %xmm0,%xmm11 2095 pxor %xmm14,%xmm15 2096 movdqa %xmm9,%xmm14 2097 paddd %xmm9,%xmm9 2098 movdqa %xmm15,%xmm12 2099 psrad $31,%xmm14 2100 paddq %xmm15,%xmm15 2101 pand %xmm8,%xmm14 2102 pxor %xmm0,%xmm12 2103 pxor %xmm14,%xmm15 2104 movdqa %xmm9,%xmm14 2105 paddd %xmm9,%xmm9 2106 movdqa %xmm15,%xmm13 2107 psrad $31,%xmm14 2108 paddq %xmm15,%xmm15 2109 pand %xmm8,%xmm14 2110 pxor %xmm0,%xmm13 2111 pxor %xmm14,%xmm15 2112 movdqa %xmm15,%xmm14 2113 psrad $31,%xmm9 2114 paddq %xmm15,%xmm15 2115 pand %xmm8,%xmm9 2116 pxor %xmm0,%xmm14 2117 pxor %xmm9,%xmm15 2118 movaps %xmm1,96(%rsp) 2119 2120 subq $96,%rdx 2121 jc L$xts_dec_short 2122 2123 movl $16+96,%eax 2124 leaq 32(%rbp,%r10,1),%rcx 2125 subq %r10,%rax 2126 movups 16(%rbp),%xmm1 2127 movq %rax,%r10 2128 leaq L$xts_magic(%rip),%r8 2129 jmp L$xts_dec_grandloop 2130 2131 .p2align 5 2132 L$xts_dec_grandloop: 2133 movdqu 0(%rdi),%xmm2 2134 movdqa %xmm0,%xmm8 2135 movdqu 16(%rdi),%xmm3 2136 pxor %xmm10,%xmm2 2137 movdqu 32(%rdi),%xmm4 2138 pxor %xmm11,%xmm3 2139 .byte 102,15,56,222,209 2140 movdqu 48(%rdi),%xmm5 2141 pxor %xmm12,%xmm4 2142 .byte 102,15,56,222,217 2143 movdqu 64(%rdi),%xmm6 2144 pxor %xmm13,%xmm5 2145 .byte 102,15,56,222,225 2146 movdqu 80(%rdi),%xmm7 2147 pxor %xmm15,%xmm8 2148 movdqa 96(%rsp),%xmm9 2149 pxor %xmm14,%xmm6 2150 .byte 102,15,56,222,233 2151 movups 32(%rbp),%xmm0 2152 leaq 96(%rdi),%rdi 2153 pxor %xmm8,%xmm7 2154 2155 pxor %xmm9,%xmm10 2156 .byte 102,15,56,222,241 2157 pxor %xmm9,%xmm11 2158 movdqa %xmm10,0(%rsp) 2159 .byte 102,15,56,222,249 2160 movups 48(%rbp),%xmm1 2161 pxor %xmm9,%xmm12 2162 2163 .byte 102,15,56,222,208 2164 pxor %xmm9,%xmm13 2165 movdqa %xmm11,16(%rsp) 2166 .byte 102,15,56,222,216 2167 pxor %xmm9,%xmm14 2168 movdqa %xmm12,32(%rsp) 2169 .byte 102,15,56,222,224 2170 .byte 102,15,56,222,232 2171 pxor %xmm9,%xmm8 2172 movdqa %xmm14,64(%rsp) 2173 .byte 102,15,56,222,240 2174 .byte 102,15,56,222,248 2175 movups 64(%rbp),%xmm0 2176 movdqa %xmm8,80(%rsp) 2177 pshufd $0x5f,%xmm15,%xmm9 2178 jmp L$xts_dec_loop6 2179 .p2align 5 2180 L$xts_dec_loop6: 2181 .byte 102,15,56,222,209 2182 .byte 102,15,56,222,217 2183 .byte 102,15,56,222,225 2184 .byte 102,15,56,222,233 2185 .byte 102,15,56,222,241 2186 .byte 102,15,56,222,249 2187 movups -64(%rcx,%rax,1),%xmm1 2188 addq $32,%rax 2189 2190 .byte 102,15,56,222,208 2191 .byte 102,15,56,222,216 2192 .byte 102,15,56,222,224 2193 .byte 102,15,56,222,232 2194 .byte 102,15,56,222,240 2195 .byte 102,15,56,222,248 2196 movups -80(%rcx,%rax,1),%xmm0 2197 jnz L$xts_dec_loop6 2198 2199 movdqa (%r8),%xmm8 2200 movdqa %xmm9,%xmm14 2201 paddd %xmm9,%xmm9 2202 .byte 102,15,56,222,209 2203 paddq %xmm15,%xmm15 2204 psrad $31,%xmm14 2205 .byte 102,15,56,222,217 2206 pand %xmm8,%xmm14 2207 movups (%rbp),%xmm10 2208 .byte 102,15,56,222,225 2209 .byte 102,15,56,222,233 2210 .byte 102,15,56,222,241 2211 pxor %xmm14,%xmm15 2212 movaps %xmm10,%xmm11 2213 .byte 102,15,56,222,249 2214 movups -64(%rcx),%xmm1 2215 2216 movdqa %xmm9,%xmm14 2217 .byte 102,15,56,222,208 2218 paddd %xmm9,%xmm9 2219 pxor %xmm15,%xmm10 2220 .byte 102,15,56,222,216 2221 psrad $31,%xmm14 2222 paddq %xmm15,%xmm15 2223 .byte 102,15,56,222,224 2224 .byte 102,15,56,222,232 2225 pand %xmm8,%xmm14 2226 movaps %xmm11,%xmm12 2227 .byte 102,15,56,222,240 2228 pxor %xmm14,%xmm15 2229 movdqa %xmm9,%xmm14 2230 .byte 102,15,56,222,248 2231 movups -48(%rcx),%xmm0 2232 2233 paddd %xmm9,%xmm9 2234 .byte 102,15,56,222,209 2235 pxor %xmm15,%xmm11 2236 psrad $31,%xmm14 2237 .byte 102,15,56,222,217 2238 paddq %xmm15,%xmm15 2239 pand %xmm8,%xmm14 2240 .byte 102,15,56,222,225 2241 .byte 102,15,56,222,233 2242 movdqa %xmm13,48(%rsp) 2243 pxor %xmm14,%xmm15 2244 .byte 102,15,56,222,241 2245 movaps %xmm12,%xmm13 2246 movdqa %xmm9,%xmm14 2247 .byte 102,15,56,222,249 2248 movups -32(%rcx),%xmm1 2249 2250 paddd %xmm9,%xmm9 2251 .byte 102,15,56,222,208 2252 pxor %xmm15,%xmm12 2253 psrad $31,%xmm14 2254 .byte 102,15,56,222,216 2255 paddq %xmm15,%xmm15 2256 pand %xmm8,%xmm14 2257 .byte 102,15,56,222,224 2258 .byte 102,15,56,222,232 2259 .byte 102,15,56,222,240 2260 pxor %xmm14,%xmm15 2261 movaps %xmm13,%xmm14 2262 .byte 102,15,56,222,248 2263 2264 movdqa %xmm9,%xmm0 2265 paddd %xmm9,%xmm9 2266 .byte 102,15,56,222,209 2267 pxor %xmm15,%xmm13 2268 psrad $31,%xmm0 2269 .byte 102,15,56,222,217 2270 paddq %xmm15,%xmm15 2271 pand %xmm8,%xmm0 2272 .byte 102,15,56,222,225 2273 .byte 102,15,56,222,233 2274 pxor %xmm0,%xmm15 2275 movups (%rbp),%xmm0 2276 .byte 102,15,56,222,241 2277 .byte 102,15,56,222,249 2278 movups 16(%rbp),%xmm1 2279 2280 pxor %xmm15,%xmm14 2281 .byte 102,15,56,223,84,36,0 2282 psrad $31,%xmm9 2283 paddq %xmm15,%xmm15 2284 .byte 102,15,56,223,92,36,16 2285 .byte 102,15,56,223,100,36,32 2286 pand %xmm8,%xmm9 2287 movq %r10,%rax 2288 .byte 102,15,56,223,108,36,48 2289 .byte 102,15,56,223,116,36,64 2290 .byte 102,15,56,223,124,36,80 2291 pxor %xmm9,%xmm15 2292 2293 leaq 96(%rsi),%rsi 2294 movups %xmm2,-96(%rsi) 2295 movups %xmm3,-80(%rsi) 2296 movups %xmm4,-64(%rsi) 2297 movups %xmm5,-48(%rsi) 2298 movups %xmm6,-32(%rsi) 2299 movups %xmm7,-16(%rsi) 2300 subq $96,%rdx 2301 jnc L$xts_dec_grandloop 2302 2303 movl $16+96,%eax 2304 subl %r10d,%eax 2305 movq %rbp,%rcx 2306 shrl $4,%eax 2307 2308 L$xts_dec_short: 2309 2310 movl %eax,%r10d 2311 pxor %xmm0,%xmm10 2312 pxor %xmm0,%xmm11 2313 addq $96,%rdx 2314 jz L$xts_dec_done 2315 2316 pxor %xmm0,%xmm12 2317 cmpq $0x20,%rdx 2318 jb L$xts_dec_one 2319 pxor %xmm0,%xmm13 2320 je L$xts_dec_two 2321 2322 pxor %xmm0,%xmm14 2323 cmpq $0x40,%rdx 2324 jb L$xts_dec_three 2325 je L$xts_dec_four 2326 2327 movdqu (%rdi),%xmm2 2328 movdqu 16(%rdi),%xmm3 2329 movdqu 32(%rdi),%xmm4 2330 pxor %xmm10,%xmm2 2331 movdqu 48(%rdi),%xmm5 2332 pxor %xmm11,%xmm3 2333 movdqu 64(%rdi),%xmm6 2334 leaq 80(%rdi),%rdi 2335 pxor %xmm12,%xmm4 2336 pxor %xmm13,%xmm5 2337 pxor %xmm14,%xmm6 2338 2339 call _aesni_decrypt6 2340 2341 xorps %xmm10,%xmm2 2342 xorps %xmm11,%xmm3 2343 xorps %xmm12,%xmm4 2344 movdqu %xmm2,(%rsi) 2345 xorps %xmm13,%xmm5 2346 movdqu %xmm3,16(%rsi) 2347 xorps %xmm14,%xmm6 2348 movdqu %xmm4,32(%rsi) 2349 pxor %xmm14,%xmm14 2350 movdqu %xmm5,48(%rsi) 2351 pcmpgtd %xmm15,%xmm14 2352 movdqu %xmm6,64(%rsi) 2353 leaq 80(%rsi),%rsi 2354 pshufd $0x13,%xmm14,%xmm11 2355 andq $15,%r9 2356 jz L$xts_dec_ret 2357 2358 movdqa %xmm15,%xmm10 2359 paddq %xmm15,%xmm15 2360 pand %xmm8,%xmm11 2361 pxor %xmm15,%xmm11 2362 jmp L$xts_dec_done2 2363 2364 .p2align 4 2365 L$xts_dec_one: 2366 movups (%rdi),%xmm2 2367 leaq 16(%rdi),%rdi 2368 xorps %xmm10,%xmm2 2369 movups (%rcx),%xmm0 2370 movups 16(%rcx),%xmm1 2371 leaq 32(%rcx),%rcx 2372 xorps %xmm0,%xmm2 2373 L$oop_dec1_12: 2374 .byte 102,15,56,222,209 2375 decl %eax 2376 movups (%rcx),%xmm1 2377 leaq 16(%rcx),%rcx 2378 jnz L$oop_dec1_12 2379 .byte 102,15,56,223,209 2380 xorps %xmm10,%xmm2 2381 movdqa %xmm11,%xmm10 2382 movups %xmm2,(%rsi) 2383 movdqa %xmm12,%xmm11 2384 leaq 16(%rsi),%rsi 2385 jmp L$xts_dec_done 2386 2387 .p2align 4 2388 L$xts_dec_two: 2389 movups (%rdi),%xmm2 2390 movups 16(%rdi),%xmm3 2391 leaq 32(%rdi),%rdi 2392 xorps %xmm10,%xmm2 2393 xorps %xmm11,%xmm3 2394 2395 call _aesni_decrypt2 2396 2397 xorps %xmm10,%xmm2 2398 movdqa %xmm12,%xmm10 2399 xorps %xmm11,%xmm3 2400 movdqa %xmm13,%xmm11 2401 movups %xmm2,(%rsi) 2402 movups %xmm3,16(%rsi) 2403 leaq 32(%rsi),%rsi 2404 jmp L$xts_dec_done 2405 2406 .p2align 4 2407 L$xts_dec_three: 2408 movups (%rdi),%xmm2 2409 movups 16(%rdi),%xmm3 2410 movups 32(%rdi),%xmm4 2411 leaq 48(%rdi),%rdi 2412 xorps %xmm10,%xmm2 2413 xorps %xmm11,%xmm3 2414 xorps %xmm12,%xmm4 2415 2416 call _aesni_decrypt3 2417 2418 xorps %xmm10,%xmm2 2419 movdqa %xmm13,%xmm10 2420 xorps %xmm11,%xmm3 2421 movdqa %xmm14,%xmm11 2422 xorps %xmm12,%xmm4 2423 movups %xmm2,(%rsi) 2424 movups %xmm3,16(%rsi) 2425 movups %xmm4,32(%rsi) 2426 leaq 48(%rsi),%rsi 2427 jmp L$xts_dec_done 2428 2429 .p2align 4 2430 L$xts_dec_four: 2431 movups (%rdi),%xmm2 2432 movups 16(%rdi),%xmm3 2433 movups 32(%rdi),%xmm4 2434 xorps %xmm10,%xmm2 2435 movups 48(%rdi),%xmm5 2436 leaq 64(%rdi),%rdi 2437 xorps %xmm11,%xmm3 2438 xorps %xmm12,%xmm4 2439 xorps %xmm13,%xmm5 2440 2441 call _aesni_decrypt4 2442 2443 pxor %xmm10,%xmm2 2444 movdqa %xmm14,%xmm10 2445 pxor %xmm11,%xmm3 2446 movdqa %xmm15,%xmm11 2447 pxor %xmm12,%xmm4 2448 movdqu %xmm2,(%rsi) 2449 pxor %xmm13,%xmm5 2450 movdqu %xmm3,16(%rsi) 2451 movdqu %xmm4,32(%rsi) 2452 movdqu %xmm5,48(%rsi) 2453 leaq 64(%rsi),%rsi 2454 jmp L$xts_dec_done 2455 2456 .p2align 4 2457 L$xts_dec_done: 2458 andq $15,%r9 2459 jz L$xts_dec_ret 2460 L$xts_dec_done2: 2461 movq %r9,%rdx 2462 movq %rbp,%rcx 2463 movl %r10d,%eax 2464 2465 movups (%rdi),%xmm2 2466 xorps %xmm11,%xmm2 2467 movups (%rcx),%xmm0 2468 movups 16(%rcx),%xmm1 2469 leaq 32(%rcx),%rcx 2470 xorps %xmm0,%xmm2 2471 L$oop_dec1_13: 2472 .byte 102,15,56,222,209 2473 decl %eax 2474 movups (%rcx),%xmm1 2475 leaq 16(%rcx),%rcx 2476 jnz L$oop_dec1_13 2477 .byte 102,15,56,223,209 2478 xorps %xmm11,%xmm2 2479 movups %xmm2,(%rsi) 2480 2481 L$xts_dec_steal: 2482 movzbl 16(%rdi),%eax 2483 movzbl (%rsi),%ecx 2484 leaq 1(%rdi),%rdi 2485 movb %al,(%rsi) 2486 movb %cl,16(%rsi) 2487 leaq 1(%rsi),%rsi 2488 subq $1,%rdx 2489 jnz L$xts_dec_steal 2490 2491 subq %r9,%rsi 2492 movq %rbp,%rcx 2493 movl %r10d,%eax 2494 2495 movups (%rsi),%xmm2 2496 xorps %xmm10,%xmm2 2497 movups (%rcx),%xmm0 2498 movups 16(%rcx),%xmm1 2499 leaq 32(%rcx),%rcx 2500 xorps %xmm0,%xmm2 2501 L$oop_dec1_14: 2502 .byte 102,15,56,222,209 2503 decl %eax 2504 movups (%rcx),%xmm1 2505 leaq 16(%rcx),%rcx 2506 jnz L$oop_dec1_14 2507 .byte 102,15,56,223,209 2508 xorps %xmm10,%xmm2 2509 movups %xmm2,(%rsi) 2510 2511 L$xts_dec_ret: 2512 xorps %xmm0,%xmm0 2513 pxor %xmm1,%xmm1 2514 pxor %xmm2,%xmm2 2515 pxor %xmm3,%xmm3 2516 pxor %xmm4,%xmm4 2517 pxor %xmm5,%xmm5 2518 pxor %xmm6,%xmm6 2519 pxor %xmm7,%xmm7 2520 movaps %xmm0,0(%rsp) 2521 pxor %xmm8,%xmm8 2522 movaps %xmm0,16(%rsp) 2523 pxor %xmm9,%xmm9 2524 movaps %xmm0,32(%rsp) 2525 pxor %xmm10,%xmm10 2526 movaps %xmm0,48(%rsp) 2527 pxor %xmm11,%xmm11 2528 movaps %xmm0,64(%rsp) 2529 pxor %xmm12,%xmm12 2530 movaps %xmm0,80(%rsp) 2531 pxor %xmm13,%xmm13 2532 movaps %xmm0,96(%rsp) 2533 pxor %xmm14,%xmm14 2534 pxor %xmm15,%xmm15 2535 movq -8(%r11),%rbp 2536 leaq (%r11),%rsp 2537 L$xts_dec_epilogue: 2538 .byte 0xf3,0xc3 2539 2540 .globl _aesni_ocb_encrypt 2541 .private_extern _aesni_ocb_encrypt 2542 2543 .p2align 5 2544 _aesni_ocb_encrypt: 2545 leaq (%rsp),%rax 2546 pushq %rbx 2547 pushq %rbp 2548 pushq %r12 2549 pushq %r13 2550 pushq %r14 2551 movq 8(%rax),%rbx 2552 movq 8+8(%rax),%rbp 2553 2554 movl 240(%rcx),%r10d 2555 movq %rcx,%r11 2556 shll $4,%r10d 2557 movups (%rcx),%xmm9 2558 movups 16(%rcx,%r10,1),%xmm1 2559 2560 movdqu (%r9),%xmm15 2561 pxor %xmm1,%xmm9 2562 pxor %xmm1,%xmm15 2563 2564 movl $16+32,%eax 2565 leaq 32(%r11,%r10,1),%rcx 2566 movups 16(%r11),%xmm1 2567 subq %r10,%rax 2568 movq %rax,%r10 2569 2570 movdqu (%rbx),%xmm10 2571 movdqu (%rbp),%xmm8 2572 2573 testq $1,%r8 2574 jnz L$ocb_enc_odd 2575 2576 bsfq %r8,%r12 2577 addq $1,%r8 2578 shlq $4,%r12 2579 movdqu (%rbx,%r12,1),%xmm7 2580 movdqu (%rdi),%xmm2 2581 leaq 16(%rdi),%rdi 2582 2583 call __ocb_encrypt1 2584 2585 movdqa %xmm7,%xmm15 2586 movups %xmm2,(%rsi) 2587 leaq 16(%rsi),%rsi 2588 subq $1,%rdx 2589 jz L$ocb_enc_done 2590 2591 L$ocb_enc_odd: 2592 leaq 1(%r8),%r12 2593 leaq 3(%r8),%r13 2594 leaq 5(%r8),%r14 2595 leaq 6(%r8),%r8 2596 bsfq %r12,%r12 2597 bsfq %r13,%r13 2598 bsfq %r14,%r14 2599 shlq $4,%r12 2600 shlq $4,%r13 2601 shlq $4,%r14 2602 2603 subq $6,%rdx 2604 jc L$ocb_enc_short 2605 jmp L$ocb_enc_grandloop 2606 2607 .p2align 5 2608 L$ocb_enc_grandloop: 2609 movdqu 0(%rdi),%xmm2 2610 movdqu 16(%rdi),%xmm3 2611 movdqu 32(%rdi),%xmm4 2612 movdqu 48(%rdi),%xmm5 2613 movdqu 64(%rdi),%xmm6 2614 movdqu 80(%rdi),%xmm7 2615 leaq 96(%rdi),%rdi 2616 2617 call __ocb_encrypt6 2618 2619 movups %xmm2,0(%rsi) 2620 movups %xmm3,16(%rsi) 2621 movups %xmm4,32(%rsi) 2622 movups %xmm5,48(%rsi) 2623 movups %xmm6,64(%rsi) 2624 movups %xmm7,80(%rsi) 2625 leaq 96(%rsi),%rsi 2626 subq $6,%rdx 2627 jnc L$ocb_enc_grandloop 2628 2629 L$ocb_enc_short: 2630 addq $6,%rdx 2631 jz L$ocb_enc_done 2632 2633 movdqu 0(%rdi),%xmm2 2634 cmpq $2,%rdx 2635 jb L$ocb_enc_one 2636 movdqu 16(%rdi),%xmm3 2637 je L$ocb_enc_two 2638 2639 movdqu 32(%rdi),%xmm4 2640 cmpq $4,%rdx 2641 jb L$ocb_enc_three 2642 movdqu 48(%rdi),%xmm5 2643 je L$ocb_enc_four 2644 2645 movdqu 64(%rdi),%xmm6 2646 pxor %xmm7,%xmm7 2647 2648 call __ocb_encrypt6 2649 2650 movdqa %xmm14,%xmm15 2651 movups %xmm2,0(%rsi) 2652 movups %xmm3,16(%rsi) 2653 movups %xmm4,32(%rsi) 2654 movups %xmm5,48(%rsi) 2655 movups %xmm6,64(%rsi) 2656 2657 jmp L$ocb_enc_done 2658 2659 .p2align 4 2660 L$ocb_enc_one: 2661 movdqa %xmm10,%xmm7 2662 2663 call __ocb_encrypt1 2664 2665 movdqa %xmm7,%xmm15 2666 movups %xmm2,0(%rsi) 2667 jmp L$ocb_enc_done 2668 2669 .p2align 4 2670 L$ocb_enc_two: 2671 pxor %xmm4,%xmm4 2672 pxor %xmm5,%xmm5 2673 2674 call __ocb_encrypt4 2675 2676 movdqa %xmm11,%xmm15 2677 movups %xmm2,0(%rsi) 2678 movups %xmm3,16(%rsi) 2679 2680 jmp L$ocb_enc_done 2681 2682 .p2align 4 2683 L$ocb_enc_three: 2684 pxor %xmm5,%xmm5 2685 2686 call __ocb_encrypt4 2687 2688 movdqa %xmm12,%xmm15 2689 movups %xmm2,0(%rsi) 2690 movups %xmm3,16(%rsi) 2691 movups %xmm4,32(%rsi) 2692 2693 jmp L$ocb_enc_done 2694 2695 .p2align 4 2696 L$ocb_enc_four: 2697 call __ocb_encrypt4 2698 2699 movdqa %xmm13,%xmm15 2700 movups %xmm2,0(%rsi) 2701 movups %xmm3,16(%rsi) 2702 movups %xmm4,32(%rsi) 2703 movups %xmm5,48(%rsi) 2704 2705 L$ocb_enc_done: 2706 pxor %xmm0,%xmm15 2707 movdqu %xmm8,(%rbp) 2708 movdqu %xmm15,(%r9) 2709 2710 xorps %xmm0,%xmm0 2711 pxor %xmm1,%xmm1 2712 pxor %xmm2,%xmm2 2713 pxor %xmm3,%xmm3 2714 pxor %xmm4,%xmm4 2715 pxor %xmm5,%xmm5 2716 pxor %xmm6,%xmm6 2717 pxor %xmm7,%xmm7 2718 pxor %xmm8,%xmm8 2719 pxor %xmm9,%xmm9 2720 pxor %xmm10,%xmm10 2721 pxor %xmm11,%xmm11 2722 pxor %xmm12,%xmm12 2723 pxor %xmm13,%xmm13 2724 pxor %xmm14,%xmm14 2725 pxor %xmm15,%xmm15 2726 leaq 40(%rsp),%rax 2727 movq -40(%rax),%r14 2728 movq -32(%rax),%r13 2729 movq -24(%rax),%r12 2730 movq -16(%rax),%rbp 2731 movq -8(%rax),%rbx 2732 leaq (%rax),%rsp 2733 L$ocb_enc_epilogue: 2734 .byte 0xf3,0xc3 2735 2736 2737 2738 .p2align 5 2739 __ocb_encrypt6: 2740 pxor %xmm9,%xmm15 2741 movdqu (%rbx,%r12,1),%xmm11 2742 movdqa %xmm10,%xmm12 2743 movdqu (%rbx,%r13,1),%xmm13 2744 movdqa %xmm10,%xmm14 2745 pxor %xmm15,%xmm10 2746 movdqu (%rbx,%r14,1),%xmm15 2747 pxor %xmm10,%xmm11 2748 pxor %xmm2,%xmm8 2749 pxor %xmm10,%xmm2 2750 pxor %xmm11,%xmm12 2751 pxor %xmm3,%xmm8 2752 pxor %xmm11,%xmm3 2753 pxor %xmm12,%xmm13 2754 pxor %xmm4,%xmm8 2755 pxor %xmm12,%xmm4 2756 pxor %xmm13,%xmm14 2757 pxor %xmm5,%xmm8 2758 pxor %xmm13,%xmm5 2759 pxor %xmm14,%xmm15 2760 pxor %xmm6,%xmm8 2761 pxor %xmm14,%xmm6 2762 pxor %xmm7,%xmm8 2763 pxor %xmm15,%xmm7 2764 movups 32(%r11),%xmm0 2765 2766 leaq 1(%r8),%r12 2767 leaq 3(%r8),%r13 2768 leaq 5(%r8),%r14 2769 addq $6,%r8 2770 pxor %xmm9,%xmm10 2771 bsfq %r12,%r12 2772 bsfq %r13,%r13 2773 bsfq %r14,%r14 2774 2775 .byte 102,15,56,220,209 2776 .byte 102,15,56,220,217 2777 .byte 102,15,56,220,225 2778 .byte 102,15,56,220,233 2779 pxor %xmm9,%xmm11 2780 pxor %xmm9,%xmm12 2781 .byte 102,15,56,220,241 2782 pxor %xmm9,%xmm13 2783 pxor %xmm9,%xmm14 2784 .byte 102,15,56,220,249 2785 movups 48(%r11),%xmm1 2786 pxor %xmm9,%xmm15 2787 2788 .byte 102,15,56,220,208 2789 .byte 102,15,56,220,216 2790 .byte 102,15,56,220,224 2791 .byte 102,15,56,220,232 2792 .byte 102,15,56,220,240 2793 .byte 102,15,56,220,248 2794 movups 64(%r11),%xmm0 2795 shlq $4,%r12 2796 shlq $4,%r13 2797 jmp L$ocb_enc_loop6 2798 2799 .p2align 5 2800 L$ocb_enc_loop6: 2801 .byte 102,15,56,220,209 2802 .byte 102,15,56,220,217 2803 .byte 102,15,56,220,225 2804 .byte 102,15,56,220,233 2805 .byte 102,15,56,220,241 2806 .byte 102,15,56,220,249 2807 movups (%rcx,%rax,1),%xmm1 2808 addq $32,%rax 2809 2810 .byte 102,15,56,220,208 2811 .byte 102,15,56,220,216 2812 .byte 102,15,56,220,224 2813 .byte 102,15,56,220,232 2814 .byte 102,15,56,220,240 2815 .byte 102,15,56,220,248 2816 movups -16(%rcx,%rax,1),%xmm0 2817 jnz L$ocb_enc_loop6 2818 2819 .byte 102,15,56,220,209 2820 .byte 102,15,56,220,217 2821 .byte 102,15,56,220,225 2822 .byte 102,15,56,220,233 2823 .byte 102,15,56,220,241 2824 .byte 102,15,56,220,249 2825 movups 16(%r11),%xmm1 2826 shlq $4,%r14 2827 2828 .byte 102,65,15,56,221,210 2829 movdqu (%rbx),%xmm10 2830 movq %r10,%rax 2831 .byte 102,65,15,56,221,219 2832 .byte 102,65,15,56,221,228 2833 .byte 102,65,15,56,221,237 2834 .byte 102,65,15,56,221,246 2835 .byte 102,65,15,56,221,255 2836 .byte 0xf3,0xc3 2837 2838 2839 2840 .p2align 5 2841 __ocb_encrypt4: 2842 pxor %xmm9,%xmm15 2843 movdqu (%rbx,%r12,1),%xmm11 2844 movdqa %xmm10,%xmm12 2845 movdqu (%rbx,%r13,1),%xmm13 2846 pxor %xmm15,%xmm10 2847 pxor %xmm10,%xmm11 2848 pxor %xmm2,%xmm8 2849 pxor %xmm10,%xmm2 2850 pxor %xmm11,%xmm12 2851 pxor %xmm3,%xmm8 2852 pxor %xmm11,%xmm3 2853 pxor %xmm12,%xmm13 2854 pxor %xmm4,%xmm8 2855 pxor %xmm12,%xmm4 2856 pxor %xmm5,%xmm8 2857 pxor %xmm13,%xmm5 2858 movups 32(%r11),%xmm0 2859 2860 pxor %xmm9,%xmm10 2861 pxor %xmm9,%xmm11 2862 pxor %xmm9,%xmm12 2863 pxor %xmm9,%xmm13 2864 2865 .byte 102,15,56,220,209 2866 .byte 102,15,56,220,217 2867 .byte 102,15,56,220,225 2868 .byte 102,15,56,220,233 2869 movups 48(%r11),%xmm1 2870 2871 .byte 102,15,56,220,208 2872 .byte 102,15,56,220,216 2873 .byte 102,15,56,220,224 2874 .byte 102,15,56,220,232 2875 movups 64(%r11),%xmm0 2876 jmp L$ocb_enc_loop4 2877 2878 .p2align 5 2879 L$ocb_enc_loop4: 2880 .byte 102,15,56,220,209 2881 .byte 102,15,56,220,217 2882 .byte 102,15,56,220,225 2883 .byte 102,15,56,220,233 2884 movups (%rcx,%rax,1),%xmm1 2885 addq $32,%rax 2886 2887 .byte 102,15,56,220,208 2888 .byte 102,15,56,220,216 2889 .byte 102,15,56,220,224 2890 .byte 102,15,56,220,232 2891 movups -16(%rcx,%rax,1),%xmm0 2892 jnz L$ocb_enc_loop4 2893 2894 .byte 102,15,56,220,209 2895 .byte 102,15,56,220,217 2896 .byte 102,15,56,220,225 2897 .byte 102,15,56,220,233 2898 movups 16(%r11),%xmm1 2899 movq %r10,%rax 2900 2901 .byte 102,65,15,56,221,210 2902 .byte 102,65,15,56,221,219 2903 .byte 102,65,15,56,221,228 2904 .byte 102,65,15,56,221,237 2905 .byte 0xf3,0xc3 2906 2907 2908 2909 .p2align 5 2910 __ocb_encrypt1: 2911 pxor %xmm15,%xmm7 2912 pxor %xmm9,%xmm7 2913 pxor %xmm2,%xmm8 2914 pxor %xmm7,%xmm2 2915 movups 32(%r11),%xmm0 2916 2917 .byte 102,15,56,220,209 2918 movups 48(%r11),%xmm1 2919 pxor %xmm9,%xmm7 2920 2921 .byte 102,15,56,220,208 2922 movups 64(%r11),%xmm0 2923 jmp L$ocb_enc_loop1 2924 2925 .p2align 5 2926 L$ocb_enc_loop1: 2927 .byte 102,15,56,220,209 2928 movups (%rcx,%rax,1),%xmm1 2929 addq $32,%rax 2930 2931 .byte 102,15,56,220,208 2932 movups -16(%rcx,%rax,1),%xmm0 2933 jnz L$ocb_enc_loop1 2934 2935 .byte 102,15,56,220,209 2936 movups 16(%r11),%xmm1 2937 movq %r10,%rax 2938 2939 .byte 102,15,56,221,215 2940 .byte 0xf3,0xc3 2941 2942 2943 .globl _aesni_ocb_decrypt 2944 .private_extern _aesni_ocb_decrypt 2945 2946 .p2align 5 2947 _aesni_ocb_decrypt: 2948 leaq (%rsp),%rax 2949 pushq %rbx 2950 pushq %rbp 2951 pushq %r12 2952 pushq %r13 2953 pushq %r14 2954 movq 8(%rax),%rbx 2955 movq 8+8(%rax),%rbp 2956 2957 movl 240(%rcx),%r10d 2958 movq %rcx,%r11 2959 shll $4,%r10d 2960 movups (%rcx),%xmm9 2961 movups 16(%rcx,%r10,1),%xmm1 2962 2963 movdqu (%r9),%xmm15 2964 pxor %xmm1,%xmm9 2965 pxor %xmm1,%xmm15 2966 2967 movl $16+32,%eax 2968 leaq 32(%r11,%r10,1),%rcx 2969 movups 16(%r11),%xmm1 2970 subq %r10,%rax 2971 movq %rax,%r10 2972 2973 movdqu (%rbx),%xmm10 2974 movdqu (%rbp),%xmm8 2975 2976 testq $1,%r8 2977 jnz L$ocb_dec_odd 2978 2979 bsfq %r8,%r12 2980 addq $1,%r8 2981 shlq $4,%r12 2982 movdqu (%rbx,%r12,1),%xmm7 2983 movdqu (%rdi),%xmm2 2984 leaq 16(%rdi),%rdi 2985 2986 call __ocb_decrypt1 2987 2988 movdqa %xmm7,%xmm15 2989 movups %xmm2,(%rsi) 2990 xorps %xmm2,%xmm8 2991 leaq 16(%rsi),%rsi 2992 subq $1,%rdx 2993 jz L$ocb_dec_done 2994 2995 L$ocb_dec_odd: 2996 leaq 1(%r8),%r12 2997 leaq 3(%r8),%r13 2998 leaq 5(%r8),%r14 2999 leaq 6(%r8),%r8 3000 bsfq %r12,%r12 3001 bsfq %r13,%r13 3002 bsfq %r14,%r14 3003 shlq $4,%r12 3004 shlq $4,%r13 3005 shlq $4,%r14 3006 3007 subq $6,%rdx 3008 jc L$ocb_dec_short 3009 jmp L$ocb_dec_grandloop 3010 3011 .p2align 5 3012 L$ocb_dec_grandloop: 3013 movdqu 0(%rdi),%xmm2 3014 movdqu 16(%rdi),%xmm3 3015 movdqu 32(%rdi),%xmm4 3016 movdqu 48(%rdi),%xmm5 3017 movdqu 64(%rdi),%xmm6 3018 movdqu 80(%rdi),%xmm7 3019 leaq 96(%rdi),%rdi 3020 3021 call __ocb_decrypt6 3022 3023 movups %xmm2,0(%rsi) 3024 pxor %xmm2,%xmm8 3025 movups %xmm3,16(%rsi) 3026 pxor %xmm3,%xmm8 3027 movups %xmm4,32(%rsi) 3028 pxor %xmm4,%xmm8 3029 movups %xmm5,48(%rsi) 3030 pxor %xmm5,%xmm8 3031 movups %xmm6,64(%rsi) 3032 pxor %xmm6,%xmm8 3033 movups %xmm7,80(%rsi) 3034 pxor %xmm7,%xmm8 3035 leaq 96(%rsi),%rsi 3036 subq $6,%rdx 3037 jnc L$ocb_dec_grandloop 3038 3039 L$ocb_dec_short: 3040 addq $6,%rdx 3041 jz L$ocb_dec_done 3042 3043 movdqu 0(%rdi),%xmm2 3044 cmpq $2,%rdx 3045 jb L$ocb_dec_one 3046 movdqu 16(%rdi),%xmm3 3047 je L$ocb_dec_two 3048 3049 movdqu 32(%rdi),%xmm4 3050 cmpq $4,%rdx 3051 jb L$ocb_dec_three 3052 movdqu 48(%rdi),%xmm5 3053 je L$ocb_dec_four 3054 3055 movdqu 64(%rdi),%xmm6 3056 pxor %xmm7,%xmm7 3057 3058 call __ocb_decrypt6 3059 3060 movdqa %xmm14,%xmm15 3061 movups %xmm2,0(%rsi) 3062 pxor %xmm2,%xmm8 3063 movups %xmm3,16(%rsi) 3064 pxor %xmm3,%xmm8 3065 movups %xmm4,32(%rsi) 3066 pxor %xmm4,%xmm8 3067 movups %xmm5,48(%rsi) 3068 pxor %xmm5,%xmm8 3069 movups %xmm6,64(%rsi) 3070 pxor %xmm6,%xmm8 3071 3072 jmp L$ocb_dec_done 3073 3074 .p2align 4 3075 L$ocb_dec_one: 3076 movdqa %xmm10,%xmm7 3077 3078 call __ocb_decrypt1 3079 3080 movdqa %xmm7,%xmm15 3081 movups %xmm2,0(%rsi) 3082 xorps %xmm2,%xmm8 3083 jmp L$ocb_dec_done 3084 3085 .p2align 4 3086 L$ocb_dec_two: 3087 pxor %xmm4,%xmm4 3088 pxor %xmm5,%xmm5 3089 3090 call __ocb_decrypt4 3091 3092 movdqa %xmm11,%xmm15 3093 movups %xmm2,0(%rsi) 3094 xorps %xmm2,%xmm8 3095 movups %xmm3,16(%rsi) 3096 xorps %xmm3,%xmm8 3097 3098 jmp L$ocb_dec_done 3099 3100 .p2align 4 3101 L$ocb_dec_three: 3102 pxor %xmm5,%xmm5 3103 3104 call __ocb_decrypt4 3105 3106 movdqa %xmm12,%xmm15 3107 movups %xmm2,0(%rsi) 3108 xorps %xmm2,%xmm8 3109 movups %xmm3,16(%rsi) 3110 xorps %xmm3,%xmm8 3111 movups %xmm4,32(%rsi) 3112 xorps %xmm4,%xmm8 3113 3114 jmp L$ocb_dec_done 3115 3116 .p2align 4 3117 L$ocb_dec_four: 3118 call __ocb_decrypt4 3119 3120 movdqa %xmm13,%xmm15 3121 movups %xmm2,0(%rsi) 3122 pxor %xmm2,%xmm8 3123 movups %xmm3,16(%rsi) 3124 pxor %xmm3,%xmm8 3125 movups %xmm4,32(%rsi) 3126 pxor %xmm4,%xmm8 3127 movups %xmm5,48(%rsi) 3128 pxor %xmm5,%xmm8 3129 3130 L$ocb_dec_done: 3131 pxor %xmm0,%xmm15 3132 movdqu %xmm8,(%rbp) 3133 movdqu %xmm15,(%r9) 3134 3135 xorps %xmm0,%xmm0 3136 pxor %xmm1,%xmm1 3137 pxor %xmm2,%xmm2 3138 pxor %xmm3,%xmm3 3139 pxor %xmm4,%xmm4 3140 pxor %xmm5,%xmm5 3141 pxor %xmm6,%xmm6 3142 pxor %xmm7,%xmm7 3143 pxor %xmm8,%xmm8 3144 pxor %xmm9,%xmm9 3145 pxor %xmm10,%xmm10 3146 pxor %xmm11,%xmm11 3147 pxor %xmm12,%xmm12 3148 pxor %xmm13,%xmm13 3149 pxor %xmm14,%xmm14 3150 pxor %xmm15,%xmm15 3151 leaq 40(%rsp),%rax 3152 movq -40(%rax),%r14 3153 movq -32(%rax),%r13 3154 movq -24(%rax),%r12 3155 movq -16(%rax),%rbp 3156 movq -8(%rax),%rbx 3157 leaq (%rax),%rsp 3158 L$ocb_dec_epilogue: 3159 .byte 0xf3,0xc3 3160 3161 3162 3163 .p2align 5 3164 __ocb_decrypt6: 3165 pxor %xmm9,%xmm15 3166 movdqu (%rbx,%r12,1),%xmm11 3167 movdqa %xmm10,%xmm12 3168 movdqu (%rbx,%r13,1),%xmm13 3169 movdqa %xmm10,%xmm14 3170 pxor %xmm15,%xmm10 3171 movdqu (%rbx,%r14,1),%xmm15 3172 pxor %xmm10,%xmm11 3173 pxor %xmm10,%xmm2 3174 pxor %xmm11,%xmm12 3175 pxor %xmm11,%xmm3 3176 pxor %xmm12,%xmm13 3177 pxor %xmm12,%xmm4 3178 pxor %xmm13,%xmm14 3179 pxor %xmm13,%xmm5 3180 pxor %xmm14,%xmm15 3181 pxor %xmm14,%xmm6 3182 pxor %xmm15,%xmm7 3183 movups 32(%r11),%xmm0 3184 3185 leaq 1(%r8),%r12 3186 leaq 3(%r8),%r13 3187 leaq 5(%r8),%r14 3188 addq $6,%r8 3189 pxor %xmm9,%xmm10 3190 bsfq %r12,%r12 3191 bsfq %r13,%r13 3192 bsfq %r14,%r14 3193 3194 .byte 102,15,56,222,209 3195 .byte 102,15,56,222,217 3196 .byte 102,15,56,222,225 3197 .byte 102,15,56,222,233 3198 pxor %xmm9,%xmm11 3199 pxor %xmm9,%xmm12 3200 .byte 102,15,56,222,241 3201 pxor %xmm9,%xmm13 3202 pxor %xmm9,%xmm14 3203 .byte 102,15,56,222,249 3204 movups 48(%r11),%xmm1 3205 pxor %xmm9,%xmm15 3206 3207 .byte 102,15,56,222,208 3208 .byte 102,15,56,222,216 3209 .byte 102,15,56,222,224 3210 .byte 102,15,56,222,232 3211 .byte 102,15,56,222,240 3212 .byte 102,15,56,222,248 3213 movups 64(%r11),%xmm0 3214 shlq $4,%r12 3215 shlq $4,%r13 3216 jmp L$ocb_dec_loop6 3217 3218 .p2align 5 3219 L$ocb_dec_loop6: 3220 .byte 102,15,56,222,209 3221 .byte 102,15,56,222,217 3222 .byte 102,15,56,222,225 3223 .byte 102,15,56,222,233 3224 .byte 102,15,56,222,241 3225 .byte 102,15,56,222,249 3226 movups (%rcx,%rax,1),%xmm1 3227 addq $32,%rax 3228 3229 .byte 102,15,56,222,208 3230 .byte 102,15,56,222,216 3231 .byte 102,15,56,222,224 3232 .byte 102,15,56,222,232 3233 .byte 102,15,56,222,240 3234 .byte 102,15,56,222,248 3235 movups -16(%rcx,%rax,1),%xmm0 3236 jnz L$ocb_dec_loop6 3237 3238 .byte 102,15,56,222,209 3239 .byte 102,15,56,222,217 3240 .byte 102,15,56,222,225 3241 .byte 102,15,56,222,233 3242 .byte 102,15,56,222,241 3243 .byte 102,15,56,222,249 3244 movups 16(%r11),%xmm1 3245 shlq $4,%r14 3246 3247 .byte 102,65,15,56,223,210 3248 movdqu (%rbx),%xmm10 3249 movq %r10,%rax 3250 .byte 102,65,15,56,223,219 3251 .byte 102,65,15,56,223,228 3252 .byte 102,65,15,56,223,237 3253 .byte 102,65,15,56,223,246 3254 .byte 102,65,15,56,223,255 3255 .byte 0xf3,0xc3 3256 3257 3258 3259 .p2align 5 3260 __ocb_decrypt4: 3261 pxor %xmm9,%xmm15 3262 movdqu (%rbx,%r12,1),%xmm11 3263 movdqa %xmm10,%xmm12 3264 movdqu (%rbx,%r13,1),%xmm13 3265 pxor %xmm15,%xmm10 3266 pxor %xmm10,%xmm11 3267 pxor %xmm10,%xmm2 3268 pxor %xmm11,%xmm12 3269 pxor %xmm11,%xmm3 3270 pxor %xmm12,%xmm13 3271 pxor %xmm12,%xmm4 3272 pxor %xmm13,%xmm5 3273 movups 32(%r11),%xmm0 3274 3275 pxor %xmm9,%xmm10 3276 pxor %xmm9,%xmm11 3277 pxor %xmm9,%xmm12 3278 pxor %xmm9,%xmm13 3279 3280 .byte 102,15,56,222,209 3281 .byte 102,15,56,222,217 3282 .byte 102,15,56,222,225 3283 .byte 102,15,56,222,233 3284 movups 48(%r11),%xmm1 3285 3286 .byte 102,15,56,222,208 3287 .byte 102,15,56,222,216 3288 .byte 102,15,56,222,224 3289 .byte 102,15,56,222,232 3290 movups 64(%r11),%xmm0 3291 jmp L$ocb_dec_loop4 3292 3293 .p2align 5 3294 L$ocb_dec_loop4: 3295 .byte 102,15,56,222,209 3296 .byte 102,15,56,222,217 3297 .byte 102,15,56,222,225 3298 .byte 102,15,56,222,233 3299 movups (%rcx,%rax,1),%xmm1 3300 addq $32,%rax 3301 3302 .byte 102,15,56,222,208 3303 .byte 102,15,56,222,216 3304 .byte 102,15,56,222,224 3305 .byte 102,15,56,222,232 3306 movups -16(%rcx,%rax,1),%xmm0 3307 jnz L$ocb_dec_loop4 3308 3309 .byte 102,15,56,222,209 3310 .byte 102,15,56,222,217 3311 .byte 102,15,56,222,225 3312 .byte 102,15,56,222,233 3313 movups 16(%r11),%xmm1 3314 movq %r10,%rax 3315 3316 .byte 102,65,15,56,223,210 3317 .byte 102,65,15,56,223,219 3318 .byte 102,65,15,56,223,228 3319 .byte 102,65,15,56,223,237 3320 .byte 0xf3,0xc3 3321 3322 3323 3324 .p2align 5 3325 __ocb_decrypt1: 3326 pxor %xmm15,%xmm7 3327 pxor %xmm9,%xmm7 3328 pxor %xmm7,%xmm2 3329 movups 32(%r11),%xmm0 3330 3331 .byte 102,15,56,222,209 3332 movups 48(%r11),%xmm1 3333 pxor %xmm9,%xmm7 3334 3335 .byte 102,15,56,222,208 3336 movups 64(%r11),%xmm0 3337 jmp L$ocb_dec_loop1 3338 3339 .p2align 5 3340 L$ocb_dec_loop1: 3341 .byte 102,15,56,222,209 3342 movups (%rcx,%rax,1),%xmm1 3343 addq $32,%rax 3344 3345 .byte 102,15,56,222,208 3346 movups -16(%rcx,%rax,1),%xmm0 3347 jnz L$ocb_dec_loop1 3348 3349 .byte 102,15,56,222,209 3350 movups 16(%r11),%xmm1 3351 movq %r10,%rax 3352 3353 .byte 102,15,56,223,215 3354 .byte 0xf3,0xc3 3355 3356 .globl _aesni_cbc_encrypt 3357 .private_extern _aesni_cbc_encrypt 3358 3359 .p2align 4 3360 _aesni_cbc_encrypt: 3361 testq %rdx,%rdx 3362 jz L$cbc_ret 3363 3364 movl 240(%rcx),%r10d 3365 movq %rcx,%r11 3366 testl %r9d,%r9d 3367 jz L$cbc_decrypt 3368 3369 movups (%r8),%xmm2 3370 movl %r10d,%eax 3371 cmpq $16,%rdx 3372 jb L$cbc_enc_tail 3373 subq $16,%rdx 3374 jmp L$cbc_enc_loop 3375 .p2align 4 3376 L$cbc_enc_loop: 3377 movups (%rdi),%xmm3 3378 leaq 16(%rdi),%rdi 3379 3380 movups (%rcx),%xmm0 3381 movups 16(%rcx),%xmm1 3382 xorps %xmm0,%xmm3 3383 leaq 32(%rcx),%rcx 3384 xorps %xmm3,%xmm2 3385 L$oop_enc1_15: 3386 .byte 102,15,56,220,209 3387 decl %eax 3388 movups (%rcx),%xmm1 3389 leaq 16(%rcx),%rcx 3390 jnz L$oop_enc1_15 3391 .byte 102,15,56,221,209 3392 movl %r10d,%eax 3393 movq %r11,%rcx 3394 movups %xmm2,0(%rsi) 3395 leaq 16(%rsi),%rsi 3396 subq $16,%rdx 3397 jnc L$cbc_enc_loop 3398 addq $16,%rdx 3399 jnz L$cbc_enc_tail 3400 pxor %xmm0,%xmm0 3401 pxor %xmm1,%xmm1 3402 movups %xmm2,(%r8) 3403 pxor %xmm2,%xmm2 3404 pxor %xmm3,%xmm3 3405 jmp L$cbc_ret 3406 3407 L$cbc_enc_tail: 3408 movq %rdx,%rcx 3409 xchgq %rdi,%rsi 3410 .long 0x9066A4F3 3411 movl $16,%ecx 3412 subq %rdx,%rcx 3413 xorl %eax,%eax 3414 .long 0x9066AAF3 3415 leaq -16(%rdi),%rdi 3416 movl %r10d,%eax 3417 movq %rdi,%rsi 3418 movq %r11,%rcx 3419 xorq %rdx,%rdx 3420 jmp L$cbc_enc_loop 3421 3422 .p2align 4 3423 L$cbc_decrypt: 3424 cmpq $16,%rdx 3425 jne L$cbc_decrypt_bulk 3426 3427 3428 3429 movdqu (%rdi),%xmm2 3430 movdqu (%r8),%xmm3 3431 movdqa %xmm2,%xmm4 3432 movups (%rcx),%xmm0 3433 movups 16(%rcx),%xmm1 3434 leaq 32(%rcx),%rcx 3435 xorps %xmm0,%xmm2 3436 L$oop_dec1_16: 3437 .byte 102,15,56,222,209 3438 decl %r10d 3439 movups (%rcx),%xmm1 3440 leaq 16(%rcx),%rcx 3441 jnz L$oop_dec1_16 3442 .byte 102,15,56,223,209 3443 pxor %xmm0,%xmm0 3444 pxor %xmm1,%xmm1 3445 movdqu %xmm4,(%r8) 3446 xorps %xmm3,%xmm2 3447 pxor %xmm3,%xmm3 3448 movups %xmm2,(%rsi) 3449 pxor %xmm2,%xmm2 3450 jmp L$cbc_ret 3451 .p2align 4 3452 L$cbc_decrypt_bulk: 3453 leaq (%rsp),%r11 3454 pushq %rbp 3455 subq $16,%rsp 3456 andq $-16,%rsp 3457 movq %rcx,%rbp 3458 movups (%r8),%xmm10 3459 movl %r10d,%eax 3460 cmpq $0x50,%rdx 3461 jbe L$cbc_dec_tail 3462 3463 movups (%rcx),%xmm0 3464 movdqu 0(%rdi),%xmm2 3465 movdqu 16(%rdi),%xmm3 3466 movdqa %xmm2,%xmm11 3467 movdqu 32(%rdi),%xmm4 3468 movdqa %xmm3,%xmm12 3469 movdqu 48(%rdi),%xmm5 3470 movdqa %xmm4,%xmm13 3471 movdqu 64(%rdi),%xmm6 3472 movdqa %xmm5,%xmm14 3473 movdqu 80(%rdi),%xmm7 3474 movdqa %xmm6,%xmm15 3475 movl _OPENSSL_ia32cap_P+4(%rip),%r9d 3476 cmpq $0x70,%rdx 3477 jbe L$cbc_dec_six_or_seven 3478 3479 andl $71303168,%r9d 3480 subq $0x50,%rdx 3481 cmpl $4194304,%r9d 3482 je L$cbc_dec_loop6_enter 3483 subq $0x20,%rdx 3484 leaq 112(%rcx),%rcx 3485 jmp L$cbc_dec_loop8_enter 3486 .p2align 4 3487 L$cbc_dec_loop8: 3488 movups %xmm9,(%rsi) 3489 leaq 16(%rsi),%rsi 3490 L$cbc_dec_loop8_enter: 3491 movdqu 96(%rdi),%xmm8 3492 pxor %xmm0,%xmm2 3493 movdqu 112(%rdi),%xmm9 3494 pxor %xmm0,%xmm3 3495 movups 16-112(%rcx),%xmm1 3496 pxor %xmm0,%xmm4 3497 movq $-1,%rbp 3498 cmpq $0x70,%rdx 3499 pxor %xmm0,%xmm5 3500 pxor %xmm0,%xmm6 3501 pxor %xmm0,%xmm7 3502 pxor %xmm0,%xmm8 3503 3504 .byte 102,15,56,222,209 3505 pxor %xmm0,%xmm9 3506 movups 32-112(%rcx),%xmm0 3507 .byte 102,15,56,222,217 3508 .byte 102,15,56,222,225 3509 .byte 102,15,56,222,233 3510 .byte 102,15,56,222,241 3511 .byte 102,15,56,222,249 3512 .byte 102,68,15,56,222,193 3513 adcq $0,%rbp 3514 andq $128,%rbp 3515 .byte 102,68,15,56,222,201 3516 addq %rdi,%rbp 3517 movups 48-112(%rcx),%xmm1 3518 .byte 102,15,56,222,208 3519 .byte 102,15,56,222,216 3520 .byte 102,15,56,222,224 3521 .byte 102,15,56,222,232 3522 .byte 102,15,56,222,240 3523 .byte 102,15,56,222,248 3524 .byte 102,68,15,56,222,192 3525 .byte 102,68,15,56,222,200 3526 movups 64-112(%rcx),%xmm0 3527 nop 3528 .byte 102,15,56,222,209 3529 .byte 102,15,56,222,217 3530 .byte 102,15,56,222,225 3531 .byte 102,15,56,222,233 3532 .byte 102,15,56,222,241 3533 .byte 102,15,56,222,249 3534 .byte 102,68,15,56,222,193 3535 .byte 102,68,15,56,222,201 3536 movups 80-112(%rcx),%xmm1 3537 nop 3538 .byte 102,15,56,222,208 3539 .byte 102,15,56,222,216 3540 .byte 102,15,56,222,224 3541 .byte 102,15,56,222,232 3542 .byte 102,15,56,222,240 3543 .byte 102,15,56,222,248 3544 .byte 102,68,15,56,222,192 3545 .byte 102,68,15,56,222,200 3546 movups 96-112(%rcx),%xmm0 3547 nop 3548 .byte 102,15,56,222,209 3549 .byte 102,15,56,222,217 3550 .byte 102,15,56,222,225 3551 .byte 102,15,56,222,233 3552 .byte 102,15,56,222,241 3553 .byte 102,15,56,222,249 3554 .byte 102,68,15,56,222,193 3555 .byte 102,68,15,56,222,201 3556 movups 112-112(%rcx),%xmm1 3557 nop 3558 .byte 102,15,56,222,208 3559 .byte 102,15,56,222,216 3560 .byte 102,15,56,222,224 3561 .byte 102,15,56,222,232 3562 .byte 102,15,56,222,240 3563 .byte 102,15,56,222,248 3564 .byte 102,68,15,56,222,192 3565 .byte 102,68,15,56,222,200 3566 movups 128-112(%rcx),%xmm0 3567 nop 3568 .byte 102,15,56,222,209 3569 .byte 102,15,56,222,217 3570 .byte 102,15,56,222,225 3571 .byte 102,15,56,222,233 3572 .byte 102,15,56,222,241 3573 .byte 102,15,56,222,249 3574 .byte 102,68,15,56,222,193 3575 .byte 102,68,15,56,222,201 3576 movups 144-112(%rcx),%xmm1 3577 cmpl $11,%eax 3578 .byte 102,15,56,222,208 3579 .byte 102,15,56,222,216 3580 .byte 102,15,56,222,224 3581 .byte 102,15,56,222,232 3582 .byte 102,15,56,222,240 3583 .byte 102,15,56,222,248 3584 .byte 102,68,15,56,222,192 3585 .byte 102,68,15,56,222,200 3586 movups 160-112(%rcx),%xmm0 3587 jb L$cbc_dec_done 3588 .byte 102,15,56,222,209 3589 .byte 102,15,56,222,217 3590 .byte 102,15,56,222,225 3591 .byte 102,15,56,222,233 3592 .byte 102,15,56,222,241 3593 .byte 102,15,56,222,249 3594 .byte 102,68,15,56,222,193 3595 .byte 102,68,15,56,222,201 3596 movups 176-112(%rcx),%xmm1 3597 nop 3598 .byte 102,15,56,222,208 3599 .byte 102,15,56,222,216 3600 .byte 102,15,56,222,224 3601 .byte 102,15,56,222,232 3602 .byte 102,15,56,222,240 3603 .byte 102,15,56,222,248 3604 .byte 102,68,15,56,222,192 3605 .byte 102,68,15,56,222,200 3606 movups 192-112(%rcx),%xmm0 3607 je L$cbc_dec_done 3608 .byte 102,15,56,222,209 3609 .byte 102,15,56,222,217 3610 .byte 102,15,56,222,225 3611 .byte 102,15,56,222,233 3612 .byte 102,15,56,222,241 3613 .byte 102,15,56,222,249 3614 .byte 102,68,15,56,222,193 3615 .byte 102,68,15,56,222,201 3616 movups 208-112(%rcx),%xmm1 3617 nop 3618 .byte 102,15,56,222,208 3619 .byte 102,15,56,222,216 3620 .byte 102,15,56,222,224 3621 .byte 102,15,56,222,232 3622 .byte 102,15,56,222,240 3623 .byte 102,15,56,222,248 3624 .byte 102,68,15,56,222,192 3625 .byte 102,68,15,56,222,200 3626 movups 224-112(%rcx),%xmm0 3627 jmp L$cbc_dec_done 3628 .p2align 4 3629 L$cbc_dec_done: 3630 .byte 102,15,56,222,209 3631 .byte 102,15,56,222,217 3632 pxor %xmm0,%xmm10 3633 pxor %xmm0,%xmm11 3634 .byte 102,15,56,222,225 3635 .byte 102,15,56,222,233 3636 pxor %xmm0,%xmm12 3637 pxor %xmm0,%xmm13 3638 .byte 102,15,56,222,241 3639 .byte 102,15,56,222,249 3640 pxor %xmm0,%xmm14 3641 pxor %xmm0,%xmm15 3642 .byte 102,68,15,56,222,193 3643 .byte 102,68,15,56,222,201 3644 movdqu 80(%rdi),%xmm1 3645 3646 .byte 102,65,15,56,223,210 3647 movdqu 96(%rdi),%xmm10 3648 pxor %xmm0,%xmm1 3649 .byte 102,65,15,56,223,219 3650 pxor %xmm0,%xmm10 3651 movdqu 112(%rdi),%xmm0 3652 .byte 102,65,15,56,223,228 3653 leaq 128(%rdi),%rdi 3654 movdqu 0(%rbp),%xmm11 3655 .byte 102,65,15,56,223,237 3656 .byte 102,65,15,56,223,246 3657 movdqu 16(%rbp),%xmm12 3658 movdqu 32(%rbp),%xmm13 3659 .byte 102,65,15,56,223,255 3660 .byte 102,68,15,56,223,193 3661 movdqu 48(%rbp),%xmm14 3662 movdqu 64(%rbp),%xmm15 3663 .byte 102,69,15,56,223,202 3664 movdqa %xmm0,%xmm10 3665 movdqu 80(%rbp),%xmm1 3666 movups -112(%rcx),%xmm0 3667 3668 movups %xmm2,(%rsi) 3669 movdqa %xmm11,%xmm2 3670 movups %xmm3,16(%rsi) 3671 movdqa %xmm12,%xmm3 3672 movups %xmm4,32(%rsi) 3673 movdqa %xmm13,%xmm4 3674 movups %xmm5,48(%rsi) 3675 movdqa %xmm14,%xmm5 3676 movups %xmm6,64(%rsi) 3677 movdqa %xmm15,%xmm6 3678 movups %xmm7,80(%rsi) 3679 movdqa %xmm1,%xmm7 3680 movups %xmm8,96(%rsi) 3681 leaq 112(%rsi),%rsi 3682 3683 subq $0x80,%rdx 3684 ja L$cbc_dec_loop8 3685 3686 movaps %xmm9,%xmm2 3687 leaq -112(%rcx),%rcx 3688 addq $0x70,%rdx 3689 jle L$cbc_dec_clear_tail_collected 3690 movups %xmm9,(%rsi) 3691 leaq 16(%rsi),%rsi 3692 cmpq $0x50,%rdx 3693 jbe L$cbc_dec_tail 3694 3695 movaps %xmm11,%xmm2 3696 L$cbc_dec_six_or_seven: 3697 cmpq $0x60,%rdx 3698 ja L$cbc_dec_seven 3699 3700 movaps %xmm7,%xmm8 3701 call _aesni_decrypt6 3702 pxor %xmm10,%xmm2 3703 movaps %xmm8,%xmm10 3704 pxor %xmm11,%xmm3 3705 movdqu %xmm2,(%rsi) 3706 pxor %xmm12,%xmm4 3707 movdqu %xmm3,16(%rsi) 3708 pxor %xmm3,%xmm3 3709 pxor %xmm13,%xmm5 3710 movdqu %xmm4,32(%rsi) 3711 pxor %xmm4,%xmm4 3712 pxor %xmm14,%xmm6 3713 movdqu %xmm5,48(%rsi) 3714 pxor %xmm5,%xmm5 3715 pxor %xmm15,%xmm7 3716 movdqu %xmm6,64(%rsi) 3717 pxor %xmm6,%xmm6 3718 leaq 80(%rsi),%rsi 3719 movdqa %xmm7,%xmm2 3720 pxor %xmm7,%xmm7 3721 jmp L$cbc_dec_tail_collected 3722 3723 .p2align 4 3724 L$cbc_dec_seven: 3725 movups 96(%rdi),%xmm8 3726 xorps %xmm9,%xmm9 3727 call _aesni_decrypt8 3728 movups 80(%rdi),%xmm9 3729 pxor %xmm10,%xmm2 3730 movups 96(%rdi),%xmm10 3731 pxor %xmm11,%xmm3 3732 movdqu %xmm2,(%rsi) 3733 pxor %xmm12,%xmm4 3734 movdqu %xmm3,16(%rsi) 3735 pxor %xmm3,%xmm3 3736 pxor %xmm13,%xmm5 3737 movdqu %xmm4,32(%rsi) 3738 pxor %xmm4,%xmm4 3739 pxor %xmm14,%xmm6 3740 movdqu %xmm5,48(%rsi) 3741 pxor %xmm5,%xmm5 3742 pxor %xmm15,%xmm7 3743 movdqu %xmm6,64(%rsi) 3744 pxor %xmm6,%xmm6 3745 pxor %xmm9,%xmm8 3746 movdqu %xmm7,80(%rsi) 3747 pxor %xmm7,%xmm7 3748 leaq 96(%rsi),%rsi 3749 movdqa %xmm8,%xmm2 3750 pxor %xmm8,%xmm8 3751 pxor %xmm9,%xmm9 3752 jmp L$cbc_dec_tail_collected 3753 3754 .p2align 4 3755 L$cbc_dec_loop6: 3756 movups %xmm7,(%rsi) 3757 leaq 16(%rsi),%rsi 3758 movdqu 0(%rdi),%xmm2 3759 movdqu 16(%rdi),%xmm3 3760 movdqa %xmm2,%xmm11 3761 movdqu 32(%rdi),%xmm4 3762 movdqa %xmm3,%xmm12 3763 movdqu 48(%rdi),%xmm5 3764 movdqa %xmm4,%xmm13 3765 movdqu 64(%rdi),%xmm6 3766 movdqa %xmm5,%xmm14 3767 movdqu 80(%rdi),%xmm7 3768 movdqa %xmm6,%xmm15 3769 L$cbc_dec_loop6_enter: 3770 leaq 96(%rdi),%rdi 3771 movdqa %xmm7,%xmm8 3772 3773 call _aesni_decrypt6 3774 3775 pxor %xmm10,%xmm2 3776 movdqa %xmm8,%xmm10 3777 pxor %xmm11,%xmm3 3778 movdqu %xmm2,(%rsi) 3779 pxor %xmm12,%xmm4 3780 movdqu %xmm3,16(%rsi) 3781 pxor %xmm13,%xmm5 3782 movdqu %xmm4,32(%rsi) 3783 pxor %xmm14,%xmm6 3784 movq %rbp,%rcx 3785 movdqu %xmm5,48(%rsi) 3786 pxor %xmm15,%xmm7 3787 movl %r10d,%eax 3788 movdqu %xmm6,64(%rsi) 3789 leaq 80(%rsi),%rsi 3790 subq $0x60,%rdx 3791 ja L$cbc_dec_loop6 3792 3793 movdqa %xmm7,%xmm2 3794 addq $0x50,%rdx 3795 jle L$cbc_dec_clear_tail_collected 3796 movups %xmm7,(%rsi) 3797 leaq 16(%rsi),%rsi 3798 3799 L$cbc_dec_tail: 3800 movups (%rdi),%xmm2 3801 subq $0x10,%rdx 3802 jbe L$cbc_dec_one 3803 3804 movups 16(%rdi),%xmm3 3805 movaps %xmm2,%xmm11 3806 subq $0x10,%rdx 3807 jbe L$cbc_dec_two 3808 3809 movups 32(%rdi),%xmm4 3810 movaps %xmm3,%xmm12 3811 subq $0x10,%rdx 3812 jbe L$cbc_dec_three 3813 3814 movups 48(%rdi),%xmm5 3815 movaps %xmm4,%xmm13 3816 subq $0x10,%rdx 3817 jbe L$cbc_dec_four 3818 3819 movups 64(%rdi),%xmm6 3820 movaps %xmm5,%xmm14 3821 movaps %xmm6,%xmm15 3822 xorps %xmm7,%xmm7 3823 call _aesni_decrypt6 3824 pxor %xmm10,%xmm2 3825 movaps %xmm15,%xmm10 3826 pxor %xmm11,%xmm3 3827 movdqu %xmm2,(%rsi) 3828 pxor %xmm12,%xmm4 3829 movdqu %xmm3,16(%rsi) 3830 pxor %xmm3,%xmm3 3831 pxor %xmm13,%xmm5 3832 movdqu %xmm4,32(%rsi) 3833 pxor %xmm4,%xmm4 3834 pxor %xmm14,%xmm6 3835 movdqu %xmm5,48(%rsi) 3836 pxor %xmm5,%xmm5 3837 leaq 64(%rsi),%rsi 3838 movdqa %xmm6,%xmm2 3839 pxor %xmm6,%xmm6 3840 pxor %xmm7,%xmm7 3841 subq $0x10,%rdx 3842 jmp L$cbc_dec_tail_collected 3843 3844 .p2align 4 3845 L$cbc_dec_one: 3846 movaps %xmm2,%xmm11 3847 movups (%rcx),%xmm0 3848 movups 16(%rcx),%xmm1 3849 leaq 32(%rcx),%rcx 3850 xorps %xmm0,%xmm2 3851 L$oop_dec1_17: 3852 .byte 102,15,56,222,209 3853 decl %eax 3854 movups (%rcx),%xmm1 3855 leaq 16(%rcx),%rcx 3856 jnz L$oop_dec1_17 3857 .byte 102,15,56,223,209 3858 xorps %xmm10,%xmm2 3859 movaps %xmm11,%xmm10 3860 jmp L$cbc_dec_tail_collected 3861 .p2align 4 3862 L$cbc_dec_two: 3863 movaps %xmm3,%xmm12 3864 call _aesni_decrypt2 3865 pxor %xmm10,%xmm2 3866 movaps %xmm12,%xmm10 3867 pxor %xmm11,%xmm3 3868 movdqu %xmm2,(%rsi) 3869 movdqa %xmm3,%xmm2 3870 pxor %xmm3,%xmm3 3871 leaq 16(%rsi),%rsi 3872 jmp L$cbc_dec_tail_collected 3873 .p2align 4 3874 L$cbc_dec_three: 3875 movaps %xmm4,%xmm13 3876 call _aesni_decrypt3 3877 pxor %xmm10,%xmm2 3878 movaps %xmm13,%xmm10 3879 pxor %xmm11,%xmm3 3880 movdqu %xmm2,(%rsi) 3881 pxor %xmm12,%xmm4 3882 movdqu %xmm3,16(%rsi) 3883 pxor %xmm3,%xmm3 3884 movdqa %xmm4,%xmm2 3885 pxor %xmm4,%xmm4 3886 leaq 32(%rsi),%rsi 3887 jmp L$cbc_dec_tail_collected 3888 .p2align 4 3889 L$cbc_dec_four: 3890 movaps %xmm5,%xmm14 3891 call _aesni_decrypt4 3892 pxor %xmm10,%xmm2 3893 movaps %xmm14,%xmm10 3894 pxor %xmm11,%xmm3 3895 movdqu %xmm2,(%rsi) 3896 pxor %xmm12,%xmm4 3897 movdqu %xmm3,16(%rsi) 3898 pxor %xmm3,%xmm3 3899 pxor %xmm13,%xmm5 3900 movdqu %xmm4,32(%rsi) 3901 pxor %xmm4,%xmm4 3902 movdqa %xmm5,%xmm2 3903 pxor %xmm5,%xmm5 3904 leaq 48(%rsi),%rsi 3905 jmp L$cbc_dec_tail_collected 3906 3907 .p2align 4 3908 L$cbc_dec_clear_tail_collected: 3909 pxor %xmm3,%xmm3 3910 pxor %xmm4,%xmm4 3911 pxor %xmm5,%xmm5 3912 pxor %xmm6,%xmm6 3913 pxor %xmm7,%xmm7 3914 pxor %xmm8,%xmm8 3915 pxor %xmm9,%xmm9 3916 L$cbc_dec_tail_collected: 3917 movups %xmm10,(%r8) 3918 andq $15,%rdx 3919 jnz L$cbc_dec_tail_partial 3920 movups %xmm2,(%rsi) 3921 pxor %xmm2,%xmm2 3922 jmp L$cbc_dec_ret 3923 .p2align 4 3924 L$cbc_dec_tail_partial: 3925 movaps %xmm2,(%rsp) 3926 pxor %xmm2,%xmm2 3927 movq $16,%rcx 3928 movq %rsi,%rdi 3929 subq %rdx,%rcx 3930 leaq (%rsp),%rsi 3931 .long 0x9066A4F3 3932 movdqa %xmm2,(%rsp) 3933 3934 L$cbc_dec_ret: 3935 xorps %xmm0,%xmm0 3936 pxor %xmm1,%xmm1 3937 movq -8(%r11),%rbp 3938 leaq (%r11),%rsp 3939 L$cbc_ret: 3940 .byte 0xf3,0xc3 3941 3942 .globl _aesni_set_decrypt_key 3943 .private_extern _aesni_set_decrypt_key 3944 3945 .p2align 4 3946 _aesni_set_decrypt_key: 3947 .byte 0x48,0x83,0xEC,0x08 3948 call __aesni_set_encrypt_key 3949 shll $4,%esi 3950 testl %eax,%eax 3951 jnz L$dec_key_ret 3952 leaq 16(%rdx,%rsi,1),%rdi 3953 3954 movups (%rdx),%xmm0 3955 movups (%rdi),%xmm1 3956 movups %xmm0,(%rdi) 3957 movups %xmm1,(%rdx) 3958 leaq 16(%rdx),%rdx 3959 leaq -16(%rdi),%rdi 3960 3961 L$dec_key_inverse: 3962 movups (%rdx),%xmm0 3963 movups (%rdi),%xmm1 3964 .byte 102,15,56,219,192 3965 .byte 102,15,56,219,201 3966 leaq 16(%rdx),%rdx 3967 leaq -16(%rdi),%rdi 3968 movups %xmm0,16(%rdi) 3969 movups %xmm1,-16(%rdx) 3970 cmpq %rdx,%rdi 3971 ja L$dec_key_inverse 3972 3973 movups (%rdx),%xmm0 3974 .byte 102,15,56,219,192 3975 pxor %xmm1,%xmm1 3976 movups %xmm0,(%rdi) 3977 pxor %xmm0,%xmm0 3978 L$dec_key_ret: 3979 addq $8,%rsp 3980 .byte 0xf3,0xc3 3981 L$SEH_end_set_decrypt_key: 3982 3983 .globl _aesni_set_encrypt_key 3984 .private_extern _aesni_set_encrypt_key 3985 3986 .p2align 4 3987 _aesni_set_encrypt_key: 3988 __aesni_set_encrypt_key: 3989 .byte 0x48,0x83,0xEC,0x08 3990 movq $-1,%rax 3991 testq %rdi,%rdi 3992 jz L$enc_key_ret 3993 testq %rdx,%rdx 3994 jz L$enc_key_ret 3995 3996 movl $268437504,%r10d 3997 movups (%rdi),%xmm0 3998 xorps %xmm4,%xmm4 3999 andl _OPENSSL_ia32cap_P+4(%rip),%r10d 4000 leaq 16(%rdx),%rax 4001 cmpl $256,%esi 4002 je L$14rounds 4003 cmpl $192,%esi 4004 je L$12rounds 4005 cmpl $128,%esi 4006 jne L$bad_keybits 4007 4008 L$10rounds: 4009 movl $9,%esi 4010 cmpl $268435456,%r10d 4011 je L$10rounds_alt 4012 4013 movups %xmm0,(%rdx) 4014 .byte 102,15,58,223,200,1 4015 call L$key_expansion_128_cold 4016 .byte 102,15,58,223,200,2 4017 call L$key_expansion_128 4018 .byte 102,15,58,223,200,4 4019 call L$key_expansion_128 4020 .byte 102,15,58,223,200,8 4021 call L$key_expansion_128 4022 .byte 102,15,58,223,200,16 4023 call L$key_expansion_128 4024 .byte 102,15,58,223,200,32 4025 call L$key_expansion_128 4026 .byte 102,15,58,223,200,64 4027 call L$key_expansion_128 4028 .byte 102,15,58,223,200,128 4029 call L$key_expansion_128 4030 .byte 102,15,58,223,200,27 4031 call L$key_expansion_128 4032 .byte 102,15,58,223,200,54 4033 call L$key_expansion_128 4034 movups %xmm0,(%rax) 4035 movl %esi,80(%rax) 4036 xorl %eax,%eax 4037 jmp L$enc_key_ret 4038 4039 .p2align 4 4040 L$10rounds_alt: 4041 movdqa L$key_rotate(%rip),%xmm5 4042 movl $8,%r10d 4043 movdqa L$key_rcon1(%rip),%xmm4 4044 movdqa %xmm0,%xmm2 4045 movdqu %xmm0,(%rdx) 4046 jmp L$oop_key128 4047 4048 .p2align 4 4049 L$oop_key128: 4050 .byte 102,15,56,0,197 4051 .byte 102,15,56,221,196 4052 pslld $1,%xmm4 4053 leaq 16(%rax),%rax 4054 4055 movdqa %xmm2,%xmm3 4056 pslldq $4,%xmm2 4057 pxor %xmm2,%xmm3 4058 pslldq $4,%xmm2 4059 pxor %xmm2,%xmm3 4060 pslldq $4,%xmm2 4061 pxor %xmm3,%xmm2 4062 4063 pxor %xmm2,%xmm0 4064 movdqu %xmm0,-16(%rax) 4065 movdqa %xmm0,%xmm2 4066 4067 decl %r10d 4068 jnz L$oop_key128 4069 4070 movdqa L$key_rcon1b(%rip),%xmm4 4071 4072 .byte 102,15,56,0,197 4073 .byte 102,15,56,221,196 4074 pslld $1,%xmm4 4075 4076 movdqa %xmm2,%xmm3 4077 pslldq $4,%xmm2 4078 pxor %xmm2,%xmm3 4079 pslldq $4,%xmm2 4080 pxor %xmm2,%xmm3 4081 pslldq $4,%xmm2 4082 pxor %xmm3,%xmm2 4083 4084 pxor %xmm2,%xmm0 4085 movdqu %xmm0,(%rax) 4086 4087 movdqa %xmm0,%xmm2 4088 .byte 102,15,56,0,197 4089 .byte 102,15,56,221,196 4090 4091 movdqa %xmm2,%xmm3 4092 pslldq $4,%xmm2 4093 pxor %xmm2,%xmm3 4094 pslldq $4,%xmm2 4095 pxor %xmm2,%xmm3 4096 pslldq $4,%xmm2 4097 pxor %xmm3,%xmm2 4098 4099 pxor %xmm2,%xmm0 4100 movdqu %xmm0,16(%rax) 4101 4102 movl %esi,96(%rax) 4103 xorl %eax,%eax 4104 jmp L$enc_key_ret 4105 4106 .p2align 4 4107 L$12rounds: 4108 movq 16(%rdi),%xmm2 4109 movl $11,%esi 4110 cmpl $268435456,%r10d 4111 je L$12rounds_alt 4112 4113 movups %xmm0,(%rdx) 4114 .byte 102,15,58,223,202,1 4115 call L$key_expansion_192a_cold 4116 .byte 102,15,58,223,202,2 4117 call L$key_expansion_192b 4118 .byte 102,15,58,223,202,4 4119 call L$key_expansion_192a 4120 .byte 102,15,58,223,202,8 4121 call L$key_expansion_192b 4122 .byte 102,15,58,223,202,16 4123 call L$key_expansion_192a 4124 .byte 102,15,58,223,202,32 4125 call L$key_expansion_192b 4126 .byte 102,15,58,223,202,64 4127 call L$key_expansion_192a 4128 .byte 102,15,58,223,202,128 4129 call L$key_expansion_192b 4130 movups %xmm0,(%rax) 4131 movl %esi,48(%rax) 4132 xorq %rax,%rax 4133 jmp L$enc_key_ret 4134 4135 .p2align 4 4136 L$12rounds_alt: 4137 movdqa L$key_rotate192(%rip),%xmm5 4138 movdqa L$key_rcon1(%rip),%xmm4 4139 movl $8,%r10d 4140 movdqu %xmm0,(%rdx) 4141 jmp L$oop_key192 4142 4143 .p2align 4 4144 L$oop_key192: 4145 movq %xmm2,0(%rax) 4146 movdqa %xmm2,%xmm1 4147 .byte 102,15,56,0,213 4148 .byte 102,15,56,221,212 4149 pslld $1,%xmm4 4150 leaq 24(%rax),%rax 4151 4152 movdqa %xmm0,%xmm3 4153 pslldq $4,%xmm0 4154 pxor %xmm0,%xmm3 4155 pslldq $4,%xmm0 4156 pxor %xmm0,%xmm3 4157 pslldq $4,%xmm0 4158 pxor %xmm3,%xmm0 4159 4160 pshufd $0xff,%xmm0,%xmm3 4161 pxor %xmm1,%xmm3 4162 pslldq $4,%xmm1 4163 pxor %xmm1,%xmm3 4164 4165 pxor %xmm2,%xmm0 4166 pxor %xmm3,%xmm2 4167 movdqu %xmm0,-16(%rax) 4168 4169 decl %r10d 4170 jnz L$oop_key192 4171 4172 movl %esi,32(%rax) 4173 xorl %eax,%eax 4174 jmp L$enc_key_ret 4175 4176 .p2align 4 4177 L$14rounds: 4178 movups 16(%rdi),%xmm2 4179 movl $13,%esi 4180 leaq 16(%rax),%rax 4181 cmpl $268435456,%r10d 4182 je L$14rounds_alt 4183 4184 movups %xmm0,(%rdx) 4185 movups %xmm2,16(%rdx) 4186 .byte 102,15,58,223,202,1 4187 call L$key_expansion_256a_cold 4188 .byte 102,15,58,223,200,1 4189 call L$key_expansion_256b 4190 .byte 102,15,58,223,202,2 4191 call L$key_expansion_256a 4192 .byte 102,15,58,223,200,2 4193 call L$key_expansion_256b 4194 .byte 102,15,58,223,202,4 4195 call L$key_expansion_256a 4196 .byte 102,15,58,223,200,4 4197 call L$key_expansion_256b 4198 .byte 102,15,58,223,202,8 4199 call L$key_expansion_256a 4200 .byte 102,15,58,223,200,8 4201 call L$key_expansion_256b 4202 .byte 102,15,58,223,202,16 4203 call L$key_expansion_256a 4204 .byte 102,15,58,223,200,16 4205 call L$key_expansion_256b 4206 .byte 102,15,58,223,202,32 4207 call L$key_expansion_256a 4208 .byte 102,15,58,223,200,32 4209 call L$key_expansion_256b 4210 .byte 102,15,58,223,202,64 4211 call L$key_expansion_256a 4212 movups %xmm0,(%rax) 4213 movl %esi,16(%rax) 4214 xorq %rax,%rax 4215 jmp L$enc_key_ret 4216 4217 .p2align 4 4218 L$14rounds_alt: 4219 movdqa L$key_rotate(%rip),%xmm5 4220 movdqa L$key_rcon1(%rip),%xmm4 4221 movl $7,%r10d 4222 movdqu %xmm0,0(%rdx) 4223 movdqa %xmm2,%xmm1 4224 movdqu %xmm2,16(%rdx) 4225 jmp L$oop_key256 4226 4227 .p2align 4 4228 L$oop_key256: 4229 .byte 102,15,56,0,213 4230 .byte 102,15,56,221,212 4231 4232 movdqa %xmm0,%xmm3 4233 pslldq $4,%xmm0 4234 pxor %xmm0,%xmm3 4235 pslldq $4,%xmm0 4236 pxor %xmm0,%xmm3 4237 pslldq $4,%xmm0 4238 pxor %xmm3,%xmm0 4239 pslld $1,%xmm4 4240 4241 pxor %xmm2,%xmm0 4242 movdqu %xmm0,(%rax) 4243 4244 decl %r10d 4245 jz L$done_key256 4246 4247 pshufd $0xff,%xmm0,%xmm2 4248 pxor %xmm3,%xmm3 4249 .byte 102,15,56,221,211 4250 4251 movdqa %xmm1,%xmm3 4252 pslldq $4,%xmm1 4253 pxor %xmm1,%xmm3 4254 pslldq $4,%xmm1 4255 pxor %xmm1,%xmm3 4256 pslldq $4,%xmm1 4257 pxor %xmm3,%xmm1 4258 4259 pxor %xmm1,%xmm2 4260 movdqu %xmm2,16(%rax) 4261 leaq 32(%rax),%rax 4262 movdqa %xmm2,%xmm1 4263 4264 jmp L$oop_key256 4265 4266 L$done_key256: 4267 movl %esi,16(%rax) 4268 xorl %eax,%eax 4269 jmp L$enc_key_ret 4270 4271 .p2align 4 4272 L$bad_keybits: 4273 movq $-2,%rax 4274 L$enc_key_ret: 4275 pxor %xmm0,%xmm0 4276 pxor %xmm1,%xmm1 4277 pxor %xmm2,%xmm2 4278 pxor %xmm3,%xmm3 4279 pxor %xmm4,%xmm4 4280 pxor %xmm5,%xmm5 4281 addq $8,%rsp 4282 .byte 0xf3,0xc3 4283 L$SEH_end_set_encrypt_key: 4284 4285 .p2align 4 4286 L$key_expansion_128: 4287 movups %xmm0,(%rax) 4288 leaq 16(%rax),%rax 4289 L$key_expansion_128_cold: 4290 shufps $16,%xmm0,%xmm4 4291 xorps %xmm4,%xmm0 4292 shufps $140,%xmm0,%xmm4 4293 xorps %xmm4,%xmm0 4294 shufps $255,%xmm1,%xmm1 4295 xorps %xmm1,%xmm0 4296 .byte 0xf3,0xc3 4297 4298 .p2align 4 4299 L$key_expansion_192a: 4300 movups %xmm0,(%rax) 4301 leaq 16(%rax),%rax 4302 L$key_expansion_192a_cold: 4303 movaps %xmm2,%xmm5 4304 L$key_expansion_192b_warm: 4305 shufps $16,%xmm0,%xmm4 4306 movdqa %xmm2,%xmm3 4307 xorps %xmm4,%xmm0 4308 shufps $140,%xmm0,%xmm4 4309 pslldq $4,%xmm3 4310 xorps %xmm4,%xmm0 4311 pshufd $85,%xmm1,%xmm1 4312 pxor %xmm3,%xmm2 4313 pxor %xmm1,%xmm0 4314 pshufd $255,%xmm0,%xmm3 4315 pxor %xmm3,%xmm2 4316 .byte 0xf3,0xc3 4317 4318 .p2align 4 4319 L$key_expansion_192b: 4320 movaps %xmm0,%xmm3 4321 shufps $68,%xmm0,%xmm5 4322 movups %xmm5,(%rax) 4323 shufps $78,%xmm2,%xmm3 4324 movups %xmm3,16(%rax) 4325 leaq 32(%rax),%rax 4326 jmp L$key_expansion_192b_warm 4327 4328 .p2align 4 4329 L$key_expansion_256a: 4330 movups %xmm2,(%rax) 4331 leaq 16(%rax),%rax 4332 L$key_expansion_256a_cold: 4333 shufps $16,%xmm0,%xmm4 4334 xorps %xmm4,%xmm0 4335 shufps $140,%xmm0,%xmm4 4336 xorps %xmm4,%xmm0 4337 shufps $255,%xmm1,%xmm1 4338 xorps %xmm1,%xmm0 4339 .byte 0xf3,0xc3 4340 4341 .p2align 4 4342 L$key_expansion_256b: 4343 movups %xmm0,(%rax) 4344 leaq 16(%rax),%rax 4345 4346 shufps $16,%xmm2,%xmm4 4347 xorps %xmm4,%xmm2 4348 shufps $140,%xmm2,%xmm4 4349 xorps %xmm4,%xmm2 4350 shufps $170,%xmm1,%xmm1 4351 xorps %xmm1,%xmm2 4352 .byte 0xf3,0xc3 4353 4354 4355 .p2align 6 4356 L$bswap_mask: 4357 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4358 L$increment32: 4359 .long 6,6,6,0 4360 L$increment64: 4361 .long 1,0,0,0 4362 L$xts_magic: 4363 .long 0x87,0,1,0 4364 L$increment1: 4365 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4366 L$key_rotate: 4367 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4368 L$key_rotate192: 4369 .long 0x04070605,0x04070605,0x04070605,0x04070605 4370 L$key_rcon1: 4371 .long 1,1,1,1 4372 L$key_rcon1b: 4373 .long 0x1b,0x1b,0x1b,0x1b 4374 4375 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4376 .p2align 6 4377 #endif 4378