1 .text 2 .globl aesni_encrypt 3 .type aesni_encrypt,@function 4 .align 16 5 aesni_encrypt: 6 movups (%rdi),%xmm2 7 movl 240(%rdx),%eax 8 movups (%rdx),%xmm0 9 movups 16(%rdx),%xmm1 10 leaq 32(%rdx),%rdx 11 xorps %xmm0,%xmm2 12 .Loop_enc1_1: 13 .byte 102,15,56,220,209 14 decl %eax 15 movups (%rdx),%xmm1 16 leaq 16(%rdx),%rdx 17 jnz .Loop_enc1_1 18 .byte 102,15,56,221,209 19 movups %xmm2,(%rsi) 20 .byte 0xf3,0xc3 21 .size aesni_encrypt,.-aesni_encrypt 22 23 .globl aesni_decrypt 24 .type aesni_decrypt,@function 25 .align 16 26 aesni_decrypt: 27 movups (%rdi),%xmm2 28 movl 240(%rdx),%eax 29 movups (%rdx),%xmm0 30 movups 16(%rdx),%xmm1 31 leaq 32(%rdx),%rdx 32 xorps %xmm0,%xmm2 33 .Loop_dec1_2: 34 .byte 102,15,56,222,209 35 decl %eax 36 movups (%rdx),%xmm1 37 leaq 16(%rdx),%rdx 38 jnz .Loop_dec1_2 39 .byte 102,15,56,223,209 40 movups %xmm2,(%rsi) 41 .byte 0xf3,0xc3 42 .size aesni_decrypt, .-aesni_decrypt 43 .type _aesni_encrypt3,@function 44 .align 16 45 _aesni_encrypt3: 46 movups (%rcx),%xmm0 47 shrl $1,%eax 48 movups 16(%rcx),%xmm1 49 leaq 32(%rcx),%rcx 50 xorps %xmm0,%xmm2 51 xorps %xmm0,%xmm3 52 xorps %xmm0,%xmm4 53 movups (%rcx),%xmm0 54 55 .Lenc_loop3: 56 .byte 102,15,56,220,209 57 .byte 102,15,56,220,217 58 decl %eax 59 .byte 102,15,56,220,225 60 movups 16(%rcx),%xmm1 61 .byte 102,15,56,220,208 62 .byte 102,15,56,220,216 63 leaq 32(%rcx),%rcx 64 .byte 102,15,56,220,224 65 movups (%rcx),%xmm0 66 jnz .Lenc_loop3 67 68 .byte 102,15,56,220,209 69 .byte 102,15,56,220,217 70 .byte 102,15,56,220,225 71 .byte 102,15,56,221,208 72 .byte 102,15,56,221,216 73 .byte 102,15,56,221,224 74 .byte 0xf3,0xc3 75 .size _aesni_encrypt3,.-_aesni_encrypt3 76 .type _aesni_decrypt3,@function 77 .align 16 78 _aesni_decrypt3: 79 movups (%rcx),%xmm0 80 shrl $1,%eax 81 movups 16(%rcx),%xmm1 82 leaq 32(%rcx),%rcx 83 xorps %xmm0,%xmm2 84 xorps %xmm0,%xmm3 85 xorps %xmm0,%xmm4 86 movups (%rcx),%xmm0 87 88 .Ldec_loop3: 89 .byte 102,15,56,222,209 90 .byte 102,15,56,222,217 91 decl %eax 92 .byte 102,15,56,222,225 93 movups 16(%rcx),%xmm1 94 .byte 102,15,56,222,208 95 .byte 102,15,56,222,216 96 leaq 32(%rcx),%rcx 97 .byte 102,15,56,222,224 98 movups (%rcx),%xmm0 99 jnz .Ldec_loop3 100 101 .byte 102,15,56,222,209 102 .byte 102,15,56,222,217 103 .byte 102,15,56,222,225 104 .byte 102,15,56,223,208 105 .byte 102,15,56,223,216 106 .byte 102,15,56,223,224 107 .byte 0xf3,0xc3 108 .size _aesni_decrypt3,.-_aesni_decrypt3 109 .type _aesni_encrypt4,@function 110 .align 16 111 _aesni_encrypt4: 112 movups (%rcx),%xmm0 113 shrl $1,%eax 114 movups 16(%rcx),%xmm1 115 leaq 32(%rcx),%rcx 116 xorps %xmm0,%xmm2 117 xorps %xmm0,%xmm3 118 xorps %xmm0,%xmm4 119 xorps %xmm0,%xmm5 120 movups (%rcx),%xmm0 121 122 .Lenc_loop4: 123 .byte 102,15,56,220,209 124 .byte 102,15,56,220,217 125 decl %eax 126 .byte 102,15,56,220,225 127 .byte 102,15,56,220,233 128 movups 16(%rcx),%xmm1 129 .byte 102,15,56,220,208 130 .byte 102,15,56,220,216 131 leaq 32(%rcx),%rcx 132 .byte 102,15,56,220,224 133 .byte 102,15,56,220,232 134 movups (%rcx),%xmm0 135 jnz .Lenc_loop4 136 137 .byte 102,15,56,220,209 138 .byte 102,15,56,220,217 139 .byte 102,15,56,220,225 140 .byte 102,15,56,220,233 141 .byte 102,15,56,221,208 142 .byte 102,15,56,221,216 143 .byte 102,15,56,221,224 144 .byte 102,15,56,221,232 145 .byte 0xf3,0xc3 146 .size _aesni_encrypt4,.-_aesni_encrypt4 147 .type _aesni_decrypt4,@function 148 .align 16 149 _aesni_decrypt4: 150 movups (%rcx),%xmm0 151 shrl $1,%eax 152 movups 16(%rcx),%xmm1 153 leaq 32(%rcx),%rcx 154 xorps %xmm0,%xmm2 155 xorps %xmm0,%xmm3 156 xorps %xmm0,%xmm4 157 xorps %xmm0,%xmm5 158 movups (%rcx),%xmm0 159 160 .Ldec_loop4: 161 .byte 102,15,56,222,209 162 .byte 102,15,56,222,217 163 decl %eax 164 .byte 102,15,56,222,225 165 .byte 102,15,56,222,233 166 movups 16(%rcx),%xmm1 167 .byte 102,15,56,222,208 168 .byte 102,15,56,222,216 169 leaq 32(%rcx),%rcx 170 .byte 102,15,56,222,224 171 .byte 102,15,56,222,232 172 movups (%rcx),%xmm0 173 jnz .Ldec_loop4 174 175 .byte 102,15,56,222,209 176 .byte 102,15,56,222,217 177 .byte 102,15,56,222,225 178 .byte 102,15,56,222,233 179 .byte 102,15,56,223,208 180 .byte 102,15,56,223,216 181 .byte 102,15,56,223,224 182 .byte 102,15,56,223,232 183 .byte 0xf3,0xc3 184 .size _aesni_decrypt4,.-_aesni_decrypt4 185 .type _aesni_encrypt6,@function 186 .align 16 187 _aesni_encrypt6: 188 movups (%rcx),%xmm0 189 shrl $1,%eax 190 movups 16(%rcx),%xmm1 191 leaq 32(%rcx),%rcx 192 xorps %xmm0,%xmm2 193 pxor %xmm0,%xmm3 194 .byte 102,15,56,220,209 195 pxor %xmm0,%xmm4 196 .byte 102,15,56,220,217 197 pxor %xmm0,%xmm5 198 .byte 102,15,56,220,225 199 pxor %xmm0,%xmm6 200 .byte 102,15,56,220,233 201 pxor %xmm0,%xmm7 202 decl %eax 203 .byte 102,15,56,220,241 204 movups (%rcx),%xmm0 205 .byte 102,15,56,220,249 206 jmp .Lenc_loop6_enter 207 .align 16 208 .Lenc_loop6: 209 .byte 102,15,56,220,209 210 .byte 102,15,56,220,217 211 decl %eax 212 .byte 102,15,56,220,225 213 .byte 102,15,56,220,233 214 .byte 102,15,56,220,241 215 .byte 102,15,56,220,249 216 .Lenc_loop6_enter: 217 movups 16(%rcx),%xmm1 218 .byte 102,15,56,220,208 219 .byte 102,15,56,220,216 220 leaq 32(%rcx),%rcx 221 .byte 102,15,56,220,224 222 .byte 102,15,56,220,232 223 .byte 102,15,56,220,240 224 .byte 102,15,56,220,248 225 movups (%rcx),%xmm0 226 jnz .Lenc_loop6 227 228 .byte 102,15,56,220,209 229 .byte 102,15,56,220,217 230 .byte 102,15,56,220,225 231 .byte 102,15,56,220,233 232 .byte 102,15,56,220,241 233 .byte 102,15,56,220,249 234 .byte 102,15,56,221,208 235 .byte 102,15,56,221,216 236 .byte 102,15,56,221,224 237 .byte 102,15,56,221,232 238 .byte 102,15,56,221,240 239 .byte 102,15,56,221,248 240 .byte 0xf3,0xc3 241 .size _aesni_encrypt6,.-_aesni_encrypt6 242 .type _aesni_decrypt6,@function 243 .align 16 244 _aesni_decrypt6: 245 movups (%rcx),%xmm0 246 shrl $1,%eax 247 movups 16(%rcx),%xmm1 248 leaq 32(%rcx),%rcx 249 xorps %xmm0,%xmm2 250 pxor %xmm0,%xmm3 251 .byte 102,15,56,222,209 252 pxor %xmm0,%xmm4 253 .byte 102,15,56,222,217 254 pxor %xmm0,%xmm5 255 .byte 102,15,56,222,225 256 pxor %xmm0,%xmm6 257 .byte 102,15,56,222,233 258 pxor %xmm0,%xmm7 259 decl %eax 260 .byte 102,15,56,222,241 261 movups (%rcx),%xmm0 262 .byte 102,15,56,222,249 263 jmp .Ldec_loop6_enter 264 .align 16 265 .Ldec_loop6: 266 .byte 102,15,56,222,209 267 .byte 102,15,56,222,217 268 decl %eax 269 .byte 102,15,56,222,225 270 .byte 102,15,56,222,233 271 .byte 102,15,56,222,241 272 .byte 102,15,56,222,249 273 .Ldec_loop6_enter: 274 movups 16(%rcx),%xmm1 275 .byte 102,15,56,222,208 276 .byte 102,15,56,222,216 277 leaq 32(%rcx),%rcx 278 .byte 102,15,56,222,224 279 .byte 102,15,56,222,232 280 .byte 102,15,56,222,240 281 .byte 102,15,56,222,248 282 movups (%rcx),%xmm0 283 jnz .Ldec_loop6 284 285 .byte 102,15,56,222,209 286 .byte 102,15,56,222,217 287 .byte 102,15,56,222,225 288 .byte 102,15,56,222,233 289 .byte 102,15,56,222,241 290 .byte 102,15,56,222,249 291 .byte 102,15,56,223,208 292 .byte 102,15,56,223,216 293 .byte 102,15,56,223,224 294 .byte 102,15,56,223,232 295 .byte 102,15,56,223,240 296 .byte 102,15,56,223,248 297 .byte 0xf3,0xc3 298 .size _aesni_decrypt6,.-_aesni_decrypt6 299 .type _aesni_encrypt8,@function 300 .align 16 301 _aesni_encrypt8: 302 movups (%rcx),%xmm0 303 shrl $1,%eax 304 movups 16(%rcx),%xmm1 305 leaq 32(%rcx),%rcx 306 xorps %xmm0,%xmm2 307 xorps %xmm0,%xmm3 308 .byte 102,15,56,220,209 309 pxor %xmm0,%xmm4 310 .byte 102,15,56,220,217 311 pxor %xmm0,%xmm5 312 .byte 102,15,56,220,225 313 pxor %xmm0,%xmm6 314 .byte 102,15,56,220,233 315 pxor %xmm0,%xmm7 316 decl %eax 317 .byte 102,15,56,220,241 318 pxor %xmm0,%xmm8 319 .byte 102,15,56,220,249 320 pxor %xmm0,%xmm9 321 movups (%rcx),%xmm0 322 .byte 102,68,15,56,220,193 323 .byte 102,68,15,56,220,201 324 movups 16(%rcx),%xmm1 325 jmp .Lenc_loop8_enter 326 .align 16 327 .Lenc_loop8: 328 .byte 102,15,56,220,209 329 .byte 102,15,56,220,217 330 decl %eax 331 .byte 102,15,56,220,225 332 .byte 102,15,56,220,233 333 .byte 102,15,56,220,241 334 .byte 102,15,56,220,249 335 .byte 102,68,15,56,220,193 336 .byte 102,68,15,56,220,201 337 movups 16(%rcx),%xmm1 338 .Lenc_loop8_enter: 339 .byte 102,15,56,220,208 340 .byte 102,15,56,220,216 341 leaq 32(%rcx),%rcx 342 .byte 102,15,56,220,224 343 .byte 102,15,56,220,232 344 .byte 102,15,56,220,240 345 .byte 102,15,56,220,248 346 .byte 102,68,15,56,220,192 347 .byte 102,68,15,56,220,200 348 movups (%rcx),%xmm0 349 jnz .Lenc_loop8 350 351 .byte 102,15,56,220,209 352 .byte 102,15,56,220,217 353 .byte 102,15,56,220,225 354 .byte 102,15,56,220,233 355 .byte 102,15,56,220,241 356 .byte 102,15,56,220,249 357 .byte 102,68,15,56,220,193 358 .byte 102,68,15,56,220,201 359 .byte 102,15,56,221,208 360 .byte 102,15,56,221,216 361 .byte 102,15,56,221,224 362 .byte 102,15,56,221,232 363 .byte 102,15,56,221,240 364 .byte 102,15,56,221,248 365 .byte 102,68,15,56,221,192 366 .byte 102,68,15,56,221,200 367 .byte 0xf3,0xc3 368 .size _aesni_encrypt8,.-_aesni_encrypt8 369 .type _aesni_decrypt8,@function 370 .align 16 371 _aesni_decrypt8: 372 movups (%rcx),%xmm0 373 shrl $1,%eax 374 movups 16(%rcx),%xmm1 375 leaq 32(%rcx),%rcx 376 xorps %xmm0,%xmm2 377 xorps %xmm0,%xmm3 378 .byte 102,15,56,222,209 379 pxor %xmm0,%xmm4 380 .byte 102,15,56,222,217 381 pxor %xmm0,%xmm5 382 .byte 102,15,56,222,225 383 pxor %xmm0,%xmm6 384 .byte 102,15,56,222,233 385 pxor %xmm0,%xmm7 386 decl %eax 387 .byte 102,15,56,222,241 388 pxor %xmm0,%xmm8 389 .byte 102,15,56,222,249 390 pxor %xmm0,%xmm9 391 movups (%rcx),%xmm0 392 .byte 102,68,15,56,222,193 393 .byte 102,68,15,56,222,201 394 movups 16(%rcx),%xmm1 395 jmp .Ldec_loop8_enter 396 .align 16 397 .Ldec_loop8: 398 .byte 102,15,56,222,209 399 .byte 102,15,56,222,217 400 decl %eax 401 .byte 102,15,56,222,225 402 .byte 102,15,56,222,233 403 .byte 102,15,56,222,241 404 .byte 102,15,56,222,249 405 .byte 102,68,15,56,222,193 406 .byte 102,68,15,56,222,201 407 movups 16(%rcx),%xmm1 408 .Ldec_loop8_enter: 409 .byte 102,15,56,222,208 410 .byte 102,15,56,222,216 411 leaq 32(%rcx),%rcx 412 .byte 102,15,56,222,224 413 .byte 102,15,56,222,232 414 .byte 102,15,56,222,240 415 .byte 102,15,56,222,248 416 .byte 102,68,15,56,222,192 417 .byte 102,68,15,56,222,200 418 movups (%rcx),%xmm0 419 jnz .Ldec_loop8 420 421 .byte 102,15,56,222,209 422 .byte 102,15,56,222,217 423 .byte 102,15,56,222,225 424 .byte 102,15,56,222,233 425 .byte 102,15,56,222,241 426 .byte 102,15,56,222,249 427 .byte 102,68,15,56,222,193 428 .byte 102,68,15,56,222,201 429 .byte 102,15,56,223,208 430 .byte 102,15,56,223,216 431 .byte 102,15,56,223,224 432 .byte 102,15,56,223,232 433 .byte 102,15,56,223,240 434 .byte 102,15,56,223,248 435 .byte 102,68,15,56,223,192 436 .byte 102,68,15,56,223,200 437 .byte 0xf3,0xc3 438 .size _aesni_decrypt8,.-_aesni_decrypt8 439 .globl aesni_ecb_encrypt 440 .type aesni_ecb_encrypt,@function 441 .align 16 442 aesni_ecb_encrypt: 443 andq $-16,%rdx 444 jz .Lecb_ret 445 446 movl 240(%rcx),%eax 447 movups (%rcx),%xmm0 448 movq %rcx,%r11 449 movl %eax,%r10d 450 testl %r8d,%r8d 451 jz .Lecb_decrypt 452 453 cmpq $128,%rdx 454 jb .Lecb_enc_tail 455 456 movdqu (%rdi),%xmm2 457 movdqu 16(%rdi),%xmm3 458 movdqu 32(%rdi),%xmm4 459 movdqu 48(%rdi),%xmm5 460 movdqu 64(%rdi),%xmm6 461 movdqu 80(%rdi),%xmm7 462 movdqu 96(%rdi),%xmm8 463 movdqu 112(%rdi),%xmm9 464 leaq 128(%rdi),%rdi 465 subq $128,%rdx 466 jmp .Lecb_enc_loop8_enter 467 .align 16 468 .Lecb_enc_loop8: 469 movups %xmm2,(%rsi) 470 movq %r11,%rcx 471 movdqu (%rdi),%xmm2 472 movl %r10d,%eax 473 movups %xmm3,16(%rsi) 474 movdqu 16(%rdi),%xmm3 475 movups %xmm4,32(%rsi) 476 movdqu 32(%rdi),%xmm4 477 movups %xmm5,48(%rsi) 478 movdqu 48(%rdi),%xmm5 479 movups %xmm6,64(%rsi) 480 movdqu 64(%rdi),%xmm6 481 movups %xmm7,80(%rsi) 482 movdqu 80(%rdi),%xmm7 483 movups %xmm8,96(%rsi) 484 movdqu 96(%rdi),%xmm8 485 movups %xmm9,112(%rsi) 486 leaq 128(%rsi),%rsi 487 movdqu 112(%rdi),%xmm9 488 leaq 128(%rdi),%rdi 489 .Lecb_enc_loop8_enter: 490 491 call _aesni_encrypt8 492 493 subq $128,%rdx 494 jnc .Lecb_enc_loop8 495 496 movups %xmm2,(%rsi) 497 movq %r11,%rcx 498 movups %xmm3,16(%rsi) 499 movl %r10d,%eax 500 movups %xmm4,32(%rsi) 501 movups %xmm5,48(%rsi) 502 movups %xmm6,64(%rsi) 503 movups %xmm7,80(%rsi) 504 movups %xmm8,96(%rsi) 505 movups %xmm9,112(%rsi) 506 leaq 128(%rsi),%rsi 507 addq $128,%rdx 508 jz .Lecb_ret 509 510 .Lecb_enc_tail: 511 movups (%rdi),%xmm2 512 cmpq $32,%rdx 513 jb .Lecb_enc_one 514 movups 16(%rdi),%xmm3 515 je .Lecb_enc_two 516 movups 32(%rdi),%xmm4 517 cmpq $64,%rdx 518 jb .Lecb_enc_three 519 movups 48(%rdi),%xmm5 520 je .Lecb_enc_four 521 movups 64(%rdi),%xmm6 522 cmpq $96,%rdx 523 jb .Lecb_enc_five 524 movups 80(%rdi),%xmm7 525 je .Lecb_enc_six 526 movdqu 96(%rdi),%xmm8 527 call _aesni_encrypt8 528 movups %xmm2,(%rsi) 529 movups %xmm3,16(%rsi) 530 movups %xmm4,32(%rsi) 531 movups %xmm5,48(%rsi) 532 movups %xmm6,64(%rsi) 533 movups %xmm7,80(%rsi) 534 movups %xmm8,96(%rsi) 535 jmp .Lecb_ret 536 .align 16 537 .Lecb_enc_one: 538 movups (%rcx),%xmm0 539 movups 16(%rcx),%xmm1 540 leaq 32(%rcx),%rcx 541 xorps %xmm0,%xmm2 542 .Loop_enc1_3: 543 .byte 102,15,56,220,209 544 decl %eax 545 movups (%rcx),%xmm1 546 leaq 16(%rcx),%rcx 547 jnz .Loop_enc1_3 548 .byte 102,15,56,221,209 549 movups %xmm2,(%rsi) 550 jmp .Lecb_ret 551 .align 16 552 .Lecb_enc_two: 553 xorps %xmm4,%xmm4 554 call _aesni_encrypt3 555 movups %xmm2,(%rsi) 556 movups %xmm3,16(%rsi) 557 jmp .Lecb_ret 558 .align 16 559 .Lecb_enc_three: 560 call _aesni_encrypt3 561 movups %xmm2,(%rsi) 562 movups %xmm3,16(%rsi) 563 movups %xmm4,32(%rsi) 564 jmp .Lecb_ret 565 .align 16 566 .Lecb_enc_four: 567 call _aesni_encrypt4 568 movups %xmm2,(%rsi) 569 movups %xmm3,16(%rsi) 570 movups %xmm4,32(%rsi) 571 movups %xmm5,48(%rsi) 572 jmp .Lecb_ret 573 .align 16 574 .Lecb_enc_five: 575 xorps %xmm7,%xmm7 576 call _aesni_encrypt6 577 movups %xmm2,(%rsi) 578 movups %xmm3,16(%rsi) 579 movups %xmm4,32(%rsi) 580 movups %xmm5,48(%rsi) 581 movups %xmm6,64(%rsi) 582 jmp .Lecb_ret 583 .align 16 584 .Lecb_enc_six: 585 call _aesni_encrypt6 586 movups %xmm2,(%rsi) 587 movups %xmm3,16(%rsi) 588 movups %xmm4,32(%rsi) 589 movups %xmm5,48(%rsi) 590 movups %xmm6,64(%rsi) 591 movups %xmm7,80(%rsi) 592 jmp .Lecb_ret 593 594 .align 16 595 .Lecb_decrypt: 596 cmpq $128,%rdx 597 jb .Lecb_dec_tail 598 599 movdqu (%rdi),%xmm2 600 movdqu 16(%rdi),%xmm3 601 movdqu 32(%rdi),%xmm4 602 movdqu 48(%rdi),%xmm5 603 movdqu 64(%rdi),%xmm6 604 movdqu 80(%rdi),%xmm7 605 movdqu 96(%rdi),%xmm8 606 movdqu 112(%rdi),%xmm9 607 leaq 128(%rdi),%rdi 608 subq $128,%rdx 609 jmp .Lecb_dec_loop8_enter 610 .align 16 611 .Lecb_dec_loop8: 612 movups %xmm2,(%rsi) 613 movq %r11,%rcx 614 movdqu (%rdi),%xmm2 615 movl %r10d,%eax 616 movups %xmm3,16(%rsi) 617 movdqu 16(%rdi),%xmm3 618 movups %xmm4,32(%rsi) 619 movdqu 32(%rdi),%xmm4 620 movups %xmm5,48(%rsi) 621 movdqu 48(%rdi),%xmm5 622 movups %xmm6,64(%rsi) 623 movdqu 64(%rdi),%xmm6 624 movups %xmm7,80(%rsi) 625 movdqu 80(%rdi),%xmm7 626 movups %xmm8,96(%rsi) 627 movdqu 96(%rdi),%xmm8 628 movups %xmm9,112(%rsi) 629 leaq 128(%rsi),%rsi 630 movdqu 112(%rdi),%xmm9 631 leaq 128(%rdi),%rdi 632 .Lecb_dec_loop8_enter: 633 634 call _aesni_decrypt8 635 636 movups (%r11),%xmm0 637 subq $128,%rdx 638 jnc .Lecb_dec_loop8 639 640 movups %xmm2,(%rsi) 641 movq %r11,%rcx 642 movups %xmm3,16(%rsi) 643 movl %r10d,%eax 644 movups %xmm4,32(%rsi) 645 movups %xmm5,48(%rsi) 646 movups %xmm6,64(%rsi) 647 movups %xmm7,80(%rsi) 648 movups %xmm8,96(%rsi) 649 movups %xmm9,112(%rsi) 650 leaq 128(%rsi),%rsi 651 addq $128,%rdx 652 jz .Lecb_ret 653 654 .Lecb_dec_tail: 655 movups (%rdi),%xmm2 656 cmpq $32,%rdx 657 jb .Lecb_dec_one 658 movups 16(%rdi),%xmm3 659 je .Lecb_dec_two 660 movups 32(%rdi),%xmm4 661 cmpq $64,%rdx 662 jb .Lecb_dec_three 663 movups 48(%rdi),%xmm5 664 je .Lecb_dec_four 665 movups 64(%rdi),%xmm6 666 cmpq $96,%rdx 667 jb .Lecb_dec_five 668 movups 80(%rdi),%xmm7 669 je .Lecb_dec_six 670 movups 96(%rdi),%xmm8 671 movups (%rcx),%xmm0 672 call _aesni_decrypt8 673 movups %xmm2,(%rsi) 674 movups %xmm3,16(%rsi) 675 movups %xmm4,32(%rsi) 676 movups %xmm5,48(%rsi) 677 movups %xmm6,64(%rsi) 678 movups %xmm7,80(%rsi) 679 movups %xmm8,96(%rsi) 680 jmp .Lecb_ret 681 .align 16 682 .Lecb_dec_one: 683 movups (%rcx),%xmm0 684 movups 16(%rcx),%xmm1 685 leaq 32(%rcx),%rcx 686 xorps %xmm0,%xmm2 687 .Loop_dec1_4: 688 .byte 102,15,56,222,209 689 decl %eax 690 movups (%rcx),%xmm1 691 leaq 16(%rcx),%rcx 692 jnz .Loop_dec1_4 693 .byte 102,15,56,223,209 694 movups %xmm2,(%rsi) 695 jmp .Lecb_ret 696 .align 16 697 .Lecb_dec_two: 698 xorps %xmm4,%xmm4 699 call _aesni_decrypt3 700 movups %xmm2,(%rsi) 701 movups %xmm3,16(%rsi) 702 jmp .Lecb_ret 703 .align 16 704 .Lecb_dec_three: 705 call _aesni_decrypt3 706 movups %xmm2,(%rsi) 707 movups %xmm3,16(%rsi) 708 movups %xmm4,32(%rsi) 709 jmp .Lecb_ret 710 .align 16 711 .Lecb_dec_four: 712 call _aesni_decrypt4 713 movups %xmm2,(%rsi) 714 movups %xmm3,16(%rsi) 715 movups %xmm4,32(%rsi) 716 movups %xmm5,48(%rsi) 717 jmp .Lecb_ret 718 .align 16 719 .Lecb_dec_five: 720 xorps %xmm7,%xmm7 721 call _aesni_decrypt6 722 movups %xmm2,(%rsi) 723 movups %xmm3,16(%rsi) 724 movups %xmm4,32(%rsi) 725 movups %xmm5,48(%rsi) 726 movups %xmm6,64(%rsi) 727 jmp .Lecb_ret 728 .align 16 729 .Lecb_dec_six: 730 call _aesni_decrypt6 731 movups %xmm2,(%rsi) 732 movups %xmm3,16(%rsi) 733 movups %xmm4,32(%rsi) 734 movups %xmm5,48(%rsi) 735 movups %xmm6,64(%rsi) 736 movups %xmm7,80(%rsi) 737 738 .Lecb_ret: 739 .byte 0xf3,0xc3 740 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt 741 .globl aesni_ccm64_encrypt_blocks 742 .type aesni_ccm64_encrypt_blocks,@function 743 .align 16 744 aesni_ccm64_encrypt_blocks: 745 movl 240(%rcx),%eax 746 movdqu (%r8),%xmm9 747 movdqa .Lincrement64(%rip),%xmm6 748 movdqa .Lbswap_mask(%rip),%xmm7 749 750 shrl $1,%eax 751 leaq 0(%rcx),%r11 752 movdqu (%r9),%xmm3 753 movdqa %xmm9,%xmm2 754 movl %eax,%r10d 755 .byte 102,68,15,56,0,207 756 jmp .Lccm64_enc_outer 757 .align 16 758 .Lccm64_enc_outer: 759 movups (%r11),%xmm0 760 movl %r10d,%eax 761 movups (%rdi),%xmm8 762 763 xorps %xmm0,%xmm2 764 movups 16(%r11),%xmm1 765 xorps %xmm8,%xmm0 766 leaq 32(%r11),%rcx 767 xorps %xmm0,%xmm3 768 movups (%rcx),%xmm0 769 770 .Lccm64_enc2_loop: 771 .byte 102,15,56,220,209 772 decl %eax 773 .byte 102,15,56,220,217 774 movups 16(%rcx),%xmm1 775 .byte 102,15,56,220,208 776 leaq 32(%rcx),%rcx 777 .byte 102,15,56,220,216 778 movups 0(%rcx),%xmm0 779 jnz .Lccm64_enc2_loop 780 .byte 102,15,56,220,209 781 .byte 102,15,56,220,217 782 paddq %xmm6,%xmm9 783 .byte 102,15,56,221,208 784 .byte 102,15,56,221,216 785 786 decq %rdx 787 leaq 16(%rdi),%rdi 788 xorps %xmm2,%xmm8 789 movdqa %xmm9,%xmm2 790 movups %xmm8,(%rsi) 791 leaq 16(%rsi),%rsi 792 .byte 102,15,56,0,215 793 jnz .Lccm64_enc_outer 794 795 movups %xmm3,(%r9) 796 .byte 0xf3,0xc3 797 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 798 .globl aesni_ccm64_decrypt_blocks 799 .type aesni_ccm64_decrypt_blocks,@function 800 .align 16 801 aesni_ccm64_decrypt_blocks: 802 movl 240(%rcx),%eax 803 movups (%r8),%xmm9 804 movdqu (%r9),%xmm3 805 movdqa .Lincrement64(%rip),%xmm6 806 movdqa .Lbswap_mask(%rip),%xmm7 807 808 movaps %xmm9,%xmm2 809 movl %eax,%r10d 810 movq %rcx,%r11 811 .byte 102,68,15,56,0,207 812 movups (%rcx),%xmm0 813 movups 16(%rcx),%xmm1 814 leaq 32(%rcx),%rcx 815 xorps %xmm0,%xmm2 816 .Loop_enc1_5: 817 .byte 102,15,56,220,209 818 decl %eax 819 movups (%rcx),%xmm1 820 leaq 16(%rcx),%rcx 821 jnz .Loop_enc1_5 822 .byte 102,15,56,221,209 823 movups (%rdi),%xmm8 824 paddq %xmm6,%xmm9 825 leaq 16(%rdi),%rdi 826 jmp .Lccm64_dec_outer 827 .align 16 828 .Lccm64_dec_outer: 829 xorps %xmm2,%xmm8 830 movdqa %xmm9,%xmm2 831 movl %r10d,%eax 832 movups %xmm8,(%rsi) 833 leaq 16(%rsi),%rsi 834 .byte 102,15,56,0,215 835 836 subq $1,%rdx 837 jz .Lccm64_dec_break 838 839 movups (%r11),%xmm0 840 shrl $1,%eax 841 movups 16(%r11),%xmm1 842 xorps %xmm0,%xmm8 843 leaq 32(%r11),%rcx 844 xorps %xmm0,%xmm2 845 xorps %xmm8,%xmm3 846 movups (%rcx),%xmm0 847 848 .Lccm64_dec2_loop: 849 .byte 102,15,56,220,209 850 decl %eax 851 .byte 102,15,56,220,217 852 movups 16(%rcx),%xmm1 853 .byte 102,15,56,220,208 854 leaq 32(%rcx),%rcx 855 .byte 102,15,56,220,216 856 movups 0(%rcx),%xmm0 857 jnz .Lccm64_dec2_loop 858 movups (%rdi),%xmm8 859 paddq %xmm6,%xmm9 860 .byte 102,15,56,220,209 861 .byte 102,15,56,220,217 862 leaq 16(%rdi),%rdi 863 .byte 102,15,56,221,208 864 .byte 102,15,56,221,216 865 jmp .Lccm64_dec_outer 866 867 .align 16 868 .Lccm64_dec_break: 869 870 movups (%r11),%xmm0 871 movups 16(%r11),%xmm1 872 xorps %xmm0,%xmm8 873 leaq 32(%r11),%r11 874 xorps %xmm8,%xmm3 875 .Loop_enc1_6: 876 .byte 102,15,56,220,217 877 decl %eax 878 movups (%r11),%xmm1 879 leaq 16(%r11),%r11 880 jnz .Loop_enc1_6 881 .byte 102,15,56,221,217 882 movups %xmm3,(%r9) 883 .byte 0xf3,0xc3 884 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 885 .globl aesni_ctr32_encrypt_blocks 886 .type aesni_ctr32_encrypt_blocks,@function 887 .align 16 888 aesni_ctr32_encrypt_blocks: 889 cmpq $1,%rdx 890 je .Lctr32_one_shortcut 891 892 movdqu (%r8),%xmm14 893 movdqa .Lbswap_mask(%rip),%xmm15 894 xorl %eax,%eax 895 .byte 102,69,15,58,22,242,3 896 .byte 102,68,15,58,34,240,3 897 898 movl 240(%rcx),%eax 899 bswapl %r10d 900 pxor %xmm12,%xmm12 901 pxor %xmm13,%xmm13 902 .byte 102,69,15,58,34,226,0 903 leaq 3(%r10),%r11 904 .byte 102,69,15,58,34,235,0 905 incl %r10d 906 .byte 102,69,15,58,34,226,1 907 incq %r11 908 .byte 102,69,15,58,34,235,1 909 incl %r10d 910 .byte 102,69,15,58,34,226,2 911 incq %r11 912 .byte 102,69,15,58,34,235,2 913 movdqa %xmm12,-40(%rsp) 914 .byte 102,69,15,56,0,231 915 movdqa %xmm13,-24(%rsp) 916 .byte 102,69,15,56,0,239 917 918 pshufd $192,%xmm12,%xmm2 919 pshufd $128,%xmm12,%xmm3 920 pshufd $64,%xmm12,%xmm4 921 cmpq $6,%rdx 922 jb .Lctr32_tail 923 shrl $1,%eax 924 movq %rcx,%r11 925 movl %eax,%r10d 926 subq $6,%rdx 927 jmp .Lctr32_loop6 928 929 .align 16 930 .Lctr32_loop6: 931 pshufd $192,%xmm13,%xmm5 932 por %xmm14,%xmm2 933 movups (%r11),%xmm0 934 pshufd $128,%xmm13,%xmm6 935 por %xmm14,%xmm3 936 movups 16(%r11),%xmm1 937 pshufd $64,%xmm13,%xmm7 938 por %xmm14,%xmm4 939 por %xmm14,%xmm5 940 xorps %xmm0,%xmm2 941 por %xmm14,%xmm6 942 por %xmm14,%xmm7 943 944 945 946 947 pxor %xmm0,%xmm3 948 .byte 102,15,56,220,209 949 leaq 32(%r11),%rcx 950 pxor %xmm0,%xmm4 951 .byte 102,15,56,220,217 952 movdqa .Lincrement32(%rip),%xmm13 953 pxor %xmm0,%xmm5 954 .byte 102,15,56,220,225 955 movdqa -40(%rsp),%xmm12 956 pxor %xmm0,%xmm6 957 .byte 102,15,56,220,233 958 pxor %xmm0,%xmm7 959 movups (%rcx),%xmm0 960 decl %eax 961 .byte 102,15,56,220,241 962 .byte 102,15,56,220,249 963 jmp .Lctr32_enc_loop6_enter 964 .align 16 965 .Lctr32_enc_loop6: 966 .byte 102,15,56,220,209 967 .byte 102,15,56,220,217 968 decl %eax 969 .byte 102,15,56,220,225 970 .byte 102,15,56,220,233 971 .byte 102,15,56,220,241 972 .byte 102,15,56,220,249 973 .Lctr32_enc_loop6_enter: 974 movups 16(%rcx),%xmm1 975 .byte 102,15,56,220,208 976 .byte 102,15,56,220,216 977 leaq 32(%rcx),%rcx 978 .byte 102,15,56,220,224 979 .byte 102,15,56,220,232 980 .byte 102,15,56,220,240 981 .byte 102,15,56,220,248 982 movups (%rcx),%xmm0 983 jnz .Lctr32_enc_loop6 984 985 .byte 102,15,56,220,209 986 paddd %xmm13,%xmm12 987 .byte 102,15,56,220,217 988 paddd -24(%rsp),%xmm13 989 .byte 102,15,56,220,225 990 movdqa %xmm12,-40(%rsp) 991 .byte 102,15,56,220,233 992 movdqa %xmm13,-24(%rsp) 993 .byte 102,15,56,220,241 994 .byte 102,69,15,56,0,231 995 .byte 102,15,56,220,249 996 .byte 102,69,15,56,0,239 997 998 .byte 102,15,56,221,208 999 movups (%rdi),%xmm8 1000 .byte 102,15,56,221,216 1001 movups 16(%rdi),%xmm9 1002 .byte 102,15,56,221,224 1003 movups 32(%rdi),%xmm10 1004 .byte 102,15,56,221,232 1005 movups 48(%rdi),%xmm11 1006 .byte 102,15,56,221,240 1007 movups 64(%rdi),%xmm1 1008 .byte 102,15,56,221,248 1009 movups 80(%rdi),%xmm0 1010 leaq 96(%rdi),%rdi 1011 1012 xorps %xmm2,%xmm8 1013 pshufd $192,%xmm12,%xmm2 1014 xorps %xmm3,%xmm9 1015 pshufd $128,%xmm12,%xmm3 1016 movups %xmm8,(%rsi) 1017 xorps %xmm4,%xmm10 1018 pshufd $64,%xmm12,%xmm4 1019 movups %xmm9,16(%rsi) 1020 xorps %xmm5,%xmm11 1021 movups %xmm10,32(%rsi) 1022 xorps %xmm6,%xmm1 1023 movups %xmm11,48(%rsi) 1024 xorps %xmm7,%xmm0 1025 movups %xmm1,64(%rsi) 1026 movups %xmm0,80(%rsi) 1027 leaq 96(%rsi),%rsi 1028 movl %r10d,%eax 1029 subq $6,%rdx 1030 jnc .Lctr32_loop6 1031 1032 addq $6,%rdx 1033 jz .Lctr32_done 1034 movq %r11,%rcx 1035 leal 1(%rax,%rax,1),%eax 1036 1037 .Lctr32_tail: 1038 por %xmm14,%xmm2 1039 movups (%rdi),%xmm8 1040 cmpq $2,%rdx 1041 jb .Lctr32_one 1042 1043 por %xmm14,%xmm3 1044 movups 16(%rdi),%xmm9 1045 je .Lctr32_two 1046 1047 pshufd $192,%xmm13,%xmm5 1048 por %xmm14,%xmm4 1049 movups 32(%rdi),%xmm10 1050 cmpq $4,%rdx 1051 jb .Lctr32_three 1052 1053 pshufd $128,%xmm13,%xmm6 1054 por %xmm14,%xmm5 1055 movups 48(%rdi),%xmm11 1056 je .Lctr32_four 1057 1058 por %xmm14,%xmm6 1059 xorps %xmm7,%xmm7 1060 1061 call _aesni_encrypt6 1062 1063 movups 64(%rdi),%xmm1 1064 xorps %xmm2,%xmm8 1065 xorps %xmm3,%xmm9 1066 movups %xmm8,(%rsi) 1067 xorps %xmm4,%xmm10 1068 movups %xmm9,16(%rsi) 1069 xorps %xmm5,%xmm11 1070 movups %xmm10,32(%rsi) 1071 xorps %xmm6,%xmm1 1072 movups %xmm11,48(%rsi) 1073 movups %xmm1,64(%rsi) 1074 jmp .Lctr32_done 1075 1076 .align 16 1077 .Lctr32_one_shortcut: 1078 movups (%r8),%xmm2 1079 movups (%rdi),%xmm8 1080 movl 240(%rcx),%eax 1081 .Lctr32_one: 1082 movups (%rcx),%xmm0 1083 movups 16(%rcx),%xmm1 1084 leaq 32(%rcx),%rcx 1085 xorps %xmm0,%xmm2 1086 .Loop_enc1_7: 1087 .byte 102,15,56,220,209 1088 decl %eax 1089 movups (%rcx),%xmm1 1090 leaq 16(%rcx),%rcx 1091 jnz .Loop_enc1_7 1092 .byte 102,15,56,221,209 1093 xorps %xmm2,%xmm8 1094 movups %xmm8,(%rsi) 1095 jmp .Lctr32_done 1096 1097 .align 16 1098 .Lctr32_two: 1099 xorps %xmm4,%xmm4 1100 call _aesni_encrypt3 1101 xorps %xmm2,%xmm8 1102 xorps %xmm3,%xmm9 1103 movups %xmm8,(%rsi) 1104 movups %xmm9,16(%rsi) 1105 jmp .Lctr32_done 1106 1107 .align 16 1108 .Lctr32_three: 1109 call _aesni_encrypt3 1110 xorps %xmm2,%xmm8 1111 xorps %xmm3,%xmm9 1112 movups %xmm8,(%rsi) 1113 xorps %xmm4,%xmm10 1114 movups %xmm9,16(%rsi) 1115 movups %xmm10,32(%rsi) 1116 jmp .Lctr32_done 1117 1118 .align 16 1119 .Lctr32_four: 1120 call _aesni_encrypt4 1121 xorps %xmm2,%xmm8 1122 xorps %xmm3,%xmm9 1123 movups %xmm8,(%rsi) 1124 xorps %xmm4,%xmm10 1125 movups %xmm9,16(%rsi) 1126 xorps %xmm5,%xmm11 1127 movups %xmm10,32(%rsi) 1128 movups %xmm11,48(%rsi) 1129 1130 .Lctr32_done: 1131 .byte 0xf3,0xc3 1132 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1133 .globl aesni_xts_encrypt 1134 .type aesni_xts_encrypt,@function 1135 .align 16 1136 aesni_xts_encrypt: 1137 leaq -104(%rsp),%rsp 1138 movups (%r9),%xmm15 1139 movl 240(%r8),%eax 1140 movl 240(%rcx),%r10d 1141 movups (%r8),%xmm0 1142 movups 16(%r8),%xmm1 1143 leaq 32(%r8),%r8 1144 xorps %xmm0,%xmm15 1145 .Loop_enc1_8: 1146 .byte 102,68,15,56,220,249 1147 decl %eax 1148 movups (%r8),%xmm1 1149 leaq 16(%r8),%r8 1150 jnz .Loop_enc1_8 1151 .byte 102,68,15,56,221,249 1152 movq %rcx,%r11 1153 movl %r10d,%eax 1154 movq %rdx,%r9 1155 andq $-16,%rdx 1156 1157 movdqa .Lxts_magic(%rip),%xmm8 1158 pxor %xmm14,%xmm14 1159 pcmpgtd %xmm15,%xmm14 1160 pshufd $19,%xmm14,%xmm9 1161 pxor %xmm14,%xmm14 1162 movdqa %xmm15,%xmm10 1163 paddq %xmm15,%xmm15 1164 pand %xmm8,%xmm9 1165 pcmpgtd %xmm15,%xmm14 1166 pxor %xmm9,%xmm15 1167 pshufd $19,%xmm14,%xmm9 1168 pxor %xmm14,%xmm14 1169 movdqa %xmm15,%xmm11 1170 paddq %xmm15,%xmm15 1171 pand %xmm8,%xmm9 1172 pcmpgtd %xmm15,%xmm14 1173 pxor %xmm9,%xmm15 1174 pshufd $19,%xmm14,%xmm9 1175 pxor %xmm14,%xmm14 1176 movdqa %xmm15,%xmm12 1177 paddq %xmm15,%xmm15 1178 pand %xmm8,%xmm9 1179 pcmpgtd %xmm15,%xmm14 1180 pxor %xmm9,%xmm15 1181 pshufd $19,%xmm14,%xmm9 1182 pxor %xmm14,%xmm14 1183 movdqa %xmm15,%xmm13 1184 paddq %xmm15,%xmm15 1185 pand %xmm8,%xmm9 1186 pcmpgtd %xmm15,%xmm14 1187 pxor %xmm9,%xmm15 1188 subq $96,%rdx 1189 jc .Lxts_enc_short 1190 1191 shrl $1,%eax 1192 subl $1,%eax 1193 movl %eax,%r10d 1194 jmp .Lxts_enc_grandloop 1195 1196 .align 16 1197 .Lxts_enc_grandloop: 1198 pshufd $19,%xmm14,%xmm9 1199 movdqa %xmm15,%xmm14 1200 paddq %xmm15,%xmm15 1201 movdqu 0(%rdi),%xmm2 1202 pand %xmm8,%xmm9 1203 movdqu 16(%rdi),%xmm3 1204 pxor %xmm9,%xmm15 1205 1206 movdqu 32(%rdi),%xmm4 1207 pxor %xmm10,%xmm2 1208 movdqu 48(%rdi),%xmm5 1209 pxor %xmm11,%xmm3 1210 movdqu 64(%rdi),%xmm6 1211 pxor %xmm12,%xmm4 1212 movdqu 80(%rdi),%xmm7 1213 leaq 96(%rdi),%rdi 1214 pxor %xmm13,%xmm5 1215 movups (%r11),%xmm0 1216 pxor %xmm14,%xmm6 1217 pxor %xmm15,%xmm7 1218 1219 1220 1221 movups 16(%r11),%xmm1 1222 pxor %xmm0,%xmm2 1223 pxor %xmm0,%xmm3 1224 movdqa %xmm10,0(%rsp) 1225 .byte 102,15,56,220,209 1226 leaq 32(%r11),%rcx 1227 pxor %xmm0,%xmm4 1228 movdqa %xmm11,16(%rsp) 1229 .byte 102,15,56,220,217 1230 pxor %xmm0,%xmm5 1231 movdqa %xmm12,32(%rsp) 1232 .byte 102,15,56,220,225 1233 pxor %xmm0,%xmm6 1234 movdqa %xmm13,48(%rsp) 1235 .byte 102,15,56,220,233 1236 pxor %xmm0,%xmm7 1237 movups (%rcx),%xmm0 1238 decl %eax 1239 movdqa %xmm14,64(%rsp) 1240 .byte 102,15,56,220,241 1241 movdqa %xmm15,80(%rsp) 1242 .byte 102,15,56,220,249 1243 pxor %xmm14,%xmm14 1244 pcmpgtd %xmm15,%xmm14 1245 jmp .Lxts_enc_loop6_enter 1246 1247 .align 16 1248 .Lxts_enc_loop6: 1249 .byte 102,15,56,220,209 1250 .byte 102,15,56,220,217 1251 decl %eax 1252 .byte 102,15,56,220,225 1253 .byte 102,15,56,220,233 1254 .byte 102,15,56,220,241 1255 .byte 102,15,56,220,249 1256 .Lxts_enc_loop6_enter: 1257 movups 16(%rcx),%xmm1 1258 .byte 102,15,56,220,208 1259 .byte 102,15,56,220,216 1260 leaq 32(%rcx),%rcx 1261 .byte 102,15,56,220,224 1262 .byte 102,15,56,220,232 1263 .byte 102,15,56,220,240 1264 .byte 102,15,56,220,248 1265 movups (%rcx),%xmm0 1266 jnz .Lxts_enc_loop6 1267 1268 pshufd $19,%xmm14,%xmm9 1269 pxor %xmm14,%xmm14 1270 paddq %xmm15,%xmm15 1271 .byte 102,15,56,220,209 1272 pand %xmm8,%xmm9 1273 .byte 102,15,56,220,217 1274 pcmpgtd %xmm15,%xmm14 1275 .byte 102,15,56,220,225 1276 pxor %xmm9,%xmm15 1277 .byte 102,15,56,220,233 1278 .byte 102,15,56,220,241 1279 .byte 102,15,56,220,249 1280 movups 16(%rcx),%xmm1 1281 1282 pshufd $19,%xmm14,%xmm9 1283 pxor %xmm14,%xmm14 1284 movdqa %xmm15,%xmm10 1285 paddq %xmm15,%xmm15 1286 .byte 102,15,56,220,208 1287 pand %xmm8,%xmm9 1288 .byte 102,15,56,220,216 1289 pcmpgtd %xmm15,%xmm14 1290 .byte 102,15,56,220,224 1291 pxor %xmm9,%xmm15 1292 .byte 102,15,56,220,232 1293 .byte 102,15,56,220,240 1294 .byte 102,15,56,220,248 1295 movups 32(%rcx),%xmm0 1296 1297 pshufd $19,%xmm14,%xmm9 1298 pxor %xmm14,%xmm14 1299 movdqa %xmm15,%xmm11 1300 paddq %xmm15,%xmm15 1301 .byte 102,15,56,220,209 1302 pand %xmm8,%xmm9 1303 .byte 102,15,56,220,217 1304 pcmpgtd %xmm15,%xmm14 1305 .byte 102,15,56,220,225 1306 pxor %xmm9,%xmm15 1307 .byte 102,15,56,220,233 1308 .byte 102,15,56,220,241 1309 .byte 102,15,56,220,249 1310 1311 pshufd $19,%xmm14,%xmm9 1312 pxor %xmm14,%xmm14 1313 movdqa %xmm15,%xmm12 1314 paddq %xmm15,%xmm15 1315 .byte 102,15,56,221,208 1316 pand %xmm8,%xmm9 1317 .byte 102,15,56,221,216 1318 pcmpgtd %xmm15,%xmm14 1319 .byte 102,15,56,221,224 1320 pxor %xmm9,%xmm15 1321 .byte 102,15,56,221,232 1322 .byte 102,15,56,221,240 1323 .byte 102,15,56,221,248 1324 1325 pshufd $19,%xmm14,%xmm9 1326 pxor %xmm14,%xmm14 1327 movdqa %xmm15,%xmm13 1328 paddq %xmm15,%xmm15 1329 xorps 0(%rsp),%xmm2 1330 pand %xmm8,%xmm9 1331 xorps 16(%rsp),%xmm3 1332 pcmpgtd %xmm15,%xmm14 1333 pxor %xmm9,%xmm15 1334 1335 xorps 32(%rsp),%xmm4 1336 movups %xmm2,0(%rsi) 1337 xorps 48(%rsp),%xmm5 1338 movups %xmm3,16(%rsi) 1339 xorps 64(%rsp),%xmm6 1340 movups %xmm4,32(%rsi) 1341 xorps 80(%rsp),%xmm7 1342 movups %xmm5,48(%rsi) 1343 movl %r10d,%eax 1344 movups %xmm6,64(%rsi) 1345 movups %xmm7,80(%rsi) 1346 leaq 96(%rsi),%rsi 1347 subq $96,%rdx 1348 jnc .Lxts_enc_grandloop 1349 1350 leal 3(%rax,%rax,1),%eax 1351 movq %r11,%rcx 1352 movl %eax,%r10d 1353 1354 .Lxts_enc_short: 1355 addq $96,%rdx 1356 jz .Lxts_enc_done 1357 1358 cmpq $32,%rdx 1359 jb .Lxts_enc_one 1360 je .Lxts_enc_two 1361 1362 cmpq $64,%rdx 1363 jb .Lxts_enc_three 1364 je .Lxts_enc_four 1365 1366 pshufd $19,%xmm14,%xmm9 1367 movdqa %xmm15,%xmm14 1368 paddq %xmm15,%xmm15 1369 movdqu (%rdi),%xmm2 1370 pand %xmm8,%xmm9 1371 movdqu 16(%rdi),%xmm3 1372 pxor %xmm9,%xmm15 1373 1374 movdqu 32(%rdi),%xmm4 1375 pxor %xmm10,%xmm2 1376 movdqu 48(%rdi),%xmm5 1377 pxor %xmm11,%xmm3 1378 movdqu 64(%rdi),%xmm6 1379 leaq 80(%rdi),%rdi 1380 pxor %xmm12,%xmm4 1381 pxor %xmm13,%xmm5 1382 pxor %xmm14,%xmm6 1383 1384 call _aesni_encrypt6 1385 1386 xorps %xmm10,%xmm2 1387 movdqa %xmm15,%xmm10 1388 xorps %xmm11,%xmm3 1389 xorps %xmm12,%xmm4 1390 movdqu %xmm2,(%rsi) 1391 xorps %xmm13,%xmm5 1392 movdqu %xmm3,16(%rsi) 1393 xorps %xmm14,%xmm6 1394 movdqu %xmm4,32(%rsi) 1395 movdqu %xmm5,48(%rsi) 1396 movdqu %xmm6,64(%rsi) 1397 leaq 80(%rsi),%rsi 1398 jmp .Lxts_enc_done 1399 1400 .align 16 1401 .Lxts_enc_one: 1402 movups (%rdi),%xmm2 1403 leaq 16(%rdi),%rdi 1404 xorps %xmm10,%xmm2 1405 movups (%rcx),%xmm0 1406 movups 16(%rcx),%xmm1 1407 leaq 32(%rcx),%rcx 1408 xorps %xmm0,%xmm2 1409 .Loop_enc1_9: 1410 .byte 102,15,56,220,209 1411 decl %eax 1412 movups (%rcx),%xmm1 1413 leaq 16(%rcx),%rcx 1414 jnz .Loop_enc1_9 1415 .byte 102,15,56,221,209 1416 xorps %xmm10,%xmm2 1417 movdqa %xmm11,%xmm10 1418 movups %xmm2,(%rsi) 1419 leaq 16(%rsi),%rsi 1420 jmp .Lxts_enc_done 1421 1422 .align 16 1423 .Lxts_enc_two: 1424 movups (%rdi),%xmm2 1425 movups 16(%rdi),%xmm3 1426 leaq 32(%rdi),%rdi 1427 xorps %xmm10,%xmm2 1428 xorps %xmm11,%xmm3 1429 1430 call _aesni_encrypt3 1431 1432 xorps %xmm10,%xmm2 1433 movdqa %xmm12,%xmm10 1434 xorps %xmm11,%xmm3 1435 movups %xmm2,(%rsi) 1436 movups %xmm3,16(%rsi) 1437 leaq 32(%rsi),%rsi 1438 jmp .Lxts_enc_done 1439 1440 .align 16 1441 .Lxts_enc_three: 1442 movups (%rdi),%xmm2 1443 movups 16(%rdi),%xmm3 1444 movups 32(%rdi),%xmm4 1445 leaq 48(%rdi),%rdi 1446 xorps %xmm10,%xmm2 1447 xorps %xmm11,%xmm3 1448 xorps %xmm12,%xmm4 1449 1450 call _aesni_encrypt3 1451 1452 xorps %xmm10,%xmm2 1453 movdqa %xmm13,%xmm10 1454 xorps %xmm11,%xmm3 1455 xorps %xmm12,%xmm4 1456 movups %xmm2,(%rsi) 1457 movups %xmm3,16(%rsi) 1458 movups %xmm4,32(%rsi) 1459 leaq 48(%rsi),%rsi 1460 jmp .Lxts_enc_done 1461 1462 .align 16 1463 .Lxts_enc_four: 1464 movups (%rdi),%xmm2 1465 movups 16(%rdi),%xmm3 1466 movups 32(%rdi),%xmm4 1467 xorps %xmm10,%xmm2 1468 movups 48(%rdi),%xmm5 1469 leaq 64(%rdi),%rdi 1470 xorps %xmm11,%xmm3 1471 xorps %xmm12,%xmm4 1472 xorps %xmm13,%xmm5 1473 1474 call _aesni_encrypt4 1475 1476 xorps %xmm10,%xmm2 1477 movdqa %xmm15,%xmm10 1478 xorps %xmm11,%xmm3 1479 xorps %xmm12,%xmm4 1480 movups %xmm2,(%rsi) 1481 xorps %xmm13,%xmm5 1482 movups %xmm3,16(%rsi) 1483 movups %xmm4,32(%rsi) 1484 movups %xmm5,48(%rsi) 1485 leaq 64(%rsi),%rsi 1486 jmp .Lxts_enc_done 1487 1488 .align 16 1489 .Lxts_enc_done: 1490 andq $15,%r9 1491 jz .Lxts_enc_ret 1492 movq %r9,%rdx 1493 1494 .Lxts_enc_steal: 1495 movzbl (%rdi),%eax 1496 movzbl -16(%rsi),%ecx 1497 leaq 1(%rdi),%rdi 1498 movb %al,-16(%rsi) 1499 movb %cl,0(%rsi) 1500 leaq 1(%rsi),%rsi 1501 subq $1,%rdx 1502 jnz .Lxts_enc_steal 1503 1504 subq %r9,%rsi 1505 movq %r11,%rcx 1506 movl %r10d,%eax 1507 1508 movups -16(%rsi),%xmm2 1509 xorps %xmm10,%xmm2 1510 movups (%rcx),%xmm0 1511 movups 16(%rcx),%xmm1 1512 leaq 32(%rcx),%rcx 1513 xorps %xmm0,%xmm2 1514 .Loop_enc1_10: 1515 .byte 102,15,56,220,209 1516 decl %eax 1517 movups (%rcx),%xmm1 1518 leaq 16(%rcx),%rcx 1519 jnz .Loop_enc1_10 1520 .byte 102,15,56,221,209 1521 xorps %xmm10,%xmm2 1522 movups %xmm2,-16(%rsi) 1523 1524 .Lxts_enc_ret: 1525 leaq 104(%rsp),%rsp 1526 .Lxts_enc_epilogue: 1527 .byte 0xf3,0xc3 1528 .size aesni_xts_encrypt,.-aesni_xts_encrypt 1529 .globl aesni_xts_decrypt 1530 .type aesni_xts_decrypt,@function 1531 .align 16 1532 aesni_xts_decrypt: 1533 leaq -104(%rsp),%rsp 1534 movups (%r9),%xmm15 1535 movl 240(%r8),%eax 1536 movl 240(%rcx),%r10d 1537 movups (%r8),%xmm0 1538 movups 16(%r8),%xmm1 1539 leaq 32(%r8),%r8 1540 xorps %xmm0,%xmm15 1541 .Loop_enc1_11: 1542 .byte 102,68,15,56,220,249 1543 decl %eax 1544 movups (%r8),%xmm1 1545 leaq 16(%r8),%r8 1546 jnz .Loop_enc1_11 1547 .byte 102,68,15,56,221,249 1548 xorl %eax,%eax 1549 testq $15,%rdx 1550 setnz %al 1551 shlq $4,%rax 1552 subq %rax,%rdx 1553 1554 movq %rcx,%r11 1555 movl %r10d,%eax 1556 movq %rdx,%r9 1557 andq $-16,%rdx 1558 1559 movdqa .Lxts_magic(%rip),%xmm8 1560 pxor %xmm14,%xmm14 1561 pcmpgtd %xmm15,%xmm14 1562 pshufd $19,%xmm14,%xmm9 1563 pxor %xmm14,%xmm14 1564 movdqa %xmm15,%xmm10 1565 paddq %xmm15,%xmm15 1566 pand %xmm8,%xmm9 1567 pcmpgtd %xmm15,%xmm14 1568 pxor %xmm9,%xmm15 1569 pshufd $19,%xmm14,%xmm9 1570 pxor %xmm14,%xmm14 1571 movdqa %xmm15,%xmm11 1572 paddq %xmm15,%xmm15 1573 pand %xmm8,%xmm9 1574 pcmpgtd %xmm15,%xmm14 1575 pxor %xmm9,%xmm15 1576 pshufd $19,%xmm14,%xmm9 1577 pxor %xmm14,%xmm14 1578 movdqa %xmm15,%xmm12 1579 paddq %xmm15,%xmm15 1580 pand %xmm8,%xmm9 1581 pcmpgtd %xmm15,%xmm14 1582 pxor %xmm9,%xmm15 1583 pshufd $19,%xmm14,%xmm9 1584 pxor %xmm14,%xmm14 1585 movdqa %xmm15,%xmm13 1586 paddq %xmm15,%xmm15 1587 pand %xmm8,%xmm9 1588 pcmpgtd %xmm15,%xmm14 1589 pxor %xmm9,%xmm15 1590 subq $96,%rdx 1591 jc .Lxts_dec_short 1592 1593 shrl $1,%eax 1594 subl $1,%eax 1595 movl %eax,%r10d 1596 jmp .Lxts_dec_grandloop 1597 1598 .align 16 1599 .Lxts_dec_grandloop: 1600 pshufd $19,%xmm14,%xmm9 1601 movdqa %xmm15,%xmm14 1602 paddq %xmm15,%xmm15 1603 movdqu 0(%rdi),%xmm2 1604 pand %xmm8,%xmm9 1605 movdqu 16(%rdi),%xmm3 1606 pxor %xmm9,%xmm15 1607 1608 movdqu 32(%rdi),%xmm4 1609 pxor %xmm10,%xmm2 1610 movdqu 48(%rdi),%xmm5 1611 pxor %xmm11,%xmm3 1612 movdqu 64(%rdi),%xmm6 1613 pxor %xmm12,%xmm4 1614 movdqu 80(%rdi),%xmm7 1615 leaq 96(%rdi),%rdi 1616 pxor %xmm13,%xmm5 1617 movups (%r11),%xmm0 1618 pxor %xmm14,%xmm6 1619 pxor %xmm15,%xmm7 1620 1621 1622 1623 movups 16(%r11),%xmm1 1624 pxor %xmm0,%xmm2 1625 pxor %xmm0,%xmm3 1626 movdqa %xmm10,0(%rsp) 1627 .byte 102,15,56,222,209 1628 leaq 32(%r11),%rcx 1629 pxor %xmm0,%xmm4 1630 movdqa %xmm11,16(%rsp) 1631 .byte 102,15,56,222,217 1632 pxor %xmm0,%xmm5 1633 movdqa %xmm12,32(%rsp) 1634 .byte 102,15,56,222,225 1635 pxor %xmm0,%xmm6 1636 movdqa %xmm13,48(%rsp) 1637 .byte 102,15,56,222,233 1638 pxor %xmm0,%xmm7 1639 movups (%rcx),%xmm0 1640 decl %eax 1641 movdqa %xmm14,64(%rsp) 1642 .byte 102,15,56,222,241 1643 movdqa %xmm15,80(%rsp) 1644 .byte 102,15,56,222,249 1645 pxor %xmm14,%xmm14 1646 pcmpgtd %xmm15,%xmm14 1647 jmp .Lxts_dec_loop6_enter 1648 1649 .align 16 1650 .Lxts_dec_loop6: 1651 .byte 102,15,56,222,209 1652 .byte 102,15,56,222,217 1653 decl %eax 1654 .byte 102,15,56,222,225 1655 .byte 102,15,56,222,233 1656 .byte 102,15,56,222,241 1657 .byte 102,15,56,222,249 1658 .Lxts_dec_loop6_enter: 1659 movups 16(%rcx),%xmm1 1660 .byte 102,15,56,222,208 1661 .byte 102,15,56,222,216 1662 leaq 32(%rcx),%rcx 1663 .byte 102,15,56,222,224 1664 .byte 102,15,56,222,232 1665 .byte 102,15,56,222,240 1666 .byte 102,15,56,222,248 1667 movups (%rcx),%xmm0 1668 jnz .Lxts_dec_loop6 1669 1670 pshufd $19,%xmm14,%xmm9 1671 pxor %xmm14,%xmm14 1672 paddq %xmm15,%xmm15 1673 .byte 102,15,56,222,209 1674 pand %xmm8,%xmm9 1675 .byte 102,15,56,222,217 1676 pcmpgtd %xmm15,%xmm14 1677 .byte 102,15,56,222,225 1678 pxor %xmm9,%xmm15 1679 .byte 102,15,56,222,233 1680 .byte 102,15,56,222,241 1681 .byte 102,15,56,222,249 1682 movups 16(%rcx),%xmm1 1683 1684 pshufd $19,%xmm14,%xmm9 1685 pxor %xmm14,%xmm14 1686 movdqa %xmm15,%xmm10 1687 paddq %xmm15,%xmm15 1688 .byte 102,15,56,222,208 1689 pand %xmm8,%xmm9 1690 .byte 102,15,56,222,216 1691 pcmpgtd %xmm15,%xmm14 1692 .byte 102,15,56,222,224 1693 pxor %xmm9,%xmm15 1694 .byte 102,15,56,222,232 1695 .byte 102,15,56,222,240 1696 .byte 102,15,56,222,248 1697 movups 32(%rcx),%xmm0 1698 1699 pshufd $19,%xmm14,%xmm9 1700 pxor %xmm14,%xmm14 1701 movdqa %xmm15,%xmm11 1702 paddq %xmm15,%xmm15 1703 .byte 102,15,56,222,209 1704 pand %xmm8,%xmm9 1705 .byte 102,15,56,222,217 1706 pcmpgtd %xmm15,%xmm14 1707 .byte 102,15,56,222,225 1708 pxor %xmm9,%xmm15 1709 .byte 102,15,56,222,233 1710 .byte 102,15,56,222,241 1711 .byte 102,15,56,222,249 1712 1713 pshufd $19,%xmm14,%xmm9 1714 pxor %xmm14,%xmm14 1715 movdqa %xmm15,%xmm12 1716 paddq %xmm15,%xmm15 1717 .byte 102,15,56,223,208 1718 pand %xmm8,%xmm9 1719 .byte 102,15,56,223,216 1720 pcmpgtd %xmm15,%xmm14 1721 .byte 102,15,56,223,224 1722 pxor %xmm9,%xmm15 1723 .byte 102,15,56,223,232 1724 .byte 102,15,56,223,240 1725 .byte 102,15,56,223,248 1726 1727 pshufd $19,%xmm14,%xmm9 1728 pxor %xmm14,%xmm14 1729 movdqa %xmm15,%xmm13 1730 paddq %xmm15,%xmm15 1731 xorps 0(%rsp),%xmm2 1732 pand %xmm8,%xmm9 1733 xorps 16(%rsp),%xmm3 1734 pcmpgtd %xmm15,%xmm14 1735 pxor %xmm9,%xmm15 1736 1737 xorps 32(%rsp),%xmm4 1738 movups %xmm2,0(%rsi) 1739 xorps 48(%rsp),%xmm5 1740 movups %xmm3,16(%rsi) 1741 xorps 64(%rsp),%xmm6 1742 movups %xmm4,32(%rsi) 1743 xorps 80(%rsp),%xmm7 1744 movups %xmm5,48(%rsi) 1745 movl %r10d,%eax 1746 movups %xmm6,64(%rsi) 1747 movups %xmm7,80(%rsi) 1748 leaq 96(%rsi),%rsi 1749 subq $96,%rdx 1750 jnc .Lxts_dec_grandloop 1751 1752 leal 3(%rax,%rax,1),%eax 1753 movq %r11,%rcx 1754 movl %eax,%r10d 1755 1756 .Lxts_dec_short: 1757 addq $96,%rdx 1758 jz .Lxts_dec_done 1759 1760 cmpq $32,%rdx 1761 jb .Lxts_dec_one 1762 je .Lxts_dec_two 1763 1764 cmpq $64,%rdx 1765 jb .Lxts_dec_three 1766 je .Lxts_dec_four 1767 1768 pshufd $19,%xmm14,%xmm9 1769 movdqa %xmm15,%xmm14 1770 paddq %xmm15,%xmm15 1771 movdqu (%rdi),%xmm2 1772 pand %xmm8,%xmm9 1773 movdqu 16(%rdi),%xmm3 1774 pxor %xmm9,%xmm15 1775 1776 movdqu 32(%rdi),%xmm4 1777 pxor %xmm10,%xmm2 1778 movdqu 48(%rdi),%xmm5 1779 pxor %xmm11,%xmm3 1780 movdqu 64(%rdi),%xmm6 1781 leaq 80(%rdi),%rdi 1782 pxor %xmm12,%xmm4 1783 pxor %xmm13,%xmm5 1784 pxor %xmm14,%xmm6 1785 1786 call _aesni_decrypt6 1787 1788 xorps %xmm10,%xmm2 1789 xorps %xmm11,%xmm3 1790 xorps %xmm12,%xmm4 1791 movdqu %xmm2,(%rsi) 1792 xorps %xmm13,%xmm5 1793 movdqu %xmm3,16(%rsi) 1794 xorps %xmm14,%xmm6 1795 movdqu %xmm4,32(%rsi) 1796 pxor %xmm14,%xmm14 1797 movdqu %xmm5,48(%rsi) 1798 pcmpgtd %xmm15,%xmm14 1799 movdqu %xmm6,64(%rsi) 1800 leaq 80(%rsi),%rsi 1801 pshufd $19,%xmm14,%xmm11 1802 andq $15,%r9 1803 jz .Lxts_dec_ret 1804 1805 movdqa %xmm15,%xmm10 1806 paddq %xmm15,%xmm15 1807 pand %xmm8,%xmm11 1808 pxor %xmm15,%xmm11 1809 jmp .Lxts_dec_done2 1810 1811 .align 16 1812 .Lxts_dec_one: 1813 movups (%rdi),%xmm2 1814 leaq 16(%rdi),%rdi 1815 xorps %xmm10,%xmm2 1816 movups (%rcx),%xmm0 1817 movups 16(%rcx),%xmm1 1818 leaq 32(%rcx),%rcx 1819 xorps %xmm0,%xmm2 1820 .Loop_dec1_12: 1821 .byte 102,15,56,222,209 1822 decl %eax 1823 movups (%rcx),%xmm1 1824 leaq 16(%rcx),%rcx 1825 jnz .Loop_dec1_12 1826 .byte 102,15,56,223,209 1827 xorps %xmm10,%xmm2 1828 movdqa %xmm11,%xmm10 1829 movups %xmm2,(%rsi) 1830 movdqa %xmm12,%xmm11 1831 leaq 16(%rsi),%rsi 1832 jmp .Lxts_dec_done 1833 1834 .align 16 1835 .Lxts_dec_two: 1836 movups (%rdi),%xmm2 1837 movups 16(%rdi),%xmm3 1838 leaq 32(%rdi),%rdi 1839 xorps %xmm10,%xmm2 1840 xorps %xmm11,%xmm3 1841 1842 call _aesni_decrypt3 1843 1844 xorps %xmm10,%xmm2 1845 movdqa %xmm12,%xmm10 1846 xorps %xmm11,%xmm3 1847 movdqa %xmm13,%xmm11 1848 movups %xmm2,(%rsi) 1849 movups %xmm3,16(%rsi) 1850 leaq 32(%rsi),%rsi 1851 jmp .Lxts_dec_done 1852 1853 .align 16 1854 .Lxts_dec_three: 1855 movups (%rdi),%xmm2 1856 movups 16(%rdi),%xmm3 1857 movups 32(%rdi),%xmm4 1858 leaq 48(%rdi),%rdi 1859 xorps %xmm10,%xmm2 1860 xorps %xmm11,%xmm3 1861 xorps %xmm12,%xmm4 1862 1863 call _aesni_decrypt3 1864 1865 xorps %xmm10,%xmm2 1866 movdqa %xmm13,%xmm10 1867 xorps %xmm11,%xmm3 1868 movdqa %xmm15,%xmm11 1869 xorps %xmm12,%xmm4 1870 movups %xmm2,(%rsi) 1871 movups %xmm3,16(%rsi) 1872 movups %xmm4,32(%rsi) 1873 leaq 48(%rsi),%rsi 1874 jmp .Lxts_dec_done 1875 1876 .align 16 1877 .Lxts_dec_four: 1878 pshufd $19,%xmm14,%xmm9 1879 movdqa %xmm15,%xmm14 1880 paddq %xmm15,%xmm15 1881 movups (%rdi),%xmm2 1882 pand %xmm8,%xmm9 1883 movups 16(%rdi),%xmm3 1884 pxor %xmm9,%xmm15 1885 1886 movups 32(%rdi),%xmm4 1887 xorps %xmm10,%xmm2 1888 movups 48(%rdi),%xmm5 1889 leaq 64(%rdi),%rdi 1890 xorps %xmm11,%xmm3 1891 xorps %xmm12,%xmm4 1892 xorps %xmm13,%xmm5 1893 1894 call _aesni_decrypt4 1895 1896 xorps %xmm10,%xmm2 1897 movdqa %xmm14,%xmm10 1898 xorps %xmm11,%xmm3 1899 movdqa %xmm15,%xmm11 1900 xorps %xmm12,%xmm4 1901 movups %xmm2,(%rsi) 1902 xorps %xmm13,%xmm5 1903 movups %xmm3,16(%rsi) 1904 movups %xmm4,32(%rsi) 1905 movups %xmm5,48(%rsi) 1906 leaq 64(%rsi),%rsi 1907 jmp .Lxts_dec_done 1908 1909 .align 16 1910 .Lxts_dec_done: 1911 andq $15,%r9 1912 jz .Lxts_dec_ret 1913 .Lxts_dec_done2: 1914 movq %r9,%rdx 1915 movq %r11,%rcx 1916 movl %r10d,%eax 1917 1918 movups (%rdi),%xmm2 1919 xorps %xmm11,%xmm2 1920 movups (%rcx),%xmm0 1921 movups 16(%rcx),%xmm1 1922 leaq 32(%rcx),%rcx 1923 xorps %xmm0,%xmm2 1924 .Loop_dec1_13: 1925 .byte 102,15,56,222,209 1926 decl %eax 1927 movups (%rcx),%xmm1 1928 leaq 16(%rcx),%rcx 1929 jnz .Loop_dec1_13 1930 .byte 102,15,56,223,209 1931 xorps %xmm11,%xmm2 1932 movups %xmm2,(%rsi) 1933 1934 .Lxts_dec_steal: 1935 movzbl 16(%rdi),%eax 1936 movzbl (%rsi),%ecx 1937 leaq 1(%rdi),%rdi 1938 movb %al,(%rsi) 1939 movb %cl,16(%rsi) 1940 leaq 1(%rsi),%rsi 1941 subq $1,%rdx 1942 jnz .Lxts_dec_steal 1943 1944 subq %r9,%rsi 1945 movq %r11,%rcx 1946 movl %r10d,%eax 1947 1948 movups (%rsi),%xmm2 1949 xorps %xmm10,%xmm2 1950 movups (%rcx),%xmm0 1951 movups 16(%rcx),%xmm1 1952 leaq 32(%rcx),%rcx 1953 xorps %xmm0,%xmm2 1954 .Loop_dec1_14: 1955 .byte 102,15,56,222,209 1956 decl %eax 1957 movups (%rcx),%xmm1 1958 leaq 16(%rcx),%rcx 1959 jnz .Loop_dec1_14 1960 .byte 102,15,56,223,209 1961 xorps %xmm10,%xmm2 1962 movups %xmm2,(%rsi) 1963 1964 .Lxts_dec_ret: 1965 leaq 104(%rsp),%rsp 1966 .Lxts_dec_epilogue: 1967 .byte 0xf3,0xc3 1968 .size aesni_xts_decrypt,.-aesni_xts_decrypt 1969 .globl aesni_cbc_encrypt 1970 .type aesni_cbc_encrypt,@function 1971 .align 16 1972 aesni_cbc_encrypt: 1973 testq %rdx,%rdx 1974 jz .Lcbc_ret 1975 1976 movl 240(%rcx),%r10d 1977 movq %rcx,%r11 1978 testl %r9d,%r9d 1979 jz .Lcbc_decrypt 1980 1981 movups (%r8),%xmm2 1982 movl %r10d,%eax 1983 cmpq $16,%rdx 1984 jb .Lcbc_enc_tail 1985 subq $16,%rdx 1986 jmp .Lcbc_enc_loop 1987 .align 16 1988 .Lcbc_enc_loop: 1989 movups (%rdi),%xmm3 1990 leaq 16(%rdi),%rdi 1991 1992 movups (%rcx),%xmm0 1993 movups 16(%rcx),%xmm1 1994 xorps %xmm0,%xmm3 1995 leaq 32(%rcx),%rcx 1996 xorps %xmm3,%xmm2 1997 .Loop_enc1_15: 1998 .byte 102,15,56,220,209 1999 decl %eax 2000 movups (%rcx),%xmm1 2001 leaq 16(%rcx),%rcx 2002 jnz .Loop_enc1_15 2003 .byte 102,15,56,221,209 2004 movl %r10d,%eax 2005 movq %r11,%rcx 2006 movups %xmm2,0(%rsi) 2007 leaq 16(%rsi),%rsi 2008 subq $16,%rdx 2009 jnc .Lcbc_enc_loop 2010 addq $16,%rdx 2011 jnz .Lcbc_enc_tail 2012 movups %xmm2,(%r8) 2013 jmp .Lcbc_ret 2014 2015 .Lcbc_enc_tail: 2016 movq %rdx,%rcx 2017 xchgq %rdi,%rsi 2018 .long 0x9066A4F3 2019 movl $16,%ecx 2020 subq %rdx,%rcx 2021 xorl %eax,%eax 2022 .long 0x9066AAF3 2023 leaq -16(%rdi),%rdi 2024 movl %r10d,%eax 2025 movq %rdi,%rsi 2026 movq %r11,%rcx 2027 xorq %rdx,%rdx 2028 jmp .Lcbc_enc_loop 2029 2030 .align 16 2031 .Lcbc_decrypt: 2032 movups (%r8),%xmm9 2033 movl %r10d,%eax 2034 cmpq $112,%rdx 2035 jbe .Lcbc_dec_tail 2036 shrl $1,%r10d 2037 subq $112,%rdx 2038 movl %r10d,%eax 2039 movaps %xmm9,-24(%rsp) 2040 jmp .Lcbc_dec_loop8_enter 2041 .align 16 2042 .Lcbc_dec_loop8: 2043 movaps %xmm0,-24(%rsp) 2044 movups %xmm9,(%rsi) 2045 leaq 16(%rsi),%rsi 2046 .Lcbc_dec_loop8_enter: 2047 movups (%rcx),%xmm0 2048 movups (%rdi),%xmm2 2049 movups 16(%rdi),%xmm3 2050 movups 16(%rcx),%xmm1 2051 2052 leaq 32(%rcx),%rcx 2053 movdqu 32(%rdi),%xmm4 2054 xorps %xmm0,%xmm2 2055 movdqu 48(%rdi),%xmm5 2056 xorps %xmm0,%xmm3 2057 movdqu 64(%rdi),%xmm6 2058 .byte 102,15,56,222,209 2059 pxor %xmm0,%xmm4 2060 movdqu 80(%rdi),%xmm7 2061 .byte 102,15,56,222,217 2062 pxor %xmm0,%xmm5 2063 movdqu 96(%rdi),%xmm8 2064 .byte 102,15,56,222,225 2065 pxor %xmm0,%xmm6 2066 movdqu 112(%rdi),%xmm9 2067 .byte 102,15,56,222,233 2068 pxor %xmm0,%xmm7 2069 decl %eax 2070 .byte 102,15,56,222,241 2071 pxor %xmm0,%xmm8 2072 .byte 102,15,56,222,249 2073 pxor %xmm0,%xmm9 2074 movups (%rcx),%xmm0 2075 .byte 102,68,15,56,222,193 2076 .byte 102,68,15,56,222,201 2077 movups 16(%rcx),%xmm1 2078 2079 call .Ldec_loop8_enter 2080 2081 movups (%rdi),%xmm1 2082 movups 16(%rdi),%xmm0 2083 xorps -24(%rsp),%xmm2 2084 xorps %xmm1,%xmm3 2085 movups 32(%rdi),%xmm1 2086 xorps %xmm0,%xmm4 2087 movups 48(%rdi),%xmm0 2088 xorps %xmm1,%xmm5 2089 movups 64(%rdi),%xmm1 2090 xorps %xmm0,%xmm6 2091 movups 80(%rdi),%xmm0 2092 xorps %xmm1,%xmm7 2093 movups 96(%rdi),%xmm1 2094 xorps %xmm0,%xmm8 2095 movups 112(%rdi),%xmm0 2096 xorps %xmm1,%xmm9 2097 movups %xmm2,(%rsi) 2098 movups %xmm3,16(%rsi) 2099 movups %xmm4,32(%rsi) 2100 movups %xmm5,48(%rsi) 2101 movl %r10d,%eax 2102 movups %xmm6,64(%rsi) 2103 movq %r11,%rcx 2104 movups %xmm7,80(%rsi) 2105 leaq 128(%rdi),%rdi 2106 movups %xmm8,96(%rsi) 2107 leaq 112(%rsi),%rsi 2108 subq $128,%rdx 2109 ja .Lcbc_dec_loop8 2110 2111 movaps %xmm9,%xmm2 2112 movaps %xmm0,%xmm9 2113 addq $112,%rdx 2114 jle .Lcbc_dec_tail_collected 2115 movups %xmm2,(%rsi) 2116 leal 1(%r10,%r10,1),%eax 2117 leaq 16(%rsi),%rsi 2118 .Lcbc_dec_tail: 2119 movups (%rdi),%xmm2 2120 movaps %xmm2,%xmm8 2121 cmpq $16,%rdx 2122 jbe .Lcbc_dec_one 2123 2124 movups 16(%rdi),%xmm3 2125 movaps %xmm3,%xmm7 2126 cmpq $32,%rdx 2127 jbe .Lcbc_dec_two 2128 2129 movups 32(%rdi),%xmm4 2130 movaps %xmm4,%xmm6 2131 cmpq $48,%rdx 2132 jbe .Lcbc_dec_three 2133 2134 movups 48(%rdi),%xmm5 2135 cmpq $64,%rdx 2136 jbe .Lcbc_dec_four 2137 2138 movups 64(%rdi),%xmm6 2139 cmpq $80,%rdx 2140 jbe .Lcbc_dec_five 2141 2142 movups 80(%rdi),%xmm7 2143 cmpq $96,%rdx 2144 jbe .Lcbc_dec_six 2145 2146 movups 96(%rdi),%xmm8 2147 movaps %xmm9,-24(%rsp) 2148 call _aesni_decrypt8 2149 movups (%rdi),%xmm1 2150 movups 16(%rdi),%xmm0 2151 xorps -24(%rsp),%xmm2 2152 xorps %xmm1,%xmm3 2153 movups 32(%rdi),%xmm1 2154 xorps %xmm0,%xmm4 2155 movups 48(%rdi),%xmm0 2156 xorps %xmm1,%xmm5 2157 movups 64(%rdi),%xmm1 2158 xorps %xmm0,%xmm6 2159 movups 80(%rdi),%xmm0 2160 xorps %xmm1,%xmm7 2161 movups 96(%rdi),%xmm9 2162 xorps %xmm0,%xmm8 2163 movups %xmm2,(%rsi) 2164 movups %xmm3,16(%rsi) 2165 movups %xmm4,32(%rsi) 2166 movups %xmm5,48(%rsi) 2167 movups %xmm6,64(%rsi) 2168 movups %xmm7,80(%rsi) 2169 leaq 96(%rsi),%rsi 2170 movaps %xmm8,%xmm2 2171 subq $112,%rdx 2172 jmp .Lcbc_dec_tail_collected 2173 .align 16 2174 .Lcbc_dec_one: 2175 movups (%rcx),%xmm0 2176 movups 16(%rcx),%xmm1 2177 leaq 32(%rcx),%rcx 2178 xorps %xmm0,%xmm2 2179 .Loop_dec1_16: 2180 .byte 102,15,56,222,209 2181 decl %eax 2182 movups (%rcx),%xmm1 2183 leaq 16(%rcx),%rcx 2184 jnz .Loop_dec1_16 2185 .byte 102,15,56,223,209 2186 xorps %xmm9,%xmm2 2187 movaps %xmm8,%xmm9 2188 subq $16,%rdx 2189 jmp .Lcbc_dec_tail_collected 2190 .align 16 2191 .Lcbc_dec_two: 2192 xorps %xmm4,%xmm4 2193 call _aesni_decrypt3 2194 xorps %xmm9,%xmm2 2195 xorps %xmm8,%xmm3 2196 movups %xmm2,(%rsi) 2197 movaps %xmm7,%xmm9 2198 movaps %xmm3,%xmm2 2199 leaq 16(%rsi),%rsi 2200 subq $32,%rdx 2201 jmp .Lcbc_dec_tail_collected 2202 .align 16 2203 .Lcbc_dec_three: 2204 call _aesni_decrypt3 2205 xorps %xmm9,%xmm2 2206 xorps %xmm8,%xmm3 2207 movups %xmm2,(%rsi) 2208 xorps %xmm7,%xmm4 2209 movups %xmm3,16(%rsi) 2210 movaps %xmm6,%xmm9 2211 movaps %xmm4,%xmm2 2212 leaq 32(%rsi),%rsi 2213 subq $48,%rdx 2214 jmp .Lcbc_dec_tail_collected 2215 .align 16 2216 .Lcbc_dec_four: 2217 call _aesni_decrypt4 2218 xorps %xmm9,%xmm2 2219 movups 48(%rdi),%xmm9 2220 xorps %xmm8,%xmm3 2221 movups %xmm2,(%rsi) 2222 xorps %xmm7,%xmm4 2223 movups %xmm3,16(%rsi) 2224 xorps %xmm6,%xmm5 2225 movups %xmm4,32(%rsi) 2226 movaps %xmm5,%xmm2 2227 leaq 48(%rsi),%rsi 2228 subq $64,%rdx 2229 jmp .Lcbc_dec_tail_collected 2230 .align 16 2231 .Lcbc_dec_five: 2232 xorps %xmm7,%xmm7 2233 call _aesni_decrypt6 2234 movups 16(%rdi),%xmm1 2235 movups 32(%rdi),%xmm0 2236 xorps %xmm9,%xmm2 2237 xorps %xmm8,%xmm3 2238 xorps %xmm1,%xmm4 2239 movups 48(%rdi),%xmm1 2240 xorps %xmm0,%xmm5 2241 movups 64(%rdi),%xmm9 2242 xorps %xmm1,%xmm6 2243 movups %xmm2,(%rsi) 2244 movups %xmm3,16(%rsi) 2245 movups %xmm4,32(%rsi) 2246 movups %xmm5,48(%rsi) 2247 leaq 64(%rsi),%rsi 2248 movaps %xmm6,%xmm2 2249 subq $80,%rdx 2250 jmp .Lcbc_dec_tail_collected 2251 .align 16 2252 .Lcbc_dec_six: 2253 call _aesni_decrypt6 2254 movups 16(%rdi),%xmm1 2255 movups 32(%rdi),%xmm0 2256 xorps %xmm9,%xmm2 2257 xorps %xmm8,%xmm3 2258 xorps %xmm1,%xmm4 2259 movups 48(%rdi),%xmm1 2260 xorps %xmm0,%xmm5 2261 movups 64(%rdi),%xmm0 2262 xorps %xmm1,%xmm6 2263 movups 80(%rdi),%xmm9 2264 xorps %xmm0,%xmm7 2265 movups %xmm2,(%rsi) 2266 movups %xmm3,16(%rsi) 2267 movups %xmm4,32(%rsi) 2268 movups %xmm5,48(%rsi) 2269 movups %xmm6,64(%rsi) 2270 leaq 80(%rsi),%rsi 2271 movaps %xmm7,%xmm2 2272 subq $96,%rdx 2273 jmp .Lcbc_dec_tail_collected 2274 .align 16 2275 .Lcbc_dec_tail_collected: 2276 andq $15,%rdx 2277 movups %xmm9,(%r8) 2278 jnz .Lcbc_dec_tail_partial 2279 movups %xmm2,(%rsi) 2280 jmp .Lcbc_dec_ret 2281 .align 16 2282 .Lcbc_dec_tail_partial: 2283 movaps %xmm2,-24(%rsp) 2284 movq $16,%rcx 2285 movq %rsi,%rdi 2286 subq %rdx,%rcx 2287 leaq -24(%rsp),%rsi 2288 .long 0x9066A4F3 2289 2290 .Lcbc_dec_ret: 2291 .Lcbc_ret: 2292 .byte 0xf3,0xc3 2293 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt 2294 .globl aesni_set_decrypt_key 2295 .type aesni_set_decrypt_key,@function 2296 .align 16 2297 aesni_set_decrypt_key: 2298 .byte 0x48,0x83,0xEC,0x08 2299 call __aesni_set_encrypt_key 2300 shll $4,%esi 2301 testl %eax,%eax 2302 jnz .Ldec_key_ret 2303 leaq 16(%rdx,%rsi,1),%rdi 2304 2305 movups (%rdx),%xmm0 2306 movups (%rdi),%xmm1 2307 movups %xmm0,(%rdi) 2308 movups %xmm1,(%rdx) 2309 leaq 16(%rdx),%rdx 2310 leaq -16(%rdi),%rdi 2311 2312 .Ldec_key_inverse: 2313 movups (%rdx),%xmm0 2314 movups (%rdi),%xmm1 2315 .byte 102,15,56,219,192 2316 .byte 102,15,56,219,201 2317 leaq 16(%rdx),%rdx 2318 leaq -16(%rdi),%rdi 2319 movups %xmm0,16(%rdi) 2320 movups %xmm1,-16(%rdx) 2321 cmpq %rdx,%rdi 2322 ja .Ldec_key_inverse 2323 2324 movups (%rdx),%xmm0 2325 .byte 102,15,56,219,192 2326 movups %xmm0,(%rdi) 2327 .Ldec_key_ret: 2328 addq $8,%rsp 2329 .byte 0xf3,0xc3 2330 .LSEH_end_set_decrypt_key: 2331 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key 2332 .globl aesni_set_encrypt_key 2333 .type aesni_set_encrypt_key,@function 2334 .align 16 2335 aesni_set_encrypt_key: 2336 __aesni_set_encrypt_key: 2337 .byte 0x48,0x83,0xEC,0x08 2338 movq $-1,%rax 2339 testq %rdi,%rdi 2340 jz .Lenc_key_ret 2341 testq %rdx,%rdx 2342 jz .Lenc_key_ret 2343 2344 movups (%rdi),%xmm0 2345 xorps %xmm4,%xmm4 2346 leaq 16(%rdx),%rax 2347 cmpl $256,%esi 2348 je .L14rounds 2349 cmpl $192,%esi 2350 je .L12rounds 2351 cmpl $128,%esi 2352 jne .Lbad_keybits 2353 2354 .L10rounds: 2355 movl $9,%esi 2356 movups %xmm0,(%rdx) 2357 .byte 102,15,58,223,200,1 2358 call .Lkey_expansion_128_cold 2359 .byte 102,15,58,223,200,2 2360 call .Lkey_expansion_128 2361 .byte 102,15,58,223,200,4 2362 call .Lkey_expansion_128 2363 .byte 102,15,58,223,200,8 2364 call .Lkey_expansion_128 2365 .byte 102,15,58,223,200,16 2366 call .Lkey_expansion_128 2367 .byte 102,15,58,223,200,32 2368 call .Lkey_expansion_128 2369 .byte 102,15,58,223,200,64 2370 call .Lkey_expansion_128 2371 .byte 102,15,58,223,200,128 2372 call .Lkey_expansion_128 2373 .byte 102,15,58,223,200,27 2374 call .Lkey_expansion_128 2375 .byte 102,15,58,223,200,54 2376 call .Lkey_expansion_128 2377 movups %xmm0,(%rax) 2378 movl %esi,80(%rax) 2379 xorl %eax,%eax 2380 jmp .Lenc_key_ret 2381 2382 .align 16 2383 .L12rounds: 2384 movq 16(%rdi),%xmm2 2385 movl $11,%esi 2386 movups %xmm0,(%rdx) 2387 .byte 102,15,58,223,202,1 2388 call .Lkey_expansion_192a_cold 2389 .byte 102,15,58,223,202,2 2390 call .Lkey_expansion_192b 2391 .byte 102,15,58,223,202,4 2392 call .Lkey_expansion_192a 2393 .byte 102,15,58,223,202,8 2394 call .Lkey_expansion_192b 2395 .byte 102,15,58,223,202,16 2396 call .Lkey_expansion_192a 2397 .byte 102,15,58,223,202,32 2398 call .Lkey_expansion_192b 2399 .byte 102,15,58,223,202,64 2400 call .Lkey_expansion_192a 2401 .byte 102,15,58,223,202,128 2402 call .Lkey_expansion_192b 2403 movups %xmm0,(%rax) 2404 movl %esi,48(%rax) 2405 xorq %rax,%rax 2406 jmp .Lenc_key_ret 2407 2408 .align 16 2409 .L14rounds: 2410 movups 16(%rdi),%xmm2 2411 movl $13,%esi 2412 leaq 16(%rax),%rax 2413 movups %xmm0,(%rdx) 2414 movups %xmm2,16(%rdx) 2415 .byte 102,15,58,223,202,1 2416 call .Lkey_expansion_256a_cold 2417 .byte 102,15,58,223,200,1 2418 call .Lkey_expansion_256b 2419 .byte 102,15,58,223,202,2 2420 call .Lkey_expansion_256a 2421 .byte 102,15,58,223,200,2 2422 call .Lkey_expansion_256b 2423 .byte 102,15,58,223,202,4 2424 call .Lkey_expansion_256a 2425 .byte 102,15,58,223,200,4 2426 call .Lkey_expansion_256b 2427 .byte 102,15,58,223,202,8 2428 call .Lkey_expansion_256a 2429 .byte 102,15,58,223,200,8 2430 call .Lkey_expansion_256b 2431 .byte 102,15,58,223,202,16 2432 call .Lkey_expansion_256a 2433 .byte 102,15,58,223,200,16 2434 call .Lkey_expansion_256b 2435 .byte 102,15,58,223,202,32 2436 call .Lkey_expansion_256a 2437 .byte 102,15,58,223,200,32 2438 call .Lkey_expansion_256b 2439 .byte 102,15,58,223,202,64 2440 call .Lkey_expansion_256a 2441 movups %xmm0,(%rax) 2442 movl %esi,16(%rax) 2443 xorq %rax,%rax 2444 jmp .Lenc_key_ret 2445 2446 .align 16 2447 .Lbad_keybits: 2448 movq $-2,%rax 2449 .Lenc_key_ret: 2450 addq $8,%rsp 2451 .byte 0xf3,0xc3 2452 .LSEH_end_set_encrypt_key: 2453 2454 .align 16 2455 .Lkey_expansion_128: 2456 movups %xmm0,(%rax) 2457 leaq 16(%rax),%rax 2458 .Lkey_expansion_128_cold: 2459 shufps $16,%xmm0,%xmm4 2460 xorps %xmm4,%xmm0 2461 shufps $140,%xmm0,%xmm4 2462 xorps %xmm4,%xmm0 2463 shufps $255,%xmm1,%xmm1 2464 xorps %xmm1,%xmm0 2465 .byte 0xf3,0xc3 2466 2467 .align 16 2468 .Lkey_expansion_192a: 2469 movups %xmm0,(%rax) 2470 leaq 16(%rax),%rax 2471 .Lkey_expansion_192a_cold: 2472 movaps %xmm2,%xmm5 2473 .Lkey_expansion_192b_warm: 2474 shufps $16,%xmm0,%xmm4 2475 movdqa %xmm2,%xmm3 2476 xorps %xmm4,%xmm0 2477 shufps $140,%xmm0,%xmm4 2478 pslldq $4,%xmm3 2479 xorps %xmm4,%xmm0 2480 pshufd $85,%xmm1,%xmm1 2481 pxor %xmm3,%xmm2 2482 pxor %xmm1,%xmm0 2483 pshufd $255,%xmm0,%xmm3 2484 pxor %xmm3,%xmm2 2485 .byte 0xf3,0xc3 2486 2487 .align 16 2488 .Lkey_expansion_192b: 2489 movaps %xmm0,%xmm3 2490 shufps $68,%xmm0,%xmm5 2491 movups %xmm5,(%rax) 2492 shufps $78,%xmm2,%xmm3 2493 movups %xmm3,16(%rax) 2494 leaq 32(%rax),%rax 2495 jmp .Lkey_expansion_192b_warm 2496 2497 .align 16 2498 .Lkey_expansion_256a: 2499 movups %xmm2,(%rax) 2500 leaq 16(%rax),%rax 2501 .Lkey_expansion_256a_cold: 2502 shufps $16,%xmm0,%xmm4 2503 xorps %xmm4,%xmm0 2504 shufps $140,%xmm0,%xmm4 2505 xorps %xmm4,%xmm0 2506 shufps $255,%xmm1,%xmm1 2507 xorps %xmm1,%xmm0 2508 .byte 0xf3,0xc3 2509 2510 .align 16 2511 .Lkey_expansion_256b: 2512 movups %xmm0,(%rax) 2513 leaq 16(%rax),%rax 2514 2515 shufps $16,%xmm2,%xmm4 2516 xorps %xmm4,%xmm2 2517 shufps $140,%xmm2,%xmm4 2518 xorps %xmm4,%xmm2 2519 shufps $170,%xmm1,%xmm1 2520 xorps %xmm1,%xmm2 2521 .byte 0xf3,0xc3 2522 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key 2523 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2524 .align 64 2525 .Lbswap_mask: 2526 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2527 .Lincrement32: 2528 .long 6,6,6,0 2529 .Lincrement64: 2530 .long 1,0,0,0 2531 .Lxts_magic: 2532 .long 0x87,0,1,0 2533 2534 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2535 .align 64 2536