1 #if defined(__x86_64__) 2 .text 3 .extern OPENSSL_ia32cap_P 4 .hidden OPENSSL_ia32cap_P 5 .globl aesni_encrypt 6 .hidden aesni_encrypt 7 .type aesni_encrypt,@function 8 .align 16 9 aesni_encrypt: 10 movups (%rdi),%xmm2 11 movl 240(%rdx),%eax 12 movups (%rdx),%xmm0 13 movups 16(%rdx),%xmm1 14 leaq 32(%rdx),%rdx 15 xorps %xmm0,%xmm2 16 .Loop_enc1_1: 17 .byte 102,15,56,220,209 18 decl %eax 19 movups (%rdx),%xmm1 20 leaq 16(%rdx),%rdx 21 jnz .Loop_enc1_1 22 .byte 102,15,56,221,209 23 pxor %xmm0,%xmm0 24 pxor %xmm1,%xmm1 25 movups %xmm2,(%rsi) 26 pxor %xmm2,%xmm2 27 .byte 0xf3,0xc3 28 .size aesni_encrypt,.-aesni_encrypt 29 30 .globl aesni_decrypt 31 .hidden aesni_decrypt 32 .type aesni_decrypt,@function 33 .align 16 34 aesni_decrypt: 35 movups (%rdi),%xmm2 36 movl 240(%rdx),%eax 37 movups (%rdx),%xmm0 38 movups 16(%rdx),%xmm1 39 leaq 32(%rdx),%rdx 40 xorps %xmm0,%xmm2 41 .Loop_dec1_2: 42 .byte 102,15,56,222,209 43 decl %eax 44 movups (%rdx),%xmm1 45 leaq 16(%rdx),%rdx 46 jnz .Loop_dec1_2 47 .byte 102,15,56,223,209 48 pxor %xmm0,%xmm0 49 pxor %xmm1,%xmm1 50 movups %xmm2,(%rsi) 51 pxor %xmm2,%xmm2 52 .byte 0xf3,0xc3 53 .size aesni_decrypt, .-aesni_decrypt 54 .type _aesni_encrypt2,@function 55 .align 16 56 _aesni_encrypt2: 57 movups (%rcx),%xmm0 58 shll $4,%eax 59 movups 16(%rcx),%xmm1 60 xorps %xmm0,%xmm2 61 xorps %xmm0,%xmm3 62 movups 32(%rcx),%xmm0 63 leaq 32(%rcx,%rax,1),%rcx 64 negq %rax 65 addq $16,%rax 66 67 .Lenc_loop2: 68 .byte 102,15,56,220,209 69 .byte 102,15,56,220,217 70 movups (%rcx,%rax,1),%xmm1 71 addq $32,%rax 72 .byte 102,15,56,220,208 73 .byte 102,15,56,220,216 74 movups -16(%rcx,%rax,1),%xmm0 75 jnz .Lenc_loop2 76 77 .byte 102,15,56,220,209 78 .byte 102,15,56,220,217 79 .byte 102,15,56,221,208 80 .byte 102,15,56,221,216 81 .byte 0xf3,0xc3 82 .size _aesni_encrypt2,.-_aesni_encrypt2 83 .type _aesni_decrypt2,@function 84 .align 16 85 _aesni_decrypt2: 86 movups (%rcx),%xmm0 87 shll $4,%eax 88 movups 16(%rcx),%xmm1 89 xorps %xmm0,%xmm2 90 xorps %xmm0,%xmm3 91 movups 32(%rcx),%xmm0 92 leaq 32(%rcx,%rax,1),%rcx 93 negq %rax 94 addq $16,%rax 95 96 .Ldec_loop2: 97 .byte 102,15,56,222,209 98 .byte 102,15,56,222,217 99 movups (%rcx,%rax,1),%xmm1 100 addq $32,%rax 101 .byte 102,15,56,222,208 102 .byte 102,15,56,222,216 103 movups -16(%rcx,%rax,1),%xmm0 104 jnz .Ldec_loop2 105 106 .byte 102,15,56,222,209 107 .byte 102,15,56,222,217 108 .byte 102,15,56,223,208 109 .byte 102,15,56,223,216 110 .byte 0xf3,0xc3 111 .size _aesni_decrypt2,.-_aesni_decrypt2 112 .type _aesni_encrypt3,@function 113 .align 16 114 _aesni_encrypt3: 115 movups (%rcx),%xmm0 116 shll $4,%eax 117 movups 16(%rcx),%xmm1 118 xorps %xmm0,%xmm2 119 xorps %xmm0,%xmm3 120 xorps %xmm0,%xmm4 121 movups 32(%rcx),%xmm0 122 leaq 32(%rcx,%rax,1),%rcx 123 negq %rax 124 addq $16,%rax 125 126 .Lenc_loop3: 127 .byte 102,15,56,220,209 128 .byte 102,15,56,220,217 129 .byte 102,15,56,220,225 130 movups (%rcx,%rax,1),%xmm1 131 addq $32,%rax 132 .byte 102,15,56,220,208 133 .byte 102,15,56,220,216 134 .byte 102,15,56,220,224 135 movups -16(%rcx,%rax,1),%xmm0 136 jnz .Lenc_loop3 137 138 .byte 102,15,56,220,209 139 .byte 102,15,56,220,217 140 .byte 102,15,56,220,225 141 .byte 102,15,56,221,208 142 .byte 102,15,56,221,216 143 .byte 102,15,56,221,224 144 .byte 0xf3,0xc3 145 .size _aesni_encrypt3,.-_aesni_encrypt3 146 .type _aesni_decrypt3,@function 147 .align 16 148 _aesni_decrypt3: 149 movups (%rcx),%xmm0 150 shll $4,%eax 151 movups 16(%rcx),%xmm1 152 xorps %xmm0,%xmm2 153 xorps %xmm0,%xmm3 154 xorps %xmm0,%xmm4 155 movups 32(%rcx),%xmm0 156 leaq 32(%rcx,%rax,1),%rcx 157 negq %rax 158 addq $16,%rax 159 160 .Ldec_loop3: 161 .byte 102,15,56,222,209 162 .byte 102,15,56,222,217 163 .byte 102,15,56,222,225 164 movups (%rcx,%rax,1),%xmm1 165 addq $32,%rax 166 .byte 102,15,56,222,208 167 .byte 102,15,56,222,216 168 .byte 102,15,56,222,224 169 movups -16(%rcx,%rax,1),%xmm0 170 jnz .Ldec_loop3 171 172 .byte 102,15,56,222,209 173 .byte 102,15,56,222,217 174 .byte 102,15,56,222,225 175 .byte 102,15,56,223,208 176 .byte 102,15,56,223,216 177 .byte 102,15,56,223,224 178 .byte 0xf3,0xc3 179 .size _aesni_decrypt3,.-_aesni_decrypt3 180 .type _aesni_encrypt4,@function 181 .align 16 182 _aesni_encrypt4: 183 movups (%rcx),%xmm0 184 shll $4,%eax 185 movups 16(%rcx),%xmm1 186 xorps %xmm0,%xmm2 187 xorps %xmm0,%xmm3 188 xorps %xmm0,%xmm4 189 xorps %xmm0,%xmm5 190 movups 32(%rcx),%xmm0 191 leaq 32(%rcx,%rax,1),%rcx 192 negq %rax 193 .byte 0x0f,0x1f,0x00 194 addq $16,%rax 195 196 .Lenc_loop4: 197 .byte 102,15,56,220,209 198 .byte 102,15,56,220,217 199 .byte 102,15,56,220,225 200 .byte 102,15,56,220,233 201 movups (%rcx,%rax,1),%xmm1 202 addq $32,%rax 203 .byte 102,15,56,220,208 204 .byte 102,15,56,220,216 205 .byte 102,15,56,220,224 206 .byte 102,15,56,220,232 207 movups -16(%rcx,%rax,1),%xmm0 208 jnz .Lenc_loop4 209 210 .byte 102,15,56,220,209 211 .byte 102,15,56,220,217 212 .byte 102,15,56,220,225 213 .byte 102,15,56,220,233 214 .byte 102,15,56,221,208 215 .byte 102,15,56,221,216 216 .byte 102,15,56,221,224 217 .byte 102,15,56,221,232 218 .byte 0xf3,0xc3 219 .size _aesni_encrypt4,.-_aesni_encrypt4 220 .type _aesni_decrypt4,@function 221 .align 16 222 _aesni_decrypt4: 223 movups (%rcx),%xmm0 224 shll $4,%eax 225 movups 16(%rcx),%xmm1 226 xorps %xmm0,%xmm2 227 xorps %xmm0,%xmm3 228 xorps %xmm0,%xmm4 229 xorps %xmm0,%xmm5 230 movups 32(%rcx),%xmm0 231 leaq 32(%rcx,%rax,1),%rcx 232 negq %rax 233 .byte 0x0f,0x1f,0x00 234 addq $16,%rax 235 236 .Ldec_loop4: 237 .byte 102,15,56,222,209 238 .byte 102,15,56,222,217 239 .byte 102,15,56,222,225 240 .byte 102,15,56,222,233 241 movups (%rcx,%rax,1),%xmm1 242 addq $32,%rax 243 .byte 102,15,56,222,208 244 .byte 102,15,56,222,216 245 .byte 102,15,56,222,224 246 .byte 102,15,56,222,232 247 movups -16(%rcx,%rax,1),%xmm0 248 jnz .Ldec_loop4 249 250 .byte 102,15,56,222,209 251 .byte 102,15,56,222,217 252 .byte 102,15,56,222,225 253 .byte 102,15,56,222,233 254 .byte 102,15,56,223,208 255 .byte 102,15,56,223,216 256 .byte 102,15,56,223,224 257 .byte 102,15,56,223,232 258 .byte 0xf3,0xc3 259 .size _aesni_decrypt4,.-_aesni_decrypt4 260 .type _aesni_encrypt6,@function 261 .align 16 262 _aesni_encrypt6: 263 movups (%rcx),%xmm0 264 shll $4,%eax 265 movups 16(%rcx),%xmm1 266 xorps %xmm0,%xmm2 267 pxor %xmm0,%xmm3 268 pxor %xmm0,%xmm4 269 .byte 102,15,56,220,209 270 leaq 32(%rcx,%rax,1),%rcx 271 negq %rax 272 .byte 102,15,56,220,217 273 pxor %xmm0,%xmm5 274 pxor %xmm0,%xmm6 275 .byte 102,15,56,220,225 276 pxor %xmm0,%xmm7 277 movups (%rcx,%rax,1),%xmm0 278 addq $16,%rax 279 jmp .Lenc_loop6_enter 280 .align 16 281 .Lenc_loop6: 282 .byte 102,15,56,220,209 283 .byte 102,15,56,220,217 284 .byte 102,15,56,220,225 285 .Lenc_loop6_enter: 286 .byte 102,15,56,220,233 287 .byte 102,15,56,220,241 288 .byte 102,15,56,220,249 289 movups (%rcx,%rax,1),%xmm1 290 addq $32,%rax 291 .byte 102,15,56,220,208 292 .byte 102,15,56,220,216 293 .byte 102,15,56,220,224 294 .byte 102,15,56,220,232 295 .byte 102,15,56,220,240 296 .byte 102,15,56,220,248 297 movups -16(%rcx,%rax,1),%xmm0 298 jnz .Lenc_loop6 299 300 .byte 102,15,56,220,209 301 .byte 102,15,56,220,217 302 .byte 102,15,56,220,225 303 .byte 102,15,56,220,233 304 .byte 102,15,56,220,241 305 .byte 102,15,56,220,249 306 .byte 102,15,56,221,208 307 .byte 102,15,56,221,216 308 .byte 102,15,56,221,224 309 .byte 102,15,56,221,232 310 .byte 102,15,56,221,240 311 .byte 102,15,56,221,248 312 .byte 0xf3,0xc3 313 .size _aesni_encrypt6,.-_aesni_encrypt6 314 .type _aesni_decrypt6,@function 315 .align 16 316 _aesni_decrypt6: 317 movups (%rcx),%xmm0 318 shll $4,%eax 319 movups 16(%rcx),%xmm1 320 xorps %xmm0,%xmm2 321 pxor %xmm0,%xmm3 322 pxor %xmm0,%xmm4 323 .byte 102,15,56,222,209 324 leaq 32(%rcx,%rax,1),%rcx 325 negq %rax 326 .byte 102,15,56,222,217 327 pxor %xmm0,%xmm5 328 pxor %xmm0,%xmm6 329 .byte 102,15,56,222,225 330 pxor %xmm0,%xmm7 331 movups (%rcx,%rax,1),%xmm0 332 addq $16,%rax 333 jmp .Ldec_loop6_enter 334 .align 16 335 .Ldec_loop6: 336 .byte 102,15,56,222,209 337 .byte 102,15,56,222,217 338 .byte 102,15,56,222,225 339 .Ldec_loop6_enter: 340 .byte 102,15,56,222,233 341 .byte 102,15,56,222,241 342 .byte 102,15,56,222,249 343 movups (%rcx,%rax,1),%xmm1 344 addq $32,%rax 345 .byte 102,15,56,222,208 346 .byte 102,15,56,222,216 347 .byte 102,15,56,222,224 348 .byte 102,15,56,222,232 349 .byte 102,15,56,222,240 350 .byte 102,15,56,222,248 351 movups -16(%rcx,%rax,1),%xmm0 352 jnz .Ldec_loop6 353 354 .byte 102,15,56,222,209 355 .byte 102,15,56,222,217 356 .byte 102,15,56,222,225 357 .byte 102,15,56,222,233 358 .byte 102,15,56,222,241 359 .byte 102,15,56,222,249 360 .byte 102,15,56,223,208 361 .byte 102,15,56,223,216 362 .byte 102,15,56,223,224 363 .byte 102,15,56,223,232 364 .byte 102,15,56,223,240 365 .byte 102,15,56,223,248 366 .byte 0xf3,0xc3 367 .size _aesni_decrypt6,.-_aesni_decrypt6 368 .type _aesni_encrypt8,@function 369 .align 16 370 _aesni_encrypt8: 371 movups (%rcx),%xmm0 372 shll $4,%eax 373 movups 16(%rcx),%xmm1 374 xorps %xmm0,%xmm2 375 xorps %xmm0,%xmm3 376 pxor %xmm0,%xmm4 377 pxor %xmm0,%xmm5 378 pxor %xmm0,%xmm6 379 leaq 32(%rcx,%rax,1),%rcx 380 negq %rax 381 .byte 102,15,56,220,209 382 pxor %xmm0,%xmm7 383 pxor %xmm0,%xmm8 384 .byte 102,15,56,220,217 385 pxor %xmm0,%xmm9 386 movups (%rcx,%rax,1),%xmm0 387 addq $16,%rax 388 jmp .Lenc_loop8_inner 389 .align 16 390 .Lenc_loop8: 391 .byte 102,15,56,220,209 392 .byte 102,15,56,220,217 393 .Lenc_loop8_inner: 394 .byte 102,15,56,220,225 395 .byte 102,15,56,220,233 396 .byte 102,15,56,220,241 397 .byte 102,15,56,220,249 398 .byte 102,68,15,56,220,193 399 .byte 102,68,15,56,220,201 400 .Lenc_loop8_enter: 401 movups (%rcx,%rax,1),%xmm1 402 addq $32,%rax 403 .byte 102,15,56,220,208 404 .byte 102,15,56,220,216 405 .byte 102,15,56,220,224 406 .byte 102,15,56,220,232 407 .byte 102,15,56,220,240 408 .byte 102,15,56,220,248 409 .byte 102,68,15,56,220,192 410 .byte 102,68,15,56,220,200 411 movups -16(%rcx,%rax,1),%xmm0 412 jnz .Lenc_loop8 413 414 .byte 102,15,56,220,209 415 .byte 102,15,56,220,217 416 .byte 102,15,56,220,225 417 .byte 102,15,56,220,233 418 .byte 102,15,56,220,241 419 .byte 102,15,56,220,249 420 .byte 102,68,15,56,220,193 421 .byte 102,68,15,56,220,201 422 .byte 102,15,56,221,208 423 .byte 102,15,56,221,216 424 .byte 102,15,56,221,224 425 .byte 102,15,56,221,232 426 .byte 102,15,56,221,240 427 .byte 102,15,56,221,248 428 .byte 102,68,15,56,221,192 429 .byte 102,68,15,56,221,200 430 .byte 0xf3,0xc3 431 .size _aesni_encrypt8,.-_aesni_encrypt8 432 .type _aesni_decrypt8,@function 433 .align 16 434 _aesni_decrypt8: 435 movups (%rcx),%xmm0 436 shll $4,%eax 437 movups 16(%rcx),%xmm1 438 xorps %xmm0,%xmm2 439 xorps %xmm0,%xmm3 440 pxor %xmm0,%xmm4 441 pxor %xmm0,%xmm5 442 pxor %xmm0,%xmm6 443 leaq 32(%rcx,%rax,1),%rcx 444 negq %rax 445 .byte 102,15,56,222,209 446 pxor %xmm0,%xmm7 447 pxor %xmm0,%xmm8 448 .byte 102,15,56,222,217 449 pxor %xmm0,%xmm9 450 movups (%rcx,%rax,1),%xmm0 451 addq $16,%rax 452 jmp .Ldec_loop8_inner 453 .align 16 454 .Ldec_loop8: 455 .byte 102,15,56,222,209 456 .byte 102,15,56,222,217 457 .Ldec_loop8_inner: 458 .byte 102,15,56,222,225 459 .byte 102,15,56,222,233 460 .byte 102,15,56,222,241 461 .byte 102,15,56,222,249 462 .byte 102,68,15,56,222,193 463 .byte 102,68,15,56,222,201 464 .Ldec_loop8_enter: 465 movups (%rcx,%rax,1),%xmm1 466 addq $32,%rax 467 .byte 102,15,56,222,208 468 .byte 102,15,56,222,216 469 .byte 102,15,56,222,224 470 .byte 102,15,56,222,232 471 .byte 102,15,56,222,240 472 .byte 102,15,56,222,248 473 .byte 102,68,15,56,222,192 474 .byte 102,68,15,56,222,200 475 movups -16(%rcx,%rax,1),%xmm0 476 jnz .Ldec_loop8 477 478 .byte 102,15,56,222,209 479 .byte 102,15,56,222,217 480 .byte 102,15,56,222,225 481 .byte 102,15,56,222,233 482 .byte 102,15,56,222,241 483 .byte 102,15,56,222,249 484 .byte 102,68,15,56,222,193 485 .byte 102,68,15,56,222,201 486 .byte 102,15,56,223,208 487 .byte 102,15,56,223,216 488 .byte 102,15,56,223,224 489 .byte 102,15,56,223,232 490 .byte 102,15,56,223,240 491 .byte 102,15,56,223,248 492 .byte 102,68,15,56,223,192 493 .byte 102,68,15,56,223,200 494 .byte 0xf3,0xc3 495 .size _aesni_decrypt8,.-_aesni_decrypt8 496 .globl aesni_ecb_encrypt 497 .hidden aesni_ecb_encrypt 498 .type aesni_ecb_encrypt,@function 499 .align 16 500 aesni_ecb_encrypt: 501 andq $-16,%rdx 502 jz .Lecb_ret 503 504 movl 240(%rcx),%eax 505 movups (%rcx),%xmm0 506 movq %rcx,%r11 507 movl %eax,%r10d 508 testl %r8d,%r8d 509 jz .Lecb_decrypt 510 511 cmpq $128,%rdx 512 jb .Lecb_enc_tail 513 514 movdqu (%rdi),%xmm2 515 movdqu 16(%rdi),%xmm3 516 movdqu 32(%rdi),%xmm4 517 movdqu 48(%rdi),%xmm5 518 movdqu 64(%rdi),%xmm6 519 movdqu 80(%rdi),%xmm7 520 movdqu 96(%rdi),%xmm8 521 movdqu 112(%rdi),%xmm9 522 leaq 128(%rdi),%rdi 523 subq $128,%rdx 524 jmp .Lecb_enc_loop8_enter 525 .align 16 526 .Lecb_enc_loop8: 527 movups %xmm2,(%rsi) 528 movq %r11,%rcx 529 movdqu (%rdi),%xmm2 530 movl %r10d,%eax 531 movups %xmm3,16(%rsi) 532 movdqu 16(%rdi),%xmm3 533 movups %xmm4,32(%rsi) 534 movdqu 32(%rdi),%xmm4 535 movups %xmm5,48(%rsi) 536 movdqu 48(%rdi),%xmm5 537 movups %xmm6,64(%rsi) 538 movdqu 64(%rdi),%xmm6 539 movups %xmm7,80(%rsi) 540 movdqu 80(%rdi),%xmm7 541 movups %xmm8,96(%rsi) 542 movdqu 96(%rdi),%xmm8 543 movups %xmm9,112(%rsi) 544 leaq 128(%rsi),%rsi 545 movdqu 112(%rdi),%xmm9 546 leaq 128(%rdi),%rdi 547 .Lecb_enc_loop8_enter: 548 549 call _aesni_encrypt8 550 551 subq $128,%rdx 552 jnc .Lecb_enc_loop8 553 554 movups %xmm2,(%rsi) 555 movq %r11,%rcx 556 movups %xmm3,16(%rsi) 557 movl %r10d,%eax 558 movups %xmm4,32(%rsi) 559 movups %xmm5,48(%rsi) 560 movups %xmm6,64(%rsi) 561 movups %xmm7,80(%rsi) 562 movups %xmm8,96(%rsi) 563 movups %xmm9,112(%rsi) 564 leaq 128(%rsi),%rsi 565 addq $128,%rdx 566 jz .Lecb_ret 567 568 .Lecb_enc_tail: 569 movups (%rdi),%xmm2 570 cmpq $32,%rdx 571 jb .Lecb_enc_one 572 movups 16(%rdi),%xmm3 573 je .Lecb_enc_two 574 movups 32(%rdi),%xmm4 575 cmpq $64,%rdx 576 jb .Lecb_enc_three 577 movups 48(%rdi),%xmm5 578 je .Lecb_enc_four 579 movups 64(%rdi),%xmm6 580 cmpq $96,%rdx 581 jb .Lecb_enc_five 582 movups 80(%rdi),%xmm7 583 je .Lecb_enc_six 584 movdqu 96(%rdi),%xmm8 585 xorps %xmm9,%xmm9 586 call _aesni_encrypt8 587 movups %xmm2,(%rsi) 588 movups %xmm3,16(%rsi) 589 movups %xmm4,32(%rsi) 590 movups %xmm5,48(%rsi) 591 movups %xmm6,64(%rsi) 592 movups %xmm7,80(%rsi) 593 movups %xmm8,96(%rsi) 594 jmp .Lecb_ret 595 .align 16 596 .Lecb_enc_one: 597 movups (%rcx),%xmm0 598 movups 16(%rcx),%xmm1 599 leaq 32(%rcx),%rcx 600 xorps %xmm0,%xmm2 601 .Loop_enc1_3: 602 .byte 102,15,56,220,209 603 decl %eax 604 movups (%rcx),%xmm1 605 leaq 16(%rcx),%rcx 606 jnz .Loop_enc1_3 607 .byte 102,15,56,221,209 608 movups %xmm2,(%rsi) 609 jmp .Lecb_ret 610 .align 16 611 .Lecb_enc_two: 612 call _aesni_encrypt2 613 movups %xmm2,(%rsi) 614 movups %xmm3,16(%rsi) 615 jmp .Lecb_ret 616 .align 16 617 .Lecb_enc_three: 618 call _aesni_encrypt3 619 movups %xmm2,(%rsi) 620 movups %xmm3,16(%rsi) 621 movups %xmm4,32(%rsi) 622 jmp .Lecb_ret 623 .align 16 624 .Lecb_enc_four: 625 call _aesni_encrypt4 626 movups %xmm2,(%rsi) 627 movups %xmm3,16(%rsi) 628 movups %xmm4,32(%rsi) 629 movups %xmm5,48(%rsi) 630 jmp .Lecb_ret 631 .align 16 632 .Lecb_enc_five: 633 xorps %xmm7,%xmm7 634 call _aesni_encrypt6 635 movups %xmm2,(%rsi) 636 movups %xmm3,16(%rsi) 637 movups %xmm4,32(%rsi) 638 movups %xmm5,48(%rsi) 639 movups %xmm6,64(%rsi) 640 jmp .Lecb_ret 641 .align 16 642 .Lecb_enc_six: 643 call _aesni_encrypt6 644 movups %xmm2,(%rsi) 645 movups %xmm3,16(%rsi) 646 movups %xmm4,32(%rsi) 647 movups %xmm5,48(%rsi) 648 movups %xmm6,64(%rsi) 649 movups %xmm7,80(%rsi) 650 jmp .Lecb_ret 651 652 .align 16 653 .Lecb_decrypt: 654 cmpq $128,%rdx 655 jb .Lecb_dec_tail 656 657 movdqu (%rdi),%xmm2 658 movdqu 16(%rdi),%xmm3 659 movdqu 32(%rdi),%xmm4 660 movdqu 48(%rdi),%xmm5 661 movdqu 64(%rdi),%xmm6 662 movdqu 80(%rdi),%xmm7 663 movdqu 96(%rdi),%xmm8 664 movdqu 112(%rdi),%xmm9 665 leaq 128(%rdi),%rdi 666 subq $128,%rdx 667 jmp .Lecb_dec_loop8_enter 668 .align 16 669 .Lecb_dec_loop8: 670 movups %xmm2,(%rsi) 671 movq %r11,%rcx 672 movdqu (%rdi),%xmm2 673 movl %r10d,%eax 674 movups %xmm3,16(%rsi) 675 movdqu 16(%rdi),%xmm3 676 movups %xmm4,32(%rsi) 677 movdqu 32(%rdi),%xmm4 678 movups %xmm5,48(%rsi) 679 movdqu 48(%rdi),%xmm5 680 movups %xmm6,64(%rsi) 681 movdqu 64(%rdi),%xmm6 682 movups %xmm7,80(%rsi) 683 movdqu 80(%rdi),%xmm7 684 movups %xmm8,96(%rsi) 685 movdqu 96(%rdi),%xmm8 686 movups %xmm9,112(%rsi) 687 leaq 128(%rsi),%rsi 688 movdqu 112(%rdi),%xmm9 689 leaq 128(%rdi),%rdi 690 .Lecb_dec_loop8_enter: 691 692 call _aesni_decrypt8 693 694 movups (%r11),%xmm0 695 subq $128,%rdx 696 jnc .Lecb_dec_loop8 697 698 movups %xmm2,(%rsi) 699 pxor %xmm2,%xmm2 700 movq %r11,%rcx 701 movups %xmm3,16(%rsi) 702 pxor %xmm3,%xmm3 703 movl %r10d,%eax 704 movups %xmm4,32(%rsi) 705 pxor %xmm4,%xmm4 706 movups %xmm5,48(%rsi) 707 pxor %xmm5,%xmm5 708 movups %xmm6,64(%rsi) 709 pxor %xmm6,%xmm6 710 movups %xmm7,80(%rsi) 711 pxor %xmm7,%xmm7 712 movups %xmm8,96(%rsi) 713 pxor %xmm8,%xmm8 714 movups %xmm9,112(%rsi) 715 pxor %xmm9,%xmm9 716 leaq 128(%rsi),%rsi 717 addq $128,%rdx 718 jz .Lecb_ret 719 720 .Lecb_dec_tail: 721 movups (%rdi),%xmm2 722 cmpq $32,%rdx 723 jb .Lecb_dec_one 724 movups 16(%rdi),%xmm3 725 je .Lecb_dec_two 726 movups 32(%rdi),%xmm4 727 cmpq $64,%rdx 728 jb .Lecb_dec_three 729 movups 48(%rdi),%xmm5 730 je .Lecb_dec_four 731 movups 64(%rdi),%xmm6 732 cmpq $96,%rdx 733 jb .Lecb_dec_five 734 movups 80(%rdi),%xmm7 735 je .Lecb_dec_six 736 movups 96(%rdi),%xmm8 737 movups (%rcx),%xmm0 738 xorps %xmm9,%xmm9 739 call _aesni_decrypt8 740 movups %xmm2,(%rsi) 741 pxor %xmm2,%xmm2 742 movups %xmm3,16(%rsi) 743 pxor %xmm3,%xmm3 744 movups %xmm4,32(%rsi) 745 pxor %xmm4,%xmm4 746 movups %xmm5,48(%rsi) 747 pxor %xmm5,%xmm5 748 movups %xmm6,64(%rsi) 749 pxor %xmm6,%xmm6 750 movups %xmm7,80(%rsi) 751 pxor %xmm7,%xmm7 752 movups %xmm8,96(%rsi) 753 pxor %xmm8,%xmm8 754 pxor %xmm9,%xmm9 755 jmp .Lecb_ret 756 .align 16 757 .Lecb_dec_one: 758 movups (%rcx),%xmm0 759 movups 16(%rcx),%xmm1 760 leaq 32(%rcx),%rcx 761 xorps %xmm0,%xmm2 762 .Loop_dec1_4: 763 .byte 102,15,56,222,209 764 decl %eax 765 movups (%rcx),%xmm1 766 leaq 16(%rcx),%rcx 767 jnz .Loop_dec1_4 768 .byte 102,15,56,223,209 769 movups %xmm2,(%rsi) 770 pxor %xmm2,%xmm2 771 jmp .Lecb_ret 772 .align 16 773 .Lecb_dec_two: 774 call _aesni_decrypt2 775 movups %xmm2,(%rsi) 776 pxor %xmm2,%xmm2 777 movups %xmm3,16(%rsi) 778 pxor %xmm3,%xmm3 779 jmp .Lecb_ret 780 .align 16 781 .Lecb_dec_three: 782 call _aesni_decrypt3 783 movups %xmm2,(%rsi) 784 pxor %xmm2,%xmm2 785 movups %xmm3,16(%rsi) 786 pxor %xmm3,%xmm3 787 movups %xmm4,32(%rsi) 788 pxor %xmm4,%xmm4 789 jmp .Lecb_ret 790 .align 16 791 .Lecb_dec_four: 792 call _aesni_decrypt4 793 movups %xmm2,(%rsi) 794 pxor %xmm2,%xmm2 795 movups %xmm3,16(%rsi) 796 pxor %xmm3,%xmm3 797 movups %xmm4,32(%rsi) 798 pxor %xmm4,%xmm4 799 movups %xmm5,48(%rsi) 800 pxor %xmm5,%xmm5 801 jmp .Lecb_ret 802 .align 16 803 .Lecb_dec_five: 804 xorps %xmm7,%xmm7 805 call _aesni_decrypt6 806 movups %xmm2,(%rsi) 807 pxor %xmm2,%xmm2 808 movups %xmm3,16(%rsi) 809 pxor %xmm3,%xmm3 810 movups %xmm4,32(%rsi) 811 pxor %xmm4,%xmm4 812 movups %xmm5,48(%rsi) 813 pxor %xmm5,%xmm5 814 movups %xmm6,64(%rsi) 815 pxor %xmm6,%xmm6 816 pxor %xmm7,%xmm7 817 jmp .Lecb_ret 818 .align 16 819 .Lecb_dec_six: 820 call _aesni_decrypt6 821 movups %xmm2,(%rsi) 822 pxor %xmm2,%xmm2 823 movups %xmm3,16(%rsi) 824 pxor %xmm3,%xmm3 825 movups %xmm4,32(%rsi) 826 pxor %xmm4,%xmm4 827 movups %xmm5,48(%rsi) 828 pxor %xmm5,%xmm5 829 movups %xmm6,64(%rsi) 830 pxor %xmm6,%xmm6 831 movups %xmm7,80(%rsi) 832 pxor %xmm7,%xmm7 833 834 .Lecb_ret: 835 xorps %xmm0,%xmm0 836 pxor %xmm1,%xmm1 837 .byte 0xf3,0xc3 838 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt 839 .globl aesni_ccm64_encrypt_blocks 840 .hidden aesni_ccm64_encrypt_blocks 841 .type aesni_ccm64_encrypt_blocks,@function 842 .align 16 843 aesni_ccm64_encrypt_blocks: 844 movl 240(%rcx),%eax 845 movdqu (%r8),%xmm6 846 movdqa .Lincrement64(%rip),%xmm9 847 movdqa .Lbswap_mask(%rip),%xmm7 848 849 shll $4,%eax 850 movl $16,%r10d 851 leaq 0(%rcx),%r11 852 movdqu (%r9),%xmm3 853 movdqa %xmm6,%xmm2 854 leaq 32(%rcx,%rax,1),%rcx 855 .byte 102,15,56,0,247 856 subq %rax,%r10 857 jmp .Lccm64_enc_outer 858 .align 16 859 .Lccm64_enc_outer: 860 movups (%r11),%xmm0 861 movq %r10,%rax 862 movups (%rdi),%xmm8 863 864 xorps %xmm0,%xmm2 865 movups 16(%r11),%xmm1 866 xorps %xmm8,%xmm0 867 xorps %xmm0,%xmm3 868 movups 32(%r11),%xmm0 869 870 .Lccm64_enc2_loop: 871 .byte 102,15,56,220,209 872 .byte 102,15,56,220,217 873 movups (%rcx,%rax,1),%xmm1 874 addq $32,%rax 875 .byte 102,15,56,220,208 876 .byte 102,15,56,220,216 877 movups -16(%rcx,%rax,1),%xmm0 878 jnz .Lccm64_enc2_loop 879 .byte 102,15,56,220,209 880 .byte 102,15,56,220,217 881 paddq %xmm9,%xmm6 882 decq %rdx 883 .byte 102,15,56,221,208 884 .byte 102,15,56,221,216 885 886 leaq 16(%rdi),%rdi 887 xorps %xmm2,%xmm8 888 movdqa %xmm6,%xmm2 889 movups %xmm8,(%rsi) 890 .byte 102,15,56,0,215 891 leaq 16(%rsi),%rsi 892 jnz .Lccm64_enc_outer 893 894 pxor %xmm0,%xmm0 895 pxor %xmm1,%xmm1 896 pxor %xmm2,%xmm2 897 movups %xmm3,(%r9) 898 pxor %xmm3,%xmm3 899 pxor %xmm8,%xmm8 900 pxor %xmm6,%xmm6 901 .byte 0xf3,0xc3 902 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 903 .globl aesni_ccm64_decrypt_blocks 904 .hidden aesni_ccm64_decrypt_blocks 905 .type aesni_ccm64_decrypt_blocks,@function 906 .align 16 907 aesni_ccm64_decrypt_blocks: 908 movl 240(%rcx),%eax 909 movups (%r8),%xmm6 910 movdqu (%r9),%xmm3 911 movdqa .Lincrement64(%rip),%xmm9 912 movdqa .Lbswap_mask(%rip),%xmm7 913 914 movaps %xmm6,%xmm2 915 movl %eax,%r10d 916 movq %rcx,%r11 917 .byte 102,15,56,0,247 918 movups (%rcx),%xmm0 919 movups 16(%rcx),%xmm1 920 leaq 32(%rcx),%rcx 921 xorps %xmm0,%xmm2 922 .Loop_enc1_5: 923 .byte 102,15,56,220,209 924 decl %eax 925 movups (%rcx),%xmm1 926 leaq 16(%rcx),%rcx 927 jnz .Loop_enc1_5 928 .byte 102,15,56,221,209 929 shll $4,%r10d 930 movl $16,%eax 931 movups (%rdi),%xmm8 932 paddq %xmm9,%xmm6 933 leaq 16(%rdi),%rdi 934 subq %r10,%rax 935 leaq 32(%r11,%r10,1),%rcx 936 movq %rax,%r10 937 jmp .Lccm64_dec_outer 938 .align 16 939 .Lccm64_dec_outer: 940 xorps %xmm2,%xmm8 941 movdqa %xmm6,%xmm2 942 movups %xmm8,(%rsi) 943 leaq 16(%rsi),%rsi 944 .byte 102,15,56,0,215 945 946 subq $1,%rdx 947 jz .Lccm64_dec_break 948 949 movups (%r11),%xmm0 950 movq %r10,%rax 951 movups 16(%r11),%xmm1 952 xorps %xmm0,%xmm8 953 xorps %xmm0,%xmm2 954 xorps %xmm8,%xmm3 955 movups 32(%r11),%xmm0 956 jmp .Lccm64_dec2_loop 957 .align 16 958 .Lccm64_dec2_loop: 959 .byte 102,15,56,220,209 960 .byte 102,15,56,220,217 961 movups (%rcx,%rax,1),%xmm1 962 addq $32,%rax 963 .byte 102,15,56,220,208 964 .byte 102,15,56,220,216 965 movups -16(%rcx,%rax,1),%xmm0 966 jnz .Lccm64_dec2_loop 967 movups (%rdi),%xmm8 968 paddq %xmm9,%xmm6 969 .byte 102,15,56,220,209 970 .byte 102,15,56,220,217 971 .byte 102,15,56,221,208 972 .byte 102,15,56,221,216 973 leaq 16(%rdi),%rdi 974 jmp .Lccm64_dec_outer 975 976 .align 16 977 .Lccm64_dec_break: 978 979 movl 240(%r11),%eax 980 movups (%r11),%xmm0 981 movups 16(%r11),%xmm1 982 xorps %xmm0,%xmm8 983 leaq 32(%r11),%r11 984 xorps %xmm8,%xmm3 985 .Loop_enc1_6: 986 .byte 102,15,56,220,217 987 decl %eax 988 movups (%r11),%xmm1 989 leaq 16(%r11),%r11 990 jnz .Loop_enc1_6 991 .byte 102,15,56,221,217 992 pxor %xmm0,%xmm0 993 pxor %xmm1,%xmm1 994 pxor %xmm2,%xmm2 995 movups %xmm3,(%r9) 996 pxor %xmm3,%xmm3 997 pxor %xmm8,%xmm8 998 pxor %xmm6,%xmm6 999 .byte 0xf3,0xc3 1000 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1001 .globl aesni_ctr32_encrypt_blocks 1002 .hidden aesni_ctr32_encrypt_blocks 1003 .type aesni_ctr32_encrypt_blocks,@function 1004 .align 16 1005 aesni_ctr32_encrypt_blocks: 1006 cmpq $1,%rdx 1007 jne .Lctr32_bulk 1008 1009 1010 1011 movups (%r8),%xmm2 1012 movups (%rdi),%xmm3 1013 movl 240(%rcx),%edx 1014 movups (%rcx),%xmm0 1015 movups 16(%rcx),%xmm1 1016 leaq 32(%rcx),%rcx 1017 xorps %xmm0,%xmm2 1018 .Loop_enc1_7: 1019 .byte 102,15,56,220,209 1020 decl %edx 1021 movups (%rcx),%xmm1 1022 leaq 16(%rcx),%rcx 1023 jnz .Loop_enc1_7 1024 .byte 102,15,56,221,209 1025 pxor %xmm0,%xmm0 1026 pxor %xmm1,%xmm1 1027 xorps %xmm3,%xmm2 1028 pxor %xmm3,%xmm3 1029 movups %xmm2,(%rsi) 1030 xorps %xmm2,%xmm2 1031 jmp .Lctr32_epilogue 1032 1033 .align 16 1034 .Lctr32_bulk: 1035 leaq (%rsp),%rax 1036 pushq %rbp 1037 subq $128,%rsp 1038 andq $-16,%rsp 1039 leaq -8(%rax),%rbp 1040 1041 1042 1043 1044 movdqu (%r8),%xmm2 1045 movdqu (%rcx),%xmm0 1046 movl 12(%r8),%r8d 1047 pxor %xmm0,%xmm2 1048 movl 12(%rcx),%r11d 1049 movdqa %xmm2,0(%rsp) 1050 bswapl %r8d 1051 movdqa %xmm2,%xmm3 1052 movdqa %xmm2,%xmm4 1053 movdqa %xmm2,%xmm5 1054 movdqa %xmm2,64(%rsp) 1055 movdqa %xmm2,80(%rsp) 1056 movdqa %xmm2,96(%rsp) 1057 movq %rdx,%r10 1058 movdqa %xmm2,112(%rsp) 1059 1060 leaq 1(%r8),%rax 1061 leaq 2(%r8),%rdx 1062 bswapl %eax 1063 bswapl %edx 1064 xorl %r11d,%eax 1065 xorl %r11d,%edx 1066 .byte 102,15,58,34,216,3 1067 leaq 3(%r8),%rax 1068 movdqa %xmm3,16(%rsp) 1069 .byte 102,15,58,34,226,3 1070 bswapl %eax 1071 movq %r10,%rdx 1072 leaq 4(%r8),%r10 1073 movdqa %xmm4,32(%rsp) 1074 xorl %r11d,%eax 1075 bswapl %r10d 1076 .byte 102,15,58,34,232,3 1077 xorl %r11d,%r10d 1078 movdqa %xmm5,48(%rsp) 1079 leaq 5(%r8),%r9 1080 movl %r10d,64+12(%rsp) 1081 bswapl %r9d 1082 leaq 6(%r8),%r10 1083 movl 240(%rcx),%eax 1084 xorl %r11d,%r9d 1085 bswapl %r10d 1086 movl %r9d,80+12(%rsp) 1087 xorl %r11d,%r10d 1088 leaq 7(%r8),%r9 1089 movl %r10d,96+12(%rsp) 1090 bswapl %r9d 1091 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1092 xorl %r11d,%r9d 1093 andl $71303168,%r10d 1094 movl %r9d,112+12(%rsp) 1095 1096 movups 16(%rcx),%xmm1 1097 1098 movdqa 64(%rsp),%xmm6 1099 movdqa 80(%rsp),%xmm7 1100 1101 cmpq $8,%rdx 1102 jb .Lctr32_tail 1103 1104 subq $6,%rdx 1105 cmpl $4194304,%r10d 1106 je .Lctr32_6x 1107 1108 leaq 128(%rcx),%rcx 1109 subq $2,%rdx 1110 jmp .Lctr32_loop8 1111 1112 .align 16 1113 .Lctr32_6x: 1114 shll $4,%eax 1115 movl $48,%r10d 1116 bswapl %r11d 1117 leaq 32(%rcx,%rax,1),%rcx 1118 subq %rax,%r10 1119 jmp .Lctr32_loop6 1120 1121 .align 16 1122 .Lctr32_loop6: 1123 addl $6,%r8d 1124 movups -48(%rcx,%r10,1),%xmm0 1125 .byte 102,15,56,220,209 1126 movl %r8d,%eax 1127 xorl %r11d,%eax 1128 .byte 102,15,56,220,217 1129 .byte 0x0f,0x38,0xf1,0x44,0x24,12 1130 leal 1(%r8),%eax 1131 .byte 102,15,56,220,225 1132 xorl %r11d,%eax 1133 .byte 0x0f,0x38,0xf1,0x44,0x24,28 1134 .byte 102,15,56,220,233 1135 leal 2(%r8),%eax 1136 xorl %r11d,%eax 1137 .byte 102,15,56,220,241 1138 .byte 0x0f,0x38,0xf1,0x44,0x24,44 1139 leal 3(%r8),%eax 1140 .byte 102,15,56,220,249 1141 movups -32(%rcx,%r10,1),%xmm1 1142 xorl %r11d,%eax 1143 1144 .byte 102,15,56,220,208 1145 .byte 0x0f,0x38,0xf1,0x44,0x24,60 1146 leal 4(%r8),%eax 1147 .byte 102,15,56,220,216 1148 xorl %r11d,%eax 1149 .byte 0x0f,0x38,0xf1,0x44,0x24,76 1150 .byte 102,15,56,220,224 1151 leal 5(%r8),%eax 1152 xorl %r11d,%eax 1153 .byte 102,15,56,220,232 1154 .byte 0x0f,0x38,0xf1,0x44,0x24,92 1155 movq %r10,%rax 1156 .byte 102,15,56,220,240 1157 .byte 102,15,56,220,248 1158 movups -16(%rcx,%r10,1),%xmm0 1159 1160 call .Lenc_loop6 1161 1162 movdqu (%rdi),%xmm8 1163 movdqu 16(%rdi),%xmm9 1164 movdqu 32(%rdi),%xmm10 1165 movdqu 48(%rdi),%xmm11 1166 movdqu 64(%rdi),%xmm12 1167 movdqu 80(%rdi),%xmm13 1168 leaq 96(%rdi),%rdi 1169 movups -64(%rcx,%r10,1),%xmm1 1170 pxor %xmm2,%xmm8 1171 movaps 0(%rsp),%xmm2 1172 pxor %xmm3,%xmm9 1173 movaps 16(%rsp),%xmm3 1174 pxor %xmm4,%xmm10 1175 movaps 32(%rsp),%xmm4 1176 pxor %xmm5,%xmm11 1177 movaps 48(%rsp),%xmm5 1178 pxor %xmm6,%xmm12 1179 movaps 64(%rsp),%xmm6 1180 pxor %xmm7,%xmm13 1181 movaps 80(%rsp),%xmm7 1182 movdqu %xmm8,(%rsi) 1183 movdqu %xmm9,16(%rsi) 1184 movdqu %xmm10,32(%rsi) 1185 movdqu %xmm11,48(%rsi) 1186 movdqu %xmm12,64(%rsi) 1187 movdqu %xmm13,80(%rsi) 1188 leaq 96(%rsi),%rsi 1189 1190 subq $6,%rdx 1191 jnc .Lctr32_loop6 1192 1193 addq $6,%rdx 1194 jz .Lctr32_done 1195 1196 leal -48(%r10),%eax 1197 leaq -80(%rcx,%r10,1),%rcx 1198 negl %eax 1199 shrl $4,%eax 1200 jmp .Lctr32_tail 1201 1202 .align 32 1203 .Lctr32_loop8: 1204 addl $8,%r8d 1205 movdqa 96(%rsp),%xmm8 1206 .byte 102,15,56,220,209 1207 movl %r8d,%r9d 1208 movdqa 112(%rsp),%xmm9 1209 .byte 102,15,56,220,217 1210 bswapl %r9d 1211 movups 32-128(%rcx),%xmm0 1212 .byte 102,15,56,220,225 1213 xorl %r11d,%r9d 1214 nop 1215 .byte 102,15,56,220,233 1216 movl %r9d,0+12(%rsp) 1217 leaq 1(%r8),%r9 1218 .byte 102,15,56,220,241 1219 .byte 102,15,56,220,249 1220 .byte 102,68,15,56,220,193 1221 .byte 102,68,15,56,220,201 1222 movups 48-128(%rcx),%xmm1 1223 bswapl %r9d 1224 .byte 102,15,56,220,208 1225 .byte 102,15,56,220,216 1226 xorl %r11d,%r9d 1227 .byte 0x66,0x90 1228 .byte 102,15,56,220,224 1229 .byte 102,15,56,220,232 1230 movl %r9d,16+12(%rsp) 1231 leaq 2(%r8),%r9 1232 .byte 102,15,56,220,240 1233 .byte 102,15,56,220,248 1234 .byte 102,68,15,56,220,192 1235 .byte 102,68,15,56,220,200 1236 movups 64-128(%rcx),%xmm0 1237 bswapl %r9d 1238 .byte 102,15,56,220,209 1239 .byte 102,15,56,220,217 1240 xorl %r11d,%r9d 1241 .byte 0x66,0x90 1242 .byte 102,15,56,220,225 1243 .byte 102,15,56,220,233 1244 movl %r9d,32+12(%rsp) 1245 leaq 3(%r8),%r9 1246 .byte 102,15,56,220,241 1247 .byte 102,15,56,220,249 1248 .byte 102,68,15,56,220,193 1249 .byte 102,68,15,56,220,201 1250 movups 80-128(%rcx),%xmm1 1251 bswapl %r9d 1252 .byte 102,15,56,220,208 1253 .byte 102,15,56,220,216 1254 xorl %r11d,%r9d 1255 .byte 0x66,0x90 1256 .byte 102,15,56,220,224 1257 .byte 102,15,56,220,232 1258 movl %r9d,48+12(%rsp) 1259 leaq 4(%r8),%r9 1260 .byte 102,15,56,220,240 1261 .byte 102,15,56,220,248 1262 .byte 102,68,15,56,220,192 1263 .byte 102,68,15,56,220,200 1264 movups 96-128(%rcx),%xmm0 1265 bswapl %r9d 1266 .byte 102,15,56,220,209 1267 .byte 102,15,56,220,217 1268 xorl %r11d,%r9d 1269 .byte 0x66,0x90 1270 .byte 102,15,56,220,225 1271 .byte 102,15,56,220,233 1272 movl %r9d,64+12(%rsp) 1273 leaq 5(%r8),%r9 1274 .byte 102,15,56,220,241 1275 .byte 102,15,56,220,249 1276 .byte 102,68,15,56,220,193 1277 .byte 102,68,15,56,220,201 1278 movups 112-128(%rcx),%xmm1 1279 bswapl %r9d 1280 .byte 102,15,56,220,208 1281 .byte 102,15,56,220,216 1282 xorl %r11d,%r9d 1283 .byte 0x66,0x90 1284 .byte 102,15,56,220,224 1285 .byte 102,15,56,220,232 1286 movl %r9d,80+12(%rsp) 1287 leaq 6(%r8),%r9 1288 .byte 102,15,56,220,240 1289 .byte 102,15,56,220,248 1290 .byte 102,68,15,56,220,192 1291 .byte 102,68,15,56,220,200 1292 movups 128-128(%rcx),%xmm0 1293 bswapl %r9d 1294 .byte 102,15,56,220,209 1295 .byte 102,15,56,220,217 1296 xorl %r11d,%r9d 1297 .byte 0x66,0x90 1298 .byte 102,15,56,220,225 1299 .byte 102,15,56,220,233 1300 movl %r9d,96+12(%rsp) 1301 leaq 7(%r8),%r9 1302 .byte 102,15,56,220,241 1303 .byte 102,15,56,220,249 1304 .byte 102,68,15,56,220,193 1305 .byte 102,68,15,56,220,201 1306 movups 144-128(%rcx),%xmm1 1307 bswapl %r9d 1308 .byte 102,15,56,220,208 1309 .byte 102,15,56,220,216 1310 .byte 102,15,56,220,224 1311 xorl %r11d,%r9d 1312 movdqu 0(%rdi),%xmm10 1313 .byte 102,15,56,220,232 1314 movl %r9d,112+12(%rsp) 1315 cmpl $11,%eax 1316 .byte 102,15,56,220,240 1317 .byte 102,15,56,220,248 1318 .byte 102,68,15,56,220,192 1319 .byte 102,68,15,56,220,200 1320 movups 160-128(%rcx),%xmm0 1321 1322 jb .Lctr32_enc_done 1323 1324 .byte 102,15,56,220,209 1325 .byte 102,15,56,220,217 1326 .byte 102,15,56,220,225 1327 .byte 102,15,56,220,233 1328 .byte 102,15,56,220,241 1329 .byte 102,15,56,220,249 1330 .byte 102,68,15,56,220,193 1331 .byte 102,68,15,56,220,201 1332 movups 176-128(%rcx),%xmm1 1333 1334 .byte 102,15,56,220,208 1335 .byte 102,15,56,220,216 1336 .byte 102,15,56,220,224 1337 .byte 102,15,56,220,232 1338 .byte 102,15,56,220,240 1339 .byte 102,15,56,220,248 1340 .byte 102,68,15,56,220,192 1341 .byte 102,68,15,56,220,200 1342 movups 192-128(%rcx),%xmm0 1343 je .Lctr32_enc_done 1344 1345 .byte 102,15,56,220,209 1346 .byte 102,15,56,220,217 1347 .byte 102,15,56,220,225 1348 .byte 102,15,56,220,233 1349 .byte 102,15,56,220,241 1350 .byte 102,15,56,220,249 1351 .byte 102,68,15,56,220,193 1352 .byte 102,68,15,56,220,201 1353 movups 208-128(%rcx),%xmm1 1354 1355 .byte 102,15,56,220,208 1356 .byte 102,15,56,220,216 1357 .byte 102,15,56,220,224 1358 .byte 102,15,56,220,232 1359 .byte 102,15,56,220,240 1360 .byte 102,15,56,220,248 1361 .byte 102,68,15,56,220,192 1362 .byte 102,68,15,56,220,200 1363 movups 224-128(%rcx),%xmm0 1364 jmp .Lctr32_enc_done 1365 1366 .align 16 1367 .Lctr32_enc_done: 1368 movdqu 16(%rdi),%xmm11 1369 pxor %xmm0,%xmm10 1370 movdqu 32(%rdi),%xmm12 1371 pxor %xmm0,%xmm11 1372 movdqu 48(%rdi),%xmm13 1373 pxor %xmm0,%xmm12 1374 movdqu 64(%rdi),%xmm14 1375 pxor %xmm0,%xmm13 1376 movdqu 80(%rdi),%xmm15 1377 pxor %xmm0,%xmm14 1378 pxor %xmm0,%xmm15 1379 .byte 102,15,56,220,209 1380 .byte 102,15,56,220,217 1381 .byte 102,15,56,220,225 1382 .byte 102,15,56,220,233 1383 .byte 102,15,56,220,241 1384 .byte 102,15,56,220,249 1385 .byte 102,68,15,56,220,193 1386 .byte 102,68,15,56,220,201 1387 movdqu 96(%rdi),%xmm1 1388 leaq 128(%rdi),%rdi 1389 1390 .byte 102,65,15,56,221,210 1391 pxor %xmm0,%xmm1 1392 movdqu 112-128(%rdi),%xmm10 1393 .byte 102,65,15,56,221,219 1394 pxor %xmm0,%xmm10 1395 movdqa 0(%rsp),%xmm11 1396 .byte 102,65,15,56,221,228 1397 .byte 102,65,15,56,221,237 1398 movdqa 16(%rsp),%xmm12 1399 movdqa 32(%rsp),%xmm13 1400 .byte 102,65,15,56,221,246 1401 .byte 102,65,15,56,221,255 1402 movdqa 48(%rsp),%xmm14 1403 movdqa 64(%rsp),%xmm15 1404 .byte 102,68,15,56,221,193 1405 movdqa 80(%rsp),%xmm0 1406 movups 16-128(%rcx),%xmm1 1407 .byte 102,69,15,56,221,202 1408 1409 movups %xmm2,(%rsi) 1410 movdqa %xmm11,%xmm2 1411 movups %xmm3,16(%rsi) 1412 movdqa %xmm12,%xmm3 1413 movups %xmm4,32(%rsi) 1414 movdqa %xmm13,%xmm4 1415 movups %xmm5,48(%rsi) 1416 movdqa %xmm14,%xmm5 1417 movups %xmm6,64(%rsi) 1418 movdqa %xmm15,%xmm6 1419 movups %xmm7,80(%rsi) 1420 movdqa %xmm0,%xmm7 1421 movups %xmm8,96(%rsi) 1422 movups %xmm9,112(%rsi) 1423 leaq 128(%rsi),%rsi 1424 1425 subq $8,%rdx 1426 jnc .Lctr32_loop8 1427 1428 addq $8,%rdx 1429 jz .Lctr32_done 1430 leaq -128(%rcx),%rcx 1431 1432 .Lctr32_tail: 1433 1434 1435 leaq 16(%rcx),%rcx 1436 cmpq $4,%rdx 1437 jb .Lctr32_loop3 1438 je .Lctr32_loop4 1439 1440 1441 shll $4,%eax 1442 movdqa 96(%rsp),%xmm8 1443 pxor %xmm9,%xmm9 1444 1445 movups 16(%rcx),%xmm0 1446 .byte 102,15,56,220,209 1447 .byte 102,15,56,220,217 1448 leaq 32-16(%rcx,%rax,1),%rcx 1449 negq %rax 1450 .byte 102,15,56,220,225 1451 addq $16,%rax 1452 movups (%rdi),%xmm10 1453 .byte 102,15,56,220,233 1454 .byte 102,15,56,220,241 1455 movups 16(%rdi),%xmm11 1456 movups 32(%rdi),%xmm12 1457 .byte 102,15,56,220,249 1458 .byte 102,68,15,56,220,193 1459 1460 call .Lenc_loop8_enter 1461 1462 movdqu 48(%rdi),%xmm13 1463 pxor %xmm10,%xmm2 1464 movdqu 64(%rdi),%xmm10 1465 pxor %xmm11,%xmm3 1466 movdqu %xmm2,(%rsi) 1467 pxor %xmm12,%xmm4 1468 movdqu %xmm3,16(%rsi) 1469 pxor %xmm13,%xmm5 1470 movdqu %xmm4,32(%rsi) 1471 pxor %xmm10,%xmm6 1472 movdqu %xmm5,48(%rsi) 1473 movdqu %xmm6,64(%rsi) 1474 cmpq $6,%rdx 1475 jb .Lctr32_done 1476 1477 movups 80(%rdi),%xmm11 1478 xorps %xmm11,%xmm7 1479 movups %xmm7,80(%rsi) 1480 je .Lctr32_done 1481 1482 movups 96(%rdi),%xmm12 1483 xorps %xmm12,%xmm8 1484 movups %xmm8,96(%rsi) 1485 jmp .Lctr32_done 1486 1487 .align 32 1488 .Lctr32_loop4: 1489 .byte 102,15,56,220,209 1490 leaq 16(%rcx),%rcx 1491 decl %eax 1492 .byte 102,15,56,220,217 1493 .byte 102,15,56,220,225 1494 .byte 102,15,56,220,233 1495 movups (%rcx),%xmm1 1496 jnz .Lctr32_loop4 1497 .byte 102,15,56,221,209 1498 .byte 102,15,56,221,217 1499 movups (%rdi),%xmm10 1500 movups 16(%rdi),%xmm11 1501 .byte 102,15,56,221,225 1502 .byte 102,15,56,221,233 1503 movups 32(%rdi),%xmm12 1504 movups 48(%rdi),%xmm13 1505 1506 xorps %xmm10,%xmm2 1507 movups %xmm2,(%rsi) 1508 xorps %xmm11,%xmm3 1509 movups %xmm3,16(%rsi) 1510 pxor %xmm12,%xmm4 1511 movdqu %xmm4,32(%rsi) 1512 pxor %xmm13,%xmm5 1513 movdqu %xmm5,48(%rsi) 1514 jmp .Lctr32_done 1515 1516 .align 32 1517 .Lctr32_loop3: 1518 .byte 102,15,56,220,209 1519 leaq 16(%rcx),%rcx 1520 decl %eax 1521 .byte 102,15,56,220,217 1522 .byte 102,15,56,220,225 1523 movups (%rcx),%xmm1 1524 jnz .Lctr32_loop3 1525 .byte 102,15,56,221,209 1526 .byte 102,15,56,221,217 1527 .byte 102,15,56,221,225 1528 1529 movups (%rdi),%xmm10 1530 xorps %xmm10,%xmm2 1531 movups %xmm2,(%rsi) 1532 cmpq $2,%rdx 1533 jb .Lctr32_done 1534 1535 movups 16(%rdi),%xmm11 1536 xorps %xmm11,%xmm3 1537 movups %xmm3,16(%rsi) 1538 je .Lctr32_done 1539 1540 movups 32(%rdi),%xmm12 1541 xorps %xmm12,%xmm4 1542 movups %xmm4,32(%rsi) 1543 1544 .Lctr32_done: 1545 xorps %xmm0,%xmm0 1546 xorl %r11d,%r11d 1547 pxor %xmm1,%xmm1 1548 pxor %xmm2,%xmm2 1549 pxor %xmm3,%xmm3 1550 pxor %xmm4,%xmm4 1551 pxor %xmm5,%xmm5 1552 pxor %xmm6,%xmm6 1553 pxor %xmm7,%xmm7 1554 movaps %xmm0,0(%rsp) 1555 pxor %xmm8,%xmm8 1556 movaps %xmm0,16(%rsp) 1557 pxor %xmm9,%xmm9 1558 movaps %xmm0,32(%rsp) 1559 pxor %xmm10,%xmm10 1560 movaps %xmm0,48(%rsp) 1561 pxor %xmm11,%xmm11 1562 movaps %xmm0,64(%rsp) 1563 pxor %xmm12,%xmm12 1564 movaps %xmm0,80(%rsp) 1565 pxor %xmm13,%xmm13 1566 movaps %xmm0,96(%rsp) 1567 pxor %xmm14,%xmm14 1568 movaps %xmm0,112(%rsp) 1569 pxor %xmm15,%xmm15 1570 leaq (%rbp),%rsp 1571 popq %rbp 1572 .Lctr32_epilogue: 1573 .byte 0xf3,0xc3 1574 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1575 .globl aesni_xts_encrypt 1576 .hidden aesni_xts_encrypt 1577 .type aesni_xts_encrypt,@function 1578 .align 16 1579 aesni_xts_encrypt: 1580 leaq (%rsp),%rax 1581 pushq %rbp 1582 subq $112,%rsp 1583 andq $-16,%rsp 1584 leaq -8(%rax),%rbp 1585 movups (%r9),%xmm2 1586 movl 240(%r8),%eax 1587 movl 240(%rcx),%r10d 1588 movups (%r8),%xmm0 1589 movups 16(%r8),%xmm1 1590 leaq 32(%r8),%r8 1591 xorps %xmm0,%xmm2 1592 .Loop_enc1_8: 1593 .byte 102,15,56,220,209 1594 decl %eax 1595 movups (%r8),%xmm1 1596 leaq 16(%r8),%r8 1597 jnz .Loop_enc1_8 1598 .byte 102,15,56,221,209 1599 movups (%rcx),%xmm0 1600 movq %rcx,%r11 1601 movl %r10d,%eax 1602 shll $4,%r10d 1603 movq %rdx,%r9 1604 andq $-16,%rdx 1605 1606 movups 16(%rcx,%r10,1),%xmm1 1607 1608 movdqa .Lxts_magic(%rip),%xmm8 1609 movdqa %xmm2,%xmm15 1610 pshufd $95,%xmm2,%xmm9 1611 pxor %xmm0,%xmm1 1612 movdqa %xmm9,%xmm14 1613 paddd %xmm9,%xmm9 1614 movdqa %xmm15,%xmm10 1615 psrad $31,%xmm14 1616 paddq %xmm15,%xmm15 1617 pand %xmm8,%xmm14 1618 pxor %xmm0,%xmm10 1619 pxor %xmm14,%xmm15 1620 movdqa %xmm9,%xmm14 1621 paddd %xmm9,%xmm9 1622 movdqa %xmm15,%xmm11 1623 psrad $31,%xmm14 1624 paddq %xmm15,%xmm15 1625 pand %xmm8,%xmm14 1626 pxor %xmm0,%xmm11 1627 pxor %xmm14,%xmm15 1628 movdqa %xmm9,%xmm14 1629 paddd %xmm9,%xmm9 1630 movdqa %xmm15,%xmm12 1631 psrad $31,%xmm14 1632 paddq %xmm15,%xmm15 1633 pand %xmm8,%xmm14 1634 pxor %xmm0,%xmm12 1635 pxor %xmm14,%xmm15 1636 movdqa %xmm9,%xmm14 1637 paddd %xmm9,%xmm9 1638 movdqa %xmm15,%xmm13 1639 psrad $31,%xmm14 1640 paddq %xmm15,%xmm15 1641 pand %xmm8,%xmm14 1642 pxor %xmm0,%xmm13 1643 pxor %xmm14,%xmm15 1644 movdqa %xmm15,%xmm14 1645 psrad $31,%xmm9 1646 paddq %xmm15,%xmm15 1647 pand %xmm8,%xmm9 1648 pxor %xmm0,%xmm14 1649 pxor %xmm9,%xmm15 1650 movaps %xmm1,96(%rsp) 1651 1652 subq $96,%rdx 1653 jc .Lxts_enc_short 1654 1655 movl $16+96,%eax 1656 leaq 32(%r11,%r10,1),%rcx 1657 subq %r10,%rax 1658 movups 16(%r11),%xmm1 1659 movq %rax,%r10 1660 leaq .Lxts_magic(%rip),%r8 1661 jmp .Lxts_enc_grandloop 1662 1663 .align 32 1664 .Lxts_enc_grandloop: 1665 movdqu 0(%rdi),%xmm2 1666 movdqa %xmm0,%xmm8 1667 movdqu 16(%rdi),%xmm3 1668 pxor %xmm10,%xmm2 1669 movdqu 32(%rdi),%xmm4 1670 pxor %xmm11,%xmm3 1671 .byte 102,15,56,220,209 1672 movdqu 48(%rdi),%xmm5 1673 pxor %xmm12,%xmm4 1674 .byte 102,15,56,220,217 1675 movdqu 64(%rdi),%xmm6 1676 pxor %xmm13,%xmm5 1677 .byte 102,15,56,220,225 1678 movdqu 80(%rdi),%xmm7 1679 pxor %xmm15,%xmm8 1680 movdqa 96(%rsp),%xmm9 1681 pxor %xmm14,%xmm6 1682 .byte 102,15,56,220,233 1683 movups 32(%r11),%xmm0 1684 leaq 96(%rdi),%rdi 1685 pxor %xmm8,%xmm7 1686 1687 pxor %xmm9,%xmm10 1688 .byte 102,15,56,220,241 1689 pxor %xmm9,%xmm11 1690 movdqa %xmm10,0(%rsp) 1691 .byte 102,15,56,220,249 1692 movups 48(%r11),%xmm1 1693 pxor %xmm9,%xmm12 1694 1695 .byte 102,15,56,220,208 1696 pxor %xmm9,%xmm13 1697 movdqa %xmm11,16(%rsp) 1698 .byte 102,15,56,220,216 1699 pxor %xmm9,%xmm14 1700 movdqa %xmm12,32(%rsp) 1701 .byte 102,15,56,220,224 1702 .byte 102,15,56,220,232 1703 pxor %xmm9,%xmm8 1704 movdqa %xmm14,64(%rsp) 1705 .byte 102,15,56,220,240 1706 .byte 102,15,56,220,248 1707 movups 64(%r11),%xmm0 1708 movdqa %xmm8,80(%rsp) 1709 pshufd $95,%xmm15,%xmm9 1710 jmp .Lxts_enc_loop6 1711 .align 32 1712 .Lxts_enc_loop6: 1713 .byte 102,15,56,220,209 1714 .byte 102,15,56,220,217 1715 .byte 102,15,56,220,225 1716 .byte 102,15,56,220,233 1717 .byte 102,15,56,220,241 1718 .byte 102,15,56,220,249 1719 movups -64(%rcx,%rax,1),%xmm1 1720 addq $32,%rax 1721 1722 .byte 102,15,56,220,208 1723 .byte 102,15,56,220,216 1724 .byte 102,15,56,220,224 1725 .byte 102,15,56,220,232 1726 .byte 102,15,56,220,240 1727 .byte 102,15,56,220,248 1728 movups -80(%rcx,%rax,1),%xmm0 1729 jnz .Lxts_enc_loop6 1730 1731 movdqa (%r8),%xmm8 1732 movdqa %xmm9,%xmm14 1733 paddd %xmm9,%xmm9 1734 .byte 102,15,56,220,209 1735 paddq %xmm15,%xmm15 1736 psrad $31,%xmm14 1737 .byte 102,15,56,220,217 1738 pand %xmm8,%xmm14 1739 movups (%r11),%xmm10 1740 .byte 102,15,56,220,225 1741 .byte 102,15,56,220,233 1742 .byte 102,15,56,220,241 1743 pxor %xmm14,%xmm15 1744 movaps %xmm10,%xmm11 1745 .byte 102,15,56,220,249 1746 movups -64(%rcx),%xmm1 1747 1748 movdqa %xmm9,%xmm14 1749 .byte 102,15,56,220,208 1750 paddd %xmm9,%xmm9 1751 pxor %xmm15,%xmm10 1752 .byte 102,15,56,220,216 1753 psrad $31,%xmm14 1754 paddq %xmm15,%xmm15 1755 .byte 102,15,56,220,224 1756 .byte 102,15,56,220,232 1757 pand %xmm8,%xmm14 1758 movaps %xmm11,%xmm12 1759 .byte 102,15,56,220,240 1760 pxor %xmm14,%xmm15 1761 movdqa %xmm9,%xmm14 1762 .byte 102,15,56,220,248 1763 movups -48(%rcx),%xmm0 1764 1765 paddd %xmm9,%xmm9 1766 .byte 102,15,56,220,209 1767 pxor %xmm15,%xmm11 1768 psrad $31,%xmm14 1769 .byte 102,15,56,220,217 1770 paddq %xmm15,%xmm15 1771 pand %xmm8,%xmm14 1772 .byte 102,15,56,220,225 1773 .byte 102,15,56,220,233 1774 movdqa %xmm13,48(%rsp) 1775 pxor %xmm14,%xmm15 1776 .byte 102,15,56,220,241 1777 movaps %xmm12,%xmm13 1778 movdqa %xmm9,%xmm14 1779 .byte 102,15,56,220,249 1780 movups -32(%rcx),%xmm1 1781 1782 paddd %xmm9,%xmm9 1783 .byte 102,15,56,220,208 1784 pxor %xmm15,%xmm12 1785 psrad $31,%xmm14 1786 .byte 102,15,56,220,216 1787 paddq %xmm15,%xmm15 1788 pand %xmm8,%xmm14 1789 .byte 102,15,56,220,224 1790 .byte 102,15,56,220,232 1791 .byte 102,15,56,220,240 1792 pxor %xmm14,%xmm15 1793 movaps %xmm13,%xmm14 1794 .byte 102,15,56,220,248 1795 1796 movdqa %xmm9,%xmm0 1797 paddd %xmm9,%xmm9 1798 .byte 102,15,56,220,209 1799 pxor %xmm15,%xmm13 1800 psrad $31,%xmm0 1801 .byte 102,15,56,220,217 1802 paddq %xmm15,%xmm15 1803 pand %xmm8,%xmm0 1804 .byte 102,15,56,220,225 1805 .byte 102,15,56,220,233 1806 pxor %xmm0,%xmm15 1807 movups (%r11),%xmm0 1808 .byte 102,15,56,220,241 1809 .byte 102,15,56,220,249 1810 movups 16(%r11),%xmm1 1811 1812 pxor %xmm15,%xmm14 1813 .byte 102,15,56,221,84,36,0 1814 psrad $31,%xmm9 1815 paddq %xmm15,%xmm15 1816 .byte 102,15,56,221,92,36,16 1817 .byte 102,15,56,221,100,36,32 1818 pand %xmm8,%xmm9 1819 movq %r10,%rax 1820 .byte 102,15,56,221,108,36,48 1821 .byte 102,15,56,221,116,36,64 1822 .byte 102,15,56,221,124,36,80 1823 pxor %xmm9,%xmm15 1824 1825 leaq 96(%rsi),%rsi 1826 movups %xmm2,-96(%rsi) 1827 movups %xmm3,-80(%rsi) 1828 movups %xmm4,-64(%rsi) 1829 movups %xmm5,-48(%rsi) 1830 movups %xmm6,-32(%rsi) 1831 movups %xmm7,-16(%rsi) 1832 subq $96,%rdx 1833 jnc .Lxts_enc_grandloop 1834 1835 movl $16+96,%eax 1836 subl %r10d,%eax 1837 movq %r11,%rcx 1838 shrl $4,%eax 1839 1840 .Lxts_enc_short: 1841 1842 movl %eax,%r10d 1843 pxor %xmm0,%xmm10 1844 addq $96,%rdx 1845 jz .Lxts_enc_done 1846 1847 pxor %xmm0,%xmm11 1848 cmpq $32,%rdx 1849 jb .Lxts_enc_one 1850 pxor %xmm0,%xmm12 1851 je .Lxts_enc_two 1852 1853 pxor %xmm0,%xmm13 1854 cmpq $64,%rdx 1855 jb .Lxts_enc_three 1856 pxor %xmm0,%xmm14 1857 je .Lxts_enc_four 1858 1859 movdqu (%rdi),%xmm2 1860 movdqu 16(%rdi),%xmm3 1861 movdqu 32(%rdi),%xmm4 1862 pxor %xmm10,%xmm2 1863 movdqu 48(%rdi),%xmm5 1864 pxor %xmm11,%xmm3 1865 movdqu 64(%rdi),%xmm6 1866 leaq 80(%rdi),%rdi 1867 pxor %xmm12,%xmm4 1868 pxor %xmm13,%xmm5 1869 pxor %xmm14,%xmm6 1870 pxor %xmm7,%xmm7 1871 1872 call _aesni_encrypt6 1873 1874 xorps %xmm10,%xmm2 1875 movdqa %xmm15,%xmm10 1876 xorps %xmm11,%xmm3 1877 xorps %xmm12,%xmm4 1878 movdqu %xmm2,(%rsi) 1879 xorps %xmm13,%xmm5 1880 movdqu %xmm3,16(%rsi) 1881 xorps %xmm14,%xmm6 1882 movdqu %xmm4,32(%rsi) 1883 movdqu %xmm5,48(%rsi) 1884 movdqu %xmm6,64(%rsi) 1885 leaq 80(%rsi),%rsi 1886 jmp .Lxts_enc_done 1887 1888 .align 16 1889 .Lxts_enc_one: 1890 movups (%rdi),%xmm2 1891 leaq 16(%rdi),%rdi 1892 xorps %xmm10,%xmm2 1893 movups (%rcx),%xmm0 1894 movups 16(%rcx),%xmm1 1895 leaq 32(%rcx),%rcx 1896 xorps %xmm0,%xmm2 1897 .Loop_enc1_9: 1898 .byte 102,15,56,220,209 1899 decl %eax 1900 movups (%rcx),%xmm1 1901 leaq 16(%rcx),%rcx 1902 jnz .Loop_enc1_9 1903 .byte 102,15,56,221,209 1904 xorps %xmm10,%xmm2 1905 movdqa %xmm11,%xmm10 1906 movups %xmm2,(%rsi) 1907 leaq 16(%rsi),%rsi 1908 jmp .Lxts_enc_done 1909 1910 .align 16 1911 .Lxts_enc_two: 1912 movups (%rdi),%xmm2 1913 movups 16(%rdi),%xmm3 1914 leaq 32(%rdi),%rdi 1915 xorps %xmm10,%xmm2 1916 xorps %xmm11,%xmm3 1917 1918 call _aesni_encrypt2 1919 1920 xorps %xmm10,%xmm2 1921 movdqa %xmm12,%xmm10 1922 xorps %xmm11,%xmm3 1923 movups %xmm2,(%rsi) 1924 movups %xmm3,16(%rsi) 1925 leaq 32(%rsi),%rsi 1926 jmp .Lxts_enc_done 1927 1928 .align 16 1929 .Lxts_enc_three: 1930 movups (%rdi),%xmm2 1931 movups 16(%rdi),%xmm3 1932 movups 32(%rdi),%xmm4 1933 leaq 48(%rdi),%rdi 1934 xorps %xmm10,%xmm2 1935 xorps %xmm11,%xmm3 1936 xorps %xmm12,%xmm4 1937 1938 call _aesni_encrypt3 1939 1940 xorps %xmm10,%xmm2 1941 movdqa %xmm13,%xmm10 1942 xorps %xmm11,%xmm3 1943 xorps %xmm12,%xmm4 1944 movups %xmm2,(%rsi) 1945 movups %xmm3,16(%rsi) 1946 movups %xmm4,32(%rsi) 1947 leaq 48(%rsi),%rsi 1948 jmp .Lxts_enc_done 1949 1950 .align 16 1951 .Lxts_enc_four: 1952 movups (%rdi),%xmm2 1953 movups 16(%rdi),%xmm3 1954 movups 32(%rdi),%xmm4 1955 xorps %xmm10,%xmm2 1956 movups 48(%rdi),%xmm5 1957 leaq 64(%rdi),%rdi 1958 xorps %xmm11,%xmm3 1959 xorps %xmm12,%xmm4 1960 xorps %xmm13,%xmm5 1961 1962 call _aesni_encrypt4 1963 1964 pxor %xmm10,%xmm2 1965 movdqa %xmm14,%xmm10 1966 pxor %xmm11,%xmm3 1967 pxor %xmm12,%xmm4 1968 movdqu %xmm2,(%rsi) 1969 pxor %xmm13,%xmm5 1970 movdqu %xmm3,16(%rsi) 1971 movdqu %xmm4,32(%rsi) 1972 movdqu %xmm5,48(%rsi) 1973 leaq 64(%rsi),%rsi 1974 jmp .Lxts_enc_done 1975 1976 .align 16 1977 .Lxts_enc_done: 1978 andq $15,%r9 1979 jz .Lxts_enc_ret 1980 movq %r9,%rdx 1981 1982 .Lxts_enc_steal: 1983 movzbl (%rdi),%eax 1984 movzbl -16(%rsi),%ecx 1985 leaq 1(%rdi),%rdi 1986 movb %al,-16(%rsi) 1987 movb %cl,0(%rsi) 1988 leaq 1(%rsi),%rsi 1989 subq $1,%rdx 1990 jnz .Lxts_enc_steal 1991 1992 subq %r9,%rsi 1993 movq %r11,%rcx 1994 movl %r10d,%eax 1995 1996 movups -16(%rsi),%xmm2 1997 xorps %xmm10,%xmm2 1998 movups (%rcx),%xmm0 1999 movups 16(%rcx),%xmm1 2000 leaq 32(%rcx),%rcx 2001 xorps %xmm0,%xmm2 2002 .Loop_enc1_10: 2003 .byte 102,15,56,220,209 2004 decl %eax 2005 movups (%rcx),%xmm1 2006 leaq 16(%rcx),%rcx 2007 jnz .Loop_enc1_10 2008 .byte 102,15,56,221,209 2009 xorps %xmm10,%xmm2 2010 movups %xmm2,-16(%rsi) 2011 2012 .Lxts_enc_ret: 2013 xorps %xmm0,%xmm0 2014 pxor %xmm1,%xmm1 2015 pxor %xmm2,%xmm2 2016 pxor %xmm3,%xmm3 2017 pxor %xmm4,%xmm4 2018 pxor %xmm5,%xmm5 2019 pxor %xmm6,%xmm6 2020 pxor %xmm7,%xmm7 2021 movaps %xmm0,0(%rsp) 2022 pxor %xmm8,%xmm8 2023 movaps %xmm0,16(%rsp) 2024 pxor %xmm9,%xmm9 2025 movaps %xmm0,32(%rsp) 2026 pxor %xmm10,%xmm10 2027 movaps %xmm0,48(%rsp) 2028 pxor %xmm11,%xmm11 2029 movaps %xmm0,64(%rsp) 2030 pxor %xmm12,%xmm12 2031 movaps %xmm0,80(%rsp) 2032 pxor %xmm13,%xmm13 2033 movaps %xmm0,96(%rsp) 2034 pxor %xmm14,%xmm14 2035 pxor %xmm15,%xmm15 2036 leaq (%rbp),%rsp 2037 popq %rbp 2038 .Lxts_enc_epilogue: 2039 .byte 0xf3,0xc3 2040 .size aesni_xts_encrypt,.-aesni_xts_encrypt 2041 .globl aesni_xts_decrypt 2042 .hidden aesni_xts_decrypt 2043 .type aesni_xts_decrypt,@function 2044 .align 16 2045 aesni_xts_decrypt: 2046 leaq (%rsp),%rax 2047 pushq %rbp 2048 subq $112,%rsp 2049 andq $-16,%rsp 2050 leaq -8(%rax),%rbp 2051 movups (%r9),%xmm2 2052 movl 240(%r8),%eax 2053 movl 240(%rcx),%r10d 2054 movups (%r8),%xmm0 2055 movups 16(%r8),%xmm1 2056 leaq 32(%r8),%r8 2057 xorps %xmm0,%xmm2 2058 .Loop_enc1_11: 2059 .byte 102,15,56,220,209 2060 decl %eax 2061 movups (%r8),%xmm1 2062 leaq 16(%r8),%r8 2063 jnz .Loop_enc1_11 2064 .byte 102,15,56,221,209 2065 xorl %eax,%eax 2066 testq $15,%rdx 2067 setnz %al 2068 shlq $4,%rax 2069 subq %rax,%rdx 2070 2071 movups (%rcx),%xmm0 2072 movq %rcx,%r11 2073 movl %r10d,%eax 2074 shll $4,%r10d 2075 movq %rdx,%r9 2076 andq $-16,%rdx 2077 2078 movups 16(%rcx,%r10,1),%xmm1 2079 2080 movdqa .Lxts_magic(%rip),%xmm8 2081 movdqa %xmm2,%xmm15 2082 pshufd $95,%xmm2,%xmm9 2083 pxor %xmm0,%xmm1 2084 movdqa %xmm9,%xmm14 2085 paddd %xmm9,%xmm9 2086 movdqa %xmm15,%xmm10 2087 psrad $31,%xmm14 2088 paddq %xmm15,%xmm15 2089 pand %xmm8,%xmm14 2090 pxor %xmm0,%xmm10 2091 pxor %xmm14,%xmm15 2092 movdqa %xmm9,%xmm14 2093 paddd %xmm9,%xmm9 2094 movdqa %xmm15,%xmm11 2095 psrad $31,%xmm14 2096 paddq %xmm15,%xmm15 2097 pand %xmm8,%xmm14 2098 pxor %xmm0,%xmm11 2099 pxor %xmm14,%xmm15 2100 movdqa %xmm9,%xmm14 2101 paddd %xmm9,%xmm9 2102 movdqa %xmm15,%xmm12 2103 psrad $31,%xmm14 2104 paddq %xmm15,%xmm15 2105 pand %xmm8,%xmm14 2106 pxor %xmm0,%xmm12 2107 pxor %xmm14,%xmm15 2108 movdqa %xmm9,%xmm14 2109 paddd %xmm9,%xmm9 2110 movdqa %xmm15,%xmm13 2111 psrad $31,%xmm14 2112 paddq %xmm15,%xmm15 2113 pand %xmm8,%xmm14 2114 pxor %xmm0,%xmm13 2115 pxor %xmm14,%xmm15 2116 movdqa %xmm15,%xmm14 2117 psrad $31,%xmm9 2118 paddq %xmm15,%xmm15 2119 pand %xmm8,%xmm9 2120 pxor %xmm0,%xmm14 2121 pxor %xmm9,%xmm15 2122 movaps %xmm1,96(%rsp) 2123 2124 subq $96,%rdx 2125 jc .Lxts_dec_short 2126 2127 movl $16+96,%eax 2128 leaq 32(%r11,%r10,1),%rcx 2129 subq %r10,%rax 2130 movups 16(%r11),%xmm1 2131 movq %rax,%r10 2132 leaq .Lxts_magic(%rip),%r8 2133 jmp .Lxts_dec_grandloop 2134 2135 .align 32 2136 .Lxts_dec_grandloop: 2137 movdqu 0(%rdi),%xmm2 2138 movdqa %xmm0,%xmm8 2139 movdqu 16(%rdi),%xmm3 2140 pxor %xmm10,%xmm2 2141 movdqu 32(%rdi),%xmm4 2142 pxor %xmm11,%xmm3 2143 .byte 102,15,56,222,209 2144 movdqu 48(%rdi),%xmm5 2145 pxor %xmm12,%xmm4 2146 .byte 102,15,56,222,217 2147 movdqu 64(%rdi),%xmm6 2148 pxor %xmm13,%xmm5 2149 .byte 102,15,56,222,225 2150 movdqu 80(%rdi),%xmm7 2151 pxor %xmm15,%xmm8 2152 movdqa 96(%rsp),%xmm9 2153 pxor %xmm14,%xmm6 2154 .byte 102,15,56,222,233 2155 movups 32(%r11),%xmm0 2156 leaq 96(%rdi),%rdi 2157 pxor %xmm8,%xmm7 2158 2159 pxor %xmm9,%xmm10 2160 .byte 102,15,56,222,241 2161 pxor %xmm9,%xmm11 2162 movdqa %xmm10,0(%rsp) 2163 .byte 102,15,56,222,249 2164 movups 48(%r11),%xmm1 2165 pxor %xmm9,%xmm12 2166 2167 .byte 102,15,56,222,208 2168 pxor %xmm9,%xmm13 2169 movdqa %xmm11,16(%rsp) 2170 .byte 102,15,56,222,216 2171 pxor %xmm9,%xmm14 2172 movdqa %xmm12,32(%rsp) 2173 .byte 102,15,56,222,224 2174 .byte 102,15,56,222,232 2175 pxor %xmm9,%xmm8 2176 movdqa %xmm14,64(%rsp) 2177 .byte 102,15,56,222,240 2178 .byte 102,15,56,222,248 2179 movups 64(%r11),%xmm0 2180 movdqa %xmm8,80(%rsp) 2181 pshufd $95,%xmm15,%xmm9 2182 jmp .Lxts_dec_loop6 2183 .align 32 2184 .Lxts_dec_loop6: 2185 .byte 102,15,56,222,209 2186 .byte 102,15,56,222,217 2187 .byte 102,15,56,222,225 2188 .byte 102,15,56,222,233 2189 .byte 102,15,56,222,241 2190 .byte 102,15,56,222,249 2191 movups -64(%rcx,%rax,1),%xmm1 2192 addq $32,%rax 2193 2194 .byte 102,15,56,222,208 2195 .byte 102,15,56,222,216 2196 .byte 102,15,56,222,224 2197 .byte 102,15,56,222,232 2198 .byte 102,15,56,222,240 2199 .byte 102,15,56,222,248 2200 movups -80(%rcx,%rax,1),%xmm0 2201 jnz .Lxts_dec_loop6 2202 2203 movdqa (%r8),%xmm8 2204 movdqa %xmm9,%xmm14 2205 paddd %xmm9,%xmm9 2206 .byte 102,15,56,222,209 2207 paddq %xmm15,%xmm15 2208 psrad $31,%xmm14 2209 .byte 102,15,56,222,217 2210 pand %xmm8,%xmm14 2211 movups (%r11),%xmm10 2212 .byte 102,15,56,222,225 2213 .byte 102,15,56,222,233 2214 .byte 102,15,56,222,241 2215 pxor %xmm14,%xmm15 2216 movaps %xmm10,%xmm11 2217 .byte 102,15,56,222,249 2218 movups -64(%rcx),%xmm1 2219 2220 movdqa %xmm9,%xmm14 2221 .byte 102,15,56,222,208 2222 paddd %xmm9,%xmm9 2223 pxor %xmm15,%xmm10 2224 .byte 102,15,56,222,216 2225 psrad $31,%xmm14 2226 paddq %xmm15,%xmm15 2227 .byte 102,15,56,222,224 2228 .byte 102,15,56,222,232 2229 pand %xmm8,%xmm14 2230 movaps %xmm11,%xmm12 2231 .byte 102,15,56,222,240 2232 pxor %xmm14,%xmm15 2233 movdqa %xmm9,%xmm14 2234 .byte 102,15,56,222,248 2235 movups -48(%rcx),%xmm0 2236 2237 paddd %xmm9,%xmm9 2238 .byte 102,15,56,222,209 2239 pxor %xmm15,%xmm11 2240 psrad $31,%xmm14 2241 .byte 102,15,56,222,217 2242 paddq %xmm15,%xmm15 2243 pand %xmm8,%xmm14 2244 .byte 102,15,56,222,225 2245 .byte 102,15,56,222,233 2246 movdqa %xmm13,48(%rsp) 2247 pxor %xmm14,%xmm15 2248 .byte 102,15,56,222,241 2249 movaps %xmm12,%xmm13 2250 movdqa %xmm9,%xmm14 2251 .byte 102,15,56,222,249 2252 movups -32(%rcx),%xmm1 2253 2254 paddd %xmm9,%xmm9 2255 .byte 102,15,56,222,208 2256 pxor %xmm15,%xmm12 2257 psrad $31,%xmm14 2258 .byte 102,15,56,222,216 2259 paddq %xmm15,%xmm15 2260 pand %xmm8,%xmm14 2261 .byte 102,15,56,222,224 2262 .byte 102,15,56,222,232 2263 .byte 102,15,56,222,240 2264 pxor %xmm14,%xmm15 2265 movaps %xmm13,%xmm14 2266 .byte 102,15,56,222,248 2267 2268 movdqa %xmm9,%xmm0 2269 paddd %xmm9,%xmm9 2270 .byte 102,15,56,222,209 2271 pxor %xmm15,%xmm13 2272 psrad $31,%xmm0 2273 .byte 102,15,56,222,217 2274 paddq %xmm15,%xmm15 2275 pand %xmm8,%xmm0 2276 .byte 102,15,56,222,225 2277 .byte 102,15,56,222,233 2278 pxor %xmm0,%xmm15 2279 movups (%r11),%xmm0 2280 .byte 102,15,56,222,241 2281 .byte 102,15,56,222,249 2282 movups 16(%r11),%xmm1 2283 2284 pxor %xmm15,%xmm14 2285 .byte 102,15,56,223,84,36,0 2286 psrad $31,%xmm9 2287 paddq %xmm15,%xmm15 2288 .byte 102,15,56,223,92,36,16 2289 .byte 102,15,56,223,100,36,32 2290 pand %xmm8,%xmm9 2291 movq %r10,%rax 2292 .byte 102,15,56,223,108,36,48 2293 .byte 102,15,56,223,116,36,64 2294 .byte 102,15,56,223,124,36,80 2295 pxor %xmm9,%xmm15 2296 2297 leaq 96(%rsi),%rsi 2298 movups %xmm2,-96(%rsi) 2299 movups %xmm3,-80(%rsi) 2300 movups %xmm4,-64(%rsi) 2301 movups %xmm5,-48(%rsi) 2302 movups %xmm6,-32(%rsi) 2303 movups %xmm7,-16(%rsi) 2304 subq $96,%rdx 2305 jnc .Lxts_dec_grandloop 2306 2307 movl $16+96,%eax 2308 subl %r10d,%eax 2309 movq %r11,%rcx 2310 shrl $4,%eax 2311 2312 .Lxts_dec_short: 2313 2314 movl %eax,%r10d 2315 pxor %xmm0,%xmm10 2316 pxor %xmm0,%xmm11 2317 addq $96,%rdx 2318 jz .Lxts_dec_done 2319 2320 pxor %xmm0,%xmm12 2321 cmpq $32,%rdx 2322 jb .Lxts_dec_one 2323 pxor %xmm0,%xmm13 2324 je .Lxts_dec_two 2325 2326 pxor %xmm0,%xmm14 2327 cmpq $64,%rdx 2328 jb .Lxts_dec_three 2329 je .Lxts_dec_four 2330 2331 movdqu (%rdi),%xmm2 2332 movdqu 16(%rdi),%xmm3 2333 movdqu 32(%rdi),%xmm4 2334 pxor %xmm10,%xmm2 2335 movdqu 48(%rdi),%xmm5 2336 pxor %xmm11,%xmm3 2337 movdqu 64(%rdi),%xmm6 2338 leaq 80(%rdi),%rdi 2339 pxor %xmm12,%xmm4 2340 pxor %xmm13,%xmm5 2341 pxor %xmm14,%xmm6 2342 2343 call _aesni_decrypt6 2344 2345 xorps %xmm10,%xmm2 2346 xorps %xmm11,%xmm3 2347 xorps %xmm12,%xmm4 2348 movdqu %xmm2,(%rsi) 2349 xorps %xmm13,%xmm5 2350 movdqu %xmm3,16(%rsi) 2351 xorps %xmm14,%xmm6 2352 movdqu %xmm4,32(%rsi) 2353 pxor %xmm14,%xmm14 2354 movdqu %xmm5,48(%rsi) 2355 pcmpgtd %xmm15,%xmm14 2356 movdqu %xmm6,64(%rsi) 2357 leaq 80(%rsi),%rsi 2358 pshufd $19,%xmm14,%xmm11 2359 andq $15,%r9 2360 jz .Lxts_dec_ret 2361 2362 movdqa %xmm15,%xmm10 2363 paddq %xmm15,%xmm15 2364 pand %xmm8,%xmm11 2365 pxor %xmm15,%xmm11 2366 jmp .Lxts_dec_done2 2367 2368 .align 16 2369 .Lxts_dec_one: 2370 movups (%rdi),%xmm2 2371 leaq 16(%rdi),%rdi 2372 xorps %xmm10,%xmm2 2373 movups (%rcx),%xmm0 2374 movups 16(%rcx),%xmm1 2375 leaq 32(%rcx),%rcx 2376 xorps %xmm0,%xmm2 2377 .Loop_dec1_12: 2378 .byte 102,15,56,222,209 2379 decl %eax 2380 movups (%rcx),%xmm1 2381 leaq 16(%rcx),%rcx 2382 jnz .Loop_dec1_12 2383 .byte 102,15,56,223,209 2384 xorps %xmm10,%xmm2 2385 movdqa %xmm11,%xmm10 2386 movups %xmm2,(%rsi) 2387 movdqa %xmm12,%xmm11 2388 leaq 16(%rsi),%rsi 2389 jmp .Lxts_dec_done 2390 2391 .align 16 2392 .Lxts_dec_two: 2393 movups (%rdi),%xmm2 2394 movups 16(%rdi),%xmm3 2395 leaq 32(%rdi),%rdi 2396 xorps %xmm10,%xmm2 2397 xorps %xmm11,%xmm3 2398 2399 call _aesni_decrypt2 2400 2401 xorps %xmm10,%xmm2 2402 movdqa %xmm12,%xmm10 2403 xorps %xmm11,%xmm3 2404 movdqa %xmm13,%xmm11 2405 movups %xmm2,(%rsi) 2406 movups %xmm3,16(%rsi) 2407 leaq 32(%rsi),%rsi 2408 jmp .Lxts_dec_done 2409 2410 .align 16 2411 .Lxts_dec_three: 2412 movups (%rdi),%xmm2 2413 movups 16(%rdi),%xmm3 2414 movups 32(%rdi),%xmm4 2415 leaq 48(%rdi),%rdi 2416 xorps %xmm10,%xmm2 2417 xorps %xmm11,%xmm3 2418 xorps %xmm12,%xmm4 2419 2420 call _aesni_decrypt3 2421 2422 xorps %xmm10,%xmm2 2423 movdqa %xmm13,%xmm10 2424 xorps %xmm11,%xmm3 2425 movdqa %xmm14,%xmm11 2426 xorps %xmm12,%xmm4 2427 movups %xmm2,(%rsi) 2428 movups %xmm3,16(%rsi) 2429 movups %xmm4,32(%rsi) 2430 leaq 48(%rsi),%rsi 2431 jmp .Lxts_dec_done 2432 2433 .align 16 2434 .Lxts_dec_four: 2435 movups (%rdi),%xmm2 2436 movups 16(%rdi),%xmm3 2437 movups 32(%rdi),%xmm4 2438 xorps %xmm10,%xmm2 2439 movups 48(%rdi),%xmm5 2440 leaq 64(%rdi),%rdi 2441 xorps %xmm11,%xmm3 2442 xorps %xmm12,%xmm4 2443 xorps %xmm13,%xmm5 2444 2445 call _aesni_decrypt4 2446 2447 pxor %xmm10,%xmm2 2448 movdqa %xmm14,%xmm10 2449 pxor %xmm11,%xmm3 2450 movdqa %xmm15,%xmm11 2451 pxor %xmm12,%xmm4 2452 movdqu %xmm2,(%rsi) 2453 pxor %xmm13,%xmm5 2454 movdqu %xmm3,16(%rsi) 2455 movdqu %xmm4,32(%rsi) 2456 movdqu %xmm5,48(%rsi) 2457 leaq 64(%rsi),%rsi 2458 jmp .Lxts_dec_done 2459 2460 .align 16 2461 .Lxts_dec_done: 2462 andq $15,%r9 2463 jz .Lxts_dec_ret 2464 .Lxts_dec_done2: 2465 movq %r9,%rdx 2466 movq %r11,%rcx 2467 movl %r10d,%eax 2468 2469 movups (%rdi),%xmm2 2470 xorps %xmm11,%xmm2 2471 movups (%rcx),%xmm0 2472 movups 16(%rcx),%xmm1 2473 leaq 32(%rcx),%rcx 2474 xorps %xmm0,%xmm2 2475 .Loop_dec1_13: 2476 .byte 102,15,56,222,209 2477 decl %eax 2478 movups (%rcx),%xmm1 2479 leaq 16(%rcx),%rcx 2480 jnz .Loop_dec1_13 2481 .byte 102,15,56,223,209 2482 xorps %xmm11,%xmm2 2483 movups %xmm2,(%rsi) 2484 2485 .Lxts_dec_steal: 2486 movzbl 16(%rdi),%eax 2487 movzbl (%rsi),%ecx 2488 leaq 1(%rdi),%rdi 2489 movb %al,(%rsi) 2490 movb %cl,16(%rsi) 2491 leaq 1(%rsi),%rsi 2492 subq $1,%rdx 2493 jnz .Lxts_dec_steal 2494 2495 subq %r9,%rsi 2496 movq %r11,%rcx 2497 movl %r10d,%eax 2498 2499 movups (%rsi),%xmm2 2500 xorps %xmm10,%xmm2 2501 movups (%rcx),%xmm0 2502 movups 16(%rcx),%xmm1 2503 leaq 32(%rcx),%rcx 2504 xorps %xmm0,%xmm2 2505 .Loop_dec1_14: 2506 .byte 102,15,56,222,209 2507 decl %eax 2508 movups (%rcx),%xmm1 2509 leaq 16(%rcx),%rcx 2510 jnz .Loop_dec1_14 2511 .byte 102,15,56,223,209 2512 xorps %xmm10,%xmm2 2513 movups %xmm2,(%rsi) 2514 2515 .Lxts_dec_ret: 2516 xorps %xmm0,%xmm0 2517 pxor %xmm1,%xmm1 2518 pxor %xmm2,%xmm2 2519 pxor %xmm3,%xmm3 2520 pxor %xmm4,%xmm4 2521 pxor %xmm5,%xmm5 2522 pxor %xmm6,%xmm6 2523 pxor %xmm7,%xmm7 2524 movaps %xmm0,0(%rsp) 2525 pxor %xmm8,%xmm8 2526 movaps %xmm0,16(%rsp) 2527 pxor %xmm9,%xmm9 2528 movaps %xmm0,32(%rsp) 2529 pxor %xmm10,%xmm10 2530 movaps %xmm0,48(%rsp) 2531 pxor %xmm11,%xmm11 2532 movaps %xmm0,64(%rsp) 2533 pxor %xmm12,%xmm12 2534 movaps %xmm0,80(%rsp) 2535 pxor %xmm13,%xmm13 2536 movaps %xmm0,96(%rsp) 2537 pxor %xmm14,%xmm14 2538 pxor %xmm15,%xmm15 2539 leaq (%rbp),%rsp 2540 popq %rbp 2541 .Lxts_dec_epilogue: 2542 .byte 0xf3,0xc3 2543 .size aesni_xts_decrypt,.-aesni_xts_decrypt 2544 .globl aesni_cbc_encrypt 2545 .hidden aesni_cbc_encrypt 2546 .type aesni_cbc_encrypt,@function 2547 .align 16 2548 aesni_cbc_encrypt: 2549 testq %rdx,%rdx 2550 jz .Lcbc_ret 2551 2552 movl 240(%rcx),%r10d 2553 movq %rcx,%r11 2554 testl %r9d,%r9d 2555 jz .Lcbc_decrypt 2556 2557 movups (%r8),%xmm2 2558 movl %r10d,%eax 2559 cmpq $16,%rdx 2560 jb .Lcbc_enc_tail 2561 subq $16,%rdx 2562 jmp .Lcbc_enc_loop 2563 .align 16 2564 .Lcbc_enc_loop: 2565 movups (%rdi),%xmm3 2566 leaq 16(%rdi),%rdi 2567 2568 movups (%rcx),%xmm0 2569 movups 16(%rcx),%xmm1 2570 xorps %xmm0,%xmm3 2571 leaq 32(%rcx),%rcx 2572 xorps %xmm3,%xmm2 2573 .Loop_enc1_15: 2574 .byte 102,15,56,220,209 2575 decl %eax 2576 movups (%rcx),%xmm1 2577 leaq 16(%rcx),%rcx 2578 jnz .Loop_enc1_15 2579 .byte 102,15,56,221,209 2580 movl %r10d,%eax 2581 movq %r11,%rcx 2582 movups %xmm2,0(%rsi) 2583 leaq 16(%rsi),%rsi 2584 subq $16,%rdx 2585 jnc .Lcbc_enc_loop 2586 addq $16,%rdx 2587 jnz .Lcbc_enc_tail 2588 pxor %xmm0,%xmm0 2589 pxor %xmm1,%xmm1 2590 movups %xmm2,(%r8) 2591 pxor %xmm2,%xmm2 2592 pxor %xmm3,%xmm3 2593 jmp .Lcbc_ret 2594 2595 .Lcbc_enc_tail: 2596 movq %rdx,%rcx 2597 xchgq %rdi,%rsi 2598 .long 0x9066A4F3 2599 movl $16,%ecx 2600 subq %rdx,%rcx 2601 xorl %eax,%eax 2602 .long 0x9066AAF3 2603 leaq -16(%rdi),%rdi 2604 movl %r10d,%eax 2605 movq %rdi,%rsi 2606 movq %r11,%rcx 2607 xorq %rdx,%rdx 2608 jmp .Lcbc_enc_loop 2609 2610 .align 16 2611 .Lcbc_decrypt: 2612 cmpq $16,%rdx 2613 jne .Lcbc_decrypt_bulk 2614 2615 2616 2617 movdqu (%rdi),%xmm2 2618 movdqu (%r8),%xmm3 2619 movdqa %xmm2,%xmm4 2620 movups (%rcx),%xmm0 2621 movups 16(%rcx),%xmm1 2622 leaq 32(%rcx),%rcx 2623 xorps %xmm0,%xmm2 2624 .Loop_dec1_16: 2625 .byte 102,15,56,222,209 2626 decl %r10d 2627 movups (%rcx),%xmm1 2628 leaq 16(%rcx),%rcx 2629 jnz .Loop_dec1_16 2630 .byte 102,15,56,223,209 2631 pxor %xmm0,%xmm0 2632 pxor %xmm1,%xmm1 2633 movdqu %xmm4,(%r8) 2634 xorps %xmm3,%xmm2 2635 pxor %xmm3,%xmm3 2636 movups %xmm2,(%rsi) 2637 pxor %xmm2,%xmm2 2638 jmp .Lcbc_ret 2639 .align 16 2640 .Lcbc_decrypt_bulk: 2641 leaq (%rsp),%rax 2642 pushq %rbp 2643 subq $16,%rsp 2644 andq $-16,%rsp 2645 leaq -8(%rax),%rbp 2646 movups (%r8),%xmm10 2647 movl %r10d,%eax 2648 cmpq $80,%rdx 2649 jbe .Lcbc_dec_tail 2650 2651 movups (%rcx),%xmm0 2652 movdqu 0(%rdi),%xmm2 2653 movdqu 16(%rdi),%xmm3 2654 movdqa %xmm2,%xmm11 2655 movdqu 32(%rdi),%xmm4 2656 movdqa %xmm3,%xmm12 2657 movdqu 48(%rdi),%xmm5 2658 movdqa %xmm4,%xmm13 2659 movdqu 64(%rdi),%xmm6 2660 movdqa %xmm5,%xmm14 2661 movdqu 80(%rdi),%xmm7 2662 movdqa %xmm6,%xmm15 2663 movl OPENSSL_ia32cap_P+4(%rip),%r9d 2664 cmpq $112,%rdx 2665 jbe .Lcbc_dec_six_or_seven 2666 2667 andl $71303168,%r9d 2668 subq $80,%rdx 2669 cmpl $4194304,%r9d 2670 je .Lcbc_dec_loop6_enter 2671 subq $32,%rdx 2672 leaq 112(%rcx),%rcx 2673 jmp .Lcbc_dec_loop8_enter 2674 .align 16 2675 .Lcbc_dec_loop8: 2676 movups %xmm9,(%rsi) 2677 leaq 16(%rsi),%rsi 2678 .Lcbc_dec_loop8_enter: 2679 movdqu 96(%rdi),%xmm8 2680 pxor %xmm0,%xmm2 2681 movdqu 112(%rdi),%xmm9 2682 pxor %xmm0,%xmm3 2683 movups 16-112(%rcx),%xmm1 2684 pxor %xmm0,%xmm4 2685 xorq %r11,%r11 2686 cmpq $112,%rdx 2687 pxor %xmm0,%xmm5 2688 pxor %xmm0,%xmm6 2689 pxor %xmm0,%xmm7 2690 pxor %xmm0,%xmm8 2691 2692 .byte 102,15,56,222,209 2693 pxor %xmm0,%xmm9 2694 movups 32-112(%rcx),%xmm0 2695 .byte 102,15,56,222,217 2696 .byte 102,15,56,222,225 2697 .byte 102,15,56,222,233 2698 .byte 102,15,56,222,241 2699 .byte 102,15,56,222,249 2700 .byte 102,68,15,56,222,193 2701 setnc %r11b 2702 shlq $7,%r11 2703 .byte 102,68,15,56,222,201 2704 addq %rdi,%r11 2705 movups 48-112(%rcx),%xmm1 2706 .byte 102,15,56,222,208 2707 .byte 102,15,56,222,216 2708 .byte 102,15,56,222,224 2709 .byte 102,15,56,222,232 2710 .byte 102,15,56,222,240 2711 .byte 102,15,56,222,248 2712 .byte 102,68,15,56,222,192 2713 .byte 102,68,15,56,222,200 2714 movups 64-112(%rcx),%xmm0 2715 nop 2716 .byte 102,15,56,222,209 2717 .byte 102,15,56,222,217 2718 .byte 102,15,56,222,225 2719 .byte 102,15,56,222,233 2720 .byte 102,15,56,222,241 2721 .byte 102,15,56,222,249 2722 .byte 102,68,15,56,222,193 2723 .byte 102,68,15,56,222,201 2724 movups 80-112(%rcx),%xmm1 2725 nop 2726 .byte 102,15,56,222,208 2727 .byte 102,15,56,222,216 2728 .byte 102,15,56,222,224 2729 .byte 102,15,56,222,232 2730 .byte 102,15,56,222,240 2731 .byte 102,15,56,222,248 2732 .byte 102,68,15,56,222,192 2733 .byte 102,68,15,56,222,200 2734 movups 96-112(%rcx),%xmm0 2735 nop 2736 .byte 102,15,56,222,209 2737 .byte 102,15,56,222,217 2738 .byte 102,15,56,222,225 2739 .byte 102,15,56,222,233 2740 .byte 102,15,56,222,241 2741 .byte 102,15,56,222,249 2742 .byte 102,68,15,56,222,193 2743 .byte 102,68,15,56,222,201 2744 movups 112-112(%rcx),%xmm1 2745 nop 2746 .byte 102,15,56,222,208 2747 .byte 102,15,56,222,216 2748 .byte 102,15,56,222,224 2749 .byte 102,15,56,222,232 2750 .byte 102,15,56,222,240 2751 .byte 102,15,56,222,248 2752 .byte 102,68,15,56,222,192 2753 .byte 102,68,15,56,222,200 2754 movups 128-112(%rcx),%xmm0 2755 nop 2756 .byte 102,15,56,222,209 2757 .byte 102,15,56,222,217 2758 .byte 102,15,56,222,225 2759 .byte 102,15,56,222,233 2760 .byte 102,15,56,222,241 2761 .byte 102,15,56,222,249 2762 .byte 102,68,15,56,222,193 2763 .byte 102,68,15,56,222,201 2764 movups 144-112(%rcx),%xmm1 2765 cmpl $11,%eax 2766 .byte 102,15,56,222,208 2767 .byte 102,15,56,222,216 2768 .byte 102,15,56,222,224 2769 .byte 102,15,56,222,232 2770 .byte 102,15,56,222,240 2771 .byte 102,15,56,222,248 2772 .byte 102,68,15,56,222,192 2773 .byte 102,68,15,56,222,200 2774 movups 160-112(%rcx),%xmm0 2775 jb .Lcbc_dec_done 2776 .byte 102,15,56,222,209 2777 .byte 102,15,56,222,217 2778 .byte 102,15,56,222,225 2779 .byte 102,15,56,222,233 2780 .byte 102,15,56,222,241 2781 .byte 102,15,56,222,249 2782 .byte 102,68,15,56,222,193 2783 .byte 102,68,15,56,222,201 2784 movups 176-112(%rcx),%xmm1 2785 nop 2786 .byte 102,15,56,222,208 2787 .byte 102,15,56,222,216 2788 .byte 102,15,56,222,224 2789 .byte 102,15,56,222,232 2790 .byte 102,15,56,222,240 2791 .byte 102,15,56,222,248 2792 .byte 102,68,15,56,222,192 2793 .byte 102,68,15,56,222,200 2794 movups 192-112(%rcx),%xmm0 2795 je .Lcbc_dec_done 2796 .byte 102,15,56,222,209 2797 .byte 102,15,56,222,217 2798 .byte 102,15,56,222,225 2799 .byte 102,15,56,222,233 2800 .byte 102,15,56,222,241 2801 .byte 102,15,56,222,249 2802 .byte 102,68,15,56,222,193 2803 .byte 102,68,15,56,222,201 2804 movups 208-112(%rcx),%xmm1 2805 nop 2806 .byte 102,15,56,222,208 2807 .byte 102,15,56,222,216 2808 .byte 102,15,56,222,224 2809 .byte 102,15,56,222,232 2810 .byte 102,15,56,222,240 2811 .byte 102,15,56,222,248 2812 .byte 102,68,15,56,222,192 2813 .byte 102,68,15,56,222,200 2814 movups 224-112(%rcx),%xmm0 2815 jmp .Lcbc_dec_done 2816 .align 16 2817 .Lcbc_dec_done: 2818 .byte 102,15,56,222,209 2819 .byte 102,15,56,222,217 2820 pxor %xmm0,%xmm10 2821 pxor %xmm0,%xmm11 2822 .byte 102,15,56,222,225 2823 .byte 102,15,56,222,233 2824 pxor %xmm0,%xmm12 2825 pxor %xmm0,%xmm13 2826 .byte 102,15,56,222,241 2827 .byte 102,15,56,222,249 2828 pxor %xmm0,%xmm14 2829 pxor %xmm0,%xmm15 2830 .byte 102,68,15,56,222,193 2831 .byte 102,68,15,56,222,201 2832 movdqu 80(%rdi),%xmm1 2833 2834 .byte 102,65,15,56,223,210 2835 movdqu 96(%rdi),%xmm10 2836 pxor %xmm0,%xmm1 2837 .byte 102,65,15,56,223,219 2838 pxor %xmm0,%xmm10 2839 movdqu 112(%rdi),%xmm0 2840 .byte 102,65,15,56,223,228 2841 leaq 128(%rdi),%rdi 2842 movdqu 0(%r11),%xmm11 2843 .byte 102,65,15,56,223,237 2844 .byte 102,65,15,56,223,246 2845 movdqu 16(%r11),%xmm12 2846 movdqu 32(%r11),%xmm13 2847 .byte 102,65,15,56,223,255 2848 .byte 102,68,15,56,223,193 2849 movdqu 48(%r11),%xmm14 2850 movdqu 64(%r11),%xmm15 2851 .byte 102,69,15,56,223,202 2852 movdqa %xmm0,%xmm10 2853 movdqu 80(%r11),%xmm1 2854 movups -112(%rcx),%xmm0 2855 2856 movups %xmm2,(%rsi) 2857 movdqa %xmm11,%xmm2 2858 movups %xmm3,16(%rsi) 2859 movdqa %xmm12,%xmm3 2860 movups %xmm4,32(%rsi) 2861 movdqa %xmm13,%xmm4 2862 movups %xmm5,48(%rsi) 2863 movdqa %xmm14,%xmm5 2864 movups %xmm6,64(%rsi) 2865 movdqa %xmm15,%xmm6 2866 movups %xmm7,80(%rsi) 2867 movdqa %xmm1,%xmm7 2868 movups %xmm8,96(%rsi) 2869 leaq 112(%rsi),%rsi 2870 2871 subq $128,%rdx 2872 ja .Lcbc_dec_loop8 2873 2874 movaps %xmm9,%xmm2 2875 leaq -112(%rcx),%rcx 2876 addq $112,%rdx 2877 jle .Lcbc_dec_clear_tail_collected 2878 movups %xmm9,(%rsi) 2879 leaq 16(%rsi),%rsi 2880 cmpq $80,%rdx 2881 jbe .Lcbc_dec_tail 2882 2883 movaps %xmm11,%xmm2 2884 .Lcbc_dec_six_or_seven: 2885 cmpq $96,%rdx 2886 ja .Lcbc_dec_seven 2887 2888 movaps %xmm7,%xmm8 2889 call _aesni_decrypt6 2890 pxor %xmm10,%xmm2 2891 movaps %xmm8,%xmm10 2892 pxor %xmm11,%xmm3 2893 movdqu %xmm2,(%rsi) 2894 pxor %xmm12,%xmm4 2895 movdqu %xmm3,16(%rsi) 2896 pxor %xmm3,%xmm3 2897 pxor %xmm13,%xmm5 2898 movdqu %xmm4,32(%rsi) 2899 pxor %xmm4,%xmm4 2900 pxor %xmm14,%xmm6 2901 movdqu %xmm5,48(%rsi) 2902 pxor %xmm5,%xmm5 2903 pxor %xmm15,%xmm7 2904 movdqu %xmm6,64(%rsi) 2905 pxor %xmm6,%xmm6 2906 leaq 80(%rsi),%rsi 2907 movdqa %xmm7,%xmm2 2908 pxor %xmm7,%xmm7 2909 jmp .Lcbc_dec_tail_collected 2910 2911 .align 16 2912 .Lcbc_dec_seven: 2913 movups 96(%rdi),%xmm8 2914 xorps %xmm9,%xmm9 2915 call _aesni_decrypt8 2916 movups 80(%rdi),%xmm9 2917 pxor %xmm10,%xmm2 2918 movups 96(%rdi),%xmm10 2919 pxor %xmm11,%xmm3 2920 movdqu %xmm2,(%rsi) 2921 pxor %xmm12,%xmm4 2922 movdqu %xmm3,16(%rsi) 2923 pxor %xmm3,%xmm3 2924 pxor %xmm13,%xmm5 2925 movdqu %xmm4,32(%rsi) 2926 pxor %xmm4,%xmm4 2927 pxor %xmm14,%xmm6 2928 movdqu %xmm5,48(%rsi) 2929 pxor %xmm5,%xmm5 2930 pxor %xmm15,%xmm7 2931 movdqu %xmm6,64(%rsi) 2932 pxor %xmm6,%xmm6 2933 pxor %xmm9,%xmm8 2934 movdqu %xmm7,80(%rsi) 2935 pxor %xmm7,%xmm7 2936 leaq 96(%rsi),%rsi 2937 movdqa %xmm8,%xmm2 2938 pxor %xmm8,%xmm8 2939 pxor %xmm9,%xmm9 2940 jmp .Lcbc_dec_tail_collected 2941 2942 .align 16 2943 .Lcbc_dec_loop6: 2944 movups %xmm7,(%rsi) 2945 leaq 16(%rsi),%rsi 2946 movdqu 0(%rdi),%xmm2 2947 movdqu 16(%rdi),%xmm3 2948 movdqa %xmm2,%xmm11 2949 movdqu 32(%rdi),%xmm4 2950 movdqa %xmm3,%xmm12 2951 movdqu 48(%rdi),%xmm5 2952 movdqa %xmm4,%xmm13 2953 movdqu 64(%rdi),%xmm6 2954 movdqa %xmm5,%xmm14 2955 movdqu 80(%rdi),%xmm7 2956 movdqa %xmm6,%xmm15 2957 .Lcbc_dec_loop6_enter: 2958 leaq 96(%rdi),%rdi 2959 movdqa %xmm7,%xmm8 2960 2961 call _aesni_decrypt6 2962 2963 pxor %xmm10,%xmm2 2964 movdqa %xmm8,%xmm10 2965 pxor %xmm11,%xmm3 2966 movdqu %xmm2,(%rsi) 2967 pxor %xmm12,%xmm4 2968 movdqu %xmm3,16(%rsi) 2969 pxor %xmm13,%xmm5 2970 movdqu %xmm4,32(%rsi) 2971 pxor %xmm14,%xmm6 2972 movq %r11,%rcx 2973 movdqu %xmm5,48(%rsi) 2974 pxor %xmm15,%xmm7 2975 movl %r10d,%eax 2976 movdqu %xmm6,64(%rsi) 2977 leaq 80(%rsi),%rsi 2978 subq $96,%rdx 2979 ja .Lcbc_dec_loop6 2980 2981 movdqa %xmm7,%xmm2 2982 addq $80,%rdx 2983 jle .Lcbc_dec_clear_tail_collected 2984 movups %xmm7,(%rsi) 2985 leaq 16(%rsi),%rsi 2986 2987 .Lcbc_dec_tail: 2988 movups (%rdi),%xmm2 2989 subq $16,%rdx 2990 jbe .Lcbc_dec_one 2991 2992 movups 16(%rdi),%xmm3 2993 movaps %xmm2,%xmm11 2994 subq $16,%rdx 2995 jbe .Lcbc_dec_two 2996 2997 movups 32(%rdi),%xmm4 2998 movaps %xmm3,%xmm12 2999 subq $16,%rdx 3000 jbe .Lcbc_dec_three 3001 3002 movups 48(%rdi),%xmm5 3003 movaps %xmm4,%xmm13 3004 subq $16,%rdx 3005 jbe .Lcbc_dec_four 3006 3007 movups 64(%rdi),%xmm6 3008 movaps %xmm5,%xmm14 3009 movaps %xmm6,%xmm15 3010 xorps %xmm7,%xmm7 3011 call _aesni_decrypt6 3012 pxor %xmm10,%xmm2 3013 movaps %xmm15,%xmm10 3014 pxor %xmm11,%xmm3 3015 movdqu %xmm2,(%rsi) 3016 pxor %xmm12,%xmm4 3017 movdqu %xmm3,16(%rsi) 3018 pxor %xmm3,%xmm3 3019 pxor %xmm13,%xmm5 3020 movdqu %xmm4,32(%rsi) 3021 pxor %xmm4,%xmm4 3022 pxor %xmm14,%xmm6 3023 movdqu %xmm5,48(%rsi) 3024 pxor %xmm5,%xmm5 3025 leaq 64(%rsi),%rsi 3026 movdqa %xmm6,%xmm2 3027 pxor %xmm6,%xmm6 3028 pxor %xmm7,%xmm7 3029 subq $16,%rdx 3030 jmp .Lcbc_dec_tail_collected 3031 3032 .align 16 3033 .Lcbc_dec_one: 3034 movaps %xmm2,%xmm11 3035 movups (%rcx),%xmm0 3036 movups 16(%rcx),%xmm1 3037 leaq 32(%rcx),%rcx 3038 xorps %xmm0,%xmm2 3039 .Loop_dec1_17: 3040 .byte 102,15,56,222,209 3041 decl %eax 3042 movups (%rcx),%xmm1 3043 leaq 16(%rcx),%rcx 3044 jnz .Loop_dec1_17 3045 .byte 102,15,56,223,209 3046 xorps %xmm10,%xmm2 3047 movaps %xmm11,%xmm10 3048 jmp .Lcbc_dec_tail_collected 3049 .align 16 3050 .Lcbc_dec_two: 3051 movaps %xmm3,%xmm12 3052 call _aesni_decrypt2 3053 pxor %xmm10,%xmm2 3054 movaps %xmm12,%xmm10 3055 pxor %xmm11,%xmm3 3056 movdqu %xmm2,(%rsi) 3057 movdqa %xmm3,%xmm2 3058 pxor %xmm3,%xmm3 3059 leaq 16(%rsi),%rsi 3060 jmp .Lcbc_dec_tail_collected 3061 .align 16 3062 .Lcbc_dec_three: 3063 movaps %xmm4,%xmm13 3064 call _aesni_decrypt3 3065 pxor %xmm10,%xmm2 3066 movaps %xmm13,%xmm10 3067 pxor %xmm11,%xmm3 3068 movdqu %xmm2,(%rsi) 3069 pxor %xmm12,%xmm4 3070 movdqu %xmm3,16(%rsi) 3071 pxor %xmm3,%xmm3 3072 movdqa %xmm4,%xmm2 3073 pxor %xmm4,%xmm4 3074 leaq 32(%rsi),%rsi 3075 jmp .Lcbc_dec_tail_collected 3076 .align 16 3077 .Lcbc_dec_four: 3078 movaps %xmm5,%xmm14 3079 call _aesni_decrypt4 3080 pxor %xmm10,%xmm2 3081 movaps %xmm14,%xmm10 3082 pxor %xmm11,%xmm3 3083 movdqu %xmm2,(%rsi) 3084 pxor %xmm12,%xmm4 3085 movdqu %xmm3,16(%rsi) 3086 pxor %xmm3,%xmm3 3087 pxor %xmm13,%xmm5 3088 movdqu %xmm4,32(%rsi) 3089 pxor %xmm4,%xmm4 3090 movdqa %xmm5,%xmm2 3091 pxor %xmm5,%xmm5 3092 leaq 48(%rsi),%rsi 3093 jmp .Lcbc_dec_tail_collected 3094 3095 .align 16 3096 .Lcbc_dec_clear_tail_collected: 3097 pxor %xmm3,%xmm3 3098 pxor %xmm4,%xmm4 3099 pxor %xmm5,%xmm5 3100 pxor %xmm6,%xmm6 3101 pxor %xmm7,%xmm7 3102 pxor %xmm8,%xmm8 3103 pxor %xmm9,%xmm9 3104 .Lcbc_dec_tail_collected: 3105 movups %xmm10,(%r8) 3106 andq $15,%rdx 3107 jnz .Lcbc_dec_tail_partial 3108 movups %xmm2,(%rsi) 3109 pxor %xmm2,%xmm2 3110 jmp .Lcbc_dec_ret 3111 .align 16 3112 .Lcbc_dec_tail_partial: 3113 movaps %xmm2,(%rsp) 3114 pxor %xmm2,%xmm2 3115 movq $16,%rcx 3116 movq %rsi,%rdi 3117 subq %rdx,%rcx 3118 leaq (%rsp),%rsi 3119 .long 0x9066A4F3 3120 movdqa %xmm2,(%rsp) 3121 3122 .Lcbc_dec_ret: 3123 xorps %xmm0,%xmm0 3124 pxor %xmm1,%xmm1 3125 leaq (%rbp),%rsp 3126 popq %rbp 3127 .Lcbc_ret: 3128 .byte 0xf3,0xc3 3129 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt 3130 .globl aesni_set_decrypt_key 3131 .hidden aesni_set_decrypt_key 3132 .type aesni_set_decrypt_key,@function 3133 .align 16 3134 aesni_set_decrypt_key: 3135 .byte 0x48,0x83,0xEC,0x08 3136 call __aesni_set_encrypt_key 3137 shll $4,%esi 3138 testl %eax,%eax 3139 jnz .Ldec_key_ret 3140 leaq 16(%rdx,%rsi,1),%rdi 3141 3142 movups (%rdx),%xmm0 3143 movups (%rdi),%xmm1 3144 movups %xmm0,(%rdi) 3145 movups %xmm1,(%rdx) 3146 leaq 16(%rdx),%rdx 3147 leaq -16(%rdi),%rdi 3148 3149 .Ldec_key_inverse: 3150 movups (%rdx),%xmm0 3151 movups (%rdi),%xmm1 3152 .byte 102,15,56,219,192 3153 .byte 102,15,56,219,201 3154 leaq 16(%rdx),%rdx 3155 leaq -16(%rdi),%rdi 3156 movups %xmm0,16(%rdi) 3157 movups %xmm1,-16(%rdx) 3158 cmpq %rdx,%rdi 3159 ja .Ldec_key_inverse 3160 3161 movups (%rdx),%xmm0 3162 .byte 102,15,56,219,192 3163 pxor %xmm1,%xmm1 3164 movups %xmm0,(%rdi) 3165 pxor %xmm0,%xmm0 3166 .Ldec_key_ret: 3167 addq $8,%rsp 3168 .byte 0xf3,0xc3 3169 .LSEH_end_set_decrypt_key: 3170 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key 3171 .globl aesni_set_encrypt_key 3172 .hidden aesni_set_encrypt_key 3173 .type aesni_set_encrypt_key,@function 3174 .align 16 3175 aesni_set_encrypt_key: 3176 __aesni_set_encrypt_key: 3177 .byte 0x48,0x83,0xEC,0x08 3178 movq $-1,%rax 3179 testq %rdi,%rdi 3180 jz .Lenc_key_ret 3181 testq %rdx,%rdx 3182 jz .Lenc_key_ret 3183 3184 movl $268437504,%r10d 3185 movups (%rdi),%xmm0 3186 xorps %xmm4,%xmm4 3187 andl OPENSSL_ia32cap_P+4(%rip),%r10d 3188 leaq 16(%rdx),%rax 3189 cmpl $256,%esi 3190 je .L14rounds 3191 cmpl $192,%esi 3192 je .L12rounds 3193 cmpl $128,%esi 3194 jne .Lbad_keybits 3195 3196 .L10rounds: 3197 movl $9,%esi 3198 cmpl $268435456,%r10d 3199 je .L10rounds_alt 3200 3201 movups %xmm0,(%rdx) 3202 .byte 102,15,58,223,200,1 3203 call .Lkey_expansion_128_cold 3204 .byte 102,15,58,223,200,2 3205 call .Lkey_expansion_128 3206 .byte 102,15,58,223,200,4 3207 call .Lkey_expansion_128 3208 .byte 102,15,58,223,200,8 3209 call .Lkey_expansion_128 3210 .byte 102,15,58,223,200,16 3211 call .Lkey_expansion_128 3212 .byte 102,15,58,223,200,32 3213 call .Lkey_expansion_128 3214 .byte 102,15,58,223,200,64 3215 call .Lkey_expansion_128 3216 .byte 102,15,58,223,200,128 3217 call .Lkey_expansion_128 3218 .byte 102,15,58,223,200,27 3219 call .Lkey_expansion_128 3220 .byte 102,15,58,223,200,54 3221 call .Lkey_expansion_128 3222 movups %xmm0,(%rax) 3223 movl %esi,80(%rax) 3224 xorl %eax,%eax 3225 jmp .Lenc_key_ret 3226 3227 .align 16 3228 .L10rounds_alt: 3229 movdqa .Lkey_rotate(%rip),%xmm5 3230 movl $8,%r10d 3231 movdqa .Lkey_rcon1(%rip),%xmm4 3232 movdqa %xmm0,%xmm2 3233 movdqu %xmm0,(%rdx) 3234 jmp .Loop_key128 3235 3236 .align 16 3237 .Loop_key128: 3238 .byte 102,15,56,0,197 3239 .byte 102,15,56,221,196 3240 pslld $1,%xmm4 3241 leaq 16(%rax),%rax 3242 3243 movdqa %xmm2,%xmm3 3244 pslldq $4,%xmm2 3245 pxor %xmm2,%xmm3 3246 pslldq $4,%xmm2 3247 pxor %xmm2,%xmm3 3248 pslldq $4,%xmm2 3249 pxor %xmm3,%xmm2 3250 3251 pxor %xmm2,%xmm0 3252 movdqu %xmm0,-16(%rax) 3253 movdqa %xmm0,%xmm2 3254 3255 decl %r10d 3256 jnz .Loop_key128 3257 3258 movdqa .Lkey_rcon1b(%rip),%xmm4 3259 3260 .byte 102,15,56,0,197 3261 .byte 102,15,56,221,196 3262 pslld $1,%xmm4 3263 3264 movdqa %xmm2,%xmm3 3265 pslldq $4,%xmm2 3266 pxor %xmm2,%xmm3 3267 pslldq $4,%xmm2 3268 pxor %xmm2,%xmm3 3269 pslldq $4,%xmm2 3270 pxor %xmm3,%xmm2 3271 3272 pxor %xmm2,%xmm0 3273 movdqu %xmm0,(%rax) 3274 3275 movdqa %xmm0,%xmm2 3276 .byte 102,15,56,0,197 3277 .byte 102,15,56,221,196 3278 3279 movdqa %xmm2,%xmm3 3280 pslldq $4,%xmm2 3281 pxor %xmm2,%xmm3 3282 pslldq $4,%xmm2 3283 pxor %xmm2,%xmm3 3284 pslldq $4,%xmm2 3285 pxor %xmm3,%xmm2 3286 3287 pxor %xmm2,%xmm0 3288 movdqu %xmm0,16(%rax) 3289 3290 movl %esi,96(%rax) 3291 xorl %eax,%eax 3292 jmp .Lenc_key_ret 3293 3294 .align 16 3295 .L12rounds: 3296 movq 16(%rdi),%xmm2 3297 movl $11,%esi 3298 cmpl $268435456,%r10d 3299 je .L12rounds_alt 3300 3301 movups %xmm0,(%rdx) 3302 .byte 102,15,58,223,202,1 3303 call .Lkey_expansion_192a_cold 3304 .byte 102,15,58,223,202,2 3305 call .Lkey_expansion_192b 3306 .byte 102,15,58,223,202,4 3307 call .Lkey_expansion_192a 3308 .byte 102,15,58,223,202,8 3309 call .Lkey_expansion_192b 3310 .byte 102,15,58,223,202,16 3311 call .Lkey_expansion_192a 3312 .byte 102,15,58,223,202,32 3313 call .Lkey_expansion_192b 3314 .byte 102,15,58,223,202,64 3315 call .Lkey_expansion_192a 3316 .byte 102,15,58,223,202,128 3317 call .Lkey_expansion_192b 3318 movups %xmm0,(%rax) 3319 movl %esi,48(%rax) 3320 xorq %rax,%rax 3321 jmp .Lenc_key_ret 3322 3323 .align 16 3324 .L12rounds_alt: 3325 movdqa .Lkey_rotate192(%rip),%xmm5 3326 movdqa .Lkey_rcon1(%rip),%xmm4 3327 movl $8,%r10d 3328 movdqu %xmm0,(%rdx) 3329 jmp .Loop_key192 3330 3331 .align 16 3332 .Loop_key192: 3333 movq %xmm2,0(%rax) 3334 movdqa %xmm2,%xmm1 3335 .byte 102,15,56,0,213 3336 .byte 102,15,56,221,212 3337 pslld $1,%xmm4 3338 leaq 24(%rax),%rax 3339 3340 movdqa %xmm0,%xmm3 3341 pslldq $4,%xmm0 3342 pxor %xmm0,%xmm3 3343 pslldq $4,%xmm0 3344 pxor %xmm0,%xmm3 3345 pslldq $4,%xmm0 3346 pxor %xmm3,%xmm0 3347 3348 pshufd $255,%xmm0,%xmm3 3349 pxor %xmm1,%xmm3 3350 pslldq $4,%xmm1 3351 pxor %xmm1,%xmm3 3352 3353 pxor %xmm2,%xmm0 3354 pxor %xmm3,%xmm2 3355 movdqu %xmm0,-16(%rax) 3356 3357 decl %r10d 3358 jnz .Loop_key192 3359 3360 movl %esi,32(%rax) 3361 xorl %eax,%eax 3362 jmp .Lenc_key_ret 3363 3364 .align 16 3365 .L14rounds: 3366 movups 16(%rdi),%xmm2 3367 movl $13,%esi 3368 leaq 16(%rax),%rax 3369 cmpl $268435456,%r10d 3370 je .L14rounds_alt 3371 3372 movups %xmm0,(%rdx) 3373 movups %xmm2,16(%rdx) 3374 .byte 102,15,58,223,202,1 3375 call .Lkey_expansion_256a_cold 3376 .byte 102,15,58,223,200,1 3377 call .Lkey_expansion_256b 3378 .byte 102,15,58,223,202,2 3379 call .Lkey_expansion_256a 3380 .byte 102,15,58,223,200,2 3381 call .Lkey_expansion_256b 3382 .byte 102,15,58,223,202,4 3383 call .Lkey_expansion_256a 3384 .byte 102,15,58,223,200,4 3385 call .Lkey_expansion_256b 3386 .byte 102,15,58,223,202,8 3387 call .Lkey_expansion_256a 3388 .byte 102,15,58,223,200,8 3389 call .Lkey_expansion_256b 3390 .byte 102,15,58,223,202,16 3391 call .Lkey_expansion_256a 3392 .byte 102,15,58,223,200,16 3393 call .Lkey_expansion_256b 3394 .byte 102,15,58,223,202,32 3395 call .Lkey_expansion_256a 3396 .byte 102,15,58,223,200,32 3397 call .Lkey_expansion_256b 3398 .byte 102,15,58,223,202,64 3399 call .Lkey_expansion_256a 3400 movups %xmm0,(%rax) 3401 movl %esi,16(%rax) 3402 xorq %rax,%rax 3403 jmp .Lenc_key_ret 3404 3405 .align 16 3406 .L14rounds_alt: 3407 movdqa .Lkey_rotate(%rip),%xmm5 3408 movdqa .Lkey_rcon1(%rip),%xmm4 3409 movl $7,%r10d 3410 movdqu %xmm0,0(%rdx) 3411 movdqa %xmm2,%xmm1 3412 movdqu %xmm2,16(%rdx) 3413 jmp .Loop_key256 3414 3415 .align 16 3416 .Loop_key256: 3417 .byte 102,15,56,0,213 3418 .byte 102,15,56,221,212 3419 3420 movdqa %xmm0,%xmm3 3421 pslldq $4,%xmm0 3422 pxor %xmm0,%xmm3 3423 pslldq $4,%xmm0 3424 pxor %xmm0,%xmm3 3425 pslldq $4,%xmm0 3426 pxor %xmm3,%xmm0 3427 pslld $1,%xmm4 3428 3429 pxor %xmm2,%xmm0 3430 movdqu %xmm0,(%rax) 3431 3432 decl %r10d 3433 jz .Ldone_key256 3434 3435 pshufd $255,%xmm0,%xmm2 3436 pxor %xmm3,%xmm3 3437 .byte 102,15,56,221,211 3438 3439 movdqa %xmm1,%xmm3 3440 pslldq $4,%xmm1 3441 pxor %xmm1,%xmm3 3442 pslldq $4,%xmm1 3443 pxor %xmm1,%xmm3 3444 pslldq $4,%xmm1 3445 pxor %xmm3,%xmm1 3446 3447 pxor %xmm1,%xmm2 3448 movdqu %xmm2,16(%rax) 3449 leaq 32(%rax),%rax 3450 movdqa %xmm2,%xmm1 3451 3452 jmp .Loop_key256 3453 3454 .Ldone_key256: 3455 movl %esi,16(%rax) 3456 xorl %eax,%eax 3457 jmp .Lenc_key_ret 3458 3459 .align 16 3460 .Lbad_keybits: 3461 movq $-2,%rax 3462 .Lenc_key_ret: 3463 pxor %xmm0,%xmm0 3464 pxor %xmm1,%xmm1 3465 pxor %xmm2,%xmm2 3466 pxor %xmm3,%xmm3 3467 pxor %xmm4,%xmm4 3468 pxor %xmm5,%xmm5 3469 addq $8,%rsp 3470 .byte 0xf3,0xc3 3471 .LSEH_end_set_encrypt_key: 3472 3473 .align 16 3474 .Lkey_expansion_128: 3475 movups %xmm0,(%rax) 3476 leaq 16(%rax),%rax 3477 .Lkey_expansion_128_cold: 3478 shufps $16,%xmm0,%xmm4 3479 xorps %xmm4,%xmm0 3480 shufps $140,%xmm0,%xmm4 3481 xorps %xmm4,%xmm0 3482 shufps $255,%xmm1,%xmm1 3483 xorps %xmm1,%xmm0 3484 .byte 0xf3,0xc3 3485 3486 .align 16 3487 .Lkey_expansion_192a: 3488 movups %xmm0,(%rax) 3489 leaq 16(%rax),%rax 3490 .Lkey_expansion_192a_cold: 3491 movaps %xmm2,%xmm5 3492 .Lkey_expansion_192b_warm: 3493 shufps $16,%xmm0,%xmm4 3494 movdqa %xmm2,%xmm3 3495 xorps %xmm4,%xmm0 3496 shufps $140,%xmm0,%xmm4 3497 pslldq $4,%xmm3 3498 xorps %xmm4,%xmm0 3499 pshufd $85,%xmm1,%xmm1 3500 pxor %xmm3,%xmm2 3501 pxor %xmm1,%xmm0 3502 pshufd $255,%xmm0,%xmm3 3503 pxor %xmm3,%xmm2 3504 .byte 0xf3,0xc3 3505 3506 .align 16 3507 .Lkey_expansion_192b: 3508 movaps %xmm0,%xmm3 3509 shufps $68,%xmm0,%xmm5 3510 movups %xmm5,(%rax) 3511 shufps $78,%xmm2,%xmm3 3512 movups %xmm3,16(%rax) 3513 leaq 32(%rax),%rax 3514 jmp .Lkey_expansion_192b_warm 3515 3516 .align 16 3517 .Lkey_expansion_256a: 3518 movups %xmm2,(%rax) 3519 leaq 16(%rax),%rax 3520 .Lkey_expansion_256a_cold: 3521 shufps $16,%xmm0,%xmm4 3522 xorps %xmm4,%xmm0 3523 shufps $140,%xmm0,%xmm4 3524 xorps %xmm4,%xmm0 3525 shufps $255,%xmm1,%xmm1 3526 xorps %xmm1,%xmm0 3527 .byte 0xf3,0xc3 3528 3529 .align 16 3530 .Lkey_expansion_256b: 3531 movups %xmm0,(%rax) 3532 leaq 16(%rax),%rax 3533 3534 shufps $16,%xmm2,%xmm4 3535 xorps %xmm4,%xmm2 3536 shufps $140,%xmm2,%xmm4 3537 xorps %xmm4,%xmm2 3538 shufps $170,%xmm1,%xmm1 3539 xorps %xmm1,%xmm2 3540 .byte 0xf3,0xc3 3541 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key 3542 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 3543 .align 64 3544 .Lbswap_mask: 3545 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3546 .Lincrement32: 3547 .long 6,6,6,0 3548 .Lincrement64: 3549 .long 1,0,0,0 3550 .Lxts_magic: 3551 .long 0x87,0,1,0 3552 .Lincrement1: 3553 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3554 .Lkey_rotate: 3555 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 3556 .Lkey_rotate192: 3557 .long 0x04070605,0x04070605,0x04070605,0x04070605 3558 .Lkey_rcon1: 3559 .long 1,1,1,1 3560 .Lkey_rcon1b: 3561 .long 0x1b,0x1b,0x1b,0x1b 3562 3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3564 .align 64 3565 #endif 3566