1 .file "crypto/aes/asm/aesni-x86.s" 2 .text 3 .globl _aesni_encrypt 4 .align 4 5 _aesni_encrypt: 6 L_aesni_encrypt_begin: 7 movl 4(%esp),%eax 8 movl 12(%esp),%edx 9 movups (%eax),%xmm2 10 movl 240(%edx),%ecx 11 movl 8(%esp),%eax 12 movups (%edx),%xmm0 13 movups 16(%edx),%xmm1 14 leal 32(%edx),%edx 15 xorps %xmm0,%xmm2 16 L000enc1_loop_1: 17 .byte 102,15,56,220,209 18 decl %ecx 19 movups (%edx),%xmm1 20 leal 16(%edx),%edx 21 jnz L000enc1_loop_1 22 .byte 102,15,56,221,209 23 movups %xmm2,(%eax) 24 ret 25 .globl _aesni_decrypt 26 .align 4 27 _aesni_decrypt: 28 L_aesni_decrypt_begin: 29 movl 4(%esp),%eax 30 movl 12(%esp),%edx 31 movups (%eax),%xmm2 32 movl 240(%edx),%ecx 33 movl 8(%esp),%eax 34 movups (%edx),%xmm0 35 movups 16(%edx),%xmm1 36 leal 32(%edx),%edx 37 xorps %xmm0,%xmm2 38 L001dec1_loop_2: 39 .byte 102,15,56,222,209 40 decl %ecx 41 movups (%edx),%xmm1 42 leal 16(%edx),%edx 43 jnz L001dec1_loop_2 44 .byte 102,15,56,223,209 45 movups %xmm2,(%eax) 46 ret 47 .align 4 48 __aesni_encrypt3: 49 movups (%edx),%xmm0 50 shrl $1,%ecx 51 movups 16(%edx),%xmm1 52 leal 32(%edx),%edx 53 xorps %xmm0,%xmm2 54 pxor %xmm0,%xmm3 55 pxor %xmm0,%xmm4 56 movups (%edx),%xmm0 57 L002enc3_loop: 58 .byte 102,15,56,220,209 59 .byte 102,15,56,220,217 60 decl %ecx 61 .byte 102,15,56,220,225 62 movups 16(%edx),%xmm1 63 .byte 102,15,56,220,208 64 .byte 102,15,56,220,216 65 leal 32(%edx),%edx 66 .byte 102,15,56,220,224 67 movups (%edx),%xmm0 68 jnz L002enc3_loop 69 .byte 102,15,56,220,209 70 .byte 102,15,56,220,217 71 .byte 102,15,56,220,225 72 .byte 102,15,56,221,208 73 .byte 102,15,56,221,216 74 .byte 102,15,56,221,224 75 ret 76 .align 4 77 __aesni_decrypt3: 78 movups (%edx),%xmm0 79 shrl $1,%ecx 80 movups 16(%edx),%xmm1 81 leal 32(%edx),%edx 82 xorps %xmm0,%xmm2 83 pxor %xmm0,%xmm3 84 pxor %xmm0,%xmm4 85 movups (%edx),%xmm0 86 L003dec3_loop: 87 .byte 102,15,56,222,209 88 .byte 102,15,56,222,217 89 decl %ecx 90 .byte 102,15,56,222,225 91 movups 16(%edx),%xmm1 92 .byte 102,15,56,222,208 93 .byte 102,15,56,222,216 94 leal 32(%edx),%edx 95 .byte 102,15,56,222,224 96 movups (%edx),%xmm0 97 jnz L003dec3_loop 98 .byte 102,15,56,222,209 99 .byte 102,15,56,222,217 100 .byte 102,15,56,222,225 101 .byte 102,15,56,223,208 102 .byte 102,15,56,223,216 103 .byte 102,15,56,223,224 104 ret 105 .align 4 106 __aesni_encrypt4: 107 movups (%edx),%xmm0 108 movups 16(%edx),%xmm1 109 shrl $1,%ecx 110 leal 32(%edx),%edx 111 xorps %xmm0,%xmm2 112 pxor %xmm0,%xmm3 113 pxor %xmm0,%xmm4 114 pxor %xmm0,%xmm5 115 movups (%edx),%xmm0 116 L004enc4_loop: 117 .byte 102,15,56,220,209 118 .byte 102,15,56,220,217 119 decl %ecx 120 .byte 102,15,56,220,225 121 .byte 102,15,56,220,233 122 movups 16(%edx),%xmm1 123 .byte 102,15,56,220,208 124 .byte 102,15,56,220,216 125 leal 32(%edx),%edx 126 .byte 102,15,56,220,224 127 .byte 102,15,56,220,232 128 movups (%edx),%xmm0 129 jnz L004enc4_loop 130 .byte 102,15,56,220,209 131 .byte 102,15,56,220,217 132 .byte 102,15,56,220,225 133 .byte 102,15,56,220,233 134 .byte 102,15,56,221,208 135 .byte 102,15,56,221,216 136 .byte 102,15,56,221,224 137 .byte 102,15,56,221,232 138 ret 139 .align 4 140 __aesni_decrypt4: 141 movups (%edx),%xmm0 142 movups 16(%edx),%xmm1 143 shrl $1,%ecx 144 leal 32(%edx),%edx 145 xorps %xmm0,%xmm2 146 pxor %xmm0,%xmm3 147 pxor %xmm0,%xmm4 148 pxor %xmm0,%xmm5 149 movups (%edx),%xmm0 150 L005dec4_loop: 151 .byte 102,15,56,222,209 152 .byte 102,15,56,222,217 153 decl %ecx 154 .byte 102,15,56,222,225 155 .byte 102,15,56,222,233 156 movups 16(%edx),%xmm1 157 .byte 102,15,56,222,208 158 .byte 102,15,56,222,216 159 leal 32(%edx),%edx 160 .byte 102,15,56,222,224 161 .byte 102,15,56,222,232 162 movups (%edx),%xmm0 163 jnz L005dec4_loop 164 .byte 102,15,56,222,209 165 .byte 102,15,56,222,217 166 .byte 102,15,56,222,225 167 .byte 102,15,56,222,233 168 .byte 102,15,56,223,208 169 .byte 102,15,56,223,216 170 .byte 102,15,56,223,224 171 .byte 102,15,56,223,232 172 ret 173 .align 4 174 __aesni_encrypt6: 175 movups (%edx),%xmm0 176 shrl $1,%ecx 177 movups 16(%edx),%xmm1 178 leal 32(%edx),%edx 179 xorps %xmm0,%xmm2 180 pxor %xmm0,%xmm3 181 .byte 102,15,56,220,209 182 pxor %xmm0,%xmm4 183 .byte 102,15,56,220,217 184 pxor %xmm0,%xmm5 185 decl %ecx 186 .byte 102,15,56,220,225 187 pxor %xmm0,%xmm6 188 .byte 102,15,56,220,233 189 pxor %xmm0,%xmm7 190 .byte 102,15,56,220,241 191 movups (%edx),%xmm0 192 .byte 102,15,56,220,249 193 jmp L_aesni_encrypt6_enter 194 .align 4,0x90 195 L006enc6_loop: 196 .byte 102,15,56,220,209 197 .byte 102,15,56,220,217 198 decl %ecx 199 .byte 102,15,56,220,225 200 .byte 102,15,56,220,233 201 .byte 102,15,56,220,241 202 .byte 102,15,56,220,249 203 .align 4,0x90 204 L_aesni_encrypt6_enter: 205 movups 16(%edx),%xmm1 206 .byte 102,15,56,220,208 207 .byte 102,15,56,220,216 208 leal 32(%edx),%edx 209 .byte 102,15,56,220,224 210 .byte 102,15,56,220,232 211 .byte 102,15,56,220,240 212 .byte 102,15,56,220,248 213 movups (%edx),%xmm0 214 jnz L006enc6_loop 215 .byte 102,15,56,220,209 216 .byte 102,15,56,220,217 217 .byte 102,15,56,220,225 218 .byte 102,15,56,220,233 219 .byte 102,15,56,220,241 220 .byte 102,15,56,220,249 221 .byte 102,15,56,221,208 222 .byte 102,15,56,221,216 223 .byte 102,15,56,221,224 224 .byte 102,15,56,221,232 225 .byte 102,15,56,221,240 226 .byte 102,15,56,221,248 227 ret 228 .align 4 229 __aesni_decrypt6: 230 movups (%edx),%xmm0 231 shrl $1,%ecx 232 movups 16(%edx),%xmm1 233 leal 32(%edx),%edx 234 xorps %xmm0,%xmm2 235 pxor %xmm0,%xmm3 236 .byte 102,15,56,222,209 237 pxor %xmm0,%xmm4 238 .byte 102,15,56,222,217 239 pxor %xmm0,%xmm5 240 decl %ecx 241 .byte 102,15,56,222,225 242 pxor %xmm0,%xmm6 243 .byte 102,15,56,222,233 244 pxor %xmm0,%xmm7 245 .byte 102,15,56,222,241 246 movups (%edx),%xmm0 247 .byte 102,15,56,222,249 248 jmp L_aesni_decrypt6_enter 249 .align 4,0x90 250 L007dec6_loop: 251 .byte 102,15,56,222,209 252 .byte 102,15,56,222,217 253 decl %ecx 254 .byte 102,15,56,222,225 255 .byte 102,15,56,222,233 256 .byte 102,15,56,222,241 257 .byte 102,15,56,222,249 258 .align 4,0x90 259 L_aesni_decrypt6_enter: 260 movups 16(%edx),%xmm1 261 .byte 102,15,56,222,208 262 .byte 102,15,56,222,216 263 leal 32(%edx),%edx 264 .byte 102,15,56,222,224 265 .byte 102,15,56,222,232 266 .byte 102,15,56,222,240 267 .byte 102,15,56,222,248 268 movups (%edx),%xmm0 269 jnz L007dec6_loop 270 .byte 102,15,56,222,209 271 .byte 102,15,56,222,217 272 .byte 102,15,56,222,225 273 .byte 102,15,56,222,233 274 .byte 102,15,56,222,241 275 .byte 102,15,56,222,249 276 .byte 102,15,56,223,208 277 .byte 102,15,56,223,216 278 .byte 102,15,56,223,224 279 .byte 102,15,56,223,232 280 .byte 102,15,56,223,240 281 .byte 102,15,56,223,248 282 ret 283 .globl _aesni_ecb_encrypt 284 .align 4 285 _aesni_ecb_encrypt: 286 L_aesni_ecb_encrypt_begin: 287 pushl %ebp 288 pushl %ebx 289 pushl %esi 290 pushl %edi 291 movl 20(%esp),%esi 292 movl 24(%esp),%edi 293 movl 28(%esp),%eax 294 movl 32(%esp),%edx 295 movl 36(%esp),%ebx 296 andl $-16,%eax 297 jz L008ecb_ret 298 movl 240(%edx),%ecx 299 testl %ebx,%ebx 300 jz L009ecb_decrypt 301 movl %edx,%ebp 302 movl %ecx,%ebx 303 cmpl $96,%eax 304 jb L010ecb_enc_tail 305 movdqu (%esi),%xmm2 306 movdqu 16(%esi),%xmm3 307 movdqu 32(%esi),%xmm4 308 movdqu 48(%esi),%xmm5 309 movdqu 64(%esi),%xmm6 310 movdqu 80(%esi),%xmm7 311 leal 96(%esi),%esi 312 subl $96,%eax 313 jmp L011ecb_enc_loop6_enter 314 .align 4,0x90 315 L012ecb_enc_loop6: 316 movups %xmm2,(%edi) 317 movdqu (%esi),%xmm2 318 movups %xmm3,16(%edi) 319 movdqu 16(%esi),%xmm3 320 movups %xmm4,32(%edi) 321 movdqu 32(%esi),%xmm4 322 movups %xmm5,48(%edi) 323 movdqu 48(%esi),%xmm5 324 movups %xmm6,64(%edi) 325 movdqu 64(%esi),%xmm6 326 movups %xmm7,80(%edi) 327 leal 96(%edi),%edi 328 movdqu 80(%esi),%xmm7 329 leal 96(%esi),%esi 330 L011ecb_enc_loop6_enter: 331 call __aesni_encrypt6 332 movl %ebp,%edx 333 movl %ebx,%ecx 334 subl $96,%eax 335 jnc L012ecb_enc_loop6 336 movups %xmm2,(%edi) 337 movups %xmm3,16(%edi) 338 movups %xmm4,32(%edi) 339 movups %xmm5,48(%edi) 340 movups %xmm6,64(%edi) 341 movups %xmm7,80(%edi) 342 leal 96(%edi),%edi 343 addl $96,%eax 344 jz L008ecb_ret 345 L010ecb_enc_tail: 346 movups (%esi),%xmm2 347 cmpl $32,%eax 348 jb L013ecb_enc_one 349 movups 16(%esi),%xmm3 350 je L014ecb_enc_two 351 movups 32(%esi),%xmm4 352 cmpl $64,%eax 353 jb L015ecb_enc_three 354 movups 48(%esi),%xmm5 355 je L016ecb_enc_four 356 movups 64(%esi),%xmm6 357 xorps %xmm7,%xmm7 358 call __aesni_encrypt6 359 movups %xmm2,(%edi) 360 movups %xmm3,16(%edi) 361 movups %xmm4,32(%edi) 362 movups %xmm5,48(%edi) 363 movups %xmm6,64(%edi) 364 jmp L008ecb_ret 365 .align 4,0x90 366 L013ecb_enc_one: 367 movups (%edx),%xmm0 368 movups 16(%edx),%xmm1 369 leal 32(%edx),%edx 370 xorps %xmm0,%xmm2 371 L017enc1_loop_3: 372 .byte 102,15,56,220,209 373 decl %ecx 374 movups (%edx),%xmm1 375 leal 16(%edx),%edx 376 jnz L017enc1_loop_3 377 .byte 102,15,56,221,209 378 movups %xmm2,(%edi) 379 jmp L008ecb_ret 380 .align 4,0x90 381 L014ecb_enc_two: 382 xorps %xmm4,%xmm4 383 call __aesni_encrypt3 384 movups %xmm2,(%edi) 385 movups %xmm3,16(%edi) 386 jmp L008ecb_ret 387 .align 4,0x90 388 L015ecb_enc_three: 389 call __aesni_encrypt3 390 movups %xmm2,(%edi) 391 movups %xmm3,16(%edi) 392 movups %xmm4,32(%edi) 393 jmp L008ecb_ret 394 .align 4,0x90 395 L016ecb_enc_four: 396 call __aesni_encrypt4 397 movups %xmm2,(%edi) 398 movups %xmm3,16(%edi) 399 movups %xmm4,32(%edi) 400 movups %xmm5,48(%edi) 401 jmp L008ecb_ret 402 .align 4,0x90 403 L009ecb_decrypt: 404 movl %edx,%ebp 405 movl %ecx,%ebx 406 cmpl $96,%eax 407 jb L018ecb_dec_tail 408 movdqu (%esi),%xmm2 409 movdqu 16(%esi),%xmm3 410 movdqu 32(%esi),%xmm4 411 movdqu 48(%esi),%xmm5 412 movdqu 64(%esi),%xmm6 413 movdqu 80(%esi),%xmm7 414 leal 96(%esi),%esi 415 subl $96,%eax 416 jmp L019ecb_dec_loop6_enter 417 .align 4,0x90 418 L020ecb_dec_loop6: 419 movups %xmm2,(%edi) 420 movdqu (%esi),%xmm2 421 movups %xmm3,16(%edi) 422 movdqu 16(%esi),%xmm3 423 movups %xmm4,32(%edi) 424 movdqu 32(%esi),%xmm4 425 movups %xmm5,48(%edi) 426 movdqu 48(%esi),%xmm5 427 movups %xmm6,64(%edi) 428 movdqu 64(%esi),%xmm6 429 movups %xmm7,80(%edi) 430 leal 96(%edi),%edi 431 movdqu 80(%esi),%xmm7 432 leal 96(%esi),%esi 433 L019ecb_dec_loop6_enter: 434 call __aesni_decrypt6 435 movl %ebp,%edx 436 movl %ebx,%ecx 437 subl $96,%eax 438 jnc L020ecb_dec_loop6 439 movups %xmm2,(%edi) 440 movups %xmm3,16(%edi) 441 movups %xmm4,32(%edi) 442 movups %xmm5,48(%edi) 443 movups %xmm6,64(%edi) 444 movups %xmm7,80(%edi) 445 leal 96(%edi),%edi 446 addl $96,%eax 447 jz L008ecb_ret 448 L018ecb_dec_tail: 449 movups (%esi),%xmm2 450 cmpl $32,%eax 451 jb L021ecb_dec_one 452 movups 16(%esi),%xmm3 453 je L022ecb_dec_two 454 movups 32(%esi),%xmm4 455 cmpl $64,%eax 456 jb L023ecb_dec_three 457 movups 48(%esi),%xmm5 458 je L024ecb_dec_four 459 movups 64(%esi),%xmm6 460 xorps %xmm7,%xmm7 461 call __aesni_decrypt6 462 movups %xmm2,(%edi) 463 movups %xmm3,16(%edi) 464 movups %xmm4,32(%edi) 465 movups %xmm5,48(%edi) 466 movups %xmm6,64(%edi) 467 jmp L008ecb_ret 468 .align 4,0x90 469 L021ecb_dec_one: 470 movups (%edx),%xmm0 471 movups 16(%edx),%xmm1 472 leal 32(%edx),%edx 473 xorps %xmm0,%xmm2 474 L025dec1_loop_4: 475 .byte 102,15,56,222,209 476 decl %ecx 477 movups (%edx),%xmm1 478 leal 16(%edx),%edx 479 jnz L025dec1_loop_4 480 .byte 102,15,56,223,209 481 movups %xmm2,(%edi) 482 jmp L008ecb_ret 483 .align 4,0x90 484 L022ecb_dec_two: 485 xorps %xmm4,%xmm4 486 call __aesni_decrypt3 487 movups %xmm2,(%edi) 488 movups %xmm3,16(%edi) 489 jmp L008ecb_ret 490 .align 4,0x90 491 L023ecb_dec_three: 492 call __aesni_decrypt3 493 movups %xmm2,(%edi) 494 movups %xmm3,16(%edi) 495 movups %xmm4,32(%edi) 496 jmp L008ecb_ret 497 .align 4,0x90 498 L024ecb_dec_four: 499 call __aesni_decrypt4 500 movups %xmm2,(%edi) 501 movups %xmm3,16(%edi) 502 movups %xmm4,32(%edi) 503 movups %xmm5,48(%edi) 504 L008ecb_ret: 505 popl %edi 506 popl %esi 507 popl %ebx 508 popl %ebp 509 ret 510 .globl _aesni_ccm64_encrypt_blocks 511 .align 4 512 _aesni_ccm64_encrypt_blocks: 513 L_aesni_ccm64_encrypt_blocks_begin: 514 pushl %ebp 515 pushl %ebx 516 pushl %esi 517 pushl %edi 518 movl 20(%esp),%esi 519 movl 24(%esp),%edi 520 movl 28(%esp),%eax 521 movl 32(%esp),%edx 522 movl 36(%esp),%ebx 523 movl 40(%esp),%ecx 524 movl %esp,%ebp 525 subl $60,%esp 526 andl $-16,%esp 527 movl %ebp,48(%esp) 528 movdqu (%ebx),%xmm7 529 movdqu (%ecx),%xmm3 530 movl 240(%edx),%ecx 531 movl $202182159,(%esp) 532 movl $134810123,4(%esp) 533 movl $67438087,8(%esp) 534 movl $66051,12(%esp) 535 movl $1,%ebx 536 xorl %ebp,%ebp 537 movl %ebx,16(%esp) 538 movl %ebp,20(%esp) 539 movl %ebp,24(%esp) 540 movl %ebp,28(%esp) 541 shrl $1,%ecx 542 leal (%edx),%ebp 543 movdqa (%esp),%xmm5 544 movdqa %xmm7,%xmm2 545 movl %ecx,%ebx 546 .byte 102,15,56,0,253 547 L026ccm64_enc_outer: 548 movups (%ebp),%xmm0 549 movl %ebx,%ecx 550 movups (%esi),%xmm6 551 xorps %xmm0,%xmm2 552 movups 16(%ebp),%xmm1 553 xorps %xmm6,%xmm0 554 leal 32(%ebp),%edx 555 xorps %xmm0,%xmm3 556 movups (%edx),%xmm0 557 L027ccm64_enc2_loop: 558 .byte 102,15,56,220,209 559 decl %ecx 560 .byte 102,15,56,220,217 561 movups 16(%edx),%xmm1 562 .byte 102,15,56,220,208 563 leal 32(%edx),%edx 564 .byte 102,15,56,220,216 565 movups (%edx),%xmm0 566 jnz L027ccm64_enc2_loop 567 .byte 102,15,56,220,209 568 .byte 102,15,56,220,217 569 paddq 16(%esp),%xmm7 570 .byte 102,15,56,221,208 571 .byte 102,15,56,221,216 572 decl %eax 573 leal 16(%esi),%esi 574 xorps %xmm2,%xmm6 575 movdqa %xmm7,%xmm2 576 movups %xmm6,(%edi) 577 leal 16(%edi),%edi 578 .byte 102,15,56,0,213 579 jnz L026ccm64_enc_outer 580 movl 48(%esp),%esp 581 movl 40(%esp),%edi 582 movups %xmm3,(%edi) 583 popl %edi 584 popl %esi 585 popl %ebx 586 popl %ebp 587 ret 588 .globl _aesni_ccm64_decrypt_blocks 589 .align 4 590 _aesni_ccm64_decrypt_blocks: 591 L_aesni_ccm64_decrypt_blocks_begin: 592 pushl %ebp 593 pushl %ebx 594 pushl %esi 595 pushl %edi 596 movl 20(%esp),%esi 597 movl 24(%esp),%edi 598 movl 28(%esp),%eax 599 movl 32(%esp),%edx 600 movl 36(%esp),%ebx 601 movl 40(%esp),%ecx 602 movl %esp,%ebp 603 subl $60,%esp 604 andl $-16,%esp 605 movl %ebp,48(%esp) 606 movdqu (%ebx),%xmm7 607 movdqu (%ecx),%xmm3 608 movl 240(%edx),%ecx 609 movl $202182159,(%esp) 610 movl $134810123,4(%esp) 611 movl $67438087,8(%esp) 612 movl $66051,12(%esp) 613 movl $1,%ebx 614 xorl %ebp,%ebp 615 movl %ebx,16(%esp) 616 movl %ebp,20(%esp) 617 movl %ebp,24(%esp) 618 movl %ebp,28(%esp) 619 movdqa (%esp),%xmm5 620 movdqa %xmm7,%xmm2 621 movl %edx,%ebp 622 movl %ecx,%ebx 623 .byte 102,15,56,0,253 624 movups (%edx),%xmm0 625 movups 16(%edx),%xmm1 626 leal 32(%edx),%edx 627 xorps %xmm0,%xmm2 628 L028enc1_loop_5: 629 .byte 102,15,56,220,209 630 decl %ecx 631 movups (%edx),%xmm1 632 leal 16(%edx),%edx 633 jnz L028enc1_loop_5 634 .byte 102,15,56,221,209 635 movups (%esi),%xmm6 636 paddq 16(%esp),%xmm7 637 leal 16(%esi),%esi 638 jmp L029ccm64_dec_outer 639 .align 4,0x90 640 L029ccm64_dec_outer: 641 xorps %xmm2,%xmm6 642 movdqa %xmm7,%xmm2 643 movl %ebx,%ecx 644 movups %xmm6,(%edi) 645 leal 16(%edi),%edi 646 .byte 102,15,56,0,213 647 subl $1,%eax 648 jz L030ccm64_dec_break 649 movups (%ebp),%xmm0 650 shrl $1,%ecx 651 movups 16(%ebp),%xmm1 652 xorps %xmm0,%xmm6 653 leal 32(%ebp),%edx 654 xorps %xmm0,%xmm2 655 xorps %xmm6,%xmm3 656 movups (%edx),%xmm0 657 L031ccm64_dec2_loop: 658 .byte 102,15,56,220,209 659 decl %ecx 660 .byte 102,15,56,220,217 661 movups 16(%edx),%xmm1 662 .byte 102,15,56,220,208 663 leal 32(%edx),%edx 664 .byte 102,15,56,220,216 665 movups (%edx),%xmm0 666 jnz L031ccm64_dec2_loop 667 movups (%esi),%xmm6 668 paddq 16(%esp),%xmm7 669 .byte 102,15,56,220,209 670 .byte 102,15,56,220,217 671 leal 16(%esi),%esi 672 .byte 102,15,56,221,208 673 .byte 102,15,56,221,216 674 jmp L029ccm64_dec_outer 675 .align 4,0x90 676 L030ccm64_dec_break: 677 movl %ebp,%edx 678 movups (%edx),%xmm0 679 movups 16(%edx),%xmm1 680 xorps %xmm0,%xmm6 681 leal 32(%edx),%edx 682 xorps %xmm6,%xmm3 683 L032enc1_loop_6: 684 .byte 102,15,56,220,217 685 decl %ecx 686 movups (%edx),%xmm1 687 leal 16(%edx),%edx 688 jnz L032enc1_loop_6 689 .byte 102,15,56,221,217 690 movl 48(%esp),%esp 691 movl 40(%esp),%edi 692 movups %xmm3,(%edi) 693 popl %edi 694 popl %esi 695 popl %ebx 696 popl %ebp 697 ret 698 .globl _aesni_ctr32_encrypt_blocks 699 .align 4 700 _aesni_ctr32_encrypt_blocks: 701 L_aesni_ctr32_encrypt_blocks_begin: 702 pushl %ebp 703 pushl %ebx 704 pushl %esi 705 pushl %edi 706 movl 20(%esp),%esi 707 movl 24(%esp),%edi 708 movl 28(%esp),%eax 709 movl 32(%esp),%edx 710 movl 36(%esp),%ebx 711 movl %esp,%ebp 712 subl $88,%esp 713 andl $-16,%esp 714 movl %ebp,80(%esp) 715 cmpl $1,%eax 716 je L033ctr32_one_shortcut 717 movdqu (%ebx),%xmm7 718 movl $202182159,(%esp) 719 movl $134810123,4(%esp) 720 movl $67438087,8(%esp) 721 movl $66051,12(%esp) 722 movl $6,%ecx 723 xorl %ebp,%ebp 724 movl %ecx,16(%esp) 725 movl %ecx,20(%esp) 726 movl %ecx,24(%esp) 727 movl %ebp,28(%esp) 728 .byte 102,15,58,22,251,3 729 .byte 102,15,58,34,253,3 730 movl 240(%edx),%ecx 731 bswap %ebx 732 pxor %xmm1,%xmm1 733 pxor %xmm0,%xmm0 734 movdqa (%esp),%xmm2 735 .byte 102,15,58,34,203,0 736 leal 3(%ebx),%ebp 737 .byte 102,15,58,34,197,0 738 incl %ebx 739 .byte 102,15,58,34,203,1 740 incl %ebp 741 .byte 102,15,58,34,197,1 742 incl %ebx 743 .byte 102,15,58,34,203,2 744 incl %ebp 745 .byte 102,15,58,34,197,2 746 movdqa %xmm1,48(%esp) 747 .byte 102,15,56,0,202 748 movdqa %xmm0,64(%esp) 749 .byte 102,15,56,0,194 750 pshufd $192,%xmm1,%xmm2 751 pshufd $128,%xmm1,%xmm3 752 cmpl $6,%eax 753 jb L034ctr32_tail 754 movdqa %xmm7,32(%esp) 755 shrl $1,%ecx 756 movl %edx,%ebp 757 movl %ecx,%ebx 758 subl $6,%eax 759 jmp L035ctr32_loop6 760 .align 4,0x90 761 L035ctr32_loop6: 762 pshufd $64,%xmm1,%xmm4 763 movdqa 32(%esp),%xmm1 764 pshufd $192,%xmm0,%xmm5 765 por %xmm1,%xmm2 766 pshufd $128,%xmm0,%xmm6 767 por %xmm1,%xmm3 768 pshufd $64,%xmm0,%xmm7 769 por %xmm1,%xmm4 770 por %xmm1,%xmm5 771 por %xmm1,%xmm6 772 por %xmm1,%xmm7 773 movups (%ebp),%xmm0 774 movups 16(%ebp),%xmm1 775 leal 32(%ebp),%edx 776 decl %ecx 777 pxor %xmm0,%xmm2 778 pxor %xmm0,%xmm3 779 .byte 102,15,56,220,209 780 pxor %xmm0,%xmm4 781 .byte 102,15,56,220,217 782 pxor %xmm0,%xmm5 783 .byte 102,15,56,220,225 784 pxor %xmm0,%xmm6 785 .byte 102,15,56,220,233 786 pxor %xmm0,%xmm7 787 .byte 102,15,56,220,241 788 movups (%edx),%xmm0 789 .byte 102,15,56,220,249 790 call L_aesni_encrypt6_enter 791 movups (%esi),%xmm1 792 movups 16(%esi),%xmm0 793 xorps %xmm1,%xmm2 794 movups 32(%esi),%xmm1 795 xorps %xmm0,%xmm3 796 movups %xmm2,(%edi) 797 movdqa 16(%esp),%xmm0 798 xorps %xmm1,%xmm4 799 movdqa 48(%esp),%xmm1 800 movups %xmm3,16(%edi) 801 movups %xmm4,32(%edi) 802 paddd %xmm0,%xmm1 803 paddd 64(%esp),%xmm0 804 movdqa (%esp),%xmm2 805 movups 48(%esi),%xmm3 806 movups 64(%esi),%xmm4 807 xorps %xmm3,%xmm5 808 movups 80(%esi),%xmm3 809 leal 96(%esi),%esi 810 movdqa %xmm1,48(%esp) 811 .byte 102,15,56,0,202 812 xorps %xmm4,%xmm6 813 movups %xmm5,48(%edi) 814 xorps %xmm3,%xmm7 815 movdqa %xmm0,64(%esp) 816 .byte 102,15,56,0,194 817 movups %xmm6,64(%edi) 818 pshufd $192,%xmm1,%xmm2 819 movups %xmm7,80(%edi) 820 leal 96(%edi),%edi 821 movl %ebx,%ecx 822 pshufd $128,%xmm1,%xmm3 823 subl $6,%eax 824 jnc L035ctr32_loop6 825 addl $6,%eax 826 jz L036ctr32_ret 827 movl %ebp,%edx 828 leal 1(,%ecx,2),%ecx 829 movdqa 32(%esp),%xmm7 830 L034ctr32_tail: 831 por %xmm7,%xmm2 832 cmpl $2,%eax 833 jb L037ctr32_one 834 pshufd $64,%xmm1,%xmm4 835 por %xmm7,%xmm3 836 je L038ctr32_two 837 pshufd $192,%xmm0,%xmm5 838 por %xmm7,%xmm4 839 cmpl $4,%eax 840 jb L039ctr32_three 841 pshufd $128,%xmm0,%xmm6 842 por %xmm7,%xmm5 843 je L040ctr32_four 844 por %xmm7,%xmm6 845 call __aesni_encrypt6 846 movups (%esi),%xmm1 847 movups 16(%esi),%xmm0 848 xorps %xmm1,%xmm2 849 movups 32(%esi),%xmm1 850 xorps %xmm0,%xmm3 851 movups 48(%esi),%xmm0 852 xorps %xmm1,%xmm4 853 movups 64(%esi),%xmm1 854 xorps %xmm0,%xmm5 855 movups %xmm2,(%edi) 856 xorps %xmm1,%xmm6 857 movups %xmm3,16(%edi) 858 movups %xmm4,32(%edi) 859 movups %xmm5,48(%edi) 860 movups %xmm6,64(%edi) 861 jmp L036ctr32_ret 862 .align 4,0x90 863 L033ctr32_one_shortcut: 864 movups (%ebx),%xmm2 865 movl 240(%edx),%ecx 866 L037ctr32_one: 867 movups (%edx),%xmm0 868 movups 16(%edx),%xmm1 869 leal 32(%edx),%edx 870 xorps %xmm0,%xmm2 871 L041enc1_loop_7: 872 .byte 102,15,56,220,209 873 decl %ecx 874 movups (%edx),%xmm1 875 leal 16(%edx),%edx 876 jnz L041enc1_loop_7 877 .byte 102,15,56,221,209 878 movups (%esi),%xmm6 879 xorps %xmm2,%xmm6 880 movups %xmm6,(%edi) 881 jmp L036ctr32_ret 882 .align 4,0x90 883 L038ctr32_two: 884 call __aesni_encrypt3 885 movups (%esi),%xmm5 886 movups 16(%esi),%xmm6 887 xorps %xmm5,%xmm2 888 xorps %xmm6,%xmm3 889 movups %xmm2,(%edi) 890 movups %xmm3,16(%edi) 891 jmp L036ctr32_ret 892 .align 4,0x90 893 L039ctr32_three: 894 call __aesni_encrypt3 895 movups (%esi),%xmm5 896 movups 16(%esi),%xmm6 897 xorps %xmm5,%xmm2 898 movups 32(%esi),%xmm7 899 xorps %xmm6,%xmm3 900 movups %xmm2,(%edi) 901 xorps %xmm7,%xmm4 902 movups %xmm3,16(%edi) 903 movups %xmm4,32(%edi) 904 jmp L036ctr32_ret 905 .align 4,0x90 906 L040ctr32_four: 907 call __aesni_encrypt4 908 movups (%esi),%xmm6 909 movups 16(%esi),%xmm7 910 movups 32(%esi),%xmm1 911 xorps %xmm6,%xmm2 912 movups 48(%esi),%xmm0 913 xorps %xmm7,%xmm3 914 movups %xmm2,(%edi) 915 xorps %xmm1,%xmm4 916 movups %xmm3,16(%edi) 917 xorps %xmm0,%xmm5 918 movups %xmm4,32(%edi) 919 movups %xmm5,48(%edi) 920 L036ctr32_ret: 921 movl 80(%esp),%esp 922 popl %edi 923 popl %esi 924 popl %ebx 925 popl %ebp 926 ret 927 .globl _aesni_xts_encrypt 928 .align 4 929 _aesni_xts_encrypt: 930 L_aesni_xts_encrypt_begin: 931 pushl %ebp 932 pushl %ebx 933 pushl %esi 934 pushl %edi 935 movl 36(%esp),%edx 936 movl 40(%esp),%esi 937 movl 240(%edx),%ecx 938 movups (%esi),%xmm2 939 movups (%edx),%xmm0 940 movups 16(%edx),%xmm1 941 leal 32(%edx),%edx 942 xorps %xmm0,%xmm2 943 L042enc1_loop_8: 944 .byte 102,15,56,220,209 945 decl %ecx 946 movups (%edx),%xmm1 947 leal 16(%edx),%edx 948 jnz L042enc1_loop_8 949 .byte 102,15,56,221,209 950 movl 20(%esp),%esi 951 movl 24(%esp),%edi 952 movl 28(%esp),%eax 953 movl 32(%esp),%edx 954 movl %esp,%ebp 955 subl $120,%esp 956 movl 240(%edx),%ecx 957 andl $-16,%esp 958 movl $135,96(%esp) 959 movl $0,100(%esp) 960 movl $1,104(%esp) 961 movl $0,108(%esp) 962 movl %eax,112(%esp) 963 movl %ebp,116(%esp) 964 movdqa %xmm2,%xmm1 965 pxor %xmm0,%xmm0 966 movdqa 96(%esp),%xmm3 967 pcmpgtd %xmm1,%xmm0 968 andl $-16,%eax 969 movl %edx,%ebp 970 movl %ecx,%ebx 971 subl $96,%eax 972 jc L043xts_enc_short 973 shrl $1,%ecx 974 movl %ecx,%ebx 975 jmp L044xts_enc_loop6 976 .align 4,0x90 977 L044xts_enc_loop6: 978 pshufd $19,%xmm0,%xmm2 979 pxor %xmm0,%xmm0 980 movdqa %xmm1,(%esp) 981 paddq %xmm1,%xmm1 982 pand %xmm3,%xmm2 983 pcmpgtd %xmm1,%xmm0 984 pxor %xmm2,%xmm1 985 pshufd $19,%xmm0,%xmm2 986 pxor %xmm0,%xmm0 987 movdqa %xmm1,16(%esp) 988 paddq %xmm1,%xmm1 989 pand %xmm3,%xmm2 990 pcmpgtd %xmm1,%xmm0 991 pxor %xmm2,%xmm1 992 pshufd $19,%xmm0,%xmm2 993 pxor %xmm0,%xmm0 994 movdqa %xmm1,32(%esp) 995 paddq %xmm1,%xmm1 996 pand %xmm3,%xmm2 997 pcmpgtd %xmm1,%xmm0 998 pxor %xmm2,%xmm1 999 pshufd $19,%xmm0,%xmm2 1000 pxor %xmm0,%xmm0 1001 movdqa %xmm1,48(%esp) 1002 paddq %xmm1,%xmm1 1003 pand %xmm3,%xmm2 1004 pcmpgtd %xmm1,%xmm0 1005 pxor %xmm2,%xmm1 1006 pshufd $19,%xmm0,%xmm7 1007 movdqa %xmm1,64(%esp) 1008 paddq %xmm1,%xmm1 1009 movups (%ebp),%xmm0 1010 pand %xmm3,%xmm7 1011 movups (%esi),%xmm2 1012 pxor %xmm1,%xmm7 1013 movdqu 16(%esi),%xmm3 1014 xorps %xmm0,%xmm2 1015 movdqu 32(%esi),%xmm4 1016 pxor %xmm0,%xmm3 1017 movdqu 48(%esi),%xmm5 1018 pxor %xmm0,%xmm4 1019 movdqu 64(%esi),%xmm6 1020 pxor %xmm0,%xmm5 1021 movdqu 80(%esi),%xmm1 1022 pxor %xmm0,%xmm6 1023 leal 96(%esi),%esi 1024 pxor (%esp),%xmm2 1025 movdqa %xmm7,80(%esp) 1026 pxor %xmm1,%xmm7 1027 movups 16(%ebp),%xmm1 1028 leal 32(%ebp),%edx 1029 pxor 16(%esp),%xmm3 1030 .byte 102,15,56,220,209 1031 pxor 32(%esp),%xmm4 1032 .byte 102,15,56,220,217 1033 pxor 48(%esp),%xmm5 1034 decl %ecx 1035 .byte 102,15,56,220,225 1036 pxor 64(%esp),%xmm6 1037 .byte 102,15,56,220,233 1038 pxor %xmm0,%xmm7 1039 .byte 102,15,56,220,241 1040 movups (%edx),%xmm0 1041 .byte 102,15,56,220,249 1042 call L_aesni_encrypt6_enter 1043 movdqa 80(%esp),%xmm1 1044 pxor %xmm0,%xmm0 1045 xorps (%esp),%xmm2 1046 pcmpgtd %xmm1,%xmm0 1047 xorps 16(%esp),%xmm3 1048 movups %xmm2,(%edi) 1049 xorps 32(%esp),%xmm4 1050 movups %xmm3,16(%edi) 1051 xorps 48(%esp),%xmm5 1052 movups %xmm4,32(%edi) 1053 xorps 64(%esp),%xmm6 1054 movups %xmm5,48(%edi) 1055 xorps %xmm1,%xmm7 1056 movups %xmm6,64(%edi) 1057 pshufd $19,%xmm0,%xmm2 1058 movups %xmm7,80(%edi) 1059 leal 96(%edi),%edi 1060 movdqa 96(%esp),%xmm3 1061 pxor %xmm0,%xmm0 1062 paddq %xmm1,%xmm1 1063 pand %xmm3,%xmm2 1064 pcmpgtd %xmm1,%xmm0 1065 movl %ebx,%ecx 1066 pxor %xmm2,%xmm1 1067 subl $96,%eax 1068 jnc L044xts_enc_loop6 1069 leal 1(,%ecx,2),%ecx 1070 movl %ebp,%edx 1071 movl %ecx,%ebx 1072 L043xts_enc_short: 1073 addl $96,%eax 1074 jz L045xts_enc_done6x 1075 movdqa %xmm1,%xmm5 1076 cmpl $32,%eax 1077 jb L046xts_enc_one 1078 pshufd $19,%xmm0,%xmm2 1079 pxor %xmm0,%xmm0 1080 paddq %xmm1,%xmm1 1081 pand %xmm3,%xmm2 1082 pcmpgtd %xmm1,%xmm0 1083 pxor %xmm2,%xmm1 1084 je L047xts_enc_two 1085 pshufd $19,%xmm0,%xmm2 1086 pxor %xmm0,%xmm0 1087 movdqa %xmm1,%xmm6 1088 paddq %xmm1,%xmm1 1089 pand %xmm3,%xmm2 1090 pcmpgtd %xmm1,%xmm0 1091 pxor %xmm2,%xmm1 1092 cmpl $64,%eax 1093 jb L048xts_enc_three 1094 pshufd $19,%xmm0,%xmm2 1095 pxor %xmm0,%xmm0 1096 movdqa %xmm1,%xmm7 1097 paddq %xmm1,%xmm1 1098 pand %xmm3,%xmm2 1099 pcmpgtd %xmm1,%xmm0 1100 pxor %xmm2,%xmm1 1101 movdqa %xmm5,(%esp) 1102 movdqa %xmm6,16(%esp) 1103 je L049xts_enc_four 1104 movdqa %xmm7,32(%esp) 1105 pshufd $19,%xmm0,%xmm7 1106 movdqa %xmm1,48(%esp) 1107 paddq %xmm1,%xmm1 1108 pand %xmm3,%xmm7 1109 pxor %xmm1,%xmm7 1110 movdqu (%esi),%xmm2 1111 movdqu 16(%esi),%xmm3 1112 movdqu 32(%esi),%xmm4 1113 pxor (%esp),%xmm2 1114 movdqu 48(%esi),%xmm5 1115 pxor 16(%esp),%xmm3 1116 movdqu 64(%esi),%xmm6 1117 pxor 32(%esp),%xmm4 1118 leal 80(%esi),%esi 1119 pxor 48(%esp),%xmm5 1120 movdqa %xmm7,64(%esp) 1121 pxor %xmm7,%xmm6 1122 call __aesni_encrypt6 1123 movaps 64(%esp),%xmm1 1124 xorps (%esp),%xmm2 1125 xorps 16(%esp),%xmm3 1126 xorps 32(%esp),%xmm4 1127 movups %xmm2,(%edi) 1128 xorps 48(%esp),%xmm5 1129 movups %xmm3,16(%edi) 1130 xorps %xmm1,%xmm6 1131 movups %xmm4,32(%edi) 1132 movups %xmm5,48(%edi) 1133 movups %xmm6,64(%edi) 1134 leal 80(%edi),%edi 1135 jmp L050xts_enc_done 1136 .align 4,0x90 1137 L046xts_enc_one: 1138 movups (%esi),%xmm2 1139 leal 16(%esi),%esi 1140 xorps %xmm5,%xmm2 1141 movups (%edx),%xmm0 1142 movups 16(%edx),%xmm1 1143 leal 32(%edx),%edx 1144 xorps %xmm0,%xmm2 1145 L051enc1_loop_9: 1146 .byte 102,15,56,220,209 1147 decl %ecx 1148 movups (%edx),%xmm1 1149 leal 16(%edx),%edx 1150 jnz L051enc1_loop_9 1151 .byte 102,15,56,221,209 1152 xorps %xmm5,%xmm2 1153 movups %xmm2,(%edi) 1154 leal 16(%edi),%edi 1155 movdqa %xmm5,%xmm1 1156 jmp L050xts_enc_done 1157 .align 4,0x90 1158 L047xts_enc_two: 1159 movaps %xmm1,%xmm6 1160 movups (%esi),%xmm2 1161 movups 16(%esi),%xmm3 1162 leal 32(%esi),%esi 1163 xorps %xmm5,%xmm2 1164 xorps %xmm6,%xmm3 1165 xorps %xmm4,%xmm4 1166 call __aesni_encrypt3 1167 xorps %xmm5,%xmm2 1168 xorps %xmm6,%xmm3 1169 movups %xmm2,(%edi) 1170 movups %xmm3,16(%edi) 1171 leal 32(%edi),%edi 1172 movdqa %xmm6,%xmm1 1173 jmp L050xts_enc_done 1174 .align 4,0x90 1175 L048xts_enc_three: 1176 movaps %xmm1,%xmm7 1177 movups (%esi),%xmm2 1178 movups 16(%esi),%xmm3 1179 movups 32(%esi),%xmm4 1180 leal 48(%esi),%esi 1181 xorps %xmm5,%xmm2 1182 xorps %xmm6,%xmm3 1183 xorps %xmm7,%xmm4 1184 call __aesni_encrypt3 1185 xorps %xmm5,%xmm2 1186 xorps %xmm6,%xmm3 1187 xorps %xmm7,%xmm4 1188 movups %xmm2,(%edi) 1189 movups %xmm3,16(%edi) 1190 movups %xmm4,32(%edi) 1191 leal 48(%edi),%edi 1192 movdqa %xmm7,%xmm1 1193 jmp L050xts_enc_done 1194 .align 4,0x90 1195 L049xts_enc_four: 1196 movaps %xmm1,%xmm6 1197 movups (%esi),%xmm2 1198 movups 16(%esi),%xmm3 1199 movups 32(%esi),%xmm4 1200 xorps (%esp),%xmm2 1201 movups 48(%esi),%xmm5 1202 leal 64(%esi),%esi 1203 xorps 16(%esp),%xmm3 1204 xorps %xmm7,%xmm4 1205 xorps %xmm6,%xmm5 1206 call __aesni_encrypt4 1207 xorps (%esp),%xmm2 1208 xorps 16(%esp),%xmm3 1209 xorps %xmm7,%xmm4 1210 movups %xmm2,(%edi) 1211 xorps %xmm6,%xmm5 1212 movups %xmm3,16(%edi) 1213 movups %xmm4,32(%edi) 1214 movups %xmm5,48(%edi) 1215 leal 64(%edi),%edi 1216 movdqa %xmm6,%xmm1 1217 jmp L050xts_enc_done 1218 .align 4,0x90 1219 L045xts_enc_done6x: 1220 movl 112(%esp),%eax 1221 andl $15,%eax 1222 jz L052xts_enc_ret 1223 movdqa %xmm1,%xmm5 1224 movl %eax,112(%esp) 1225 jmp L053xts_enc_steal 1226 .align 4,0x90 1227 L050xts_enc_done: 1228 movl 112(%esp),%eax 1229 pxor %xmm0,%xmm0 1230 andl $15,%eax 1231 jz L052xts_enc_ret 1232 pcmpgtd %xmm1,%xmm0 1233 movl %eax,112(%esp) 1234 pshufd $19,%xmm0,%xmm5 1235 paddq %xmm1,%xmm1 1236 pand 96(%esp),%xmm5 1237 pxor %xmm1,%xmm5 1238 L053xts_enc_steal: 1239 movzbl (%esi),%ecx 1240 movzbl -16(%edi),%edx 1241 leal 1(%esi),%esi 1242 movb %cl,-16(%edi) 1243 movb %dl,(%edi) 1244 leal 1(%edi),%edi 1245 subl $1,%eax 1246 jnz L053xts_enc_steal 1247 subl 112(%esp),%edi 1248 movl %ebp,%edx 1249 movl %ebx,%ecx 1250 movups -16(%edi),%xmm2 1251 xorps %xmm5,%xmm2 1252 movups (%edx),%xmm0 1253 movups 16(%edx),%xmm1 1254 leal 32(%edx),%edx 1255 xorps %xmm0,%xmm2 1256 L054enc1_loop_10: 1257 .byte 102,15,56,220,209 1258 decl %ecx 1259 movups (%edx),%xmm1 1260 leal 16(%edx),%edx 1261 jnz L054enc1_loop_10 1262 .byte 102,15,56,221,209 1263 xorps %xmm5,%xmm2 1264 movups %xmm2,-16(%edi) 1265 L052xts_enc_ret: 1266 movl 116(%esp),%esp 1267 popl %edi 1268 popl %esi 1269 popl %ebx 1270 popl %ebp 1271 ret 1272 .globl _aesni_xts_decrypt 1273 .align 4 1274 _aesni_xts_decrypt: 1275 L_aesni_xts_decrypt_begin: 1276 pushl %ebp 1277 pushl %ebx 1278 pushl %esi 1279 pushl %edi 1280 movl 36(%esp),%edx 1281 movl 40(%esp),%esi 1282 movl 240(%edx),%ecx 1283 movups (%esi),%xmm2 1284 movups (%edx),%xmm0 1285 movups 16(%edx),%xmm1 1286 leal 32(%edx),%edx 1287 xorps %xmm0,%xmm2 1288 L055enc1_loop_11: 1289 .byte 102,15,56,220,209 1290 decl %ecx 1291 movups (%edx),%xmm1 1292 leal 16(%edx),%edx 1293 jnz L055enc1_loop_11 1294 .byte 102,15,56,221,209 1295 movl 20(%esp),%esi 1296 movl 24(%esp),%edi 1297 movl 28(%esp),%eax 1298 movl 32(%esp),%edx 1299 movl %esp,%ebp 1300 subl $120,%esp 1301 andl $-16,%esp 1302 xorl %ebx,%ebx 1303 testl $15,%eax 1304 setnz %bl 1305 shll $4,%ebx 1306 subl %ebx,%eax 1307 movl $135,96(%esp) 1308 movl $0,100(%esp) 1309 movl $1,104(%esp) 1310 movl $0,108(%esp) 1311 movl %eax,112(%esp) 1312 movl %ebp,116(%esp) 1313 movl 240(%edx),%ecx 1314 movl %edx,%ebp 1315 movl %ecx,%ebx 1316 movdqa %xmm2,%xmm1 1317 pxor %xmm0,%xmm0 1318 movdqa 96(%esp),%xmm3 1319 pcmpgtd %xmm1,%xmm0 1320 andl $-16,%eax 1321 subl $96,%eax 1322 jc L056xts_dec_short 1323 shrl $1,%ecx 1324 movl %ecx,%ebx 1325 jmp L057xts_dec_loop6 1326 .align 4,0x90 1327 L057xts_dec_loop6: 1328 pshufd $19,%xmm0,%xmm2 1329 pxor %xmm0,%xmm0 1330 movdqa %xmm1,(%esp) 1331 paddq %xmm1,%xmm1 1332 pand %xmm3,%xmm2 1333 pcmpgtd %xmm1,%xmm0 1334 pxor %xmm2,%xmm1 1335 pshufd $19,%xmm0,%xmm2 1336 pxor %xmm0,%xmm0 1337 movdqa %xmm1,16(%esp) 1338 paddq %xmm1,%xmm1 1339 pand %xmm3,%xmm2 1340 pcmpgtd %xmm1,%xmm0 1341 pxor %xmm2,%xmm1 1342 pshufd $19,%xmm0,%xmm2 1343 pxor %xmm0,%xmm0 1344 movdqa %xmm1,32(%esp) 1345 paddq %xmm1,%xmm1 1346 pand %xmm3,%xmm2 1347 pcmpgtd %xmm1,%xmm0 1348 pxor %xmm2,%xmm1 1349 pshufd $19,%xmm0,%xmm2 1350 pxor %xmm0,%xmm0 1351 movdqa %xmm1,48(%esp) 1352 paddq %xmm1,%xmm1 1353 pand %xmm3,%xmm2 1354 pcmpgtd %xmm1,%xmm0 1355 pxor %xmm2,%xmm1 1356 pshufd $19,%xmm0,%xmm7 1357 movdqa %xmm1,64(%esp) 1358 paddq %xmm1,%xmm1 1359 movups (%ebp),%xmm0 1360 pand %xmm3,%xmm7 1361 movups (%esi),%xmm2 1362 pxor %xmm1,%xmm7 1363 movdqu 16(%esi),%xmm3 1364 xorps %xmm0,%xmm2 1365 movdqu 32(%esi),%xmm4 1366 pxor %xmm0,%xmm3 1367 movdqu 48(%esi),%xmm5 1368 pxor %xmm0,%xmm4 1369 movdqu 64(%esi),%xmm6 1370 pxor %xmm0,%xmm5 1371 movdqu 80(%esi),%xmm1 1372 pxor %xmm0,%xmm6 1373 leal 96(%esi),%esi 1374 pxor (%esp),%xmm2 1375 movdqa %xmm7,80(%esp) 1376 pxor %xmm1,%xmm7 1377 movups 16(%ebp),%xmm1 1378 leal 32(%ebp),%edx 1379 pxor 16(%esp),%xmm3 1380 .byte 102,15,56,222,209 1381 pxor 32(%esp),%xmm4 1382 .byte 102,15,56,222,217 1383 pxor 48(%esp),%xmm5 1384 decl %ecx 1385 .byte 102,15,56,222,225 1386 pxor 64(%esp),%xmm6 1387 .byte 102,15,56,222,233 1388 pxor %xmm0,%xmm7 1389 .byte 102,15,56,222,241 1390 movups (%edx),%xmm0 1391 .byte 102,15,56,222,249 1392 call L_aesni_decrypt6_enter 1393 movdqa 80(%esp),%xmm1 1394 pxor %xmm0,%xmm0 1395 xorps (%esp),%xmm2 1396 pcmpgtd %xmm1,%xmm0 1397 xorps 16(%esp),%xmm3 1398 movups %xmm2,(%edi) 1399 xorps 32(%esp),%xmm4 1400 movups %xmm3,16(%edi) 1401 xorps 48(%esp),%xmm5 1402 movups %xmm4,32(%edi) 1403 xorps 64(%esp),%xmm6 1404 movups %xmm5,48(%edi) 1405 xorps %xmm1,%xmm7 1406 movups %xmm6,64(%edi) 1407 pshufd $19,%xmm0,%xmm2 1408 movups %xmm7,80(%edi) 1409 leal 96(%edi),%edi 1410 movdqa 96(%esp),%xmm3 1411 pxor %xmm0,%xmm0 1412 paddq %xmm1,%xmm1 1413 pand %xmm3,%xmm2 1414 pcmpgtd %xmm1,%xmm0 1415 movl %ebx,%ecx 1416 pxor %xmm2,%xmm1 1417 subl $96,%eax 1418 jnc L057xts_dec_loop6 1419 leal 1(,%ecx,2),%ecx 1420 movl %ebp,%edx 1421 movl %ecx,%ebx 1422 L056xts_dec_short: 1423 addl $96,%eax 1424 jz L058xts_dec_done6x 1425 movdqa %xmm1,%xmm5 1426 cmpl $32,%eax 1427 jb L059xts_dec_one 1428 pshufd $19,%xmm0,%xmm2 1429 pxor %xmm0,%xmm0 1430 paddq %xmm1,%xmm1 1431 pand %xmm3,%xmm2 1432 pcmpgtd %xmm1,%xmm0 1433 pxor %xmm2,%xmm1 1434 je L060xts_dec_two 1435 pshufd $19,%xmm0,%xmm2 1436 pxor %xmm0,%xmm0 1437 movdqa %xmm1,%xmm6 1438 paddq %xmm1,%xmm1 1439 pand %xmm3,%xmm2 1440 pcmpgtd %xmm1,%xmm0 1441 pxor %xmm2,%xmm1 1442 cmpl $64,%eax 1443 jb L061xts_dec_three 1444 pshufd $19,%xmm0,%xmm2 1445 pxor %xmm0,%xmm0 1446 movdqa %xmm1,%xmm7 1447 paddq %xmm1,%xmm1 1448 pand %xmm3,%xmm2 1449 pcmpgtd %xmm1,%xmm0 1450 pxor %xmm2,%xmm1 1451 movdqa %xmm5,(%esp) 1452 movdqa %xmm6,16(%esp) 1453 je L062xts_dec_four 1454 movdqa %xmm7,32(%esp) 1455 pshufd $19,%xmm0,%xmm7 1456 movdqa %xmm1,48(%esp) 1457 paddq %xmm1,%xmm1 1458 pand %xmm3,%xmm7 1459 pxor %xmm1,%xmm7 1460 movdqu (%esi),%xmm2 1461 movdqu 16(%esi),%xmm3 1462 movdqu 32(%esi),%xmm4 1463 pxor (%esp),%xmm2 1464 movdqu 48(%esi),%xmm5 1465 pxor 16(%esp),%xmm3 1466 movdqu 64(%esi),%xmm6 1467 pxor 32(%esp),%xmm4 1468 leal 80(%esi),%esi 1469 pxor 48(%esp),%xmm5 1470 movdqa %xmm7,64(%esp) 1471 pxor %xmm7,%xmm6 1472 call __aesni_decrypt6 1473 movaps 64(%esp),%xmm1 1474 xorps (%esp),%xmm2 1475 xorps 16(%esp),%xmm3 1476 xorps 32(%esp),%xmm4 1477 movups %xmm2,(%edi) 1478 xorps 48(%esp),%xmm5 1479 movups %xmm3,16(%edi) 1480 xorps %xmm1,%xmm6 1481 movups %xmm4,32(%edi) 1482 movups %xmm5,48(%edi) 1483 movups %xmm6,64(%edi) 1484 leal 80(%edi),%edi 1485 jmp L063xts_dec_done 1486 .align 4,0x90 1487 L059xts_dec_one: 1488 movups (%esi),%xmm2 1489 leal 16(%esi),%esi 1490 xorps %xmm5,%xmm2 1491 movups (%edx),%xmm0 1492 movups 16(%edx),%xmm1 1493 leal 32(%edx),%edx 1494 xorps %xmm0,%xmm2 1495 L064dec1_loop_12: 1496 .byte 102,15,56,222,209 1497 decl %ecx 1498 movups (%edx),%xmm1 1499 leal 16(%edx),%edx 1500 jnz L064dec1_loop_12 1501 .byte 102,15,56,223,209 1502 xorps %xmm5,%xmm2 1503 movups %xmm2,(%edi) 1504 leal 16(%edi),%edi 1505 movdqa %xmm5,%xmm1 1506 jmp L063xts_dec_done 1507 .align 4,0x90 1508 L060xts_dec_two: 1509 movaps %xmm1,%xmm6 1510 movups (%esi),%xmm2 1511 movups 16(%esi),%xmm3 1512 leal 32(%esi),%esi 1513 xorps %xmm5,%xmm2 1514 xorps %xmm6,%xmm3 1515 call __aesni_decrypt3 1516 xorps %xmm5,%xmm2 1517 xorps %xmm6,%xmm3 1518 movups %xmm2,(%edi) 1519 movups %xmm3,16(%edi) 1520 leal 32(%edi),%edi 1521 movdqa %xmm6,%xmm1 1522 jmp L063xts_dec_done 1523 .align 4,0x90 1524 L061xts_dec_three: 1525 movaps %xmm1,%xmm7 1526 movups (%esi),%xmm2 1527 movups 16(%esi),%xmm3 1528 movups 32(%esi),%xmm4 1529 leal 48(%esi),%esi 1530 xorps %xmm5,%xmm2 1531 xorps %xmm6,%xmm3 1532 xorps %xmm7,%xmm4 1533 call __aesni_decrypt3 1534 xorps %xmm5,%xmm2 1535 xorps %xmm6,%xmm3 1536 xorps %xmm7,%xmm4 1537 movups %xmm2,(%edi) 1538 movups %xmm3,16(%edi) 1539 movups %xmm4,32(%edi) 1540 leal 48(%edi),%edi 1541 movdqa %xmm7,%xmm1 1542 jmp L063xts_dec_done 1543 .align 4,0x90 1544 L062xts_dec_four: 1545 movaps %xmm1,%xmm6 1546 movups (%esi),%xmm2 1547 movups 16(%esi),%xmm3 1548 movups 32(%esi),%xmm4 1549 xorps (%esp),%xmm2 1550 movups 48(%esi),%xmm5 1551 leal 64(%esi),%esi 1552 xorps 16(%esp),%xmm3 1553 xorps %xmm7,%xmm4 1554 xorps %xmm6,%xmm5 1555 call __aesni_decrypt4 1556 xorps (%esp),%xmm2 1557 xorps 16(%esp),%xmm3 1558 xorps %xmm7,%xmm4 1559 movups %xmm2,(%edi) 1560 xorps %xmm6,%xmm5 1561 movups %xmm3,16(%edi) 1562 movups %xmm4,32(%edi) 1563 movups %xmm5,48(%edi) 1564 leal 64(%edi),%edi 1565 movdqa %xmm6,%xmm1 1566 jmp L063xts_dec_done 1567 .align 4,0x90 1568 L058xts_dec_done6x: 1569 movl 112(%esp),%eax 1570 andl $15,%eax 1571 jz L065xts_dec_ret 1572 movl %eax,112(%esp) 1573 jmp L066xts_dec_only_one_more 1574 .align 4,0x90 1575 L063xts_dec_done: 1576 movl 112(%esp),%eax 1577 pxor %xmm0,%xmm0 1578 andl $15,%eax 1579 jz L065xts_dec_ret 1580 pcmpgtd %xmm1,%xmm0 1581 movl %eax,112(%esp) 1582 pshufd $19,%xmm0,%xmm2 1583 pxor %xmm0,%xmm0 1584 movdqa 96(%esp),%xmm3 1585 paddq %xmm1,%xmm1 1586 pand %xmm3,%xmm2 1587 pcmpgtd %xmm1,%xmm0 1588 pxor %xmm2,%xmm1 1589 L066xts_dec_only_one_more: 1590 pshufd $19,%xmm0,%xmm5 1591 movdqa %xmm1,%xmm6 1592 paddq %xmm1,%xmm1 1593 pand %xmm3,%xmm5 1594 pxor %xmm1,%xmm5 1595 movl %ebp,%edx 1596 movl %ebx,%ecx 1597 movups (%esi),%xmm2 1598 xorps %xmm5,%xmm2 1599 movups (%edx),%xmm0 1600 movups 16(%edx),%xmm1 1601 leal 32(%edx),%edx 1602 xorps %xmm0,%xmm2 1603 L067dec1_loop_13: 1604 .byte 102,15,56,222,209 1605 decl %ecx 1606 movups (%edx),%xmm1 1607 leal 16(%edx),%edx 1608 jnz L067dec1_loop_13 1609 .byte 102,15,56,223,209 1610 xorps %xmm5,%xmm2 1611 movups %xmm2,(%edi) 1612 L068xts_dec_steal: 1613 movzbl 16(%esi),%ecx 1614 movzbl (%edi),%edx 1615 leal 1(%esi),%esi 1616 movb %cl,(%edi) 1617 movb %dl,16(%edi) 1618 leal 1(%edi),%edi 1619 subl $1,%eax 1620 jnz L068xts_dec_steal 1621 subl 112(%esp),%edi 1622 movl %ebp,%edx 1623 movl %ebx,%ecx 1624 movups (%edi),%xmm2 1625 xorps %xmm6,%xmm2 1626 movups (%edx),%xmm0 1627 movups 16(%edx),%xmm1 1628 leal 32(%edx),%edx 1629 xorps %xmm0,%xmm2 1630 L069dec1_loop_14: 1631 .byte 102,15,56,222,209 1632 decl %ecx 1633 movups (%edx),%xmm1 1634 leal 16(%edx),%edx 1635 jnz L069dec1_loop_14 1636 .byte 102,15,56,223,209 1637 xorps %xmm6,%xmm2 1638 movups %xmm2,(%edi) 1639 L065xts_dec_ret: 1640 movl 116(%esp),%esp 1641 popl %edi 1642 popl %esi 1643 popl %ebx 1644 popl %ebp 1645 ret 1646 .globl _aesni_cbc_encrypt 1647 .align 4 1648 _aesni_cbc_encrypt: 1649 L_aesni_cbc_encrypt_begin: 1650 pushl %ebp 1651 pushl %ebx 1652 pushl %esi 1653 pushl %edi 1654 movl 20(%esp),%esi 1655 movl %esp,%ebx 1656 movl 24(%esp),%edi 1657 subl $24,%ebx 1658 movl 28(%esp),%eax 1659 andl $-16,%ebx 1660 movl 32(%esp),%edx 1661 movl 36(%esp),%ebp 1662 testl %eax,%eax 1663 jz L070cbc_abort 1664 cmpl $0,40(%esp) 1665 xchgl %esp,%ebx 1666 movups (%ebp),%xmm7 1667 movl 240(%edx),%ecx 1668 movl %edx,%ebp 1669 movl %ebx,16(%esp) 1670 movl %ecx,%ebx 1671 je L071cbc_decrypt 1672 movaps %xmm7,%xmm2 1673 cmpl $16,%eax 1674 jb L072cbc_enc_tail 1675 subl $16,%eax 1676 jmp L073cbc_enc_loop 1677 .align 4,0x90 1678 L073cbc_enc_loop: 1679 movups (%esi),%xmm7 1680 leal 16(%esi),%esi 1681 movups (%edx),%xmm0 1682 movups 16(%edx),%xmm1 1683 xorps %xmm0,%xmm7 1684 leal 32(%edx),%edx 1685 xorps %xmm7,%xmm2 1686 L074enc1_loop_15: 1687 .byte 102,15,56,220,209 1688 decl %ecx 1689 movups (%edx),%xmm1 1690 leal 16(%edx),%edx 1691 jnz L074enc1_loop_15 1692 .byte 102,15,56,221,209 1693 movl %ebx,%ecx 1694 movl %ebp,%edx 1695 movups %xmm2,(%edi) 1696 leal 16(%edi),%edi 1697 subl $16,%eax 1698 jnc L073cbc_enc_loop 1699 addl $16,%eax 1700 jnz L072cbc_enc_tail 1701 movaps %xmm2,%xmm7 1702 jmp L075cbc_ret 1703 L072cbc_enc_tail: 1704 movl %eax,%ecx 1705 .long 2767451785 1706 movl $16,%ecx 1707 subl %eax,%ecx 1708 xorl %eax,%eax 1709 .long 2868115081 1710 leal -16(%edi),%edi 1711 movl %ebx,%ecx 1712 movl %edi,%esi 1713 movl %ebp,%edx 1714 jmp L073cbc_enc_loop 1715 .align 4,0x90 1716 L071cbc_decrypt: 1717 cmpl $80,%eax 1718 jbe L076cbc_dec_tail 1719 movaps %xmm7,(%esp) 1720 subl $80,%eax 1721 jmp L077cbc_dec_loop6_enter 1722 .align 4,0x90 1723 L078cbc_dec_loop6: 1724 movaps %xmm0,(%esp) 1725 movups %xmm7,(%edi) 1726 leal 16(%edi),%edi 1727 L077cbc_dec_loop6_enter: 1728 movdqu (%esi),%xmm2 1729 movdqu 16(%esi),%xmm3 1730 movdqu 32(%esi),%xmm4 1731 movdqu 48(%esi),%xmm5 1732 movdqu 64(%esi),%xmm6 1733 movdqu 80(%esi),%xmm7 1734 call __aesni_decrypt6 1735 movups (%esi),%xmm1 1736 movups 16(%esi),%xmm0 1737 xorps (%esp),%xmm2 1738 xorps %xmm1,%xmm3 1739 movups 32(%esi),%xmm1 1740 xorps %xmm0,%xmm4 1741 movups 48(%esi),%xmm0 1742 xorps %xmm1,%xmm5 1743 movups 64(%esi),%xmm1 1744 xorps %xmm0,%xmm6 1745 movups 80(%esi),%xmm0 1746 xorps %xmm1,%xmm7 1747 movups %xmm2,(%edi) 1748 movups %xmm3,16(%edi) 1749 leal 96(%esi),%esi 1750 movups %xmm4,32(%edi) 1751 movl %ebx,%ecx 1752 movups %xmm5,48(%edi) 1753 movl %ebp,%edx 1754 movups %xmm6,64(%edi) 1755 leal 80(%edi),%edi 1756 subl $96,%eax 1757 ja L078cbc_dec_loop6 1758 movaps %xmm7,%xmm2 1759 movaps %xmm0,%xmm7 1760 addl $80,%eax 1761 jle L079cbc_dec_tail_collected 1762 movups %xmm2,(%edi) 1763 leal 16(%edi),%edi 1764 L076cbc_dec_tail: 1765 movups (%esi),%xmm2 1766 movaps %xmm2,%xmm6 1767 cmpl $16,%eax 1768 jbe L080cbc_dec_one 1769 movups 16(%esi),%xmm3 1770 movaps %xmm3,%xmm5 1771 cmpl $32,%eax 1772 jbe L081cbc_dec_two 1773 movups 32(%esi),%xmm4 1774 cmpl $48,%eax 1775 jbe L082cbc_dec_three 1776 movups 48(%esi),%xmm5 1777 cmpl $64,%eax 1778 jbe L083cbc_dec_four 1779 movups 64(%esi),%xmm6 1780 movaps %xmm7,(%esp) 1781 movups (%esi),%xmm2 1782 xorps %xmm7,%xmm7 1783 call __aesni_decrypt6 1784 movups (%esi),%xmm1 1785 movups 16(%esi),%xmm0 1786 xorps (%esp),%xmm2 1787 xorps %xmm1,%xmm3 1788 movups 32(%esi),%xmm1 1789 xorps %xmm0,%xmm4 1790 movups 48(%esi),%xmm0 1791 xorps %xmm1,%xmm5 1792 movups 64(%esi),%xmm7 1793 xorps %xmm0,%xmm6 1794 movups %xmm2,(%edi) 1795 movups %xmm3,16(%edi) 1796 movups %xmm4,32(%edi) 1797 movups %xmm5,48(%edi) 1798 leal 64(%edi),%edi 1799 movaps %xmm6,%xmm2 1800 subl $80,%eax 1801 jmp L079cbc_dec_tail_collected 1802 .align 4,0x90 1803 L080cbc_dec_one: 1804 movups (%edx),%xmm0 1805 movups 16(%edx),%xmm1 1806 leal 32(%edx),%edx 1807 xorps %xmm0,%xmm2 1808 L084dec1_loop_16: 1809 .byte 102,15,56,222,209 1810 decl %ecx 1811 movups (%edx),%xmm1 1812 leal 16(%edx),%edx 1813 jnz L084dec1_loop_16 1814 .byte 102,15,56,223,209 1815 xorps %xmm7,%xmm2 1816 movaps %xmm6,%xmm7 1817 subl $16,%eax 1818 jmp L079cbc_dec_tail_collected 1819 .align 4,0x90 1820 L081cbc_dec_two: 1821 xorps %xmm4,%xmm4 1822 call __aesni_decrypt3 1823 xorps %xmm7,%xmm2 1824 xorps %xmm6,%xmm3 1825 movups %xmm2,(%edi) 1826 movaps %xmm3,%xmm2 1827 leal 16(%edi),%edi 1828 movaps %xmm5,%xmm7 1829 subl $32,%eax 1830 jmp L079cbc_dec_tail_collected 1831 .align 4,0x90 1832 L082cbc_dec_three: 1833 call __aesni_decrypt3 1834 xorps %xmm7,%xmm2 1835 xorps %xmm6,%xmm3 1836 xorps %xmm5,%xmm4 1837 movups %xmm2,(%edi) 1838 movaps %xmm4,%xmm2 1839 movups %xmm3,16(%edi) 1840 leal 32(%edi),%edi 1841 movups 32(%esi),%xmm7 1842 subl $48,%eax 1843 jmp L079cbc_dec_tail_collected 1844 .align 4,0x90 1845 L083cbc_dec_four: 1846 call __aesni_decrypt4 1847 movups 16(%esi),%xmm1 1848 movups 32(%esi),%xmm0 1849 xorps %xmm7,%xmm2 1850 movups 48(%esi),%xmm7 1851 xorps %xmm6,%xmm3 1852 movups %xmm2,(%edi) 1853 xorps %xmm1,%xmm4 1854 movups %xmm3,16(%edi) 1855 xorps %xmm0,%xmm5 1856 movups %xmm4,32(%edi) 1857 leal 48(%edi),%edi 1858 movaps %xmm5,%xmm2 1859 subl $64,%eax 1860 L079cbc_dec_tail_collected: 1861 andl $15,%eax 1862 jnz L085cbc_dec_tail_partial 1863 movups %xmm2,(%edi) 1864 jmp L075cbc_ret 1865 .align 4,0x90 1866 L085cbc_dec_tail_partial: 1867 movaps %xmm2,(%esp) 1868 movl $16,%ecx 1869 movl %esp,%esi 1870 subl %eax,%ecx 1871 .long 2767451785 1872 L075cbc_ret: 1873 movl 16(%esp),%esp 1874 movl 36(%esp),%ebp 1875 movups %xmm7,(%ebp) 1876 L070cbc_abort: 1877 popl %edi 1878 popl %esi 1879 popl %ebx 1880 popl %ebp 1881 ret 1882 .align 4 1883 __aesni_set_encrypt_key: 1884 testl %eax,%eax 1885 jz L086bad_pointer 1886 testl %edx,%edx 1887 jz L086bad_pointer 1888 movups (%eax),%xmm0 1889 xorps %xmm4,%xmm4 1890 leal 16(%edx),%edx 1891 cmpl $256,%ecx 1892 je L08714rounds 1893 cmpl $192,%ecx 1894 je L08812rounds 1895 cmpl $128,%ecx 1896 jne L089bad_keybits 1897 .align 4,0x90 1898 L09010rounds: 1899 movl $9,%ecx 1900 movups %xmm0,-16(%edx) 1901 .byte 102,15,58,223,200,1 1902 call L091key_128_cold 1903 .byte 102,15,58,223,200,2 1904 call L092key_128 1905 .byte 102,15,58,223,200,4 1906 call L092key_128 1907 .byte 102,15,58,223,200,8 1908 call L092key_128 1909 .byte 102,15,58,223,200,16 1910 call L092key_128 1911 .byte 102,15,58,223,200,32 1912 call L092key_128 1913 .byte 102,15,58,223,200,64 1914 call L092key_128 1915 .byte 102,15,58,223,200,128 1916 call L092key_128 1917 .byte 102,15,58,223,200,27 1918 call L092key_128 1919 .byte 102,15,58,223,200,54 1920 call L092key_128 1921 movups %xmm0,(%edx) 1922 movl %ecx,80(%edx) 1923 xorl %eax,%eax 1924 ret 1925 .align 4,0x90 1926 L092key_128: 1927 movups %xmm0,(%edx) 1928 leal 16(%edx),%edx 1929 L091key_128_cold: 1930 shufps $16,%xmm0,%xmm4 1931 xorps %xmm4,%xmm0 1932 shufps $140,%xmm0,%xmm4 1933 xorps %xmm4,%xmm0 1934 shufps $255,%xmm1,%xmm1 1935 xorps %xmm1,%xmm0 1936 ret 1937 .align 4,0x90 1938 L08812rounds: 1939 movq 16(%eax),%xmm2 1940 movl $11,%ecx 1941 movups %xmm0,-16(%edx) 1942 .byte 102,15,58,223,202,1 1943 call L093key_192a_cold 1944 .byte 102,15,58,223,202,2 1945 call L094key_192b 1946 .byte 102,15,58,223,202,4 1947 call L095key_192a 1948 .byte 102,15,58,223,202,8 1949 call L094key_192b 1950 .byte 102,15,58,223,202,16 1951 call L095key_192a 1952 .byte 102,15,58,223,202,32 1953 call L094key_192b 1954 .byte 102,15,58,223,202,64 1955 call L095key_192a 1956 .byte 102,15,58,223,202,128 1957 call L094key_192b 1958 movups %xmm0,(%edx) 1959 movl %ecx,48(%edx) 1960 xorl %eax,%eax 1961 ret 1962 .align 4,0x90 1963 L095key_192a: 1964 movups %xmm0,(%edx) 1965 leal 16(%edx),%edx 1966 .align 4,0x90 1967 L093key_192a_cold: 1968 movaps %xmm2,%xmm5 1969 L096key_192b_warm: 1970 shufps $16,%xmm0,%xmm4 1971 movdqa %xmm2,%xmm3 1972 xorps %xmm4,%xmm0 1973 shufps $140,%xmm0,%xmm4 1974 pslldq $4,%xmm3 1975 xorps %xmm4,%xmm0 1976 pshufd $85,%xmm1,%xmm1 1977 pxor %xmm3,%xmm2 1978 pxor %xmm1,%xmm0 1979 pshufd $255,%xmm0,%xmm3 1980 pxor %xmm3,%xmm2 1981 ret 1982 .align 4,0x90 1983 L094key_192b: 1984 movaps %xmm0,%xmm3 1985 shufps $68,%xmm0,%xmm5 1986 movups %xmm5,(%edx) 1987 shufps $78,%xmm2,%xmm3 1988 movups %xmm3,16(%edx) 1989 leal 32(%edx),%edx 1990 jmp L096key_192b_warm 1991 .align 4,0x90 1992 L08714rounds: 1993 movups 16(%eax),%xmm2 1994 movl $13,%ecx 1995 leal 16(%edx),%edx 1996 movups %xmm0,-32(%edx) 1997 movups %xmm2,-16(%edx) 1998 .byte 102,15,58,223,202,1 1999 call L097key_256a_cold 2000 .byte 102,15,58,223,200,1 2001 call L098key_256b 2002 .byte 102,15,58,223,202,2 2003 call L099key_256a 2004 .byte 102,15,58,223,200,2 2005 call L098key_256b 2006 .byte 102,15,58,223,202,4 2007 call L099key_256a 2008 .byte 102,15,58,223,200,4 2009 call L098key_256b 2010 .byte 102,15,58,223,202,8 2011 call L099key_256a 2012 .byte 102,15,58,223,200,8 2013 call L098key_256b 2014 .byte 102,15,58,223,202,16 2015 call L099key_256a 2016 .byte 102,15,58,223,200,16 2017 call L098key_256b 2018 .byte 102,15,58,223,202,32 2019 call L099key_256a 2020 .byte 102,15,58,223,200,32 2021 call L098key_256b 2022 .byte 102,15,58,223,202,64 2023 call L099key_256a 2024 movups %xmm0,(%edx) 2025 movl %ecx,16(%edx) 2026 xorl %eax,%eax 2027 ret 2028 .align 4,0x90 2029 L099key_256a: 2030 movups %xmm2,(%edx) 2031 leal 16(%edx),%edx 2032 L097key_256a_cold: 2033 shufps $16,%xmm0,%xmm4 2034 xorps %xmm4,%xmm0 2035 shufps $140,%xmm0,%xmm4 2036 xorps %xmm4,%xmm0 2037 shufps $255,%xmm1,%xmm1 2038 xorps %xmm1,%xmm0 2039 ret 2040 .align 4,0x90 2041 L098key_256b: 2042 movups %xmm0,(%edx) 2043 leal 16(%edx),%edx 2044 shufps $16,%xmm2,%xmm4 2045 xorps %xmm4,%xmm2 2046 shufps $140,%xmm2,%xmm4 2047 xorps %xmm4,%xmm2 2048 shufps $170,%xmm1,%xmm1 2049 xorps %xmm1,%xmm2 2050 ret 2051 .align 2,0x90 2052 L086bad_pointer: 2053 movl $-1,%eax 2054 ret 2055 .align 2,0x90 2056 L089bad_keybits: 2057 movl $-2,%eax 2058 ret 2059 .globl _aesni_set_encrypt_key 2060 .align 4 2061 _aesni_set_encrypt_key: 2062 L_aesni_set_encrypt_key_begin: 2063 movl 4(%esp),%eax 2064 movl 8(%esp),%ecx 2065 movl 12(%esp),%edx 2066 call __aesni_set_encrypt_key 2067 ret 2068 .globl _aesni_set_decrypt_key 2069 .align 4 2070 _aesni_set_decrypt_key: 2071 L_aesni_set_decrypt_key_begin: 2072 movl 4(%esp),%eax 2073 movl 8(%esp),%ecx 2074 movl 12(%esp),%edx 2075 call __aesni_set_encrypt_key 2076 movl 12(%esp),%edx 2077 shll $4,%ecx 2078 testl %eax,%eax 2079 jnz L100dec_key_ret 2080 leal 16(%edx,%ecx,1),%eax 2081 movups (%edx),%xmm0 2082 movups (%eax),%xmm1 2083 movups %xmm0,(%eax) 2084 movups %xmm1,(%edx) 2085 leal 16(%edx),%edx 2086 leal -16(%eax),%eax 2087 L101dec_key_inverse: 2088 movups (%edx),%xmm0 2089 movups (%eax),%xmm1 2090 .byte 102,15,56,219,192 2091 .byte 102,15,56,219,201 2092 leal 16(%edx),%edx 2093 leal -16(%eax),%eax 2094 movups %xmm0,16(%eax) 2095 movups %xmm1,-16(%edx) 2096 cmpl %edx,%eax 2097 ja L101dec_key_inverse 2098 movups (%edx),%xmm0 2099 .byte 102,15,56,219,192 2100 movups %xmm0,(%edx) 2101 xorl %eax,%eax 2102 L100dec_key_ret: 2103 ret 2104 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 2105 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 2106 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 2107 .byte 115,108,46,111,114,103,62,0 2108