1 #if defined(__i386__) 2 .file "src/crypto/aes/asm/aesni-x86.S" 3 .text 4 .globl _aesni_encrypt 5 .private_extern _aesni_encrypt 6 .align 4 7 _aesni_encrypt: 8 L_aesni_encrypt_begin: 9 movl 4(%esp),%eax 10 movl 12(%esp),%edx 11 movups (%eax),%xmm2 12 movl 240(%edx),%ecx 13 movl 8(%esp),%eax 14 movups (%edx),%xmm0 15 movups 16(%edx),%xmm1 16 leal 32(%edx),%edx 17 xorps %xmm0,%xmm2 18 L000enc1_loop_1: 19 .byte 102,15,56,220,209 20 decl %ecx 21 movups (%edx),%xmm1 22 leal 16(%edx),%edx 23 jnz L000enc1_loop_1 24 .byte 102,15,56,221,209 25 pxor %xmm0,%xmm0 26 pxor %xmm1,%xmm1 27 movups %xmm2,(%eax) 28 pxor %xmm2,%xmm2 29 ret 30 .globl _aesni_decrypt 31 .private_extern _aesni_decrypt 32 .align 4 33 _aesni_decrypt: 34 L_aesni_decrypt_begin: 35 movl 4(%esp),%eax 36 movl 12(%esp),%edx 37 movups (%eax),%xmm2 38 movl 240(%edx),%ecx 39 movl 8(%esp),%eax 40 movups (%edx),%xmm0 41 movups 16(%edx),%xmm1 42 leal 32(%edx),%edx 43 xorps %xmm0,%xmm2 44 L001dec1_loop_2: 45 .byte 102,15,56,222,209 46 decl %ecx 47 movups (%edx),%xmm1 48 leal 16(%edx),%edx 49 jnz L001dec1_loop_2 50 .byte 102,15,56,223,209 51 pxor %xmm0,%xmm0 52 pxor %xmm1,%xmm1 53 movups %xmm2,(%eax) 54 pxor %xmm2,%xmm2 55 ret 56 .private_extern __aesni_encrypt2 57 .align 4 58 __aesni_encrypt2: 59 movups (%edx),%xmm0 60 shll $4,%ecx 61 movups 16(%edx),%xmm1 62 xorps %xmm0,%xmm2 63 pxor %xmm0,%xmm3 64 movups 32(%edx),%xmm0 65 leal 32(%edx,%ecx,1),%edx 66 negl %ecx 67 addl $16,%ecx 68 L002enc2_loop: 69 .byte 102,15,56,220,209 70 .byte 102,15,56,220,217 71 movups (%edx,%ecx,1),%xmm1 72 addl $32,%ecx 73 .byte 102,15,56,220,208 74 .byte 102,15,56,220,216 75 movups -16(%edx,%ecx,1),%xmm0 76 jnz L002enc2_loop 77 .byte 102,15,56,220,209 78 .byte 102,15,56,220,217 79 .byte 102,15,56,221,208 80 .byte 102,15,56,221,216 81 ret 82 .private_extern __aesni_decrypt2 83 .align 4 84 __aesni_decrypt2: 85 movups (%edx),%xmm0 86 shll $4,%ecx 87 movups 16(%edx),%xmm1 88 xorps %xmm0,%xmm2 89 pxor %xmm0,%xmm3 90 movups 32(%edx),%xmm0 91 leal 32(%edx,%ecx,1),%edx 92 negl %ecx 93 addl $16,%ecx 94 L003dec2_loop: 95 .byte 102,15,56,222,209 96 .byte 102,15,56,222,217 97 movups (%edx,%ecx,1),%xmm1 98 addl $32,%ecx 99 .byte 102,15,56,222,208 100 .byte 102,15,56,222,216 101 movups -16(%edx,%ecx,1),%xmm0 102 jnz L003dec2_loop 103 .byte 102,15,56,222,209 104 .byte 102,15,56,222,217 105 .byte 102,15,56,223,208 106 .byte 102,15,56,223,216 107 ret 108 .private_extern __aesni_encrypt3 109 .align 4 110 __aesni_encrypt3: 111 movups (%edx),%xmm0 112 shll $4,%ecx 113 movups 16(%edx),%xmm1 114 xorps %xmm0,%xmm2 115 pxor %xmm0,%xmm3 116 pxor %xmm0,%xmm4 117 movups 32(%edx),%xmm0 118 leal 32(%edx,%ecx,1),%edx 119 negl %ecx 120 addl $16,%ecx 121 L004enc3_loop: 122 .byte 102,15,56,220,209 123 .byte 102,15,56,220,217 124 .byte 102,15,56,220,225 125 movups (%edx,%ecx,1),%xmm1 126 addl $32,%ecx 127 .byte 102,15,56,220,208 128 .byte 102,15,56,220,216 129 .byte 102,15,56,220,224 130 movups -16(%edx,%ecx,1),%xmm0 131 jnz L004enc3_loop 132 .byte 102,15,56,220,209 133 .byte 102,15,56,220,217 134 .byte 102,15,56,220,225 135 .byte 102,15,56,221,208 136 .byte 102,15,56,221,216 137 .byte 102,15,56,221,224 138 ret 139 .private_extern __aesni_decrypt3 140 .align 4 141 __aesni_decrypt3: 142 movups (%edx),%xmm0 143 shll $4,%ecx 144 movups 16(%edx),%xmm1 145 xorps %xmm0,%xmm2 146 pxor %xmm0,%xmm3 147 pxor %xmm0,%xmm4 148 movups 32(%edx),%xmm0 149 leal 32(%edx,%ecx,1),%edx 150 negl %ecx 151 addl $16,%ecx 152 L005dec3_loop: 153 .byte 102,15,56,222,209 154 .byte 102,15,56,222,217 155 .byte 102,15,56,222,225 156 movups (%edx,%ecx,1),%xmm1 157 addl $32,%ecx 158 .byte 102,15,56,222,208 159 .byte 102,15,56,222,216 160 .byte 102,15,56,222,224 161 movups -16(%edx,%ecx,1),%xmm0 162 jnz L005dec3_loop 163 .byte 102,15,56,222,209 164 .byte 102,15,56,222,217 165 .byte 102,15,56,222,225 166 .byte 102,15,56,223,208 167 .byte 102,15,56,223,216 168 .byte 102,15,56,223,224 169 ret 170 .private_extern __aesni_encrypt4 171 .align 4 172 __aesni_encrypt4: 173 movups (%edx),%xmm0 174 movups 16(%edx),%xmm1 175 shll $4,%ecx 176 xorps %xmm0,%xmm2 177 pxor %xmm0,%xmm3 178 pxor %xmm0,%xmm4 179 pxor %xmm0,%xmm5 180 movups 32(%edx),%xmm0 181 leal 32(%edx,%ecx,1),%edx 182 negl %ecx 183 .byte 15,31,64,0 184 addl $16,%ecx 185 L006enc4_loop: 186 .byte 102,15,56,220,209 187 .byte 102,15,56,220,217 188 .byte 102,15,56,220,225 189 .byte 102,15,56,220,233 190 movups (%edx,%ecx,1),%xmm1 191 addl $32,%ecx 192 .byte 102,15,56,220,208 193 .byte 102,15,56,220,216 194 .byte 102,15,56,220,224 195 .byte 102,15,56,220,232 196 movups -16(%edx,%ecx,1),%xmm0 197 jnz L006enc4_loop 198 .byte 102,15,56,220,209 199 .byte 102,15,56,220,217 200 .byte 102,15,56,220,225 201 .byte 102,15,56,220,233 202 .byte 102,15,56,221,208 203 .byte 102,15,56,221,216 204 .byte 102,15,56,221,224 205 .byte 102,15,56,221,232 206 ret 207 .private_extern __aesni_decrypt4 208 .align 4 209 __aesni_decrypt4: 210 movups (%edx),%xmm0 211 movups 16(%edx),%xmm1 212 shll $4,%ecx 213 xorps %xmm0,%xmm2 214 pxor %xmm0,%xmm3 215 pxor %xmm0,%xmm4 216 pxor %xmm0,%xmm5 217 movups 32(%edx),%xmm0 218 leal 32(%edx,%ecx,1),%edx 219 negl %ecx 220 .byte 15,31,64,0 221 addl $16,%ecx 222 L007dec4_loop: 223 .byte 102,15,56,222,209 224 .byte 102,15,56,222,217 225 .byte 102,15,56,222,225 226 .byte 102,15,56,222,233 227 movups (%edx,%ecx,1),%xmm1 228 addl $32,%ecx 229 .byte 102,15,56,222,208 230 .byte 102,15,56,222,216 231 .byte 102,15,56,222,224 232 .byte 102,15,56,222,232 233 movups -16(%edx,%ecx,1),%xmm0 234 jnz L007dec4_loop 235 .byte 102,15,56,222,209 236 .byte 102,15,56,222,217 237 .byte 102,15,56,222,225 238 .byte 102,15,56,222,233 239 .byte 102,15,56,223,208 240 .byte 102,15,56,223,216 241 .byte 102,15,56,223,224 242 .byte 102,15,56,223,232 243 ret 244 .private_extern __aesni_encrypt6 245 .align 4 246 __aesni_encrypt6: 247 movups (%edx),%xmm0 248 shll $4,%ecx 249 movups 16(%edx),%xmm1 250 xorps %xmm0,%xmm2 251 pxor %xmm0,%xmm3 252 pxor %xmm0,%xmm4 253 .byte 102,15,56,220,209 254 pxor %xmm0,%xmm5 255 pxor %xmm0,%xmm6 256 .byte 102,15,56,220,217 257 leal 32(%edx,%ecx,1),%edx 258 negl %ecx 259 .byte 102,15,56,220,225 260 pxor %xmm0,%xmm7 261 movups (%edx,%ecx,1),%xmm0 262 addl $16,%ecx 263 jmp L008_aesni_encrypt6_inner 264 .align 4,0x90 265 L009enc6_loop: 266 .byte 102,15,56,220,209 267 .byte 102,15,56,220,217 268 .byte 102,15,56,220,225 269 L008_aesni_encrypt6_inner: 270 .byte 102,15,56,220,233 271 .byte 102,15,56,220,241 272 .byte 102,15,56,220,249 273 L_aesni_encrypt6_enter: 274 movups (%edx,%ecx,1),%xmm1 275 addl $32,%ecx 276 .byte 102,15,56,220,208 277 .byte 102,15,56,220,216 278 .byte 102,15,56,220,224 279 .byte 102,15,56,220,232 280 .byte 102,15,56,220,240 281 .byte 102,15,56,220,248 282 movups -16(%edx,%ecx,1),%xmm0 283 jnz L009enc6_loop 284 .byte 102,15,56,220,209 285 .byte 102,15,56,220,217 286 .byte 102,15,56,220,225 287 .byte 102,15,56,220,233 288 .byte 102,15,56,220,241 289 .byte 102,15,56,220,249 290 .byte 102,15,56,221,208 291 .byte 102,15,56,221,216 292 .byte 102,15,56,221,224 293 .byte 102,15,56,221,232 294 .byte 102,15,56,221,240 295 .byte 102,15,56,221,248 296 ret 297 .private_extern __aesni_decrypt6 298 .align 4 299 __aesni_decrypt6: 300 movups (%edx),%xmm0 301 shll $4,%ecx 302 movups 16(%edx),%xmm1 303 xorps %xmm0,%xmm2 304 pxor %xmm0,%xmm3 305 pxor %xmm0,%xmm4 306 .byte 102,15,56,222,209 307 pxor %xmm0,%xmm5 308 pxor %xmm0,%xmm6 309 .byte 102,15,56,222,217 310 leal 32(%edx,%ecx,1),%edx 311 negl %ecx 312 .byte 102,15,56,222,225 313 pxor %xmm0,%xmm7 314 movups (%edx,%ecx,1),%xmm0 315 addl $16,%ecx 316 jmp L010_aesni_decrypt6_inner 317 .align 4,0x90 318 L011dec6_loop: 319 .byte 102,15,56,222,209 320 .byte 102,15,56,222,217 321 .byte 102,15,56,222,225 322 L010_aesni_decrypt6_inner: 323 .byte 102,15,56,222,233 324 .byte 102,15,56,222,241 325 .byte 102,15,56,222,249 326 L_aesni_decrypt6_enter: 327 movups (%edx,%ecx,1),%xmm1 328 addl $32,%ecx 329 .byte 102,15,56,222,208 330 .byte 102,15,56,222,216 331 .byte 102,15,56,222,224 332 .byte 102,15,56,222,232 333 .byte 102,15,56,222,240 334 .byte 102,15,56,222,248 335 movups -16(%edx,%ecx,1),%xmm0 336 jnz L011dec6_loop 337 .byte 102,15,56,222,209 338 .byte 102,15,56,222,217 339 .byte 102,15,56,222,225 340 .byte 102,15,56,222,233 341 .byte 102,15,56,222,241 342 .byte 102,15,56,222,249 343 .byte 102,15,56,223,208 344 .byte 102,15,56,223,216 345 .byte 102,15,56,223,224 346 .byte 102,15,56,223,232 347 .byte 102,15,56,223,240 348 .byte 102,15,56,223,248 349 ret 350 .globl _aesni_ecb_encrypt 351 .private_extern _aesni_ecb_encrypt 352 .align 4 353 _aesni_ecb_encrypt: 354 L_aesni_ecb_encrypt_begin: 355 pushl %ebp 356 pushl %ebx 357 pushl %esi 358 pushl %edi 359 movl 20(%esp),%esi 360 movl 24(%esp),%edi 361 movl 28(%esp),%eax 362 movl 32(%esp),%edx 363 movl 36(%esp),%ebx 364 andl $-16,%eax 365 jz L012ecb_ret 366 movl 240(%edx),%ecx 367 testl %ebx,%ebx 368 jz L013ecb_decrypt 369 movl %edx,%ebp 370 movl %ecx,%ebx 371 cmpl $96,%eax 372 jb L014ecb_enc_tail 373 movdqu (%esi),%xmm2 374 movdqu 16(%esi),%xmm3 375 movdqu 32(%esi),%xmm4 376 movdqu 48(%esi),%xmm5 377 movdqu 64(%esi),%xmm6 378 movdqu 80(%esi),%xmm7 379 leal 96(%esi),%esi 380 subl $96,%eax 381 jmp L015ecb_enc_loop6_enter 382 .align 4,0x90 383 L016ecb_enc_loop6: 384 movups %xmm2,(%edi) 385 movdqu (%esi),%xmm2 386 movups %xmm3,16(%edi) 387 movdqu 16(%esi),%xmm3 388 movups %xmm4,32(%edi) 389 movdqu 32(%esi),%xmm4 390 movups %xmm5,48(%edi) 391 movdqu 48(%esi),%xmm5 392 movups %xmm6,64(%edi) 393 movdqu 64(%esi),%xmm6 394 movups %xmm7,80(%edi) 395 leal 96(%edi),%edi 396 movdqu 80(%esi),%xmm7 397 leal 96(%esi),%esi 398 L015ecb_enc_loop6_enter: 399 call __aesni_encrypt6 400 movl %ebp,%edx 401 movl %ebx,%ecx 402 subl $96,%eax 403 jnc L016ecb_enc_loop6 404 movups %xmm2,(%edi) 405 movups %xmm3,16(%edi) 406 movups %xmm4,32(%edi) 407 movups %xmm5,48(%edi) 408 movups %xmm6,64(%edi) 409 movups %xmm7,80(%edi) 410 leal 96(%edi),%edi 411 addl $96,%eax 412 jz L012ecb_ret 413 L014ecb_enc_tail: 414 movups (%esi),%xmm2 415 cmpl $32,%eax 416 jb L017ecb_enc_one 417 movups 16(%esi),%xmm3 418 je L018ecb_enc_two 419 movups 32(%esi),%xmm4 420 cmpl $64,%eax 421 jb L019ecb_enc_three 422 movups 48(%esi),%xmm5 423 je L020ecb_enc_four 424 movups 64(%esi),%xmm6 425 xorps %xmm7,%xmm7 426 call __aesni_encrypt6 427 movups %xmm2,(%edi) 428 movups %xmm3,16(%edi) 429 movups %xmm4,32(%edi) 430 movups %xmm5,48(%edi) 431 movups %xmm6,64(%edi) 432 jmp L012ecb_ret 433 .align 4,0x90 434 L017ecb_enc_one: 435 movups (%edx),%xmm0 436 movups 16(%edx),%xmm1 437 leal 32(%edx),%edx 438 xorps %xmm0,%xmm2 439 L021enc1_loop_3: 440 .byte 102,15,56,220,209 441 decl %ecx 442 movups (%edx),%xmm1 443 leal 16(%edx),%edx 444 jnz L021enc1_loop_3 445 .byte 102,15,56,221,209 446 movups %xmm2,(%edi) 447 jmp L012ecb_ret 448 .align 4,0x90 449 L018ecb_enc_two: 450 call __aesni_encrypt2 451 movups %xmm2,(%edi) 452 movups %xmm3,16(%edi) 453 jmp L012ecb_ret 454 .align 4,0x90 455 L019ecb_enc_three: 456 call __aesni_encrypt3 457 movups %xmm2,(%edi) 458 movups %xmm3,16(%edi) 459 movups %xmm4,32(%edi) 460 jmp L012ecb_ret 461 .align 4,0x90 462 L020ecb_enc_four: 463 call __aesni_encrypt4 464 movups %xmm2,(%edi) 465 movups %xmm3,16(%edi) 466 movups %xmm4,32(%edi) 467 movups %xmm5,48(%edi) 468 jmp L012ecb_ret 469 .align 4,0x90 470 L013ecb_decrypt: 471 movl %edx,%ebp 472 movl %ecx,%ebx 473 cmpl $96,%eax 474 jb L022ecb_dec_tail 475 movdqu (%esi),%xmm2 476 movdqu 16(%esi),%xmm3 477 movdqu 32(%esi),%xmm4 478 movdqu 48(%esi),%xmm5 479 movdqu 64(%esi),%xmm6 480 movdqu 80(%esi),%xmm7 481 leal 96(%esi),%esi 482 subl $96,%eax 483 jmp L023ecb_dec_loop6_enter 484 .align 4,0x90 485 L024ecb_dec_loop6: 486 movups %xmm2,(%edi) 487 movdqu (%esi),%xmm2 488 movups %xmm3,16(%edi) 489 movdqu 16(%esi),%xmm3 490 movups %xmm4,32(%edi) 491 movdqu 32(%esi),%xmm4 492 movups %xmm5,48(%edi) 493 movdqu 48(%esi),%xmm5 494 movups %xmm6,64(%edi) 495 movdqu 64(%esi),%xmm6 496 movups %xmm7,80(%edi) 497 leal 96(%edi),%edi 498 movdqu 80(%esi),%xmm7 499 leal 96(%esi),%esi 500 L023ecb_dec_loop6_enter: 501 call __aesni_decrypt6 502 movl %ebp,%edx 503 movl %ebx,%ecx 504 subl $96,%eax 505 jnc L024ecb_dec_loop6 506 movups %xmm2,(%edi) 507 movups %xmm3,16(%edi) 508 movups %xmm4,32(%edi) 509 movups %xmm5,48(%edi) 510 movups %xmm6,64(%edi) 511 movups %xmm7,80(%edi) 512 leal 96(%edi),%edi 513 addl $96,%eax 514 jz L012ecb_ret 515 L022ecb_dec_tail: 516 movups (%esi),%xmm2 517 cmpl $32,%eax 518 jb L025ecb_dec_one 519 movups 16(%esi),%xmm3 520 je L026ecb_dec_two 521 movups 32(%esi),%xmm4 522 cmpl $64,%eax 523 jb L027ecb_dec_three 524 movups 48(%esi),%xmm5 525 je L028ecb_dec_four 526 movups 64(%esi),%xmm6 527 xorps %xmm7,%xmm7 528 call __aesni_decrypt6 529 movups %xmm2,(%edi) 530 movups %xmm3,16(%edi) 531 movups %xmm4,32(%edi) 532 movups %xmm5,48(%edi) 533 movups %xmm6,64(%edi) 534 jmp L012ecb_ret 535 .align 4,0x90 536 L025ecb_dec_one: 537 movups (%edx),%xmm0 538 movups 16(%edx),%xmm1 539 leal 32(%edx),%edx 540 xorps %xmm0,%xmm2 541 L029dec1_loop_4: 542 .byte 102,15,56,222,209 543 decl %ecx 544 movups (%edx),%xmm1 545 leal 16(%edx),%edx 546 jnz L029dec1_loop_4 547 .byte 102,15,56,223,209 548 movups %xmm2,(%edi) 549 jmp L012ecb_ret 550 .align 4,0x90 551 L026ecb_dec_two: 552 call __aesni_decrypt2 553 movups %xmm2,(%edi) 554 movups %xmm3,16(%edi) 555 jmp L012ecb_ret 556 .align 4,0x90 557 L027ecb_dec_three: 558 call __aesni_decrypt3 559 movups %xmm2,(%edi) 560 movups %xmm3,16(%edi) 561 movups %xmm4,32(%edi) 562 jmp L012ecb_ret 563 .align 4,0x90 564 L028ecb_dec_four: 565 call __aesni_decrypt4 566 movups %xmm2,(%edi) 567 movups %xmm3,16(%edi) 568 movups %xmm4,32(%edi) 569 movups %xmm5,48(%edi) 570 L012ecb_ret: 571 pxor %xmm0,%xmm0 572 pxor %xmm1,%xmm1 573 pxor %xmm2,%xmm2 574 pxor %xmm3,%xmm3 575 pxor %xmm4,%xmm4 576 pxor %xmm5,%xmm5 577 pxor %xmm6,%xmm6 578 pxor %xmm7,%xmm7 579 popl %edi 580 popl %esi 581 popl %ebx 582 popl %ebp 583 ret 584 .globl _aesni_ccm64_encrypt_blocks 585 .private_extern _aesni_ccm64_encrypt_blocks 586 .align 4 587 _aesni_ccm64_encrypt_blocks: 588 L_aesni_ccm64_encrypt_blocks_begin: 589 pushl %ebp 590 pushl %ebx 591 pushl %esi 592 pushl %edi 593 movl 20(%esp),%esi 594 movl 24(%esp),%edi 595 movl 28(%esp),%eax 596 movl 32(%esp),%edx 597 movl 36(%esp),%ebx 598 movl 40(%esp),%ecx 599 movl %esp,%ebp 600 subl $60,%esp 601 andl $-16,%esp 602 movl %ebp,48(%esp) 603 movdqu (%ebx),%xmm7 604 movdqu (%ecx),%xmm3 605 movl 240(%edx),%ecx 606 movl $202182159,(%esp) 607 movl $134810123,4(%esp) 608 movl $67438087,8(%esp) 609 movl $66051,12(%esp) 610 movl $1,%ebx 611 xorl %ebp,%ebp 612 movl %ebx,16(%esp) 613 movl %ebp,20(%esp) 614 movl %ebp,24(%esp) 615 movl %ebp,28(%esp) 616 shll $4,%ecx 617 movl $16,%ebx 618 leal (%edx),%ebp 619 movdqa (%esp),%xmm5 620 movdqa %xmm7,%xmm2 621 leal 32(%edx,%ecx,1),%edx 622 subl %ecx,%ebx 623 .byte 102,15,56,0,253 624 L030ccm64_enc_outer: 625 movups (%ebp),%xmm0 626 movl %ebx,%ecx 627 movups (%esi),%xmm6 628 xorps %xmm0,%xmm2 629 movups 16(%ebp),%xmm1 630 xorps %xmm6,%xmm0 631 xorps %xmm0,%xmm3 632 movups 32(%ebp),%xmm0 633 L031ccm64_enc2_loop: 634 .byte 102,15,56,220,209 635 .byte 102,15,56,220,217 636 movups (%edx,%ecx,1),%xmm1 637 addl $32,%ecx 638 .byte 102,15,56,220,208 639 .byte 102,15,56,220,216 640 movups -16(%edx,%ecx,1),%xmm0 641 jnz L031ccm64_enc2_loop 642 .byte 102,15,56,220,209 643 .byte 102,15,56,220,217 644 paddq 16(%esp),%xmm7 645 decl %eax 646 .byte 102,15,56,221,208 647 .byte 102,15,56,221,216 648 leal 16(%esi),%esi 649 xorps %xmm2,%xmm6 650 movdqa %xmm7,%xmm2 651 movups %xmm6,(%edi) 652 .byte 102,15,56,0,213 653 leal 16(%edi),%edi 654 jnz L030ccm64_enc_outer 655 movl 48(%esp),%esp 656 movl 40(%esp),%edi 657 movups %xmm3,(%edi) 658 pxor %xmm0,%xmm0 659 pxor %xmm1,%xmm1 660 pxor %xmm2,%xmm2 661 pxor %xmm3,%xmm3 662 pxor %xmm4,%xmm4 663 pxor %xmm5,%xmm5 664 pxor %xmm6,%xmm6 665 pxor %xmm7,%xmm7 666 popl %edi 667 popl %esi 668 popl %ebx 669 popl %ebp 670 ret 671 .globl _aesni_ccm64_decrypt_blocks 672 .private_extern _aesni_ccm64_decrypt_blocks 673 .align 4 674 _aesni_ccm64_decrypt_blocks: 675 L_aesni_ccm64_decrypt_blocks_begin: 676 pushl %ebp 677 pushl %ebx 678 pushl %esi 679 pushl %edi 680 movl 20(%esp),%esi 681 movl 24(%esp),%edi 682 movl 28(%esp),%eax 683 movl 32(%esp),%edx 684 movl 36(%esp),%ebx 685 movl 40(%esp),%ecx 686 movl %esp,%ebp 687 subl $60,%esp 688 andl $-16,%esp 689 movl %ebp,48(%esp) 690 movdqu (%ebx),%xmm7 691 movdqu (%ecx),%xmm3 692 movl 240(%edx),%ecx 693 movl $202182159,(%esp) 694 movl $134810123,4(%esp) 695 movl $67438087,8(%esp) 696 movl $66051,12(%esp) 697 movl $1,%ebx 698 xorl %ebp,%ebp 699 movl %ebx,16(%esp) 700 movl %ebp,20(%esp) 701 movl %ebp,24(%esp) 702 movl %ebp,28(%esp) 703 movdqa (%esp),%xmm5 704 movdqa %xmm7,%xmm2 705 movl %edx,%ebp 706 movl %ecx,%ebx 707 .byte 102,15,56,0,253 708 movups (%edx),%xmm0 709 movups 16(%edx),%xmm1 710 leal 32(%edx),%edx 711 xorps %xmm0,%xmm2 712 L032enc1_loop_5: 713 .byte 102,15,56,220,209 714 decl %ecx 715 movups (%edx),%xmm1 716 leal 16(%edx),%edx 717 jnz L032enc1_loop_5 718 .byte 102,15,56,221,209 719 shll $4,%ebx 720 movl $16,%ecx 721 movups (%esi),%xmm6 722 paddq 16(%esp),%xmm7 723 leal 16(%esi),%esi 724 subl %ebx,%ecx 725 leal 32(%ebp,%ebx,1),%edx 726 movl %ecx,%ebx 727 jmp L033ccm64_dec_outer 728 .align 4,0x90 729 L033ccm64_dec_outer: 730 xorps %xmm2,%xmm6 731 movdqa %xmm7,%xmm2 732 movups %xmm6,(%edi) 733 leal 16(%edi),%edi 734 .byte 102,15,56,0,213 735 subl $1,%eax 736 jz L034ccm64_dec_break 737 movups (%ebp),%xmm0 738 movl %ebx,%ecx 739 movups 16(%ebp),%xmm1 740 xorps %xmm0,%xmm6 741 xorps %xmm0,%xmm2 742 xorps %xmm6,%xmm3 743 movups 32(%ebp),%xmm0 744 L035ccm64_dec2_loop: 745 .byte 102,15,56,220,209 746 .byte 102,15,56,220,217 747 movups (%edx,%ecx,1),%xmm1 748 addl $32,%ecx 749 .byte 102,15,56,220,208 750 .byte 102,15,56,220,216 751 movups -16(%edx,%ecx,1),%xmm0 752 jnz L035ccm64_dec2_loop 753 movups (%esi),%xmm6 754 paddq 16(%esp),%xmm7 755 .byte 102,15,56,220,209 756 .byte 102,15,56,220,217 757 .byte 102,15,56,221,208 758 .byte 102,15,56,221,216 759 leal 16(%esi),%esi 760 jmp L033ccm64_dec_outer 761 .align 4,0x90 762 L034ccm64_dec_break: 763 movl 240(%ebp),%ecx 764 movl %ebp,%edx 765 movups (%edx),%xmm0 766 movups 16(%edx),%xmm1 767 xorps %xmm0,%xmm6 768 leal 32(%edx),%edx 769 xorps %xmm6,%xmm3 770 L036enc1_loop_6: 771 .byte 102,15,56,220,217 772 decl %ecx 773 movups (%edx),%xmm1 774 leal 16(%edx),%edx 775 jnz L036enc1_loop_6 776 .byte 102,15,56,221,217 777 movl 48(%esp),%esp 778 movl 40(%esp),%edi 779 movups %xmm3,(%edi) 780 pxor %xmm0,%xmm0 781 pxor %xmm1,%xmm1 782 pxor %xmm2,%xmm2 783 pxor %xmm3,%xmm3 784 pxor %xmm4,%xmm4 785 pxor %xmm5,%xmm5 786 pxor %xmm6,%xmm6 787 pxor %xmm7,%xmm7 788 popl %edi 789 popl %esi 790 popl %ebx 791 popl %ebp 792 ret 793 .globl _aesni_ctr32_encrypt_blocks 794 .private_extern _aesni_ctr32_encrypt_blocks 795 .align 4 796 _aesni_ctr32_encrypt_blocks: 797 L_aesni_ctr32_encrypt_blocks_begin: 798 pushl %ebp 799 pushl %ebx 800 pushl %esi 801 pushl %edi 802 movl 20(%esp),%esi 803 movl 24(%esp),%edi 804 movl 28(%esp),%eax 805 movl 32(%esp),%edx 806 movl 36(%esp),%ebx 807 movl %esp,%ebp 808 subl $88,%esp 809 andl $-16,%esp 810 movl %ebp,80(%esp) 811 cmpl $1,%eax 812 je L037ctr32_one_shortcut 813 movdqu (%ebx),%xmm7 814 movl $202182159,(%esp) 815 movl $134810123,4(%esp) 816 movl $67438087,8(%esp) 817 movl $66051,12(%esp) 818 movl $6,%ecx 819 xorl %ebp,%ebp 820 movl %ecx,16(%esp) 821 movl %ecx,20(%esp) 822 movl %ecx,24(%esp) 823 movl %ebp,28(%esp) 824 .byte 102,15,58,22,251,3 825 .byte 102,15,58,34,253,3 826 movl 240(%edx),%ecx 827 bswap %ebx 828 pxor %xmm0,%xmm0 829 pxor %xmm1,%xmm1 830 movdqa (%esp),%xmm2 831 .byte 102,15,58,34,195,0 832 leal 3(%ebx),%ebp 833 .byte 102,15,58,34,205,0 834 incl %ebx 835 .byte 102,15,58,34,195,1 836 incl %ebp 837 .byte 102,15,58,34,205,1 838 incl %ebx 839 .byte 102,15,58,34,195,2 840 incl %ebp 841 .byte 102,15,58,34,205,2 842 movdqa %xmm0,48(%esp) 843 .byte 102,15,56,0,194 844 movdqu (%edx),%xmm6 845 movdqa %xmm1,64(%esp) 846 .byte 102,15,56,0,202 847 pshufd $192,%xmm0,%xmm2 848 pshufd $128,%xmm0,%xmm3 849 cmpl $6,%eax 850 jb L038ctr32_tail 851 pxor %xmm6,%xmm7 852 shll $4,%ecx 853 movl $16,%ebx 854 movdqa %xmm7,32(%esp) 855 movl %edx,%ebp 856 subl %ecx,%ebx 857 leal 32(%edx,%ecx,1),%edx 858 subl $6,%eax 859 jmp L039ctr32_loop6 860 .align 4,0x90 861 L039ctr32_loop6: 862 pshufd $64,%xmm0,%xmm4 863 movdqa 32(%esp),%xmm0 864 pshufd $192,%xmm1,%xmm5 865 pxor %xmm0,%xmm2 866 pshufd $128,%xmm1,%xmm6 867 pxor %xmm0,%xmm3 868 pshufd $64,%xmm1,%xmm7 869 movups 16(%ebp),%xmm1 870 pxor %xmm0,%xmm4 871 pxor %xmm0,%xmm5 872 .byte 102,15,56,220,209 873 pxor %xmm0,%xmm6 874 pxor %xmm0,%xmm7 875 .byte 102,15,56,220,217 876 movups 32(%ebp),%xmm0 877 movl %ebx,%ecx 878 .byte 102,15,56,220,225 879 .byte 102,15,56,220,233 880 .byte 102,15,56,220,241 881 .byte 102,15,56,220,249 882 call L_aesni_encrypt6_enter 883 movups (%esi),%xmm1 884 movups 16(%esi),%xmm0 885 xorps %xmm1,%xmm2 886 movups 32(%esi),%xmm1 887 xorps %xmm0,%xmm3 888 movups %xmm2,(%edi) 889 movdqa 16(%esp),%xmm0 890 xorps %xmm1,%xmm4 891 movdqa 64(%esp),%xmm1 892 movups %xmm3,16(%edi) 893 movups %xmm4,32(%edi) 894 paddd %xmm0,%xmm1 895 paddd 48(%esp),%xmm0 896 movdqa (%esp),%xmm2 897 movups 48(%esi),%xmm3 898 movups 64(%esi),%xmm4 899 xorps %xmm3,%xmm5 900 movups 80(%esi),%xmm3 901 leal 96(%esi),%esi 902 movdqa %xmm0,48(%esp) 903 .byte 102,15,56,0,194 904 xorps %xmm4,%xmm6 905 movups %xmm5,48(%edi) 906 xorps %xmm3,%xmm7 907 movdqa %xmm1,64(%esp) 908 .byte 102,15,56,0,202 909 movups %xmm6,64(%edi) 910 pshufd $192,%xmm0,%xmm2 911 movups %xmm7,80(%edi) 912 leal 96(%edi),%edi 913 pshufd $128,%xmm0,%xmm3 914 subl $6,%eax 915 jnc L039ctr32_loop6 916 addl $6,%eax 917 jz L040ctr32_ret 918 movdqu (%ebp),%xmm7 919 movl %ebp,%edx 920 pxor 32(%esp),%xmm7 921 movl 240(%ebp),%ecx 922 L038ctr32_tail: 923 por %xmm7,%xmm2 924 cmpl $2,%eax 925 jb L041ctr32_one 926 pshufd $64,%xmm0,%xmm4 927 por %xmm7,%xmm3 928 je L042ctr32_two 929 pshufd $192,%xmm1,%xmm5 930 por %xmm7,%xmm4 931 cmpl $4,%eax 932 jb L043ctr32_three 933 pshufd $128,%xmm1,%xmm6 934 por %xmm7,%xmm5 935 je L044ctr32_four 936 por %xmm7,%xmm6 937 call __aesni_encrypt6 938 movups (%esi),%xmm1 939 movups 16(%esi),%xmm0 940 xorps %xmm1,%xmm2 941 movups 32(%esi),%xmm1 942 xorps %xmm0,%xmm3 943 movups 48(%esi),%xmm0 944 xorps %xmm1,%xmm4 945 movups 64(%esi),%xmm1 946 xorps %xmm0,%xmm5 947 movups %xmm2,(%edi) 948 xorps %xmm1,%xmm6 949 movups %xmm3,16(%edi) 950 movups %xmm4,32(%edi) 951 movups %xmm5,48(%edi) 952 movups %xmm6,64(%edi) 953 jmp L040ctr32_ret 954 .align 4,0x90 955 L037ctr32_one_shortcut: 956 movups (%ebx),%xmm2 957 movl 240(%edx),%ecx 958 L041ctr32_one: 959 movups (%edx),%xmm0 960 movups 16(%edx),%xmm1 961 leal 32(%edx),%edx 962 xorps %xmm0,%xmm2 963 L045enc1_loop_7: 964 .byte 102,15,56,220,209 965 decl %ecx 966 movups (%edx),%xmm1 967 leal 16(%edx),%edx 968 jnz L045enc1_loop_7 969 .byte 102,15,56,221,209 970 movups (%esi),%xmm6 971 xorps %xmm2,%xmm6 972 movups %xmm6,(%edi) 973 jmp L040ctr32_ret 974 .align 4,0x90 975 L042ctr32_two: 976 call __aesni_encrypt2 977 movups (%esi),%xmm5 978 movups 16(%esi),%xmm6 979 xorps %xmm5,%xmm2 980 xorps %xmm6,%xmm3 981 movups %xmm2,(%edi) 982 movups %xmm3,16(%edi) 983 jmp L040ctr32_ret 984 .align 4,0x90 985 L043ctr32_three: 986 call __aesni_encrypt3 987 movups (%esi),%xmm5 988 movups 16(%esi),%xmm6 989 xorps %xmm5,%xmm2 990 movups 32(%esi),%xmm7 991 xorps %xmm6,%xmm3 992 movups %xmm2,(%edi) 993 xorps %xmm7,%xmm4 994 movups %xmm3,16(%edi) 995 movups %xmm4,32(%edi) 996 jmp L040ctr32_ret 997 .align 4,0x90 998 L044ctr32_four: 999 call __aesni_encrypt4 1000 movups (%esi),%xmm6 1001 movups 16(%esi),%xmm7 1002 movups 32(%esi),%xmm1 1003 xorps %xmm6,%xmm2 1004 movups 48(%esi),%xmm0 1005 xorps %xmm7,%xmm3 1006 movups %xmm2,(%edi) 1007 xorps %xmm1,%xmm4 1008 movups %xmm3,16(%edi) 1009 xorps %xmm0,%xmm5 1010 movups %xmm4,32(%edi) 1011 movups %xmm5,48(%edi) 1012 L040ctr32_ret: 1013 pxor %xmm0,%xmm0 1014 pxor %xmm1,%xmm1 1015 pxor %xmm2,%xmm2 1016 pxor %xmm3,%xmm3 1017 pxor %xmm4,%xmm4 1018 movdqa %xmm0,32(%esp) 1019 pxor %xmm5,%xmm5 1020 movdqa %xmm0,48(%esp) 1021 pxor %xmm6,%xmm6 1022 movdqa %xmm0,64(%esp) 1023 pxor %xmm7,%xmm7 1024 movl 80(%esp),%esp 1025 popl %edi 1026 popl %esi 1027 popl %ebx 1028 popl %ebp 1029 ret 1030 .globl _aesni_xts_encrypt 1031 .private_extern _aesni_xts_encrypt 1032 .align 4 1033 _aesni_xts_encrypt: 1034 L_aesni_xts_encrypt_begin: 1035 pushl %ebp 1036 pushl %ebx 1037 pushl %esi 1038 pushl %edi 1039 movl 36(%esp),%edx 1040 movl 40(%esp),%esi 1041 movl 240(%edx),%ecx 1042 movups (%esi),%xmm2 1043 movups (%edx),%xmm0 1044 movups 16(%edx),%xmm1 1045 leal 32(%edx),%edx 1046 xorps %xmm0,%xmm2 1047 L046enc1_loop_8: 1048 .byte 102,15,56,220,209 1049 decl %ecx 1050 movups (%edx),%xmm1 1051 leal 16(%edx),%edx 1052 jnz L046enc1_loop_8 1053 .byte 102,15,56,221,209 1054 movl 20(%esp),%esi 1055 movl 24(%esp),%edi 1056 movl 28(%esp),%eax 1057 movl 32(%esp),%edx 1058 movl %esp,%ebp 1059 subl $120,%esp 1060 movl 240(%edx),%ecx 1061 andl $-16,%esp 1062 movl $135,96(%esp) 1063 movl $0,100(%esp) 1064 movl $1,104(%esp) 1065 movl $0,108(%esp) 1066 movl %eax,112(%esp) 1067 movl %ebp,116(%esp) 1068 movdqa %xmm2,%xmm1 1069 pxor %xmm0,%xmm0 1070 movdqa 96(%esp),%xmm3 1071 pcmpgtd %xmm1,%xmm0 1072 andl $-16,%eax 1073 movl %edx,%ebp 1074 movl %ecx,%ebx 1075 subl $96,%eax 1076 jc L047xts_enc_short 1077 shll $4,%ecx 1078 movl $16,%ebx 1079 subl %ecx,%ebx 1080 leal 32(%edx,%ecx,1),%edx 1081 jmp L048xts_enc_loop6 1082 .align 4,0x90 1083 L048xts_enc_loop6: 1084 pshufd $19,%xmm0,%xmm2 1085 pxor %xmm0,%xmm0 1086 movdqa %xmm1,(%esp) 1087 paddq %xmm1,%xmm1 1088 pand %xmm3,%xmm2 1089 pcmpgtd %xmm1,%xmm0 1090 pxor %xmm2,%xmm1 1091 pshufd $19,%xmm0,%xmm2 1092 pxor %xmm0,%xmm0 1093 movdqa %xmm1,16(%esp) 1094 paddq %xmm1,%xmm1 1095 pand %xmm3,%xmm2 1096 pcmpgtd %xmm1,%xmm0 1097 pxor %xmm2,%xmm1 1098 pshufd $19,%xmm0,%xmm2 1099 pxor %xmm0,%xmm0 1100 movdqa %xmm1,32(%esp) 1101 paddq %xmm1,%xmm1 1102 pand %xmm3,%xmm2 1103 pcmpgtd %xmm1,%xmm0 1104 pxor %xmm2,%xmm1 1105 pshufd $19,%xmm0,%xmm2 1106 pxor %xmm0,%xmm0 1107 movdqa %xmm1,48(%esp) 1108 paddq %xmm1,%xmm1 1109 pand %xmm3,%xmm2 1110 pcmpgtd %xmm1,%xmm0 1111 pxor %xmm2,%xmm1 1112 pshufd $19,%xmm0,%xmm7 1113 movdqa %xmm1,64(%esp) 1114 paddq %xmm1,%xmm1 1115 movups (%ebp),%xmm0 1116 pand %xmm3,%xmm7 1117 movups (%esi),%xmm2 1118 pxor %xmm1,%xmm7 1119 movl %ebx,%ecx 1120 movdqu 16(%esi),%xmm3 1121 xorps %xmm0,%xmm2 1122 movdqu 32(%esi),%xmm4 1123 pxor %xmm0,%xmm3 1124 movdqu 48(%esi),%xmm5 1125 pxor %xmm0,%xmm4 1126 movdqu 64(%esi),%xmm6 1127 pxor %xmm0,%xmm5 1128 movdqu 80(%esi),%xmm1 1129 pxor %xmm0,%xmm6 1130 leal 96(%esi),%esi 1131 pxor (%esp),%xmm2 1132 movdqa %xmm7,80(%esp) 1133 pxor %xmm1,%xmm7 1134 movups 16(%ebp),%xmm1 1135 pxor 16(%esp),%xmm3 1136 pxor 32(%esp),%xmm4 1137 .byte 102,15,56,220,209 1138 pxor 48(%esp),%xmm5 1139 pxor 64(%esp),%xmm6 1140 .byte 102,15,56,220,217 1141 pxor %xmm0,%xmm7 1142 movups 32(%ebp),%xmm0 1143 .byte 102,15,56,220,225 1144 .byte 102,15,56,220,233 1145 .byte 102,15,56,220,241 1146 .byte 102,15,56,220,249 1147 call L_aesni_encrypt6_enter 1148 movdqa 80(%esp),%xmm1 1149 pxor %xmm0,%xmm0 1150 xorps (%esp),%xmm2 1151 pcmpgtd %xmm1,%xmm0 1152 xorps 16(%esp),%xmm3 1153 movups %xmm2,(%edi) 1154 xorps 32(%esp),%xmm4 1155 movups %xmm3,16(%edi) 1156 xorps 48(%esp),%xmm5 1157 movups %xmm4,32(%edi) 1158 xorps 64(%esp),%xmm6 1159 movups %xmm5,48(%edi) 1160 xorps %xmm1,%xmm7 1161 movups %xmm6,64(%edi) 1162 pshufd $19,%xmm0,%xmm2 1163 movups %xmm7,80(%edi) 1164 leal 96(%edi),%edi 1165 movdqa 96(%esp),%xmm3 1166 pxor %xmm0,%xmm0 1167 paddq %xmm1,%xmm1 1168 pand %xmm3,%xmm2 1169 pcmpgtd %xmm1,%xmm0 1170 pxor %xmm2,%xmm1 1171 subl $96,%eax 1172 jnc L048xts_enc_loop6 1173 movl 240(%ebp),%ecx 1174 movl %ebp,%edx 1175 movl %ecx,%ebx 1176 L047xts_enc_short: 1177 addl $96,%eax 1178 jz L049xts_enc_done6x 1179 movdqa %xmm1,%xmm5 1180 cmpl $32,%eax 1181 jb L050xts_enc_one 1182 pshufd $19,%xmm0,%xmm2 1183 pxor %xmm0,%xmm0 1184 paddq %xmm1,%xmm1 1185 pand %xmm3,%xmm2 1186 pcmpgtd %xmm1,%xmm0 1187 pxor %xmm2,%xmm1 1188 je L051xts_enc_two 1189 pshufd $19,%xmm0,%xmm2 1190 pxor %xmm0,%xmm0 1191 movdqa %xmm1,%xmm6 1192 paddq %xmm1,%xmm1 1193 pand %xmm3,%xmm2 1194 pcmpgtd %xmm1,%xmm0 1195 pxor %xmm2,%xmm1 1196 cmpl $64,%eax 1197 jb L052xts_enc_three 1198 pshufd $19,%xmm0,%xmm2 1199 pxor %xmm0,%xmm0 1200 movdqa %xmm1,%xmm7 1201 paddq %xmm1,%xmm1 1202 pand %xmm3,%xmm2 1203 pcmpgtd %xmm1,%xmm0 1204 pxor %xmm2,%xmm1 1205 movdqa %xmm5,(%esp) 1206 movdqa %xmm6,16(%esp) 1207 je L053xts_enc_four 1208 movdqa %xmm7,32(%esp) 1209 pshufd $19,%xmm0,%xmm7 1210 movdqa %xmm1,48(%esp) 1211 paddq %xmm1,%xmm1 1212 pand %xmm3,%xmm7 1213 pxor %xmm1,%xmm7 1214 movdqu (%esi),%xmm2 1215 movdqu 16(%esi),%xmm3 1216 movdqu 32(%esi),%xmm4 1217 pxor (%esp),%xmm2 1218 movdqu 48(%esi),%xmm5 1219 pxor 16(%esp),%xmm3 1220 movdqu 64(%esi),%xmm6 1221 pxor 32(%esp),%xmm4 1222 leal 80(%esi),%esi 1223 pxor 48(%esp),%xmm5 1224 movdqa %xmm7,64(%esp) 1225 pxor %xmm7,%xmm6 1226 call __aesni_encrypt6 1227 movaps 64(%esp),%xmm1 1228 xorps (%esp),%xmm2 1229 xorps 16(%esp),%xmm3 1230 xorps 32(%esp),%xmm4 1231 movups %xmm2,(%edi) 1232 xorps 48(%esp),%xmm5 1233 movups %xmm3,16(%edi) 1234 xorps %xmm1,%xmm6 1235 movups %xmm4,32(%edi) 1236 movups %xmm5,48(%edi) 1237 movups %xmm6,64(%edi) 1238 leal 80(%edi),%edi 1239 jmp L054xts_enc_done 1240 .align 4,0x90 1241 L050xts_enc_one: 1242 movups (%esi),%xmm2 1243 leal 16(%esi),%esi 1244 xorps %xmm5,%xmm2 1245 movups (%edx),%xmm0 1246 movups 16(%edx),%xmm1 1247 leal 32(%edx),%edx 1248 xorps %xmm0,%xmm2 1249 L055enc1_loop_9: 1250 .byte 102,15,56,220,209 1251 decl %ecx 1252 movups (%edx),%xmm1 1253 leal 16(%edx),%edx 1254 jnz L055enc1_loop_9 1255 .byte 102,15,56,221,209 1256 xorps %xmm5,%xmm2 1257 movups %xmm2,(%edi) 1258 leal 16(%edi),%edi 1259 movdqa %xmm5,%xmm1 1260 jmp L054xts_enc_done 1261 .align 4,0x90 1262 L051xts_enc_two: 1263 movaps %xmm1,%xmm6 1264 movups (%esi),%xmm2 1265 movups 16(%esi),%xmm3 1266 leal 32(%esi),%esi 1267 xorps %xmm5,%xmm2 1268 xorps %xmm6,%xmm3 1269 call __aesni_encrypt2 1270 xorps %xmm5,%xmm2 1271 xorps %xmm6,%xmm3 1272 movups %xmm2,(%edi) 1273 movups %xmm3,16(%edi) 1274 leal 32(%edi),%edi 1275 movdqa %xmm6,%xmm1 1276 jmp L054xts_enc_done 1277 .align 4,0x90 1278 L052xts_enc_three: 1279 movaps %xmm1,%xmm7 1280 movups (%esi),%xmm2 1281 movups 16(%esi),%xmm3 1282 movups 32(%esi),%xmm4 1283 leal 48(%esi),%esi 1284 xorps %xmm5,%xmm2 1285 xorps %xmm6,%xmm3 1286 xorps %xmm7,%xmm4 1287 call __aesni_encrypt3 1288 xorps %xmm5,%xmm2 1289 xorps %xmm6,%xmm3 1290 xorps %xmm7,%xmm4 1291 movups %xmm2,(%edi) 1292 movups %xmm3,16(%edi) 1293 movups %xmm4,32(%edi) 1294 leal 48(%edi),%edi 1295 movdqa %xmm7,%xmm1 1296 jmp L054xts_enc_done 1297 .align 4,0x90 1298 L053xts_enc_four: 1299 movaps %xmm1,%xmm6 1300 movups (%esi),%xmm2 1301 movups 16(%esi),%xmm3 1302 movups 32(%esi),%xmm4 1303 xorps (%esp),%xmm2 1304 movups 48(%esi),%xmm5 1305 leal 64(%esi),%esi 1306 xorps 16(%esp),%xmm3 1307 xorps %xmm7,%xmm4 1308 xorps %xmm6,%xmm5 1309 call __aesni_encrypt4 1310 xorps (%esp),%xmm2 1311 xorps 16(%esp),%xmm3 1312 xorps %xmm7,%xmm4 1313 movups %xmm2,(%edi) 1314 xorps %xmm6,%xmm5 1315 movups %xmm3,16(%edi) 1316 movups %xmm4,32(%edi) 1317 movups %xmm5,48(%edi) 1318 leal 64(%edi),%edi 1319 movdqa %xmm6,%xmm1 1320 jmp L054xts_enc_done 1321 .align 4,0x90 1322 L049xts_enc_done6x: 1323 movl 112(%esp),%eax 1324 andl $15,%eax 1325 jz L056xts_enc_ret 1326 movdqa %xmm1,%xmm5 1327 movl %eax,112(%esp) 1328 jmp L057xts_enc_steal 1329 .align 4,0x90 1330 L054xts_enc_done: 1331 movl 112(%esp),%eax 1332 pxor %xmm0,%xmm0 1333 andl $15,%eax 1334 jz L056xts_enc_ret 1335 pcmpgtd %xmm1,%xmm0 1336 movl %eax,112(%esp) 1337 pshufd $19,%xmm0,%xmm5 1338 paddq %xmm1,%xmm1 1339 pand 96(%esp),%xmm5 1340 pxor %xmm1,%xmm5 1341 L057xts_enc_steal: 1342 movzbl (%esi),%ecx 1343 movzbl -16(%edi),%edx 1344 leal 1(%esi),%esi 1345 movb %cl,-16(%edi) 1346 movb %dl,(%edi) 1347 leal 1(%edi),%edi 1348 subl $1,%eax 1349 jnz L057xts_enc_steal 1350 subl 112(%esp),%edi 1351 movl %ebp,%edx 1352 movl %ebx,%ecx 1353 movups -16(%edi),%xmm2 1354 xorps %xmm5,%xmm2 1355 movups (%edx),%xmm0 1356 movups 16(%edx),%xmm1 1357 leal 32(%edx),%edx 1358 xorps %xmm0,%xmm2 1359 L058enc1_loop_10: 1360 .byte 102,15,56,220,209 1361 decl %ecx 1362 movups (%edx),%xmm1 1363 leal 16(%edx),%edx 1364 jnz L058enc1_loop_10 1365 .byte 102,15,56,221,209 1366 xorps %xmm5,%xmm2 1367 movups %xmm2,-16(%edi) 1368 L056xts_enc_ret: 1369 pxor %xmm0,%xmm0 1370 pxor %xmm1,%xmm1 1371 pxor %xmm2,%xmm2 1372 movdqa %xmm0,(%esp) 1373 pxor %xmm3,%xmm3 1374 movdqa %xmm0,16(%esp) 1375 pxor %xmm4,%xmm4 1376 movdqa %xmm0,32(%esp) 1377 pxor %xmm5,%xmm5 1378 movdqa %xmm0,48(%esp) 1379 pxor %xmm6,%xmm6 1380 movdqa %xmm0,64(%esp) 1381 pxor %xmm7,%xmm7 1382 movdqa %xmm0,80(%esp) 1383 movl 116(%esp),%esp 1384 popl %edi 1385 popl %esi 1386 popl %ebx 1387 popl %ebp 1388 ret 1389 .globl _aesni_xts_decrypt 1390 .private_extern _aesni_xts_decrypt 1391 .align 4 1392 _aesni_xts_decrypt: 1393 L_aesni_xts_decrypt_begin: 1394 pushl %ebp 1395 pushl %ebx 1396 pushl %esi 1397 pushl %edi 1398 movl 36(%esp),%edx 1399 movl 40(%esp),%esi 1400 movl 240(%edx),%ecx 1401 movups (%esi),%xmm2 1402 movups (%edx),%xmm0 1403 movups 16(%edx),%xmm1 1404 leal 32(%edx),%edx 1405 xorps %xmm0,%xmm2 1406 L059enc1_loop_11: 1407 .byte 102,15,56,220,209 1408 decl %ecx 1409 movups (%edx),%xmm1 1410 leal 16(%edx),%edx 1411 jnz L059enc1_loop_11 1412 .byte 102,15,56,221,209 1413 movl 20(%esp),%esi 1414 movl 24(%esp),%edi 1415 movl 28(%esp),%eax 1416 movl 32(%esp),%edx 1417 movl %esp,%ebp 1418 subl $120,%esp 1419 andl $-16,%esp 1420 xorl %ebx,%ebx 1421 testl $15,%eax 1422 setnz %bl 1423 shll $4,%ebx 1424 subl %ebx,%eax 1425 movl $135,96(%esp) 1426 movl $0,100(%esp) 1427 movl $1,104(%esp) 1428 movl $0,108(%esp) 1429 movl %eax,112(%esp) 1430 movl %ebp,116(%esp) 1431 movl 240(%edx),%ecx 1432 movl %edx,%ebp 1433 movl %ecx,%ebx 1434 movdqa %xmm2,%xmm1 1435 pxor %xmm0,%xmm0 1436 movdqa 96(%esp),%xmm3 1437 pcmpgtd %xmm1,%xmm0 1438 andl $-16,%eax 1439 subl $96,%eax 1440 jc L060xts_dec_short 1441 shll $4,%ecx 1442 movl $16,%ebx 1443 subl %ecx,%ebx 1444 leal 32(%edx,%ecx,1),%edx 1445 jmp L061xts_dec_loop6 1446 .align 4,0x90 1447 L061xts_dec_loop6: 1448 pshufd $19,%xmm0,%xmm2 1449 pxor %xmm0,%xmm0 1450 movdqa %xmm1,(%esp) 1451 paddq %xmm1,%xmm1 1452 pand %xmm3,%xmm2 1453 pcmpgtd %xmm1,%xmm0 1454 pxor %xmm2,%xmm1 1455 pshufd $19,%xmm0,%xmm2 1456 pxor %xmm0,%xmm0 1457 movdqa %xmm1,16(%esp) 1458 paddq %xmm1,%xmm1 1459 pand %xmm3,%xmm2 1460 pcmpgtd %xmm1,%xmm0 1461 pxor %xmm2,%xmm1 1462 pshufd $19,%xmm0,%xmm2 1463 pxor %xmm0,%xmm0 1464 movdqa %xmm1,32(%esp) 1465 paddq %xmm1,%xmm1 1466 pand %xmm3,%xmm2 1467 pcmpgtd %xmm1,%xmm0 1468 pxor %xmm2,%xmm1 1469 pshufd $19,%xmm0,%xmm2 1470 pxor %xmm0,%xmm0 1471 movdqa %xmm1,48(%esp) 1472 paddq %xmm1,%xmm1 1473 pand %xmm3,%xmm2 1474 pcmpgtd %xmm1,%xmm0 1475 pxor %xmm2,%xmm1 1476 pshufd $19,%xmm0,%xmm7 1477 movdqa %xmm1,64(%esp) 1478 paddq %xmm1,%xmm1 1479 movups (%ebp),%xmm0 1480 pand %xmm3,%xmm7 1481 movups (%esi),%xmm2 1482 pxor %xmm1,%xmm7 1483 movl %ebx,%ecx 1484 movdqu 16(%esi),%xmm3 1485 xorps %xmm0,%xmm2 1486 movdqu 32(%esi),%xmm4 1487 pxor %xmm0,%xmm3 1488 movdqu 48(%esi),%xmm5 1489 pxor %xmm0,%xmm4 1490 movdqu 64(%esi),%xmm6 1491 pxor %xmm0,%xmm5 1492 movdqu 80(%esi),%xmm1 1493 pxor %xmm0,%xmm6 1494 leal 96(%esi),%esi 1495 pxor (%esp),%xmm2 1496 movdqa %xmm7,80(%esp) 1497 pxor %xmm1,%xmm7 1498 movups 16(%ebp),%xmm1 1499 pxor 16(%esp),%xmm3 1500 pxor 32(%esp),%xmm4 1501 .byte 102,15,56,222,209 1502 pxor 48(%esp),%xmm5 1503 pxor 64(%esp),%xmm6 1504 .byte 102,15,56,222,217 1505 pxor %xmm0,%xmm7 1506 movups 32(%ebp),%xmm0 1507 .byte 102,15,56,222,225 1508 .byte 102,15,56,222,233 1509 .byte 102,15,56,222,241 1510 .byte 102,15,56,222,249 1511 call L_aesni_decrypt6_enter 1512 movdqa 80(%esp),%xmm1 1513 pxor %xmm0,%xmm0 1514 xorps (%esp),%xmm2 1515 pcmpgtd %xmm1,%xmm0 1516 xorps 16(%esp),%xmm3 1517 movups %xmm2,(%edi) 1518 xorps 32(%esp),%xmm4 1519 movups %xmm3,16(%edi) 1520 xorps 48(%esp),%xmm5 1521 movups %xmm4,32(%edi) 1522 xorps 64(%esp),%xmm6 1523 movups %xmm5,48(%edi) 1524 xorps %xmm1,%xmm7 1525 movups %xmm6,64(%edi) 1526 pshufd $19,%xmm0,%xmm2 1527 movups %xmm7,80(%edi) 1528 leal 96(%edi),%edi 1529 movdqa 96(%esp),%xmm3 1530 pxor %xmm0,%xmm0 1531 paddq %xmm1,%xmm1 1532 pand %xmm3,%xmm2 1533 pcmpgtd %xmm1,%xmm0 1534 pxor %xmm2,%xmm1 1535 subl $96,%eax 1536 jnc L061xts_dec_loop6 1537 movl 240(%ebp),%ecx 1538 movl %ebp,%edx 1539 movl %ecx,%ebx 1540 L060xts_dec_short: 1541 addl $96,%eax 1542 jz L062xts_dec_done6x 1543 movdqa %xmm1,%xmm5 1544 cmpl $32,%eax 1545 jb L063xts_dec_one 1546 pshufd $19,%xmm0,%xmm2 1547 pxor %xmm0,%xmm0 1548 paddq %xmm1,%xmm1 1549 pand %xmm3,%xmm2 1550 pcmpgtd %xmm1,%xmm0 1551 pxor %xmm2,%xmm1 1552 je L064xts_dec_two 1553 pshufd $19,%xmm0,%xmm2 1554 pxor %xmm0,%xmm0 1555 movdqa %xmm1,%xmm6 1556 paddq %xmm1,%xmm1 1557 pand %xmm3,%xmm2 1558 pcmpgtd %xmm1,%xmm0 1559 pxor %xmm2,%xmm1 1560 cmpl $64,%eax 1561 jb L065xts_dec_three 1562 pshufd $19,%xmm0,%xmm2 1563 pxor %xmm0,%xmm0 1564 movdqa %xmm1,%xmm7 1565 paddq %xmm1,%xmm1 1566 pand %xmm3,%xmm2 1567 pcmpgtd %xmm1,%xmm0 1568 pxor %xmm2,%xmm1 1569 movdqa %xmm5,(%esp) 1570 movdqa %xmm6,16(%esp) 1571 je L066xts_dec_four 1572 movdqa %xmm7,32(%esp) 1573 pshufd $19,%xmm0,%xmm7 1574 movdqa %xmm1,48(%esp) 1575 paddq %xmm1,%xmm1 1576 pand %xmm3,%xmm7 1577 pxor %xmm1,%xmm7 1578 movdqu (%esi),%xmm2 1579 movdqu 16(%esi),%xmm3 1580 movdqu 32(%esi),%xmm4 1581 pxor (%esp),%xmm2 1582 movdqu 48(%esi),%xmm5 1583 pxor 16(%esp),%xmm3 1584 movdqu 64(%esi),%xmm6 1585 pxor 32(%esp),%xmm4 1586 leal 80(%esi),%esi 1587 pxor 48(%esp),%xmm5 1588 movdqa %xmm7,64(%esp) 1589 pxor %xmm7,%xmm6 1590 call __aesni_decrypt6 1591 movaps 64(%esp),%xmm1 1592 xorps (%esp),%xmm2 1593 xorps 16(%esp),%xmm3 1594 xorps 32(%esp),%xmm4 1595 movups %xmm2,(%edi) 1596 xorps 48(%esp),%xmm5 1597 movups %xmm3,16(%edi) 1598 xorps %xmm1,%xmm6 1599 movups %xmm4,32(%edi) 1600 movups %xmm5,48(%edi) 1601 movups %xmm6,64(%edi) 1602 leal 80(%edi),%edi 1603 jmp L067xts_dec_done 1604 .align 4,0x90 1605 L063xts_dec_one: 1606 movups (%esi),%xmm2 1607 leal 16(%esi),%esi 1608 xorps %xmm5,%xmm2 1609 movups (%edx),%xmm0 1610 movups 16(%edx),%xmm1 1611 leal 32(%edx),%edx 1612 xorps %xmm0,%xmm2 1613 L068dec1_loop_12: 1614 .byte 102,15,56,222,209 1615 decl %ecx 1616 movups (%edx),%xmm1 1617 leal 16(%edx),%edx 1618 jnz L068dec1_loop_12 1619 .byte 102,15,56,223,209 1620 xorps %xmm5,%xmm2 1621 movups %xmm2,(%edi) 1622 leal 16(%edi),%edi 1623 movdqa %xmm5,%xmm1 1624 jmp L067xts_dec_done 1625 .align 4,0x90 1626 L064xts_dec_two: 1627 movaps %xmm1,%xmm6 1628 movups (%esi),%xmm2 1629 movups 16(%esi),%xmm3 1630 leal 32(%esi),%esi 1631 xorps %xmm5,%xmm2 1632 xorps %xmm6,%xmm3 1633 call __aesni_decrypt2 1634 xorps %xmm5,%xmm2 1635 xorps %xmm6,%xmm3 1636 movups %xmm2,(%edi) 1637 movups %xmm3,16(%edi) 1638 leal 32(%edi),%edi 1639 movdqa %xmm6,%xmm1 1640 jmp L067xts_dec_done 1641 .align 4,0x90 1642 L065xts_dec_three: 1643 movaps %xmm1,%xmm7 1644 movups (%esi),%xmm2 1645 movups 16(%esi),%xmm3 1646 movups 32(%esi),%xmm4 1647 leal 48(%esi),%esi 1648 xorps %xmm5,%xmm2 1649 xorps %xmm6,%xmm3 1650 xorps %xmm7,%xmm4 1651 call __aesni_decrypt3 1652 xorps %xmm5,%xmm2 1653 xorps %xmm6,%xmm3 1654 xorps %xmm7,%xmm4 1655 movups %xmm2,(%edi) 1656 movups %xmm3,16(%edi) 1657 movups %xmm4,32(%edi) 1658 leal 48(%edi),%edi 1659 movdqa %xmm7,%xmm1 1660 jmp L067xts_dec_done 1661 .align 4,0x90 1662 L066xts_dec_four: 1663 movaps %xmm1,%xmm6 1664 movups (%esi),%xmm2 1665 movups 16(%esi),%xmm3 1666 movups 32(%esi),%xmm4 1667 xorps (%esp),%xmm2 1668 movups 48(%esi),%xmm5 1669 leal 64(%esi),%esi 1670 xorps 16(%esp),%xmm3 1671 xorps %xmm7,%xmm4 1672 xorps %xmm6,%xmm5 1673 call __aesni_decrypt4 1674 xorps (%esp),%xmm2 1675 xorps 16(%esp),%xmm3 1676 xorps %xmm7,%xmm4 1677 movups %xmm2,(%edi) 1678 xorps %xmm6,%xmm5 1679 movups %xmm3,16(%edi) 1680 movups %xmm4,32(%edi) 1681 movups %xmm5,48(%edi) 1682 leal 64(%edi),%edi 1683 movdqa %xmm6,%xmm1 1684 jmp L067xts_dec_done 1685 .align 4,0x90 1686 L062xts_dec_done6x: 1687 movl 112(%esp),%eax 1688 andl $15,%eax 1689 jz L069xts_dec_ret 1690 movl %eax,112(%esp) 1691 jmp L070xts_dec_only_one_more 1692 .align 4,0x90 1693 L067xts_dec_done: 1694 movl 112(%esp),%eax 1695 pxor %xmm0,%xmm0 1696 andl $15,%eax 1697 jz L069xts_dec_ret 1698 pcmpgtd %xmm1,%xmm0 1699 movl %eax,112(%esp) 1700 pshufd $19,%xmm0,%xmm2 1701 pxor %xmm0,%xmm0 1702 movdqa 96(%esp),%xmm3 1703 paddq %xmm1,%xmm1 1704 pand %xmm3,%xmm2 1705 pcmpgtd %xmm1,%xmm0 1706 pxor %xmm2,%xmm1 1707 L070xts_dec_only_one_more: 1708 pshufd $19,%xmm0,%xmm5 1709 movdqa %xmm1,%xmm6 1710 paddq %xmm1,%xmm1 1711 pand %xmm3,%xmm5 1712 pxor %xmm1,%xmm5 1713 movl %ebp,%edx 1714 movl %ebx,%ecx 1715 movups (%esi),%xmm2 1716 xorps %xmm5,%xmm2 1717 movups (%edx),%xmm0 1718 movups 16(%edx),%xmm1 1719 leal 32(%edx),%edx 1720 xorps %xmm0,%xmm2 1721 L071dec1_loop_13: 1722 .byte 102,15,56,222,209 1723 decl %ecx 1724 movups (%edx),%xmm1 1725 leal 16(%edx),%edx 1726 jnz L071dec1_loop_13 1727 .byte 102,15,56,223,209 1728 xorps %xmm5,%xmm2 1729 movups %xmm2,(%edi) 1730 L072xts_dec_steal: 1731 movzbl 16(%esi),%ecx 1732 movzbl (%edi),%edx 1733 leal 1(%esi),%esi 1734 movb %cl,(%edi) 1735 movb %dl,16(%edi) 1736 leal 1(%edi),%edi 1737 subl $1,%eax 1738 jnz L072xts_dec_steal 1739 subl 112(%esp),%edi 1740 movl %ebp,%edx 1741 movl %ebx,%ecx 1742 movups (%edi),%xmm2 1743 xorps %xmm6,%xmm2 1744 movups (%edx),%xmm0 1745 movups 16(%edx),%xmm1 1746 leal 32(%edx),%edx 1747 xorps %xmm0,%xmm2 1748 L073dec1_loop_14: 1749 .byte 102,15,56,222,209 1750 decl %ecx 1751 movups (%edx),%xmm1 1752 leal 16(%edx),%edx 1753 jnz L073dec1_loop_14 1754 .byte 102,15,56,223,209 1755 xorps %xmm6,%xmm2 1756 movups %xmm2,(%edi) 1757 L069xts_dec_ret: 1758 pxor %xmm0,%xmm0 1759 pxor %xmm1,%xmm1 1760 pxor %xmm2,%xmm2 1761 movdqa %xmm0,(%esp) 1762 pxor %xmm3,%xmm3 1763 movdqa %xmm0,16(%esp) 1764 pxor %xmm4,%xmm4 1765 movdqa %xmm0,32(%esp) 1766 pxor %xmm5,%xmm5 1767 movdqa %xmm0,48(%esp) 1768 pxor %xmm6,%xmm6 1769 movdqa %xmm0,64(%esp) 1770 pxor %xmm7,%xmm7 1771 movdqa %xmm0,80(%esp) 1772 movl 116(%esp),%esp 1773 popl %edi 1774 popl %esi 1775 popl %ebx 1776 popl %ebp 1777 ret 1778 .globl _aesni_cbc_encrypt 1779 .private_extern _aesni_cbc_encrypt 1780 .align 4 1781 _aesni_cbc_encrypt: 1782 L_aesni_cbc_encrypt_begin: 1783 pushl %ebp 1784 pushl %ebx 1785 pushl %esi 1786 pushl %edi 1787 movl 20(%esp),%esi 1788 movl %esp,%ebx 1789 movl 24(%esp),%edi 1790 subl $24,%ebx 1791 movl 28(%esp),%eax 1792 andl $-16,%ebx 1793 movl 32(%esp),%edx 1794 movl 36(%esp),%ebp 1795 testl %eax,%eax 1796 jz L074cbc_abort 1797 cmpl $0,40(%esp) 1798 xchgl %esp,%ebx 1799 movups (%ebp),%xmm7 1800 movl 240(%edx),%ecx 1801 movl %edx,%ebp 1802 movl %ebx,16(%esp) 1803 movl %ecx,%ebx 1804 je L075cbc_decrypt 1805 movaps %xmm7,%xmm2 1806 cmpl $16,%eax 1807 jb L076cbc_enc_tail 1808 subl $16,%eax 1809 jmp L077cbc_enc_loop 1810 .align 4,0x90 1811 L077cbc_enc_loop: 1812 movups (%esi),%xmm7 1813 leal 16(%esi),%esi 1814 movups (%edx),%xmm0 1815 movups 16(%edx),%xmm1 1816 xorps %xmm0,%xmm7 1817 leal 32(%edx),%edx 1818 xorps %xmm7,%xmm2 1819 L078enc1_loop_15: 1820 .byte 102,15,56,220,209 1821 decl %ecx 1822 movups (%edx),%xmm1 1823 leal 16(%edx),%edx 1824 jnz L078enc1_loop_15 1825 .byte 102,15,56,221,209 1826 movl %ebx,%ecx 1827 movl %ebp,%edx 1828 movups %xmm2,(%edi) 1829 leal 16(%edi),%edi 1830 subl $16,%eax 1831 jnc L077cbc_enc_loop 1832 addl $16,%eax 1833 jnz L076cbc_enc_tail 1834 movaps %xmm2,%xmm7 1835 pxor %xmm2,%xmm2 1836 jmp L079cbc_ret 1837 L076cbc_enc_tail: 1838 movl %eax,%ecx 1839 .long 2767451785 1840 movl $16,%ecx 1841 subl %eax,%ecx 1842 xorl %eax,%eax 1843 .long 2868115081 1844 leal -16(%edi),%edi 1845 movl %ebx,%ecx 1846 movl %edi,%esi 1847 movl %ebp,%edx 1848 jmp L077cbc_enc_loop 1849 .align 4,0x90 1850 L075cbc_decrypt: 1851 cmpl $80,%eax 1852 jbe L080cbc_dec_tail 1853 movaps %xmm7,(%esp) 1854 subl $80,%eax 1855 jmp L081cbc_dec_loop6_enter 1856 .align 4,0x90 1857 L082cbc_dec_loop6: 1858 movaps %xmm0,(%esp) 1859 movups %xmm7,(%edi) 1860 leal 16(%edi),%edi 1861 L081cbc_dec_loop6_enter: 1862 movdqu (%esi),%xmm2 1863 movdqu 16(%esi),%xmm3 1864 movdqu 32(%esi),%xmm4 1865 movdqu 48(%esi),%xmm5 1866 movdqu 64(%esi),%xmm6 1867 movdqu 80(%esi),%xmm7 1868 call __aesni_decrypt6 1869 movups (%esi),%xmm1 1870 movups 16(%esi),%xmm0 1871 xorps (%esp),%xmm2 1872 xorps %xmm1,%xmm3 1873 movups 32(%esi),%xmm1 1874 xorps %xmm0,%xmm4 1875 movups 48(%esi),%xmm0 1876 xorps %xmm1,%xmm5 1877 movups 64(%esi),%xmm1 1878 xorps %xmm0,%xmm6 1879 movups 80(%esi),%xmm0 1880 xorps %xmm1,%xmm7 1881 movups %xmm2,(%edi) 1882 movups %xmm3,16(%edi) 1883 leal 96(%esi),%esi 1884 movups %xmm4,32(%edi) 1885 movl %ebx,%ecx 1886 movups %xmm5,48(%edi) 1887 movl %ebp,%edx 1888 movups %xmm6,64(%edi) 1889 leal 80(%edi),%edi 1890 subl $96,%eax 1891 ja L082cbc_dec_loop6 1892 movaps %xmm7,%xmm2 1893 movaps %xmm0,%xmm7 1894 addl $80,%eax 1895 jle L083cbc_dec_clear_tail_collected 1896 movups %xmm2,(%edi) 1897 leal 16(%edi),%edi 1898 L080cbc_dec_tail: 1899 movups (%esi),%xmm2 1900 movaps %xmm2,%xmm6 1901 cmpl $16,%eax 1902 jbe L084cbc_dec_one 1903 movups 16(%esi),%xmm3 1904 movaps %xmm3,%xmm5 1905 cmpl $32,%eax 1906 jbe L085cbc_dec_two 1907 movups 32(%esi),%xmm4 1908 cmpl $48,%eax 1909 jbe L086cbc_dec_three 1910 movups 48(%esi),%xmm5 1911 cmpl $64,%eax 1912 jbe L087cbc_dec_four 1913 movups 64(%esi),%xmm6 1914 movaps %xmm7,(%esp) 1915 movups (%esi),%xmm2 1916 xorps %xmm7,%xmm7 1917 call __aesni_decrypt6 1918 movups (%esi),%xmm1 1919 movups 16(%esi),%xmm0 1920 xorps (%esp),%xmm2 1921 xorps %xmm1,%xmm3 1922 movups 32(%esi),%xmm1 1923 xorps %xmm0,%xmm4 1924 movups 48(%esi),%xmm0 1925 xorps %xmm1,%xmm5 1926 movups 64(%esi),%xmm7 1927 xorps %xmm0,%xmm6 1928 movups %xmm2,(%edi) 1929 movups %xmm3,16(%edi) 1930 pxor %xmm3,%xmm3 1931 movups %xmm4,32(%edi) 1932 pxor %xmm4,%xmm4 1933 movups %xmm5,48(%edi) 1934 pxor %xmm5,%xmm5 1935 leal 64(%edi),%edi 1936 movaps %xmm6,%xmm2 1937 pxor %xmm6,%xmm6 1938 subl $80,%eax 1939 jmp L088cbc_dec_tail_collected 1940 .align 4,0x90 1941 L084cbc_dec_one: 1942 movups (%edx),%xmm0 1943 movups 16(%edx),%xmm1 1944 leal 32(%edx),%edx 1945 xorps %xmm0,%xmm2 1946 L089dec1_loop_16: 1947 .byte 102,15,56,222,209 1948 decl %ecx 1949 movups (%edx),%xmm1 1950 leal 16(%edx),%edx 1951 jnz L089dec1_loop_16 1952 .byte 102,15,56,223,209 1953 xorps %xmm7,%xmm2 1954 movaps %xmm6,%xmm7 1955 subl $16,%eax 1956 jmp L088cbc_dec_tail_collected 1957 .align 4,0x90 1958 L085cbc_dec_two: 1959 call __aesni_decrypt2 1960 xorps %xmm7,%xmm2 1961 xorps %xmm6,%xmm3 1962 movups %xmm2,(%edi) 1963 movaps %xmm3,%xmm2 1964 pxor %xmm3,%xmm3 1965 leal 16(%edi),%edi 1966 movaps %xmm5,%xmm7 1967 subl $32,%eax 1968 jmp L088cbc_dec_tail_collected 1969 .align 4,0x90 1970 L086cbc_dec_three: 1971 call __aesni_decrypt3 1972 xorps %xmm7,%xmm2 1973 xorps %xmm6,%xmm3 1974 xorps %xmm5,%xmm4 1975 movups %xmm2,(%edi) 1976 movaps %xmm4,%xmm2 1977 pxor %xmm4,%xmm4 1978 movups %xmm3,16(%edi) 1979 pxor %xmm3,%xmm3 1980 leal 32(%edi),%edi 1981 movups 32(%esi),%xmm7 1982 subl $48,%eax 1983 jmp L088cbc_dec_tail_collected 1984 .align 4,0x90 1985 L087cbc_dec_four: 1986 call __aesni_decrypt4 1987 movups 16(%esi),%xmm1 1988 movups 32(%esi),%xmm0 1989 xorps %xmm7,%xmm2 1990 movups 48(%esi),%xmm7 1991 xorps %xmm6,%xmm3 1992 movups %xmm2,(%edi) 1993 xorps %xmm1,%xmm4 1994 movups %xmm3,16(%edi) 1995 pxor %xmm3,%xmm3 1996 xorps %xmm0,%xmm5 1997 movups %xmm4,32(%edi) 1998 pxor %xmm4,%xmm4 1999 leal 48(%edi),%edi 2000 movaps %xmm5,%xmm2 2001 pxor %xmm5,%xmm5 2002 subl $64,%eax 2003 jmp L088cbc_dec_tail_collected 2004 .align 4,0x90 2005 L083cbc_dec_clear_tail_collected: 2006 pxor %xmm3,%xmm3 2007 pxor %xmm4,%xmm4 2008 pxor %xmm5,%xmm5 2009 pxor %xmm6,%xmm6 2010 L088cbc_dec_tail_collected: 2011 andl $15,%eax 2012 jnz L090cbc_dec_tail_partial 2013 movups %xmm2,(%edi) 2014 pxor %xmm0,%xmm0 2015 jmp L079cbc_ret 2016 .align 4,0x90 2017 L090cbc_dec_tail_partial: 2018 movaps %xmm2,(%esp) 2019 pxor %xmm0,%xmm0 2020 movl $16,%ecx 2021 movl %esp,%esi 2022 subl %eax,%ecx 2023 .long 2767451785 2024 movdqa %xmm2,(%esp) 2025 L079cbc_ret: 2026 movl 16(%esp),%esp 2027 movl 36(%esp),%ebp 2028 pxor %xmm2,%xmm2 2029 pxor %xmm1,%xmm1 2030 movups %xmm7,(%ebp) 2031 pxor %xmm7,%xmm7 2032 L074cbc_abort: 2033 popl %edi 2034 popl %esi 2035 popl %ebx 2036 popl %ebp 2037 ret 2038 .private_extern __aesni_set_encrypt_key 2039 .align 4 2040 __aesni_set_encrypt_key: 2041 pushl %ebp 2042 pushl %ebx 2043 testl %eax,%eax 2044 jz L091bad_pointer 2045 testl %edx,%edx 2046 jz L091bad_pointer 2047 call L092pic 2048 L092pic: 2049 popl %ebx 2050 leal Lkey_const-L092pic(%ebx),%ebx 2051 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp 2052 movups (%eax),%xmm0 2053 xorps %xmm4,%xmm4 2054 movl 4(%ebp),%ebp 2055 leal 16(%edx),%edx 2056 andl $268437504,%ebp 2057 cmpl $256,%ecx 2058 je L09314rounds 2059 cmpl $192,%ecx 2060 je L09412rounds 2061 cmpl $128,%ecx 2062 jne L095bad_keybits 2063 .align 4,0x90 2064 L09610rounds: 2065 cmpl $268435456,%ebp 2066 je L09710rounds_alt 2067 movl $9,%ecx 2068 movups %xmm0,-16(%edx) 2069 .byte 102,15,58,223,200,1 2070 call L098key_128_cold 2071 .byte 102,15,58,223,200,2 2072 call L099key_128 2073 .byte 102,15,58,223,200,4 2074 call L099key_128 2075 .byte 102,15,58,223,200,8 2076 call L099key_128 2077 .byte 102,15,58,223,200,16 2078 call L099key_128 2079 .byte 102,15,58,223,200,32 2080 call L099key_128 2081 .byte 102,15,58,223,200,64 2082 call L099key_128 2083 .byte 102,15,58,223,200,128 2084 call L099key_128 2085 .byte 102,15,58,223,200,27 2086 call L099key_128 2087 .byte 102,15,58,223,200,54 2088 call L099key_128 2089 movups %xmm0,(%edx) 2090 movl %ecx,80(%edx) 2091 jmp L100good_key 2092 .align 4,0x90 2093 L099key_128: 2094 movups %xmm0,(%edx) 2095 leal 16(%edx),%edx 2096 L098key_128_cold: 2097 shufps $16,%xmm0,%xmm4 2098 xorps %xmm4,%xmm0 2099 shufps $140,%xmm0,%xmm4 2100 xorps %xmm4,%xmm0 2101 shufps $255,%xmm1,%xmm1 2102 xorps %xmm1,%xmm0 2103 ret 2104 .align 4,0x90 2105 L09710rounds_alt: 2106 movdqa (%ebx),%xmm5 2107 movl $8,%ecx 2108 movdqa 32(%ebx),%xmm4 2109 movdqa %xmm0,%xmm2 2110 movdqu %xmm0,-16(%edx) 2111 L101loop_key128: 2112 .byte 102,15,56,0,197 2113 .byte 102,15,56,221,196 2114 pslld $1,%xmm4 2115 leal 16(%edx),%edx 2116 movdqa %xmm2,%xmm3 2117 pslldq $4,%xmm2 2118 pxor %xmm2,%xmm3 2119 pslldq $4,%xmm2 2120 pxor %xmm2,%xmm3 2121 pslldq $4,%xmm2 2122 pxor %xmm3,%xmm2 2123 pxor %xmm2,%xmm0 2124 movdqu %xmm0,-16(%edx) 2125 movdqa %xmm0,%xmm2 2126 decl %ecx 2127 jnz L101loop_key128 2128 movdqa 48(%ebx),%xmm4 2129 .byte 102,15,56,0,197 2130 .byte 102,15,56,221,196 2131 pslld $1,%xmm4 2132 movdqa %xmm2,%xmm3 2133 pslldq $4,%xmm2 2134 pxor %xmm2,%xmm3 2135 pslldq $4,%xmm2 2136 pxor %xmm2,%xmm3 2137 pslldq $4,%xmm2 2138 pxor %xmm3,%xmm2 2139 pxor %xmm2,%xmm0 2140 movdqu %xmm0,(%edx) 2141 movdqa %xmm0,%xmm2 2142 .byte 102,15,56,0,197 2143 .byte 102,15,56,221,196 2144 movdqa %xmm2,%xmm3 2145 pslldq $4,%xmm2 2146 pxor %xmm2,%xmm3 2147 pslldq $4,%xmm2 2148 pxor %xmm2,%xmm3 2149 pslldq $4,%xmm2 2150 pxor %xmm3,%xmm2 2151 pxor %xmm2,%xmm0 2152 movdqu %xmm0,16(%edx) 2153 movl $9,%ecx 2154 movl %ecx,96(%edx) 2155 jmp L100good_key 2156 .align 4,0x90 2157 L09412rounds: 2158 movq 16(%eax),%xmm2 2159 cmpl $268435456,%ebp 2160 je L10212rounds_alt 2161 movl $11,%ecx 2162 movups %xmm0,-16(%edx) 2163 .byte 102,15,58,223,202,1 2164 call L103key_192a_cold 2165 .byte 102,15,58,223,202,2 2166 call L104key_192b 2167 .byte 102,15,58,223,202,4 2168 call L105key_192a 2169 .byte 102,15,58,223,202,8 2170 call L104key_192b 2171 .byte 102,15,58,223,202,16 2172 call L105key_192a 2173 .byte 102,15,58,223,202,32 2174 call L104key_192b 2175 .byte 102,15,58,223,202,64 2176 call L105key_192a 2177 .byte 102,15,58,223,202,128 2178 call L104key_192b 2179 movups %xmm0,(%edx) 2180 movl %ecx,48(%edx) 2181 jmp L100good_key 2182 .align 4,0x90 2183 L105key_192a: 2184 movups %xmm0,(%edx) 2185 leal 16(%edx),%edx 2186 .align 4,0x90 2187 L103key_192a_cold: 2188 movaps %xmm2,%xmm5 2189 L106key_192b_warm: 2190 shufps $16,%xmm0,%xmm4 2191 movdqa %xmm2,%xmm3 2192 xorps %xmm4,%xmm0 2193 shufps $140,%xmm0,%xmm4 2194 pslldq $4,%xmm3 2195 xorps %xmm4,%xmm0 2196 pshufd $85,%xmm1,%xmm1 2197 pxor %xmm3,%xmm2 2198 pxor %xmm1,%xmm0 2199 pshufd $255,%xmm0,%xmm3 2200 pxor %xmm3,%xmm2 2201 ret 2202 .align 4,0x90 2203 L104key_192b: 2204 movaps %xmm0,%xmm3 2205 shufps $68,%xmm0,%xmm5 2206 movups %xmm5,(%edx) 2207 shufps $78,%xmm2,%xmm3 2208 movups %xmm3,16(%edx) 2209 leal 32(%edx),%edx 2210 jmp L106key_192b_warm 2211 .align 4,0x90 2212 L10212rounds_alt: 2213 movdqa 16(%ebx),%xmm5 2214 movdqa 32(%ebx),%xmm4 2215 movl $8,%ecx 2216 movdqu %xmm0,-16(%edx) 2217 L107loop_key192: 2218 movq %xmm2,(%edx) 2219 movdqa %xmm2,%xmm1 2220 .byte 102,15,56,0,213 2221 .byte 102,15,56,221,212 2222 pslld $1,%xmm4 2223 leal 24(%edx),%edx 2224 movdqa %xmm0,%xmm3 2225 pslldq $4,%xmm0 2226 pxor %xmm0,%xmm3 2227 pslldq $4,%xmm0 2228 pxor %xmm0,%xmm3 2229 pslldq $4,%xmm0 2230 pxor %xmm3,%xmm0 2231 pshufd $255,%xmm0,%xmm3 2232 pxor %xmm1,%xmm3 2233 pslldq $4,%xmm1 2234 pxor %xmm1,%xmm3 2235 pxor %xmm2,%xmm0 2236 pxor %xmm3,%xmm2 2237 movdqu %xmm0,-16(%edx) 2238 decl %ecx 2239 jnz L107loop_key192 2240 movl $11,%ecx 2241 movl %ecx,32(%edx) 2242 jmp L100good_key 2243 .align 4,0x90 2244 L09314rounds: 2245 movups 16(%eax),%xmm2 2246 leal 16(%edx),%edx 2247 cmpl $268435456,%ebp 2248 je L10814rounds_alt 2249 movl $13,%ecx 2250 movups %xmm0,-32(%edx) 2251 movups %xmm2,-16(%edx) 2252 .byte 102,15,58,223,202,1 2253 call L109key_256a_cold 2254 .byte 102,15,58,223,200,1 2255 call L110key_256b 2256 .byte 102,15,58,223,202,2 2257 call L111key_256a 2258 .byte 102,15,58,223,200,2 2259 call L110key_256b 2260 .byte 102,15,58,223,202,4 2261 call L111key_256a 2262 .byte 102,15,58,223,200,4 2263 call L110key_256b 2264 .byte 102,15,58,223,202,8 2265 call L111key_256a 2266 .byte 102,15,58,223,200,8 2267 call L110key_256b 2268 .byte 102,15,58,223,202,16 2269 call L111key_256a 2270 .byte 102,15,58,223,200,16 2271 call L110key_256b 2272 .byte 102,15,58,223,202,32 2273 call L111key_256a 2274 .byte 102,15,58,223,200,32 2275 call L110key_256b 2276 .byte 102,15,58,223,202,64 2277 call L111key_256a 2278 movups %xmm0,(%edx) 2279 movl %ecx,16(%edx) 2280 xorl %eax,%eax 2281 jmp L100good_key 2282 .align 4,0x90 2283 L111key_256a: 2284 movups %xmm2,(%edx) 2285 leal 16(%edx),%edx 2286 L109key_256a_cold: 2287 shufps $16,%xmm0,%xmm4 2288 xorps %xmm4,%xmm0 2289 shufps $140,%xmm0,%xmm4 2290 xorps %xmm4,%xmm0 2291 shufps $255,%xmm1,%xmm1 2292 xorps %xmm1,%xmm0 2293 ret 2294 .align 4,0x90 2295 L110key_256b: 2296 movups %xmm0,(%edx) 2297 leal 16(%edx),%edx 2298 shufps $16,%xmm2,%xmm4 2299 xorps %xmm4,%xmm2 2300 shufps $140,%xmm2,%xmm4 2301 xorps %xmm4,%xmm2 2302 shufps $170,%xmm1,%xmm1 2303 xorps %xmm1,%xmm2 2304 ret 2305 .align 4,0x90 2306 L10814rounds_alt: 2307 movdqa (%ebx),%xmm5 2308 movdqa 32(%ebx),%xmm4 2309 movl $7,%ecx 2310 movdqu %xmm0,-32(%edx) 2311 movdqa %xmm2,%xmm1 2312 movdqu %xmm2,-16(%edx) 2313 L112loop_key256: 2314 .byte 102,15,56,0,213 2315 .byte 102,15,56,221,212 2316 movdqa %xmm0,%xmm3 2317 pslldq $4,%xmm0 2318 pxor %xmm0,%xmm3 2319 pslldq $4,%xmm0 2320 pxor %xmm0,%xmm3 2321 pslldq $4,%xmm0 2322 pxor %xmm3,%xmm0 2323 pslld $1,%xmm4 2324 pxor %xmm2,%xmm0 2325 movdqu %xmm0,(%edx) 2326 decl %ecx 2327 jz L113done_key256 2328 pshufd $255,%xmm0,%xmm2 2329 pxor %xmm3,%xmm3 2330 .byte 102,15,56,221,211 2331 movdqa %xmm1,%xmm3 2332 pslldq $4,%xmm1 2333 pxor %xmm1,%xmm3 2334 pslldq $4,%xmm1 2335 pxor %xmm1,%xmm3 2336 pslldq $4,%xmm1 2337 pxor %xmm3,%xmm1 2338 pxor %xmm1,%xmm2 2339 movdqu %xmm2,16(%edx) 2340 leal 32(%edx),%edx 2341 movdqa %xmm2,%xmm1 2342 jmp L112loop_key256 2343 L113done_key256: 2344 movl $13,%ecx 2345 movl %ecx,16(%edx) 2346 L100good_key: 2347 pxor %xmm0,%xmm0 2348 pxor %xmm1,%xmm1 2349 pxor %xmm2,%xmm2 2350 pxor %xmm3,%xmm3 2351 pxor %xmm4,%xmm4 2352 pxor %xmm5,%xmm5 2353 xorl %eax,%eax 2354 popl %ebx 2355 popl %ebp 2356 ret 2357 .align 2,0x90 2358 L091bad_pointer: 2359 movl $-1,%eax 2360 popl %ebx 2361 popl %ebp 2362 ret 2363 .align 2,0x90 2364 L095bad_keybits: 2365 pxor %xmm0,%xmm0 2366 movl $-2,%eax 2367 popl %ebx 2368 popl %ebp 2369 ret 2370 .globl _aesni_set_encrypt_key 2371 .private_extern _aesni_set_encrypt_key 2372 .align 4 2373 _aesni_set_encrypt_key: 2374 L_aesni_set_encrypt_key_begin: 2375 movl 4(%esp),%eax 2376 movl 8(%esp),%ecx 2377 movl 12(%esp),%edx 2378 call __aesni_set_encrypt_key 2379 ret 2380 .globl _aesni_set_decrypt_key 2381 .private_extern _aesni_set_decrypt_key 2382 .align 4 2383 _aesni_set_decrypt_key: 2384 L_aesni_set_decrypt_key_begin: 2385 movl 4(%esp),%eax 2386 movl 8(%esp),%ecx 2387 movl 12(%esp),%edx 2388 call __aesni_set_encrypt_key 2389 movl 12(%esp),%edx 2390 shll $4,%ecx 2391 testl %eax,%eax 2392 jnz L114dec_key_ret 2393 leal 16(%edx,%ecx,1),%eax 2394 movups (%edx),%xmm0 2395 movups (%eax),%xmm1 2396 movups %xmm0,(%eax) 2397 movups %xmm1,(%edx) 2398 leal 16(%edx),%edx 2399 leal -16(%eax),%eax 2400 L115dec_key_inverse: 2401 movups (%edx),%xmm0 2402 movups (%eax),%xmm1 2403 .byte 102,15,56,219,192 2404 .byte 102,15,56,219,201 2405 leal 16(%edx),%edx 2406 leal -16(%eax),%eax 2407 movups %xmm0,16(%eax) 2408 movups %xmm1,-16(%edx) 2409 cmpl %edx,%eax 2410 ja L115dec_key_inverse 2411 movups (%edx),%xmm0 2412 .byte 102,15,56,219,192 2413 movups %xmm0,(%edx) 2414 pxor %xmm0,%xmm0 2415 pxor %xmm1,%xmm1 2416 xorl %eax,%eax 2417 L114dec_key_ret: 2418 ret 2419 .align 6,0x90 2420 Lkey_const: 2421 .long 202313229,202313229,202313229,202313229 2422 .long 67569157,67569157,67569157,67569157 2423 .long 1,1,1,1 2424 .long 27,27,27,27 2425 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 2426 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 2427 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 2428 .byte 115,108,46,111,114,103,62,0 2429 .section __IMPORT,__pointers,non_lazy_symbol_pointers 2430 L_OPENSSL_ia32cap_P$non_lazy_ptr: 2431 .indirect_symbol _OPENSSL_ia32cap_P 2432 .long 0 2433 #endif 2434