1 #if defined(__i386__) 2 .text 3 .globl _aesni_encrypt 4 .private_extern _aesni_encrypt 5 .align 4 6 _aesni_encrypt: 7 L_aesni_encrypt_begin: 8 movl 4(%esp),%eax 9 movl 12(%esp),%edx 10 movups (%eax),%xmm2 11 movl 240(%edx),%ecx 12 movl 8(%esp),%eax 13 movups (%edx),%xmm0 14 movups 16(%edx),%xmm1 15 leal 32(%edx),%edx 16 xorps %xmm0,%xmm2 17 L000enc1_loop_1: 18 .byte 102,15,56,220,209 19 decl %ecx 20 movups (%edx),%xmm1 21 leal 16(%edx),%edx 22 jnz L000enc1_loop_1 23 .byte 102,15,56,221,209 24 pxor %xmm0,%xmm0 25 pxor %xmm1,%xmm1 26 movups %xmm2,(%eax) 27 pxor %xmm2,%xmm2 28 ret 29 .globl _aesni_decrypt 30 .private_extern _aesni_decrypt 31 .align 4 32 _aesni_decrypt: 33 L_aesni_decrypt_begin: 34 movl 4(%esp),%eax 35 movl 12(%esp),%edx 36 movups (%eax),%xmm2 37 movl 240(%edx),%ecx 38 movl 8(%esp),%eax 39 movups (%edx),%xmm0 40 movups 16(%edx),%xmm1 41 leal 32(%edx),%edx 42 xorps %xmm0,%xmm2 43 L001dec1_loop_2: 44 .byte 102,15,56,222,209 45 decl %ecx 46 movups (%edx),%xmm1 47 leal 16(%edx),%edx 48 jnz L001dec1_loop_2 49 .byte 102,15,56,223,209 50 pxor %xmm0,%xmm0 51 pxor %xmm1,%xmm1 52 movups %xmm2,(%eax) 53 pxor %xmm2,%xmm2 54 ret 55 .private_extern __aesni_encrypt2 56 .align 4 57 __aesni_encrypt2: 58 movups (%edx),%xmm0 59 shll $4,%ecx 60 movups 16(%edx),%xmm1 61 xorps %xmm0,%xmm2 62 pxor %xmm0,%xmm3 63 movups 32(%edx),%xmm0 64 leal 32(%edx,%ecx,1),%edx 65 negl %ecx 66 addl $16,%ecx 67 L002enc2_loop: 68 .byte 102,15,56,220,209 69 .byte 102,15,56,220,217 70 movups (%edx,%ecx,1),%xmm1 71 addl $32,%ecx 72 .byte 102,15,56,220,208 73 .byte 102,15,56,220,216 74 movups -16(%edx,%ecx,1),%xmm0 75 jnz L002enc2_loop 76 .byte 102,15,56,220,209 77 .byte 102,15,56,220,217 78 .byte 102,15,56,221,208 79 .byte 102,15,56,221,216 80 ret 81 .private_extern __aesni_decrypt2 82 .align 4 83 __aesni_decrypt2: 84 movups (%edx),%xmm0 85 shll $4,%ecx 86 movups 16(%edx),%xmm1 87 xorps %xmm0,%xmm2 88 pxor %xmm0,%xmm3 89 movups 32(%edx),%xmm0 90 leal 32(%edx,%ecx,1),%edx 91 negl %ecx 92 addl $16,%ecx 93 L003dec2_loop: 94 .byte 102,15,56,222,209 95 .byte 102,15,56,222,217 96 movups (%edx,%ecx,1),%xmm1 97 addl $32,%ecx 98 .byte 102,15,56,222,208 99 .byte 102,15,56,222,216 100 movups -16(%edx,%ecx,1),%xmm0 101 jnz L003dec2_loop 102 .byte 102,15,56,222,209 103 .byte 102,15,56,222,217 104 .byte 102,15,56,223,208 105 .byte 102,15,56,223,216 106 ret 107 .private_extern __aesni_encrypt3 108 .align 4 109 __aesni_encrypt3: 110 movups (%edx),%xmm0 111 shll $4,%ecx 112 movups 16(%edx),%xmm1 113 xorps %xmm0,%xmm2 114 pxor %xmm0,%xmm3 115 pxor %xmm0,%xmm4 116 movups 32(%edx),%xmm0 117 leal 32(%edx,%ecx,1),%edx 118 negl %ecx 119 addl $16,%ecx 120 L004enc3_loop: 121 .byte 102,15,56,220,209 122 .byte 102,15,56,220,217 123 .byte 102,15,56,220,225 124 movups (%edx,%ecx,1),%xmm1 125 addl $32,%ecx 126 .byte 102,15,56,220,208 127 .byte 102,15,56,220,216 128 .byte 102,15,56,220,224 129 movups -16(%edx,%ecx,1),%xmm0 130 jnz L004enc3_loop 131 .byte 102,15,56,220,209 132 .byte 102,15,56,220,217 133 .byte 102,15,56,220,225 134 .byte 102,15,56,221,208 135 .byte 102,15,56,221,216 136 .byte 102,15,56,221,224 137 ret 138 .private_extern __aesni_decrypt3 139 .align 4 140 __aesni_decrypt3: 141 movups (%edx),%xmm0 142 shll $4,%ecx 143 movups 16(%edx),%xmm1 144 xorps %xmm0,%xmm2 145 pxor %xmm0,%xmm3 146 pxor %xmm0,%xmm4 147 movups 32(%edx),%xmm0 148 leal 32(%edx,%ecx,1),%edx 149 negl %ecx 150 addl $16,%ecx 151 L005dec3_loop: 152 .byte 102,15,56,222,209 153 .byte 102,15,56,222,217 154 .byte 102,15,56,222,225 155 movups (%edx,%ecx,1),%xmm1 156 addl $32,%ecx 157 .byte 102,15,56,222,208 158 .byte 102,15,56,222,216 159 .byte 102,15,56,222,224 160 movups -16(%edx,%ecx,1),%xmm0 161 jnz L005dec3_loop 162 .byte 102,15,56,222,209 163 .byte 102,15,56,222,217 164 .byte 102,15,56,222,225 165 .byte 102,15,56,223,208 166 .byte 102,15,56,223,216 167 .byte 102,15,56,223,224 168 ret 169 .private_extern __aesni_encrypt4 170 .align 4 171 __aesni_encrypt4: 172 movups (%edx),%xmm0 173 movups 16(%edx),%xmm1 174 shll $4,%ecx 175 xorps %xmm0,%xmm2 176 pxor %xmm0,%xmm3 177 pxor %xmm0,%xmm4 178 pxor %xmm0,%xmm5 179 movups 32(%edx),%xmm0 180 leal 32(%edx,%ecx,1),%edx 181 negl %ecx 182 .byte 15,31,64,0 183 addl $16,%ecx 184 L006enc4_loop: 185 .byte 102,15,56,220,209 186 .byte 102,15,56,220,217 187 .byte 102,15,56,220,225 188 .byte 102,15,56,220,233 189 movups (%edx,%ecx,1),%xmm1 190 addl $32,%ecx 191 .byte 102,15,56,220,208 192 .byte 102,15,56,220,216 193 .byte 102,15,56,220,224 194 .byte 102,15,56,220,232 195 movups -16(%edx,%ecx,1),%xmm0 196 jnz L006enc4_loop 197 .byte 102,15,56,220,209 198 .byte 102,15,56,220,217 199 .byte 102,15,56,220,225 200 .byte 102,15,56,220,233 201 .byte 102,15,56,221,208 202 .byte 102,15,56,221,216 203 .byte 102,15,56,221,224 204 .byte 102,15,56,221,232 205 ret 206 .private_extern __aesni_decrypt4 207 .align 4 208 __aesni_decrypt4: 209 movups (%edx),%xmm0 210 movups 16(%edx),%xmm1 211 shll $4,%ecx 212 xorps %xmm0,%xmm2 213 pxor %xmm0,%xmm3 214 pxor %xmm0,%xmm4 215 pxor %xmm0,%xmm5 216 movups 32(%edx),%xmm0 217 leal 32(%edx,%ecx,1),%edx 218 negl %ecx 219 .byte 15,31,64,0 220 addl $16,%ecx 221 L007dec4_loop: 222 .byte 102,15,56,222,209 223 .byte 102,15,56,222,217 224 .byte 102,15,56,222,225 225 .byte 102,15,56,222,233 226 movups (%edx,%ecx,1),%xmm1 227 addl $32,%ecx 228 .byte 102,15,56,222,208 229 .byte 102,15,56,222,216 230 .byte 102,15,56,222,224 231 .byte 102,15,56,222,232 232 movups -16(%edx,%ecx,1),%xmm0 233 jnz L007dec4_loop 234 .byte 102,15,56,222,209 235 .byte 102,15,56,222,217 236 .byte 102,15,56,222,225 237 .byte 102,15,56,222,233 238 .byte 102,15,56,223,208 239 .byte 102,15,56,223,216 240 .byte 102,15,56,223,224 241 .byte 102,15,56,223,232 242 ret 243 .private_extern __aesni_encrypt6 244 .align 4 245 __aesni_encrypt6: 246 movups (%edx),%xmm0 247 shll $4,%ecx 248 movups 16(%edx),%xmm1 249 xorps %xmm0,%xmm2 250 pxor %xmm0,%xmm3 251 pxor %xmm0,%xmm4 252 .byte 102,15,56,220,209 253 pxor %xmm0,%xmm5 254 pxor %xmm0,%xmm6 255 .byte 102,15,56,220,217 256 leal 32(%edx,%ecx,1),%edx 257 negl %ecx 258 .byte 102,15,56,220,225 259 pxor %xmm0,%xmm7 260 movups (%edx,%ecx,1),%xmm0 261 addl $16,%ecx 262 jmp L008_aesni_encrypt6_inner 263 .align 4,0x90 264 L009enc6_loop: 265 .byte 102,15,56,220,209 266 .byte 102,15,56,220,217 267 .byte 102,15,56,220,225 268 L008_aesni_encrypt6_inner: 269 .byte 102,15,56,220,233 270 .byte 102,15,56,220,241 271 .byte 102,15,56,220,249 272 L_aesni_encrypt6_enter: 273 movups (%edx,%ecx,1),%xmm1 274 addl $32,%ecx 275 .byte 102,15,56,220,208 276 .byte 102,15,56,220,216 277 .byte 102,15,56,220,224 278 .byte 102,15,56,220,232 279 .byte 102,15,56,220,240 280 .byte 102,15,56,220,248 281 movups -16(%edx,%ecx,1),%xmm0 282 jnz L009enc6_loop 283 .byte 102,15,56,220,209 284 .byte 102,15,56,220,217 285 .byte 102,15,56,220,225 286 .byte 102,15,56,220,233 287 .byte 102,15,56,220,241 288 .byte 102,15,56,220,249 289 .byte 102,15,56,221,208 290 .byte 102,15,56,221,216 291 .byte 102,15,56,221,224 292 .byte 102,15,56,221,232 293 .byte 102,15,56,221,240 294 .byte 102,15,56,221,248 295 ret 296 .private_extern __aesni_decrypt6 297 .align 4 298 __aesni_decrypt6: 299 movups (%edx),%xmm0 300 shll $4,%ecx 301 movups 16(%edx),%xmm1 302 xorps %xmm0,%xmm2 303 pxor %xmm0,%xmm3 304 pxor %xmm0,%xmm4 305 .byte 102,15,56,222,209 306 pxor %xmm0,%xmm5 307 pxor %xmm0,%xmm6 308 .byte 102,15,56,222,217 309 leal 32(%edx,%ecx,1),%edx 310 negl %ecx 311 .byte 102,15,56,222,225 312 pxor %xmm0,%xmm7 313 movups (%edx,%ecx,1),%xmm0 314 addl $16,%ecx 315 jmp L010_aesni_decrypt6_inner 316 .align 4,0x90 317 L011dec6_loop: 318 .byte 102,15,56,222,209 319 .byte 102,15,56,222,217 320 .byte 102,15,56,222,225 321 L010_aesni_decrypt6_inner: 322 .byte 102,15,56,222,233 323 .byte 102,15,56,222,241 324 .byte 102,15,56,222,249 325 L_aesni_decrypt6_enter: 326 movups (%edx,%ecx,1),%xmm1 327 addl $32,%ecx 328 .byte 102,15,56,222,208 329 .byte 102,15,56,222,216 330 .byte 102,15,56,222,224 331 .byte 102,15,56,222,232 332 .byte 102,15,56,222,240 333 .byte 102,15,56,222,248 334 movups -16(%edx,%ecx,1),%xmm0 335 jnz L011dec6_loop 336 .byte 102,15,56,222,209 337 .byte 102,15,56,222,217 338 .byte 102,15,56,222,225 339 .byte 102,15,56,222,233 340 .byte 102,15,56,222,241 341 .byte 102,15,56,222,249 342 .byte 102,15,56,223,208 343 .byte 102,15,56,223,216 344 .byte 102,15,56,223,224 345 .byte 102,15,56,223,232 346 .byte 102,15,56,223,240 347 .byte 102,15,56,223,248 348 ret 349 .globl _aesni_ecb_encrypt 350 .private_extern _aesni_ecb_encrypt 351 .align 4 352 _aesni_ecb_encrypt: 353 L_aesni_ecb_encrypt_begin: 354 pushl %ebp 355 pushl %ebx 356 pushl %esi 357 pushl %edi 358 movl 20(%esp),%esi 359 movl 24(%esp),%edi 360 movl 28(%esp),%eax 361 movl 32(%esp),%edx 362 movl 36(%esp),%ebx 363 andl $-16,%eax 364 jz L012ecb_ret 365 movl 240(%edx),%ecx 366 testl %ebx,%ebx 367 jz L013ecb_decrypt 368 movl %edx,%ebp 369 movl %ecx,%ebx 370 cmpl $96,%eax 371 jb L014ecb_enc_tail 372 movdqu (%esi),%xmm2 373 movdqu 16(%esi),%xmm3 374 movdqu 32(%esi),%xmm4 375 movdqu 48(%esi),%xmm5 376 movdqu 64(%esi),%xmm6 377 movdqu 80(%esi),%xmm7 378 leal 96(%esi),%esi 379 subl $96,%eax 380 jmp L015ecb_enc_loop6_enter 381 .align 4,0x90 382 L016ecb_enc_loop6: 383 movups %xmm2,(%edi) 384 movdqu (%esi),%xmm2 385 movups %xmm3,16(%edi) 386 movdqu 16(%esi),%xmm3 387 movups %xmm4,32(%edi) 388 movdqu 32(%esi),%xmm4 389 movups %xmm5,48(%edi) 390 movdqu 48(%esi),%xmm5 391 movups %xmm6,64(%edi) 392 movdqu 64(%esi),%xmm6 393 movups %xmm7,80(%edi) 394 leal 96(%edi),%edi 395 movdqu 80(%esi),%xmm7 396 leal 96(%esi),%esi 397 L015ecb_enc_loop6_enter: 398 call __aesni_encrypt6 399 movl %ebp,%edx 400 movl %ebx,%ecx 401 subl $96,%eax 402 jnc L016ecb_enc_loop6 403 movups %xmm2,(%edi) 404 movups %xmm3,16(%edi) 405 movups %xmm4,32(%edi) 406 movups %xmm5,48(%edi) 407 movups %xmm6,64(%edi) 408 movups %xmm7,80(%edi) 409 leal 96(%edi),%edi 410 addl $96,%eax 411 jz L012ecb_ret 412 L014ecb_enc_tail: 413 movups (%esi),%xmm2 414 cmpl $32,%eax 415 jb L017ecb_enc_one 416 movups 16(%esi),%xmm3 417 je L018ecb_enc_two 418 movups 32(%esi),%xmm4 419 cmpl $64,%eax 420 jb L019ecb_enc_three 421 movups 48(%esi),%xmm5 422 je L020ecb_enc_four 423 movups 64(%esi),%xmm6 424 xorps %xmm7,%xmm7 425 call __aesni_encrypt6 426 movups %xmm2,(%edi) 427 movups %xmm3,16(%edi) 428 movups %xmm4,32(%edi) 429 movups %xmm5,48(%edi) 430 movups %xmm6,64(%edi) 431 jmp L012ecb_ret 432 .align 4,0x90 433 L017ecb_enc_one: 434 movups (%edx),%xmm0 435 movups 16(%edx),%xmm1 436 leal 32(%edx),%edx 437 xorps %xmm0,%xmm2 438 L021enc1_loop_3: 439 .byte 102,15,56,220,209 440 decl %ecx 441 movups (%edx),%xmm1 442 leal 16(%edx),%edx 443 jnz L021enc1_loop_3 444 .byte 102,15,56,221,209 445 movups %xmm2,(%edi) 446 jmp L012ecb_ret 447 .align 4,0x90 448 L018ecb_enc_two: 449 call __aesni_encrypt2 450 movups %xmm2,(%edi) 451 movups %xmm3,16(%edi) 452 jmp L012ecb_ret 453 .align 4,0x90 454 L019ecb_enc_three: 455 call __aesni_encrypt3 456 movups %xmm2,(%edi) 457 movups %xmm3,16(%edi) 458 movups %xmm4,32(%edi) 459 jmp L012ecb_ret 460 .align 4,0x90 461 L020ecb_enc_four: 462 call __aesni_encrypt4 463 movups %xmm2,(%edi) 464 movups %xmm3,16(%edi) 465 movups %xmm4,32(%edi) 466 movups %xmm5,48(%edi) 467 jmp L012ecb_ret 468 .align 4,0x90 469 L013ecb_decrypt: 470 movl %edx,%ebp 471 movl %ecx,%ebx 472 cmpl $96,%eax 473 jb L022ecb_dec_tail 474 movdqu (%esi),%xmm2 475 movdqu 16(%esi),%xmm3 476 movdqu 32(%esi),%xmm4 477 movdqu 48(%esi),%xmm5 478 movdqu 64(%esi),%xmm6 479 movdqu 80(%esi),%xmm7 480 leal 96(%esi),%esi 481 subl $96,%eax 482 jmp L023ecb_dec_loop6_enter 483 .align 4,0x90 484 L024ecb_dec_loop6: 485 movups %xmm2,(%edi) 486 movdqu (%esi),%xmm2 487 movups %xmm3,16(%edi) 488 movdqu 16(%esi),%xmm3 489 movups %xmm4,32(%edi) 490 movdqu 32(%esi),%xmm4 491 movups %xmm5,48(%edi) 492 movdqu 48(%esi),%xmm5 493 movups %xmm6,64(%edi) 494 movdqu 64(%esi),%xmm6 495 movups %xmm7,80(%edi) 496 leal 96(%edi),%edi 497 movdqu 80(%esi),%xmm7 498 leal 96(%esi),%esi 499 L023ecb_dec_loop6_enter: 500 call __aesni_decrypt6 501 movl %ebp,%edx 502 movl %ebx,%ecx 503 subl $96,%eax 504 jnc L024ecb_dec_loop6 505 movups %xmm2,(%edi) 506 movups %xmm3,16(%edi) 507 movups %xmm4,32(%edi) 508 movups %xmm5,48(%edi) 509 movups %xmm6,64(%edi) 510 movups %xmm7,80(%edi) 511 leal 96(%edi),%edi 512 addl $96,%eax 513 jz L012ecb_ret 514 L022ecb_dec_tail: 515 movups (%esi),%xmm2 516 cmpl $32,%eax 517 jb L025ecb_dec_one 518 movups 16(%esi),%xmm3 519 je L026ecb_dec_two 520 movups 32(%esi),%xmm4 521 cmpl $64,%eax 522 jb L027ecb_dec_three 523 movups 48(%esi),%xmm5 524 je L028ecb_dec_four 525 movups 64(%esi),%xmm6 526 xorps %xmm7,%xmm7 527 call __aesni_decrypt6 528 movups %xmm2,(%edi) 529 movups %xmm3,16(%edi) 530 movups %xmm4,32(%edi) 531 movups %xmm5,48(%edi) 532 movups %xmm6,64(%edi) 533 jmp L012ecb_ret 534 .align 4,0x90 535 L025ecb_dec_one: 536 movups (%edx),%xmm0 537 movups 16(%edx),%xmm1 538 leal 32(%edx),%edx 539 xorps %xmm0,%xmm2 540 L029dec1_loop_4: 541 .byte 102,15,56,222,209 542 decl %ecx 543 movups (%edx),%xmm1 544 leal 16(%edx),%edx 545 jnz L029dec1_loop_4 546 .byte 102,15,56,223,209 547 movups %xmm2,(%edi) 548 jmp L012ecb_ret 549 .align 4,0x90 550 L026ecb_dec_two: 551 call __aesni_decrypt2 552 movups %xmm2,(%edi) 553 movups %xmm3,16(%edi) 554 jmp L012ecb_ret 555 .align 4,0x90 556 L027ecb_dec_three: 557 call __aesni_decrypt3 558 movups %xmm2,(%edi) 559 movups %xmm3,16(%edi) 560 movups %xmm4,32(%edi) 561 jmp L012ecb_ret 562 .align 4,0x90 563 L028ecb_dec_four: 564 call __aesni_decrypt4 565 movups %xmm2,(%edi) 566 movups %xmm3,16(%edi) 567 movups %xmm4,32(%edi) 568 movups %xmm5,48(%edi) 569 L012ecb_ret: 570 pxor %xmm0,%xmm0 571 pxor %xmm1,%xmm1 572 pxor %xmm2,%xmm2 573 pxor %xmm3,%xmm3 574 pxor %xmm4,%xmm4 575 pxor %xmm5,%xmm5 576 pxor %xmm6,%xmm6 577 pxor %xmm7,%xmm7 578 popl %edi 579 popl %esi 580 popl %ebx 581 popl %ebp 582 ret 583 .globl _aesni_ccm64_encrypt_blocks 584 .private_extern _aesni_ccm64_encrypt_blocks 585 .align 4 586 _aesni_ccm64_encrypt_blocks: 587 L_aesni_ccm64_encrypt_blocks_begin: 588 pushl %ebp 589 pushl %ebx 590 pushl %esi 591 pushl %edi 592 movl 20(%esp),%esi 593 movl 24(%esp),%edi 594 movl 28(%esp),%eax 595 movl 32(%esp),%edx 596 movl 36(%esp),%ebx 597 movl 40(%esp),%ecx 598 movl %esp,%ebp 599 subl $60,%esp 600 andl $-16,%esp 601 movl %ebp,48(%esp) 602 movdqu (%ebx),%xmm7 603 movdqu (%ecx),%xmm3 604 movl 240(%edx),%ecx 605 movl $202182159,(%esp) 606 movl $134810123,4(%esp) 607 movl $67438087,8(%esp) 608 movl $66051,12(%esp) 609 movl $1,%ebx 610 xorl %ebp,%ebp 611 movl %ebx,16(%esp) 612 movl %ebp,20(%esp) 613 movl %ebp,24(%esp) 614 movl %ebp,28(%esp) 615 shll $4,%ecx 616 movl $16,%ebx 617 leal (%edx),%ebp 618 movdqa (%esp),%xmm5 619 movdqa %xmm7,%xmm2 620 leal 32(%edx,%ecx,1),%edx 621 subl %ecx,%ebx 622 .byte 102,15,56,0,253 623 L030ccm64_enc_outer: 624 movups (%ebp),%xmm0 625 movl %ebx,%ecx 626 movups (%esi),%xmm6 627 xorps %xmm0,%xmm2 628 movups 16(%ebp),%xmm1 629 xorps %xmm6,%xmm0 630 xorps %xmm0,%xmm3 631 movups 32(%ebp),%xmm0 632 L031ccm64_enc2_loop: 633 .byte 102,15,56,220,209 634 .byte 102,15,56,220,217 635 movups (%edx,%ecx,1),%xmm1 636 addl $32,%ecx 637 .byte 102,15,56,220,208 638 .byte 102,15,56,220,216 639 movups -16(%edx,%ecx,1),%xmm0 640 jnz L031ccm64_enc2_loop 641 .byte 102,15,56,220,209 642 .byte 102,15,56,220,217 643 paddq 16(%esp),%xmm7 644 decl %eax 645 .byte 102,15,56,221,208 646 .byte 102,15,56,221,216 647 leal 16(%esi),%esi 648 xorps %xmm2,%xmm6 649 movdqa %xmm7,%xmm2 650 movups %xmm6,(%edi) 651 .byte 102,15,56,0,213 652 leal 16(%edi),%edi 653 jnz L030ccm64_enc_outer 654 movl 48(%esp),%esp 655 movl 40(%esp),%edi 656 movups %xmm3,(%edi) 657 pxor %xmm0,%xmm0 658 pxor %xmm1,%xmm1 659 pxor %xmm2,%xmm2 660 pxor %xmm3,%xmm3 661 pxor %xmm4,%xmm4 662 pxor %xmm5,%xmm5 663 pxor %xmm6,%xmm6 664 pxor %xmm7,%xmm7 665 popl %edi 666 popl %esi 667 popl %ebx 668 popl %ebp 669 ret 670 .globl _aesni_ccm64_decrypt_blocks 671 .private_extern _aesni_ccm64_decrypt_blocks 672 .align 4 673 _aesni_ccm64_decrypt_blocks: 674 L_aesni_ccm64_decrypt_blocks_begin: 675 pushl %ebp 676 pushl %ebx 677 pushl %esi 678 pushl %edi 679 movl 20(%esp),%esi 680 movl 24(%esp),%edi 681 movl 28(%esp),%eax 682 movl 32(%esp),%edx 683 movl 36(%esp),%ebx 684 movl 40(%esp),%ecx 685 movl %esp,%ebp 686 subl $60,%esp 687 andl $-16,%esp 688 movl %ebp,48(%esp) 689 movdqu (%ebx),%xmm7 690 movdqu (%ecx),%xmm3 691 movl 240(%edx),%ecx 692 movl $202182159,(%esp) 693 movl $134810123,4(%esp) 694 movl $67438087,8(%esp) 695 movl $66051,12(%esp) 696 movl $1,%ebx 697 xorl %ebp,%ebp 698 movl %ebx,16(%esp) 699 movl %ebp,20(%esp) 700 movl %ebp,24(%esp) 701 movl %ebp,28(%esp) 702 movdqa (%esp),%xmm5 703 movdqa %xmm7,%xmm2 704 movl %edx,%ebp 705 movl %ecx,%ebx 706 .byte 102,15,56,0,253 707 movups (%edx),%xmm0 708 movups 16(%edx),%xmm1 709 leal 32(%edx),%edx 710 xorps %xmm0,%xmm2 711 L032enc1_loop_5: 712 .byte 102,15,56,220,209 713 decl %ecx 714 movups (%edx),%xmm1 715 leal 16(%edx),%edx 716 jnz L032enc1_loop_5 717 .byte 102,15,56,221,209 718 shll $4,%ebx 719 movl $16,%ecx 720 movups (%esi),%xmm6 721 paddq 16(%esp),%xmm7 722 leal 16(%esi),%esi 723 subl %ebx,%ecx 724 leal 32(%ebp,%ebx,1),%edx 725 movl %ecx,%ebx 726 jmp L033ccm64_dec_outer 727 .align 4,0x90 728 L033ccm64_dec_outer: 729 xorps %xmm2,%xmm6 730 movdqa %xmm7,%xmm2 731 movups %xmm6,(%edi) 732 leal 16(%edi),%edi 733 .byte 102,15,56,0,213 734 subl $1,%eax 735 jz L034ccm64_dec_break 736 movups (%ebp),%xmm0 737 movl %ebx,%ecx 738 movups 16(%ebp),%xmm1 739 xorps %xmm0,%xmm6 740 xorps %xmm0,%xmm2 741 xorps %xmm6,%xmm3 742 movups 32(%ebp),%xmm0 743 L035ccm64_dec2_loop: 744 .byte 102,15,56,220,209 745 .byte 102,15,56,220,217 746 movups (%edx,%ecx,1),%xmm1 747 addl $32,%ecx 748 .byte 102,15,56,220,208 749 .byte 102,15,56,220,216 750 movups -16(%edx,%ecx,1),%xmm0 751 jnz L035ccm64_dec2_loop 752 movups (%esi),%xmm6 753 paddq 16(%esp),%xmm7 754 .byte 102,15,56,220,209 755 .byte 102,15,56,220,217 756 .byte 102,15,56,221,208 757 .byte 102,15,56,221,216 758 leal 16(%esi),%esi 759 jmp L033ccm64_dec_outer 760 .align 4,0x90 761 L034ccm64_dec_break: 762 movl 240(%ebp),%ecx 763 movl %ebp,%edx 764 movups (%edx),%xmm0 765 movups 16(%edx),%xmm1 766 xorps %xmm0,%xmm6 767 leal 32(%edx),%edx 768 xorps %xmm6,%xmm3 769 L036enc1_loop_6: 770 .byte 102,15,56,220,217 771 decl %ecx 772 movups (%edx),%xmm1 773 leal 16(%edx),%edx 774 jnz L036enc1_loop_6 775 .byte 102,15,56,221,217 776 movl 48(%esp),%esp 777 movl 40(%esp),%edi 778 movups %xmm3,(%edi) 779 pxor %xmm0,%xmm0 780 pxor %xmm1,%xmm1 781 pxor %xmm2,%xmm2 782 pxor %xmm3,%xmm3 783 pxor %xmm4,%xmm4 784 pxor %xmm5,%xmm5 785 pxor %xmm6,%xmm6 786 pxor %xmm7,%xmm7 787 popl %edi 788 popl %esi 789 popl %ebx 790 popl %ebp 791 ret 792 .globl _aesni_ctr32_encrypt_blocks 793 .private_extern _aesni_ctr32_encrypt_blocks 794 .align 4 795 _aesni_ctr32_encrypt_blocks: 796 L_aesni_ctr32_encrypt_blocks_begin: 797 pushl %ebp 798 pushl %ebx 799 pushl %esi 800 pushl %edi 801 movl 20(%esp),%esi 802 movl 24(%esp),%edi 803 movl 28(%esp),%eax 804 movl 32(%esp),%edx 805 movl 36(%esp),%ebx 806 movl %esp,%ebp 807 subl $88,%esp 808 andl $-16,%esp 809 movl %ebp,80(%esp) 810 cmpl $1,%eax 811 je L037ctr32_one_shortcut 812 movdqu (%ebx),%xmm7 813 movl $202182159,(%esp) 814 movl $134810123,4(%esp) 815 movl $67438087,8(%esp) 816 movl $66051,12(%esp) 817 movl $6,%ecx 818 xorl %ebp,%ebp 819 movl %ecx,16(%esp) 820 movl %ecx,20(%esp) 821 movl %ecx,24(%esp) 822 movl %ebp,28(%esp) 823 .byte 102,15,58,22,251,3 824 .byte 102,15,58,34,253,3 825 movl 240(%edx),%ecx 826 bswap %ebx 827 pxor %xmm0,%xmm0 828 pxor %xmm1,%xmm1 829 movdqa (%esp),%xmm2 830 .byte 102,15,58,34,195,0 831 leal 3(%ebx),%ebp 832 .byte 102,15,58,34,205,0 833 incl %ebx 834 .byte 102,15,58,34,195,1 835 incl %ebp 836 .byte 102,15,58,34,205,1 837 incl %ebx 838 .byte 102,15,58,34,195,2 839 incl %ebp 840 .byte 102,15,58,34,205,2 841 movdqa %xmm0,48(%esp) 842 .byte 102,15,56,0,194 843 movdqu (%edx),%xmm6 844 movdqa %xmm1,64(%esp) 845 .byte 102,15,56,0,202 846 pshufd $192,%xmm0,%xmm2 847 pshufd $128,%xmm0,%xmm3 848 cmpl $6,%eax 849 jb L038ctr32_tail 850 pxor %xmm6,%xmm7 851 shll $4,%ecx 852 movl $16,%ebx 853 movdqa %xmm7,32(%esp) 854 movl %edx,%ebp 855 subl %ecx,%ebx 856 leal 32(%edx,%ecx,1),%edx 857 subl $6,%eax 858 jmp L039ctr32_loop6 859 .align 4,0x90 860 L039ctr32_loop6: 861 pshufd $64,%xmm0,%xmm4 862 movdqa 32(%esp),%xmm0 863 pshufd $192,%xmm1,%xmm5 864 pxor %xmm0,%xmm2 865 pshufd $128,%xmm1,%xmm6 866 pxor %xmm0,%xmm3 867 pshufd $64,%xmm1,%xmm7 868 movups 16(%ebp),%xmm1 869 pxor %xmm0,%xmm4 870 pxor %xmm0,%xmm5 871 .byte 102,15,56,220,209 872 pxor %xmm0,%xmm6 873 pxor %xmm0,%xmm7 874 .byte 102,15,56,220,217 875 movups 32(%ebp),%xmm0 876 movl %ebx,%ecx 877 .byte 102,15,56,220,225 878 .byte 102,15,56,220,233 879 .byte 102,15,56,220,241 880 .byte 102,15,56,220,249 881 call L_aesni_encrypt6_enter 882 movups (%esi),%xmm1 883 movups 16(%esi),%xmm0 884 xorps %xmm1,%xmm2 885 movups 32(%esi),%xmm1 886 xorps %xmm0,%xmm3 887 movups %xmm2,(%edi) 888 movdqa 16(%esp),%xmm0 889 xorps %xmm1,%xmm4 890 movdqa 64(%esp),%xmm1 891 movups %xmm3,16(%edi) 892 movups %xmm4,32(%edi) 893 paddd %xmm0,%xmm1 894 paddd 48(%esp),%xmm0 895 movdqa (%esp),%xmm2 896 movups 48(%esi),%xmm3 897 movups 64(%esi),%xmm4 898 xorps %xmm3,%xmm5 899 movups 80(%esi),%xmm3 900 leal 96(%esi),%esi 901 movdqa %xmm0,48(%esp) 902 .byte 102,15,56,0,194 903 xorps %xmm4,%xmm6 904 movups %xmm5,48(%edi) 905 xorps %xmm3,%xmm7 906 movdqa %xmm1,64(%esp) 907 .byte 102,15,56,0,202 908 movups %xmm6,64(%edi) 909 pshufd $192,%xmm0,%xmm2 910 movups %xmm7,80(%edi) 911 leal 96(%edi),%edi 912 pshufd $128,%xmm0,%xmm3 913 subl $6,%eax 914 jnc L039ctr32_loop6 915 addl $6,%eax 916 jz L040ctr32_ret 917 movdqu (%ebp),%xmm7 918 movl %ebp,%edx 919 pxor 32(%esp),%xmm7 920 movl 240(%ebp),%ecx 921 L038ctr32_tail: 922 por %xmm7,%xmm2 923 cmpl $2,%eax 924 jb L041ctr32_one 925 pshufd $64,%xmm0,%xmm4 926 por %xmm7,%xmm3 927 je L042ctr32_two 928 pshufd $192,%xmm1,%xmm5 929 por %xmm7,%xmm4 930 cmpl $4,%eax 931 jb L043ctr32_three 932 pshufd $128,%xmm1,%xmm6 933 por %xmm7,%xmm5 934 je L044ctr32_four 935 por %xmm7,%xmm6 936 call __aesni_encrypt6 937 movups (%esi),%xmm1 938 movups 16(%esi),%xmm0 939 xorps %xmm1,%xmm2 940 movups 32(%esi),%xmm1 941 xorps %xmm0,%xmm3 942 movups 48(%esi),%xmm0 943 xorps %xmm1,%xmm4 944 movups 64(%esi),%xmm1 945 xorps %xmm0,%xmm5 946 movups %xmm2,(%edi) 947 xorps %xmm1,%xmm6 948 movups %xmm3,16(%edi) 949 movups %xmm4,32(%edi) 950 movups %xmm5,48(%edi) 951 movups %xmm6,64(%edi) 952 jmp L040ctr32_ret 953 .align 4,0x90 954 L037ctr32_one_shortcut: 955 movups (%ebx),%xmm2 956 movl 240(%edx),%ecx 957 L041ctr32_one: 958 movups (%edx),%xmm0 959 movups 16(%edx),%xmm1 960 leal 32(%edx),%edx 961 xorps %xmm0,%xmm2 962 L045enc1_loop_7: 963 .byte 102,15,56,220,209 964 decl %ecx 965 movups (%edx),%xmm1 966 leal 16(%edx),%edx 967 jnz L045enc1_loop_7 968 .byte 102,15,56,221,209 969 movups (%esi),%xmm6 970 xorps %xmm2,%xmm6 971 movups %xmm6,(%edi) 972 jmp L040ctr32_ret 973 .align 4,0x90 974 L042ctr32_two: 975 call __aesni_encrypt2 976 movups (%esi),%xmm5 977 movups 16(%esi),%xmm6 978 xorps %xmm5,%xmm2 979 xorps %xmm6,%xmm3 980 movups %xmm2,(%edi) 981 movups %xmm3,16(%edi) 982 jmp L040ctr32_ret 983 .align 4,0x90 984 L043ctr32_three: 985 call __aesni_encrypt3 986 movups (%esi),%xmm5 987 movups 16(%esi),%xmm6 988 xorps %xmm5,%xmm2 989 movups 32(%esi),%xmm7 990 xorps %xmm6,%xmm3 991 movups %xmm2,(%edi) 992 xorps %xmm7,%xmm4 993 movups %xmm3,16(%edi) 994 movups %xmm4,32(%edi) 995 jmp L040ctr32_ret 996 .align 4,0x90 997 L044ctr32_four: 998 call __aesni_encrypt4 999 movups (%esi),%xmm6 1000 movups 16(%esi),%xmm7 1001 movups 32(%esi),%xmm1 1002 xorps %xmm6,%xmm2 1003 movups 48(%esi),%xmm0 1004 xorps %xmm7,%xmm3 1005 movups %xmm2,(%edi) 1006 xorps %xmm1,%xmm4 1007 movups %xmm3,16(%edi) 1008 xorps %xmm0,%xmm5 1009 movups %xmm4,32(%edi) 1010 movups %xmm5,48(%edi) 1011 L040ctr32_ret: 1012 pxor %xmm0,%xmm0 1013 pxor %xmm1,%xmm1 1014 pxor %xmm2,%xmm2 1015 pxor %xmm3,%xmm3 1016 pxor %xmm4,%xmm4 1017 movdqa %xmm0,32(%esp) 1018 pxor %xmm5,%xmm5 1019 movdqa %xmm0,48(%esp) 1020 pxor %xmm6,%xmm6 1021 movdqa %xmm0,64(%esp) 1022 pxor %xmm7,%xmm7 1023 movl 80(%esp),%esp 1024 popl %edi 1025 popl %esi 1026 popl %ebx 1027 popl %ebp 1028 ret 1029 .globl _aesni_xts_encrypt 1030 .private_extern _aesni_xts_encrypt 1031 .align 4 1032 _aesni_xts_encrypt: 1033 L_aesni_xts_encrypt_begin: 1034 pushl %ebp 1035 pushl %ebx 1036 pushl %esi 1037 pushl %edi 1038 movl 36(%esp),%edx 1039 movl 40(%esp),%esi 1040 movl 240(%edx),%ecx 1041 movups (%esi),%xmm2 1042 movups (%edx),%xmm0 1043 movups 16(%edx),%xmm1 1044 leal 32(%edx),%edx 1045 xorps %xmm0,%xmm2 1046 L046enc1_loop_8: 1047 .byte 102,15,56,220,209 1048 decl %ecx 1049 movups (%edx),%xmm1 1050 leal 16(%edx),%edx 1051 jnz L046enc1_loop_8 1052 .byte 102,15,56,221,209 1053 movl 20(%esp),%esi 1054 movl 24(%esp),%edi 1055 movl 28(%esp),%eax 1056 movl 32(%esp),%edx 1057 movl %esp,%ebp 1058 subl $120,%esp 1059 movl 240(%edx),%ecx 1060 andl $-16,%esp 1061 movl $135,96(%esp) 1062 movl $0,100(%esp) 1063 movl $1,104(%esp) 1064 movl $0,108(%esp) 1065 movl %eax,112(%esp) 1066 movl %ebp,116(%esp) 1067 movdqa %xmm2,%xmm1 1068 pxor %xmm0,%xmm0 1069 movdqa 96(%esp),%xmm3 1070 pcmpgtd %xmm1,%xmm0 1071 andl $-16,%eax 1072 movl %edx,%ebp 1073 movl %ecx,%ebx 1074 subl $96,%eax 1075 jc L047xts_enc_short 1076 shll $4,%ecx 1077 movl $16,%ebx 1078 subl %ecx,%ebx 1079 leal 32(%edx,%ecx,1),%edx 1080 jmp L048xts_enc_loop6 1081 .align 4,0x90 1082 L048xts_enc_loop6: 1083 pshufd $19,%xmm0,%xmm2 1084 pxor %xmm0,%xmm0 1085 movdqa %xmm1,(%esp) 1086 paddq %xmm1,%xmm1 1087 pand %xmm3,%xmm2 1088 pcmpgtd %xmm1,%xmm0 1089 pxor %xmm2,%xmm1 1090 pshufd $19,%xmm0,%xmm2 1091 pxor %xmm0,%xmm0 1092 movdqa %xmm1,16(%esp) 1093 paddq %xmm1,%xmm1 1094 pand %xmm3,%xmm2 1095 pcmpgtd %xmm1,%xmm0 1096 pxor %xmm2,%xmm1 1097 pshufd $19,%xmm0,%xmm2 1098 pxor %xmm0,%xmm0 1099 movdqa %xmm1,32(%esp) 1100 paddq %xmm1,%xmm1 1101 pand %xmm3,%xmm2 1102 pcmpgtd %xmm1,%xmm0 1103 pxor %xmm2,%xmm1 1104 pshufd $19,%xmm0,%xmm2 1105 pxor %xmm0,%xmm0 1106 movdqa %xmm1,48(%esp) 1107 paddq %xmm1,%xmm1 1108 pand %xmm3,%xmm2 1109 pcmpgtd %xmm1,%xmm0 1110 pxor %xmm2,%xmm1 1111 pshufd $19,%xmm0,%xmm7 1112 movdqa %xmm1,64(%esp) 1113 paddq %xmm1,%xmm1 1114 movups (%ebp),%xmm0 1115 pand %xmm3,%xmm7 1116 movups (%esi),%xmm2 1117 pxor %xmm1,%xmm7 1118 movl %ebx,%ecx 1119 movdqu 16(%esi),%xmm3 1120 xorps %xmm0,%xmm2 1121 movdqu 32(%esi),%xmm4 1122 pxor %xmm0,%xmm3 1123 movdqu 48(%esi),%xmm5 1124 pxor %xmm0,%xmm4 1125 movdqu 64(%esi),%xmm6 1126 pxor %xmm0,%xmm5 1127 movdqu 80(%esi),%xmm1 1128 pxor %xmm0,%xmm6 1129 leal 96(%esi),%esi 1130 pxor (%esp),%xmm2 1131 movdqa %xmm7,80(%esp) 1132 pxor %xmm1,%xmm7 1133 movups 16(%ebp),%xmm1 1134 pxor 16(%esp),%xmm3 1135 pxor 32(%esp),%xmm4 1136 .byte 102,15,56,220,209 1137 pxor 48(%esp),%xmm5 1138 pxor 64(%esp),%xmm6 1139 .byte 102,15,56,220,217 1140 pxor %xmm0,%xmm7 1141 movups 32(%ebp),%xmm0 1142 .byte 102,15,56,220,225 1143 .byte 102,15,56,220,233 1144 .byte 102,15,56,220,241 1145 .byte 102,15,56,220,249 1146 call L_aesni_encrypt6_enter 1147 movdqa 80(%esp),%xmm1 1148 pxor %xmm0,%xmm0 1149 xorps (%esp),%xmm2 1150 pcmpgtd %xmm1,%xmm0 1151 xorps 16(%esp),%xmm3 1152 movups %xmm2,(%edi) 1153 xorps 32(%esp),%xmm4 1154 movups %xmm3,16(%edi) 1155 xorps 48(%esp),%xmm5 1156 movups %xmm4,32(%edi) 1157 xorps 64(%esp),%xmm6 1158 movups %xmm5,48(%edi) 1159 xorps %xmm1,%xmm7 1160 movups %xmm6,64(%edi) 1161 pshufd $19,%xmm0,%xmm2 1162 movups %xmm7,80(%edi) 1163 leal 96(%edi),%edi 1164 movdqa 96(%esp),%xmm3 1165 pxor %xmm0,%xmm0 1166 paddq %xmm1,%xmm1 1167 pand %xmm3,%xmm2 1168 pcmpgtd %xmm1,%xmm0 1169 pxor %xmm2,%xmm1 1170 subl $96,%eax 1171 jnc L048xts_enc_loop6 1172 movl 240(%ebp),%ecx 1173 movl %ebp,%edx 1174 movl %ecx,%ebx 1175 L047xts_enc_short: 1176 addl $96,%eax 1177 jz L049xts_enc_done6x 1178 movdqa %xmm1,%xmm5 1179 cmpl $32,%eax 1180 jb L050xts_enc_one 1181 pshufd $19,%xmm0,%xmm2 1182 pxor %xmm0,%xmm0 1183 paddq %xmm1,%xmm1 1184 pand %xmm3,%xmm2 1185 pcmpgtd %xmm1,%xmm0 1186 pxor %xmm2,%xmm1 1187 je L051xts_enc_two 1188 pshufd $19,%xmm0,%xmm2 1189 pxor %xmm0,%xmm0 1190 movdqa %xmm1,%xmm6 1191 paddq %xmm1,%xmm1 1192 pand %xmm3,%xmm2 1193 pcmpgtd %xmm1,%xmm0 1194 pxor %xmm2,%xmm1 1195 cmpl $64,%eax 1196 jb L052xts_enc_three 1197 pshufd $19,%xmm0,%xmm2 1198 pxor %xmm0,%xmm0 1199 movdqa %xmm1,%xmm7 1200 paddq %xmm1,%xmm1 1201 pand %xmm3,%xmm2 1202 pcmpgtd %xmm1,%xmm0 1203 pxor %xmm2,%xmm1 1204 movdqa %xmm5,(%esp) 1205 movdqa %xmm6,16(%esp) 1206 je L053xts_enc_four 1207 movdqa %xmm7,32(%esp) 1208 pshufd $19,%xmm0,%xmm7 1209 movdqa %xmm1,48(%esp) 1210 paddq %xmm1,%xmm1 1211 pand %xmm3,%xmm7 1212 pxor %xmm1,%xmm7 1213 movdqu (%esi),%xmm2 1214 movdqu 16(%esi),%xmm3 1215 movdqu 32(%esi),%xmm4 1216 pxor (%esp),%xmm2 1217 movdqu 48(%esi),%xmm5 1218 pxor 16(%esp),%xmm3 1219 movdqu 64(%esi),%xmm6 1220 pxor 32(%esp),%xmm4 1221 leal 80(%esi),%esi 1222 pxor 48(%esp),%xmm5 1223 movdqa %xmm7,64(%esp) 1224 pxor %xmm7,%xmm6 1225 call __aesni_encrypt6 1226 movaps 64(%esp),%xmm1 1227 xorps (%esp),%xmm2 1228 xorps 16(%esp),%xmm3 1229 xorps 32(%esp),%xmm4 1230 movups %xmm2,(%edi) 1231 xorps 48(%esp),%xmm5 1232 movups %xmm3,16(%edi) 1233 xorps %xmm1,%xmm6 1234 movups %xmm4,32(%edi) 1235 movups %xmm5,48(%edi) 1236 movups %xmm6,64(%edi) 1237 leal 80(%edi),%edi 1238 jmp L054xts_enc_done 1239 .align 4,0x90 1240 L050xts_enc_one: 1241 movups (%esi),%xmm2 1242 leal 16(%esi),%esi 1243 xorps %xmm5,%xmm2 1244 movups (%edx),%xmm0 1245 movups 16(%edx),%xmm1 1246 leal 32(%edx),%edx 1247 xorps %xmm0,%xmm2 1248 L055enc1_loop_9: 1249 .byte 102,15,56,220,209 1250 decl %ecx 1251 movups (%edx),%xmm1 1252 leal 16(%edx),%edx 1253 jnz L055enc1_loop_9 1254 .byte 102,15,56,221,209 1255 xorps %xmm5,%xmm2 1256 movups %xmm2,(%edi) 1257 leal 16(%edi),%edi 1258 movdqa %xmm5,%xmm1 1259 jmp L054xts_enc_done 1260 .align 4,0x90 1261 L051xts_enc_two: 1262 movaps %xmm1,%xmm6 1263 movups (%esi),%xmm2 1264 movups 16(%esi),%xmm3 1265 leal 32(%esi),%esi 1266 xorps %xmm5,%xmm2 1267 xorps %xmm6,%xmm3 1268 call __aesni_encrypt2 1269 xorps %xmm5,%xmm2 1270 xorps %xmm6,%xmm3 1271 movups %xmm2,(%edi) 1272 movups %xmm3,16(%edi) 1273 leal 32(%edi),%edi 1274 movdqa %xmm6,%xmm1 1275 jmp L054xts_enc_done 1276 .align 4,0x90 1277 L052xts_enc_three: 1278 movaps %xmm1,%xmm7 1279 movups (%esi),%xmm2 1280 movups 16(%esi),%xmm3 1281 movups 32(%esi),%xmm4 1282 leal 48(%esi),%esi 1283 xorps %xmm5,%xmm2 1284 xorps %xmm6,%xmm3 1285 xorps %xmm7,%xmm4 1286 call __aesni_encrypt3 1287 xorps %xmm5,%xmm2 1288 xorps %xmm6,%xmm3 1289 xorps %xmm7,%xmm4 1290 movups %xmm2,(%edi) 1291 movups %xmm3,16(%edi) 1292 movups %xmm4,32(%edi) 1293 leal 48(%edi),%edi 1294 movdqa %xmm7,%xmm1 1295 jmp L054xts_enc_done 1296 .align 4,0x90 1297 L053xts_enc_four: 1298 movaps %xmm1,%xmm6 1299 movups (%esi),%xmm2 1300 movups 16(%esi),%xmm3 1301 movups 32(%esi),%xmm4 1302 xorps (%esp),%xmm2 1303 movups 48(%esi),%xmm5 1304 leal 64(%esi),%esi 1305 xorps 16(%esp),%xmm3 1306 xorps %xmm7,%xmm4 1307 xorps %xmm6,%xmm5 1308 call __aesni_encrypt4 1309 xorps (%esp),%xmm2 1310 xorps 16(%esp),%xmm3 1311 xorps %xmm7,%xmm4 1312 movups %xmm2,(%edi) 1313 xorps %xmm6,%xmm5 1314 movups %xmm3,16(%edi) 1315 movups %xmm4,32(%edi) 1316 movups %xmm5,48(%edi) 1317 leal 64(%edi),%edi 1318 movdqa %xmm6,%xmm1 1319 jmp L054xts_enc_done 1320 .align 4,0x90 1321 L049xts_enc_done6x: 1322 movl 112(%esp),%eax 1323 andl $15,%eax 1324 jz L056xts_enc_ret 1325 movdqa %xmm1,%xmm5 1326 movl %eax,112(%esp) 1327 jmp L057xts_enc_steal 1328 .align 4,0x90 1329 L054xts_enc_done: 1330 movl 112(%esp),%eax 1331 pxor %xmm0,%xmm0 1332 andl $15,%eax 1333 jz L056xts_enc_ret 1334 pcmpgtd %xmm1,%xmm0 1335 movl %eax,112(%esp) 1336 pshufd $19,%xmm0,%xmm5 1337 paddq %xmm1,%xmm1 1338 pand 96(%esp),%xmm5 1339 pxor %xmm1,%xmm5 1340 L057xts_enc_steal: 1341 movzbl (%esi),%ecx 1342 movzbl -16(%edi),%edx 1343 leal 1(%esi),%esi 1344 movb %cl,-16(%edi) 1345 movb %dl,(%edi) 1346 leal 1(%edi),%edi 1347 subl $1,%eax 1348 jnz L057xts_enc_steal 1349 subl 112(%esp),%edi 1350 movl %ebp,%edx 1351 movl %ebx,%ecx 1352 movups -16(%edi),%xmm2 1353 xorps %xmm5,%xmm2 1354 movups (%edx),%xmm0 1355 movups 16(%edx),%xmm1 1356 leal 32(%edx),%edx 1357 xorps %xmm0,%xmm2 1358 L058enc1_loop_10: 1359 .byte 102,15,56,220,209 1360 decl %ecx 1361 movups (%edx),%xmm1 1362 leal 16(%edx),%edx 1363 jnz L058enc1_loop_10 1364 .byte 102,15,56,221,209 1365 xorps %xmm5,%xmm2 1366 movups %xmm2,-16(%edi) 1367 L056xts_enc_ret: 1368 pxor %xmm0,%xmm0 1369 pxor %xmm1,%xmm1 1370 pxor %xmm2,%xmm2 1371 movdqa %xmm0,(%esp) 1372 pxor %xmm3,%xmm3 1373 movdqa %xmm0,16(%esp) 1374 pxor %xmm4,%xmm4 1375 movdqa %xmm0,32(%esp) 1376 pxor %xmm5,%xmm5 1377 movdqa %xmm0,48(%esp) 1378 pxor %xmm6,%xmm6 1379 movdqa %xmm0,64(%esp) 1380 pxor %xmm7,%xmm7 1381 movdqa %xmm0,80(%esp) 1382 movl 116(%esp),%esp 1383 popl %edi 1384 popl %esi 1385 popl %ebx 1386 popl %ebp 1387 ret 1388 .globl _aesni_xts_decrypt 1389 .private_extern _aesni_xts_decrypt 1390 .align 4 1391 _aesni_xts_decrypt: 1392 L_aesni_xts_decrypt_begin: 1393 pushl %ebp 1394 pushl %ebx 1395 pushl %esi 1396 pushl %edi 1397 movl 36(%esp),%edx 1398 movl 40(%esp),%esi 1399 movl 240(%edx),%ecx 1400 movups (%esi),%xmm2 1401 movups (%edx),%xmm0 1402 movups 16(%edx),%xmm1 1403 leal 32(%edx),%edx 1404 xorps %xmm0,%xmm2 1405 L059enc1_loop_11: 1406 .byte 102,15,56,220,209 1407 decl %ecx 1408 movups (%edx),%xmm1 1409 leal 16(%edx),%edx 1410 jnz L059enc1_loop_11 1411 .byte 102,15,56,221,209 1412 movl 20(%esp),%esi 1413 movl 24(%esp),%edi 1414 movl 28(%esp),%eax 1415 movl 32(%esp),%edx 1416 movl %esp,%ebp 1417 subl $120,%esp 1418 andl $-16,%esp 1419 xorl %ebx,%ebx 1420 testl $15,%eax 1421 setnz %bl 1422 shll $4,%ebx 1423 subl %ebx,%eax 1424 movl $135,96(%esp) 1425 movl $0,100(%esp) 1426 movl $1,104(%esp) 1427 movl $0,108(%esp) 1428 movl %eax,112(%esp) 1429 movl %ebp,116(%esp) 1430 movl 240(%edx),%ecx 1431 movl %edx,%ebp 1432 movl %ecx,%ebx 1433 movdqa %xmm2,%xmm1 1434 pxor %xmm0,%xmm0 1435 movdqa 96(%esp),%xmm3 1436 pcmpgtd %xmm1,%xmm0 1437 andl $-16,%eax 1438 subl $96,%eax 1439 jc L060xts_dec_short 1440 shll $4,%ecx 1441 movl $16,%ebx 1442 subl %ecx,%ebx 1443 leal 32(%edx,%ecx,1),%edx 1444 jmp L061xts_dec_loop6 1445 .align 4,0x90 1446 L061xts_dec_loop6: 1447 pshufd $19,%xmm0,%xmm2 1448 pxor %xmm0,%xmm0 1449 movdqa %xmm1,(%esp) 1450 paddq %xmm1,%xmm1 1451 pand %xmm3,%xmm2 1452 pcmpgtd %xmm1,%xmm0 1453 pxor %xmm2,%xmm1 1454 pshufd $19,%xmm0,%xmm2 1455 pxor %xmm0,%xmm0 1456 movdqa %xmm1,16(%esp) 1457 paddq %xmm1,%xmm1 1458 pand %xmm3,%xmm2 1459 pcmpgtd %xmm1,%xmm0 1460 pxor %xmm2,%xmm1 1461 pshufd $19,%xmm0,%xmm2 1462 pxor %xmm0,%xmm0 1463 movdqa %xmm1,32(%esp) 1464 paddq %xmm1,%xmm1 1465 pand %xmm3,%xmm2 1466 pcmpgtd %xmm1,%xmm0 1467 pxor %xmm2,%xmm1 1468 pshufd $19,%xmm0,%xmm2 1469 pxor %xmm0,%xmm0 1470 movdqa %xmm1,48(%esp) 1471 paddq %xmm1,%xmm1 1472 pand %xmm3,%xmm2 1473 pcmpgtd %xmm1,%xmm0 1474 pxor %xmm2,%xmm1 1475 pshufd $19,%xmm0,%xmm7 1476 movdqa %xmm1,64(%esp) 1477 paddq %xmm1,%xmm1 1478 movups (%ebp),%xmm0 1479 pand %xmm3,%xmm7 1480 movups (%esi),%xmm2 1481 pxor %xmm1,%xmm7 1482 movl %ebx,%ecx 1483 movdqu 16(%esi),%xmm3 1484 xorps %xmm0,%xmm2 1485 movdqu 32(%esi),%xmm4 1486 pxor %xmm0,%xmm3 1487 movdqu 48(%esi),%xmm5 1488 pxor %xmm0,%xmm4 1489 movdqu 64(%esi),%xmm6 1490 pxor %xmm0,%xmm5 1491 movdqu 80(%esi),%xmm1 1492 pxor %xmm0,%xmm6 1493 leal 96(%esi),%esi 1494 pxor (%esp),%xmm2 1495 movdqa %xmm7,80(%esp) 1496 pxor %xmm1,%xmm7 1497 movups 16(%ebp),%xmm1 1498 pxor 16(%esp),%xmm3 1499 pxor 32(%esp),%xmm4 1500 .byte 102,15,56,222,209 1501 pxor 48(%esp),%xmm5 1502 pxor 64(%esp),%xmm6 1503 .byte 102,15,56,222,217 1504 pxor %xmm0,%xmm7 1505 movups 32(%ebp),%xmm0 1506 .byte 102,15,56,222,225 1507 .byte 102,15,56,222,233 1508 .byte 102,15,56,222,241 1509 .byte 102,15,56,222,249 1510 call L_aesni_decrypt6_enter 1511 movdqa 80(%esp),%xmm1 1512 pxor %xmm0,%xmm0 1513 xorps (%esp),%xmm2 1514 pcmpgtd %xmm1,%xmm0 1515 xorps 16(%esp),%xmm3 1516 movups %xmm2,(%edi) 1517 xorps 32(%esp),%xmm4 1518 movups %xmm3,16(%edi) 1519 xorps 48(%esp),%xmm5 1520 movups %xmm4,32(%edi) 1521 xorps 64(%esp),%xmm6 1522 movups %xmm5,48(%edi) 1523 xorps %xmm1,%xmm7 1524 movups %xmm6,64(%edi) 1525 pshufd $19,%xmm0,%xmm2 1526 movups %xmm7,80(%edi) 1527 leal 96(%edi),%edi 1528 movdqa 96(%esp),%xmm3 1529 pxor %xmm0,%xmm0 1530 paddq %xmm1,%xmm1 1531 pand %xmm3,%xmm2 1532 pcmpgtd %xmm1,%xmm0 1533 pxor %xmm2,%xmm1 1534 subl $96,%eax 1535 jnc L061xts_dec_loop6 1536 movl 240(%ebp),%ecx 1537 movl %ebp,%edx 1538 movl %ecx,%ebx 1539 L060xts_dec_short: 1540 addl $96,%eax 1541 jz L062xts_dec_done6x 1542 movdqa %xmm1,%xmm5 1543 cmpl $32,%eax 1544 jb L063xts_dec_one 1545 pshufd $19,%xmm0,%xmm2 1546 pxor %xmm0,%xmm0 1547 paddq %xmm1,%xmm1 1548 pand %xmm3,%xmm2 1549 pcmpgtd %xmm1,%xmm0 1550 pxor %xmm2,%xmm1 1551 je L064xts_dec_two 1552 pshufd $19,%xmm0,%xmm2 1553 pxor %xmm0,%xmm0 1554 movdqa %xmm1,%xmm6 1555 paddq %xmm1,%xmm1 1556 pand %xmm3,%xmm2 1557 pcmpgtd %xmm1,%xmm0 1558 pxor %xmm2,%xmm1 1559 cmpl $64,%eax 1560 jb L065xts_dec_three 1561 pshufd $19,%xmm0,%xmm2 1562 pxor %xmm0,%xmm0 1563 movdqa %xmm1,%xmm7 1564 paddq %xmm1,%xmm1 1565 pand %xmm3,%xmm2 1566 pcmpgtd %xmm1,%xmm0 1567 pxor %xmm2,%xmm1 1568 movdqa %xmm5,(%esp) 1569 movdqa %xmm6,16(%esp) 1570 je L066xts_dec_four 1571 movdqa %xmm7,32(%esp) 1572 pshufd $19,%xmm0,%xmm7 1573 movdqa %xmm1,48(%esp) 1574 paddq %xmm1,%xmm1 1575 pand %xmm3,%xmm7 1576 pxor %xmm1,%xmm7 1577 movdqu (%esi),%xmm2 1578 movdqu 16(%esi),%xmm3 1579 movdqu 32(%esi),%xmm4 1580 pxor (%esp),%xmm2 1581 movdqu 48(%esi),%xmm5 1582 pxor 16(%esp),%xmm3 1583 movdqu 64(%esi),%xmm6 1584 pxor 32(%esp),%xmm4 1585 leal 80(%esi),%esi 1586 pxor 48(%esp),%xmm5 1587 movdqa %xmm7,64(%esp) 1588 pxor %xmm7,%xmm6 1589 call __aesni_decrypt6 1590 movaps 64(%esp),%xmm1 1591 xorps (%esp),%xmm2 1592 xorps 16(%esp),%xmm3 1593 xorps 32(%esp),%xmm4 1594 movups %xmm2,(%edi) 1595 xorps 48(%esp),%xmm5 1596 movups %xmm3,16(%edi) 1597 xorps %xmm1,%xmm6 1598 movups %xmm4,32(%edi) 1599 movups %xmm5,48(%edi) 1600 movups %xmm6,64(%edi) 1601 leal 80(%edi),%edi 1602 jmp L067xts_dec_done 1603 .align 4,0x90 1604 L063xts_dec_one: 1605 movups (%esi),%xmm2 1606 leal 16(%esi),%esi 1607 xorps %xmm5,%xmm2 1608 movups (%edx),%xmm0 1609 movups 16(%edx),%xmm1 1610 leal 32(%edx),%edx 1611 xorps %xmm0,%xmm2 1612 L068dec1_loop_12: 1613 .byte 102,15,56,222,209 1614 decl %ecx 1615 movups (%edx),%xmm1 1616 leal 16(%edx),%edx 1617 jnz L068dec1_loop_12 1618 .byte 102,15,56,223,209 1619 xorps %xmm5,%xmm2 1620 movups %xmm2,(%edi) 1621 leal 16(%edi),%edi 1622 movdqa %xmm5,%xmm1 1623 jmp L067xts_dec_done 1624 .align 4,0x90 1625 L064xts_dec_two: 1626 movaps %xmm1,%xmm6 1627 movups (%esi),%xmm2 1628 movups 16(%esi),%xmm3 1629 leal 32(%esi),%esi 1630 xorps %xmm5,%xmm2 1631 xorps %xmm6,%xmm3 1632 call __aesni_decrypt2 1633 xorps %xmm5,%xmm2 1634 xorps %xmm6,%xmm3 1635 movups %xmm2,(%edi) 1636 movups %xmm3,16(%edi) 1637 leal 32(%edi),%edi 1638 movdqa %xmm6,%xmm1 1639 jmp L067xts_dec_done 1640 .align 4,0x90 1641 L065xts_dec_three: 1642 movaps %xmm1,%xmm7 1643 movups (%esi),%xmm2 1644 movups 16(%esi),%xmm3 1645 movups 32(%esi),%xmm4 1646 leal 48(%esi),%esi 1647 xorps %xmm5,%xmm2 1648 xorps %xmm6,%xmm3 1649 xorps %xmm7,%xmm4 1650 call __aesni_decrypt3 1651 xorps %xmm5,%xmm2 1652 xorps %xmm6,%xmm3 1653 xorps %xmm7,%xmm4 1654 movups %xmm2,(%edi) 1655 movups %xmm3,16(%edi) 1656 movups %xmm4,32(%edi) 1657 leal 48(%edi),%edi 1658 movdqa %xmm7,%xmm1 1659 jmp L067xts_dec_done 1660 .align 4,0x90 1661 L066xts_dec_four: 1662 movaps %xmm1,%xmm6 1663 movups (%esi),%xmm2 1664 movups 16(%esi),%xmm3 1665 movups 32(%esi),%xmm4 1666 xorps (%esp),%xmm2 1667 movups 48(%esi),%xmm5 1668 leal 64(%esi),%esi 1669 xorps 16(%esp),%xmm3 1670 xorps %xmm7,%xmm4 1671 xorps %xmm6,%xmm5 1672 call __aesni_decrypt4 1673 xorps (%esp),%xmm2 1674 xorps 16(%esp),%xmm3 1675 xorps %xmm7,%xmm4 1676 movups %xmm2,(%edi) 1677 xorps %xmm6,%xmm5 1678 movups %xmm3,16(%edi) 1679 movups %xmm4,32(%edi) 1680 movups %xmm5,48(%edi) 1681 leal 64(%edi),%edi 1682 movdqa %xmm6,%xmm1 1683 jmp L067xts_dec_done 1684 .align 4,0x90 1685 L062xts_dec_done6x: 1686 movl 112(%esp),%eax 1687 andl $15,%eax 1688 jz L069xts_dec_ret 1689 movl %eax,112(%esp) 1690 jmp L070xts_dec_only_one_more 1691 .align 4,0x90 1692 L067xts_dec_done: 1693 movl 112(%esp),%eax 1694 pxor %xmm0,%xmm0 1695 andl $15,%eax 1696 jz L069xts_dec_ret 1697 pcmpgtd %xmm1,%xmm0 1698 movl %eax,112(%esp) 1699 pshufd $19,%xmm0,%xmm2 1700 pxor %xmm0,%xmm0 1701 movdqa 96(%esp),%xmm3 1702 paddq %xmm1,%xmm1 1703 pand %xmm3,%xmm2 1704 pcmpgtd %xmm1,%xmm0 1705 pxor %xmm2,%xmm1 1706 L070xts_dec_only_one_more: 1707 pshufd $19,%xmm0,%xmm5 1708 movdqa %xmm1,%xmm6 1709 paddq %xmm1,%xmm1 1710 pand %xmm3,%xmm5 1711 pxor %xmm1,%xmm5 1712 movl %ebp,%edx 1713 movl %ebx,%ecx 1714 movups (%esi),%xmm2 1715 xorps %xmm5,%xmm2 1716 movups (%edx),%xmm0 1717 movups 16(%edx),%xmm1 1718 leal 32(%edx),%edx 1719 xorps %xmm0,%xmm2 1720 L071dec1_loop_13: 1721 .byte 102,15,56,222,209 1722 decl %ecx 1723 movups (%edx),%xmm1 1724 leal 16(%edx),%edx 1725 jnz L071dec1_loop_13 1726 .byte 102,15,56,223,209 1727 xorps %xmm5,%xmm2 1728 movups %xmm2,(%edi) 1729 L072xts_dec_steal: 1730 movzbl 16(%esi),%ecx 1731 movzbl (%edi),%edx 1732 leal 1(%esi),%esi 1733 movb %cl,(%edi) 1734 movb %dl,16(%edi) 1735 leal 1(%edi),%edi 1736 subl $1,%eax 1737 jnz L072xts_dec_steal 1738 subl 112(%esp),%edi 1739 movl %ebp,%edx 1740 movl %ebx,%ecx 1741 movups (%edi),%xmm2 1742 xorps %xmm6,%xmm2 1743 movups (%edx),%xmm0 1744 movups 16(%edx),%xmm1 1745 leal 32(%edx),%edx 1746 xorps %xmm0,%xmm2 1747 L073dec1_loop_14: 1748 .byte 102,15,56,222,209 1749 decl %ecx 1750 movups (%edx),%xmm1 1751 leal 16(%edx),%edx 1752 jnz L073dec1_loop_14 1753 .byte 102,15,56,223,209 1754 xorps %xmm6,%xmm2 1755 movups %xmm2,(%edi) 1756 L069xts_dec_ret: 1757 pxor %xmm0,%xmm0 1758 pxor %xmm1,%xmm1 1759 pxor %xmm2,%xmm2 1760 movdqa %xmm0,(%esp) 1761 pxor %xmm3,%xmm3 1762 movdqa %xmm0,16(%esp) 1763 pxor %xmm4,%xmm4 1764 movdqa %xmm0,32(%esp) 1765 pxor %xmm5,%xmm5 1766 movdqa %xmm0,48(%esp) 1767 pxor %xmm6,%xmm6 1768 movdqa %xmm0,64(%esp) 1769 pxor %xmm7,%xmm7 1770 movdqa %xmm0,80(%esp) 1771 movl 116(%esp),%esp 1772 popl %edi 1773 popl %esi 1774 popl %ebx 1775 popl %ebp 1776 ret 1777 .globl _aesni_cbc_encrypt 1778 .private_extern _aesni_cbc_encrypt 1779 .align 4 1780 _aesni_cbc_encrypt: 1781 L_aesni_cbc_encrypt_begin: 1782 pushl %ebp 1783 pushl %ebx 1784 pushl %esi 1785 pushl %edi 1786 movl 20(%esp),%esi 1787 movl %esp,%ebx 1788 movl 24(%esp),%edi 1789 subl $24,%ebx 1790 movl 28(%esp),%eax 1791 andl $-16,%ebx 1792 movl 32(%esp),%edx 1793 movl 36(%esp),%ebp 1794 testl %eax,%eax 1795 jz L074cbc_abort 1796 cmpl $0,40(%esp) 1797 xchgl %esp,%ebx 1798 movups (%ebp),%xmm7 1799 movl 240(%edx),%ecx 1800 movl %edx,%ebp 1801 movl %ebx,16(%esp) 1802 movl %ecx,%ebx 1803 je L075cbc_decrypt 1804 movaps %xmm7,%xmm2 1805 cmpl $16,%eax 1806 jb L076cbc_enc_tail 1807 subl $16,%eax 1808 jmp L077cbc_enc_loop 1809 .align 4,0x90 1810 L077cbc_enc_loop: 1811 movups (%esi),%xmm7 1812 leal 16(%esi),%esi 1813 movups (%edx),%xmm0 1814 movups 16(%edx),%xmm1 1815 xorps %xmm0,%xmm7 1816 leal 32(%edx),%edx 1817 xorps %xmm7,%xmm2 1818 L078enc1_loop_15: 1819 .byte 102,15,56,220,209 1820 decl %ecx 1821 movups (%edx),%xmm1 1822 leal 16(%edx),%edx 1823 jnz L078enc1_loop_15 1824 .byte 102,15,56,221,209 1825 movl %ebx,%ecx 1826 movl %ebp,%edx 1827 movups %xmm2,(%edi) 1828 leal 16(%edi),%edi 1829 subl $16,%eax 1830 jnc L077cbc_enc_loop 1831 addl $16,%eax 1832 jnz L076cbc_enc_tail 1833 movaps %xmm2,%xmm7 1834 pxor %xmm2,%xmm2 1835 jmp L079cbc_ret 1836 L076cbc_enc_tail: 1837 movl %eax,%ecx 1838 .long 2767451785 1839 movl $16,%ecx 1840 subl %eax,%ecx 1841 xorl %eax,%eax 1842 .long 2868115081 1843 leal -16(%edi),%edi 1844 movl %ebx,%ecx 1845 movl %edi,%esi 1846 movl %ebp,%edx 1847 jmp L077cbc_enc_loop 1848 .align 4,0x90 1849 L075cbc_decrypt: 1850 cmpl $80,%eax 1851 jbe L080cbc_dec_tail 1852 movaps %xmm7,(%esp) 1853 subl $80,%eax 1854 jmp L081cbc_dec_loop6_enter 1855 .align 4,0x90 1856 L082cbc_dec_loop6: 1857 movaps %xmm0,(%esp) 1858 movups %xmm7,(%edi) 1859 leal 16(%edi),%edi 1860 L081cbc_dec_loop6_enter: 1861 movdqu (%esi),%xmm2 1862 movdqu 16(%esi),%xmm3 1863 movdqu 32(%esi),%xmm4 1864 movdqu 48(%esi),%xmm5 1865 movdqu 64(%esi),%xmm6 1866 movdqu 80(%esi),%xmm7 1867 call __aesni_decrypt6 1868 movups (%esi),%xmm1 1869 movups 16(%esi),%xmm0 1870 xorps (%esp),%xmm2 1871 xorps %xmm1,%xmm3 1872 movups 32(%esi),%xmm1 1873 xorps %xmm0,%xmm4 1874 movups 48(%esi),%xmm0 1875 xorps %xmm1,%xmm5 1876 movups 64(%esi),%xmm1 1877 xorps %xmm0,%xmm6 1878 movups 80(%esi),%xmm0 1879 xorps %xmm1,%xmm7 1880 movups %xmm2,(%edi) 1881 movups %xmm3,16(%edi) 1882 leal 96(%esi),%esi 1883 movups %xmm4,32(%edi) 1884 movl %ebx,%ecx 1885 movups %xmm5,48(%edi) 1886 movl %ebp,%edx 1887 movups %xmm6,64(%edi) 1888 leal 80(%edi),%edi 1889 subl $96,%eax 1890 ja L082cbc_dec_loop6 1891 movaps %xmm7,%xmm2 1892 movaps %xmm0,%xmm7 1893 addl $80,%eax 1894 jle L083cbc_dec_clear_tail_collected 1895 movups %xmm2,(%edi) 1896 leal 16(%edi),%edi 1897 L080cbc_dec_tail: 1898 movups (%esi),%xmm2 1899 movaps %xmm2,%xmm6 1900 cmpl $16,%eax 1901 jbe L084cbc_dec_one 1902 movups 16(%esi),%xmm3 1903 movaps %xmm3,%xmm5 1904 cmpl $32,%eax 1905 jbe L085cbc_dec_two 1906 movups 32(%esi),%xmm4 1907 cmpl $48,%eax 1908 jbe L086cbc_dec_three 1909 movups 48(%esi),%xmm5 1910 cmpl $64,%eax 1911 jbe L087cbc_dec_four 1912 movups 64(%esi),%xmm6 1913 movaps %xmm7,(%esp) 1914 movups (%esi),%xmm2 1915 xorps %xmm7,%xmm7 1916 call __aesni_decrypt6 1917 movups (%esi),%xmm1 1918 movups 16(%esi),%xmm0 1919 xorps (%esp),%xmm2 1920 xorps %xmm1,%xmm3 1921 movups 32(%esi),%xmm1 1922 xorps %xmm0,%xmm4 1923 movups 48(%esi),%xmm0 1924 xorps %xmm1,%xmm5 1925 movups 64(%esi),%xmm7 1926 xorps %xmm0,%xmm6 1927 movups %xmm2,(%edi) 1928 movups %xmm3,16(%edi) 1929 pxor %xmm3,%xmm3 1930 movups %xmm4,32(%edi) 1931 pxor %xmm4,%xmm4 1932 movups %xmm5,48(%edi) 1933 pxor %xmm5,%xmm5 1934 leal 64(%edi),%edi 1935 movaps %xmm6,%xmm2 1936 pxor %xmm6,%xmm6 1937 subl $80,%eax 1938 jmp L088cbc_dec_tail_collected 1939 .align 4,0x90 1940 L084cbc_dec_one: 1941 movups (%edx),%xmm0 1942 movups 16(%edx),%xmm1 1943 leal 32(%edx),%edx 1944 xorps %xmm0,%xmm2 1945 L089dec1_loop_16: 1946 .byte 102,15,56,222,209 1947 decl %ecx 1948 movups (%edx),%xmm1 1949 leal 16(%edx),%edx 1950 jnz L089dec1_loop_16 1951 .byte 102,15,56,223,209 1952 xorps %xmm7,%xmm2 1953 movaps %xmm6,%xmm7 1954 subl $16,%eax 1955 jmp L088cbc_dec_tail_collected 1956 .align 4,0x90 1957 L085cbc_dec_two: 1958 call __aesni_decrypt2 1959 xorps %xmm7,%xmm2 1960 xorps %xmm6,%xmm3 1961 movups %xmm2,(%edi) 1962 movaps %xmm3,%xmm2 1963 pxor %xmm3,%xmm3 1964 leal 16(%edi),%edi 1965 movaps %xmm5,%xmm7 1966 subl $32,%eax 1967 jmp L088cbc_dec_tail_collected 1968 .align 4,0x90 1969 L086cbc_dec_three: 1970 call __aesni_decrypt3 1971 xorps %xmm7,%xmm2 1972 xorps %xmm6,%xmm3 1973 xorps %xmm5,%xmm4 1974 movups %xmm2,(%edi) 1975 movaps %xmm4,%xmm2 1976 pxor %xmm4,%xmm4 1977 movups %xmm3,16(%edi) 1978 pxor %xmm3,%xmm3 1979 leal 32(%edi),%edi 1980 movups 32(%esi),%xmm7 1981 subl $48,%eax 1982 jmp L088cbc_dec_tail_collected 1983 .align 4,0x90 1984 L087cbc_dec_four: 1985 call __aesni_decrypt4 1986 movups 16(%esi),%xmm1 1987 movups 32(%esi),%xmm0 1988 xorps %xmm7,%xmm2 1989 movups 48(%esi),%xmm7 1990 xorps %xmm6,%xmm3 1991 movups %xmm2,(%edi) 1992 xorps %xmm1,%xmm4 1993 movups %xmm3,16(%edi) 1994 pxor %xmm3,%xmm3 1995 xorps %xmm0,%xmm5 1996 movups %xmm4,32(%edi) 1997 pxor %xmm4,%xmm4 1998 leal 48(%edi),%edi 1999 movaps %xmm5,%xmm2 2000 pxor %xmm5,%xmm5 2001 subl $64,%eax 2002 jmp L088cbc_dec_tail_collected 2003 .align 4,0x90 2004 L083cbc_dec_clear_tail_collected: 2005 pxor %xmm3,%xmm3 2006 pxor %xmm4,%xmm4 2007 pxor %xmm5,%xmm5 2008 pxor %xmm6,%xmm6 2009 L088cbc_dec_tail_collected: 2010 andl $15,%eax 2011 jnz L090cbc_dec_tail_partial 2012 movups %xmm2,(%edi) 2013 pxor %xmm0,%xmm0 2014 jmp L079cbc_ret 2015 .align 4,0x90 2016 L090cbc_dec_tail_partial: 2017 movaps %xmm2,(%esp) 2018 pxor %xmm0,%xmm0 2019 movl $16,%ecx 2020 movl %esp,%esi 2021 subl %eax,%ecx 2022 .long 2767451785 2023 movdqa %xmm2,(%esp) 2024 L079cbc_ret: 2025 movl 16(%esp),%esp 2026 movl 36(%esp),%ebp 2027 pxor %xmm2,%xmm2 2028 pxor %xmm1,%xmm1 2029 movups %xmm7,(%ebp) 2030 pxor %xmm7,%xmm7 2031 L074cbc_abort: 2032 popl %edi 2033 popl %esi 2034 popl %ebx 2035 popl %ebp 2036 ret 2037 .private_extern __aesni_set_encrypt_key 2038 .align 4 2039 __aesni_set_encrypt_key: 2040 pushl %ebp 2041 pushl %ebx 2042 testl %eax,%eax 2043 jz L091bad_pointer 2044 testl %edx,%edx 2045 jz L091bad_pointer 2046 call L092pic 2047 L092pic: 2048 popl %ebx 2049 leal Lkey_const-L092pic(%ebx),%ebx 2050 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp 2051 movups (%eax),%xmm0 2052 xorps %xmm4,%xmm4 2053 movl 4(%ebp),%ebp 2054 leal 16(%edx),%edx 2055 andl $268437504,%ebp 2056 cmpl $256,%ecx 2057 je L09314rounds 2058 cmpl $192,%ecx 2059 je L09412rounds 2060 cmpl $128,%ecx 2061 jne L095bad_keybits 2062 .align 4,0x90 2063 L09610rounds: 2064 cmpl $268435456,%ebp 2065 je L09710rounds_alt 2066 movl $9,%ecx 2067 movups %xmm0,-16(%edx) 2068 .byte 102,15,58,223,200,1 2069 call L098key_128_cold 2070 .byte 102,15,58,223,200,2 2071 call L099key_128 2072 .byte 102,15,58,223,200,4 2073 call L099key_128 2074 .byte 102,15,58,223,200,8 2075 call L099key_128 2076 .byte 102,15,58,223,200,16 2077 call L099key_128 2078 .byte 102,15,58,223,200,32 2079 call L099key_128 2080 .byte 102,15,58,223,200,64 2081 call L099key_128 2082 .byte 102,15,58,223,200,128 2083 call L099key_128 2084 .byte 102,15,58,223,200,27 2085 call L099key_128 2086 .byte 102,15,58,223,200,54 2087 call L099key_128 2088 movups %xmm0,(%edx) 2089 movl %ecx,80(%edx) 2090 jmp L100good_key 2091 .align 4,0x90 2092 L099key_128: 2093 movups %xmm0,(%edx) 2094 leal 16(%edx),%edx 2095 L098key_128_cold: 2096 shufps $16,%xmm0,%xmm4 2097 xorps %xmm4,%xmm0 2098 shufps $140,%xmm0,%xmm4 2099 xorps %xmm4,%xmm0 2100 shufps $255,%xmm1,%xmm1 2101 xorps %xmm1,%xmm0 2102 ret 2103 .align 4,0x90 2104 L09710rounds_alt: 2105 movdqa (%ebx),%xmm5 2106 movl $8,%ecx 2107 movdqa 32(%ebx),%xmm4 2108 movdqa %xmm0,%xmm2 2109 movdqu %xmm0,-16(%edx) 2110 L101loop_key128: 2111 .byte 102,15,56,0,197 2112 .byte 102,15,56,221,196 2113 pslld $1,%xmm4 2114 leal 16(%edx),%edx 2115 movdqa %xmm2,%xmm3 2116 pslldq $4,%xmm2 2117 pxor %xmm2,%xmm3 2118 pslldq $4,%xmm2 2119 pxor %xmm2,%xmm3 2120 pslldq $4,%xmm2 2121 pxor %xmm3,%xmm2 2122 pxor %xmm2,%xmm0 2123 movdqu %xmm0,-16(%edx) 2124 movdqa %xmm0,%xmm2 2125 decl %ecx 2126 jnz L101loop_key128 2127 movdqa 48(%ebx),%xmm4 2128 .byte 102,15,56,0,197 2129 .byte 102,15,56,221,196 2130 pslld $1,%xmm4 2131 movdqa %xmm2,%xmm3 2132 pslldq $4,%xmm2 2133 pxor %xmm2,%xmm3 2134 pslldq $4,%xmm2 2135 pxor %xmm2,%xmm3 2136 pslldq $4,%xmm2 2137 pxor %xmm3,%xmm2 2138 pxor %xmm2,%xmm0 2139 movdqu %xmm0,(%edx) 2140 movdqa %xmm0,%xmm2 2141 .byte 102,15,56,0,197 2142 .byte 102,15,56,221,196 2143 movdqa %xmm2,%xmm3 2144 pslldq $4,%xmm2 2145 pxor %xmm2,%xmm3 2146 pslldq $4,%xmm2 2147 pxor %xmm2,%xmm3 2148 pslldq $4,%xmm2 2149 pxor %xmm3,%xmm2 2150 pxor %xmm2,%xmm0 2151 movdqu %xmm0,16(%edx) 2152 movl $9,%ecx 2153 movl %ecx,96(%edx) 2154 jmp L100good_key 2155 .align 4,0x90 2156 L09412rounds: 2157 movq 16(%eax),%xmm2 2158 cmpl $268435456,%ebp 2159 je L10212rounds_alt 2160 movl $11,%ecx 2161 movups %xmm0,-16(%edx) 2162 .byte 102,15,58,223,202,1 2163 call L103key_192a_cold 2164 .byte 102,15,58,223,202,2 2165 call L104key_192b 2166 .byte 102,15,58,223,202,4 2167 call L105key_192a 2168 .byte 102,15,58,223,202,8 2169 call L104key_192b 2170 .byte 102,15,58,223,202,16 2171 call L105key_192a 2172 .byte 102,15,58,223,202,32 2173 call L104key_192b 2174 .byte 102,15,58,223,202,64 2175 call L105key_192a 2176 .byte 102,15,58,223,202,128 2177 call L104key_192b 2178 movups %xmm0,(%edx) 2179 movl %ecx,48(%edx) 2180 jmp L100good_key 2181 .align 4,0x90 2182 L105key_192a: 2183 movups %xmm0,(%edx) 2184 leal 16(%edx),%edx 2185 .align 4,0x90 2186 L103key_192a_cold: 2187 movaps %xmm2,%xmm5 2188 L106key_192b_warm: 2189 shufps $16,%xmm0,%xmm4 2190 movdqa %xmm2,%xmm3 2191 xorps %xmm4,%xmm0 2192 shufps $140,%xmm0,%xmm4 2193 pslldq $4,%xmm3 2194 xorps %xmm4,%xmm0 2195 pshufd $85,%xmm1,%xmm1 2196 pxor %xmm3,%xmm2 2197 pxor %xmm1,%xmm0 2198 pshufd $255,%xmm0,%xmm3 2199 pxor %xmm3,%xmm2 2200 ret 2201 .align 4,0x90 2202 L104key_192b: 2203 movaps %xmm0,%xmm3 2204 shufps $68,%xmm0,%xmm5 2205 movups %xmm5,(%edx) 2206 shufps $78,%xmm2,%xmm3 2207 movups %xmm3,16(%edx) 2208 leal 32(%edx),%edx 2209 jmp L106key_192b_warm 2210 .align 4,0x90 2211 L10212rounds_alt: 2212 movdqa 16(%ebx),%xmm5 2213 movdqa 32(%ebx),%xmm4 2214 movl $8,%ecx 2215 movdqu %xmm0,-16(%edx) 2216 L107loop_key192: 2217 movq %xmm2,(%edx) 2218 movdqa %xmm2,%xmm1 2219 .byte 102,15,56,0,213 2220 .byte 102,15,56,221,212 2221 pslld $1,%xmm4 2222 leal 24(%edx),%edx 2223 movdqa %xmm0,%xmm3 2224 pslldq $4,%xmm0 2225 pxor %xmm0,%xmm3 2226 pslldq $4,%xmm0 2227 pxor %xmm0,%xmm3 2228 pslldq $4,%xmm0 2229 pxor %xmm3,%xmm0 2230 pshufd $255,%xmm0,%xmm3 2231 pxor %xmm1,%xmm3 2232 pslldq $4,%xmm1 2233 pxor %xmm1,%xmm3 2234 pxor %xmm2,%xmm0 2235 pxor %xmm3,%xmm2 2236 movdqu %xmm0,-16(%edx) 2237 decl %ecx 2238 jnz L107loop_key192 2239 movl $11,%ecx 2240 movl %ecx,32(%edx) 2241 jmp L100good_key 2242 .align 4,0x90 2243 L09314rounds: 2244 movups 16(%eax),%xmm2 2245 leal 16(%edx),%edx 2246 cmpl $268435456,%ebp 2247 je L10814rounds_alt 2248 movl $13,%ecx 2249 movups %xmm0,-32(%edx) 2250 movups %xmm2,-16(%edx) 2251 .byte 102,15,58,223,202,1 2252 call L109key_256a_cold 2253 .byte 102,15,58,223,200,1 2254 call L110key_256b 2255 .byte 102,15,58,223,202,2 2256 call L111key_256a 2257 .byte 102,15,58,223,200,2 2258 call L110key_256b 2259 .byte 102,15,58,223,202,4 2260 call L111key_256a 2261 .byte 102,15,58,223,200,4 2262 call L110key_256b 2263 .byte 102,15,58,223,202,8 2264 call L111key_256a 2265 .byte 102,15,58,223,200,8 2266 call L110key_256b 2267 .byte 102,15,58,223,202,16 2268 call L111key_256a 2269 .byte 102,15,58,223,200,16 2270 call L110key_256b 2271 .byte 102,15,58,223,202,32 2272 call L111key_256a 2273 .byte 102,15,58,223,200,32 2274 call L110key_256b 2275 .byte 102,15,58,223,202,64 2276 call L111key_256a 2277 movups %xmm0,(%edx) 2278 movl %ecx,16(%edx) 2279 xorl %eax,%eax 2280 jmp L100good_key 2281 .align 4,0x90 2282 L111key_256a: 2283 movups %xmm2,(%edx) 2284 leal 16(%edx),%edx 2285 L109key_256a_cold: 2286 shufps $16,%xmm0,%xmm4 2287 xorps %xmm4,%xmm0 2288 shufps $140,%xmm0,%xmm4 2289 xorps %xmm4,%xmm0 2290 shufps $255,%xmm1,%xmm1 2291 xorps %xmm1,%xmm0 2292 ret 2293 .align 4,0x90 2294 L110key_256b: 2295 movups %xmm0,(%edx) 2296 leal 16(%edx),%edx 2297 shufps $16,%xmm2,%xmm4 2298 xorps %xmm4,%xmm2 2299 shufps $140,%xmm2,%xmm4 2300 xorps %xmm4,%xmm2 2301 shufps $170,%xmm1,%xmm1 2302 xorps %xmm1,%xmm2 2303 ret 2304 .align 4,0x90 2305 L10814rounds_alt: 2306 movdqa (%ebx),%xmm5 2307 movdqa 32(%ebx),%xmm4 2308 movl $7,%ecx 2309 movdqu %xmm0,-32(%edx) 2310 movdqa %xmm2,%xmm1 2311 movdqu %xmm2,-16(%edx) 2312 L112loop_key256: 2313 .byte 102,15,56,0,213 2314 .byte 102,15,56,221,212 2315 movdqa %xmm0,%xmm3 2316 pslldq $4,%xmm0 2317 pxor %xmm0,%xmm3 2318 pslldq $4,%xmm0 2319 pxor %xmm0,%xmm3 2320 pslldq $4,%xmm0 2321 pxor %xmm3,%xmm0 2322 pslld $1,%xmm4 2323 pxor %xmm2,%xmm0 2324 movdqu %xmm0,(%edx) 2325 decl %ecx 2326 jz L113done_key256 2327 pshufd $255,%xmm0,%xmm2 2328 pxor %xmm3,%xmm3 2329 .byte 102,15,56,221,211 2330 movdqa %xmm1,%xmm3 2331 pslldq $4,%xmm1 2332 pxor %xmm1,%xmm3 2333 pslldq $4,%xmm1 2334 pxor %xmm1,%xmm3 2335 pslldq $4,%xmm1 2336 pxor %xmm3,%xmm1 2337 pxor %xmm1,%xmm2 2338 movdqu %xmm2,16(%edx) 2339 leal 32(%edx),%edx 2340 movdqa %xmm2,%xmm1 2341 jmp L112loop_key256 2342 L113done_key256: 2343 movl $13,%ecx 2344 movl %ecx,16(%edx) 2345 L100good_key: 2346 pxor %xmm0,%xmm0 2347 pxor %xmm1,%xmm1 2348 pxor %xmm2,%xmm2 2349 pxor %xmm3,%xmm3 2350 pxor %xmm4,%xmm4 2351 pxor %xmm5,%xmm5 2352 xorl %eax,%eax 2353 popl %ebx 2354 popl %ebp 2355 ret 2356 .align 2,0x90 2357 L091bad_pointer: 2358 movl $-1,%eax 2359 popl %ebx 2360 popl %ebp 2361 ret 2362 .align 2,0x90 2363 L095bad_keybits: 2364 pxor %xmm0,%xmm0 2365 movl $-2,%eax 2366 popl %ebx 2367 popl %ebp 2368 ret 2369 .globl _aesni_set_encrypt_key 2370 .private_extern _aesni_set_encrypt_key 2371 .align 4 2372 _aesni_set_encrypt_key: 2373 L_aesni_set_encrypt_key_begin: 2374 movl 4(%esp),%eax 2375 movl 8(%esp),%ecx 2376 movl 12(%esp),%edx 2377 call __aesni_set_encrypt_key 2378 ret 2379 .globl _aesni_set_decrypt_key 2380 .private_extern _aesni_set_decrypt_key 2381 .align 4 2382 _aesni_set_decrypt_key: 2383 L_aesni_set_decrypt_key_begin: 2384 movl 4(%esp),%eax 2385 movl 8(%esp),%ecx 2386 movl 12(%esp),%edx 2387 call __aesni_set_encrypt_key 2388 movl 12(%esp),%edx 2389 shll $4,%ecx 2390 testl %eax,%eax 2391 jnz L114dec_key_ret 2392 leal 16(%edx,%ecx,1),%eax 2393 movups (%edx),%xmm0 2394 movups (%eax),%xmm1 2395 movups %xmm0,(%eax) 2396 movups %xmm1,(%edx) 2397 leal 16(%edx),%edx 2398 leal -16(%eax),%eax 2399 L115dec_key_inverse: 2400 movups (%edx),%xmm0 2401 movups (%eax),%xmm1 2402 .byte 102,15,56,219,192 2403 .byte 102,15,56,219,201 2404 leal 16(%edx),%edx 2405 leal -16(%eax),%eax 2406 movups %xmm0,16(%eax) 2407 movups %xmm1,-16(%edx) 2408 cmpl %edx,%eax 2409 ja L115dec_key_inverse 2410 movups (%edx),%xmm0 2411 .byte 102,15,56,219,192 2412 movups %xmm0,(%edx) 2413 pxor %xmm0,%xmm0 2414 pxor %xmm1,%xmm1 2415 xorl %eax,%eax 2416 L114dec_key_ret: 2417 ret 2418 .align 6,0x90 2419 Lkey_const: 2420 .long 202313229,202313229,202313229,202313229 2421 .long 67569157,67569157,67569157,67569157 2422 .long 1,1,1,1 2423 .long 27,27,27,27 2424 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 2425 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 2426 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 2427 .byte 115,108,46,111,114,103,62,0 2428 .section __IMPORT,__pointers,non_lazy_symbol_pointers 2429 L_OPENSSL_ia32cap_P$non_lazy_ptr: 2430 .indirect_symbol _OPENSSL_ia32cap_P 2431 .long 0 2432 #endif 2433