1 #if defined(__x86_64__) 2 .text 3 4 .extern OPENSSL_ia32cap_P 5 .hidden OPENSSL_ia32cap_P 6 7 .globl rsaz_512_sqr 8 .hidden rsaz_512_sqr 9 .type rsaz_512_sqr,@function 10 .align 32 11 rsaz_512_sqr: 12 pushq %rbx 13 pushq %rbp 14 pushq %r12 15 pushq %r13 16 pushq %r14 17 pushq %r15 18 19 subq $128+24,%rsp 20 .Lsqr_body: 21 movq %rdx,%rbp 22 movq (%rsi),%rdx 23 movq 8(%rsi),%rax 24 movq %rcx,128(%rsp) 25 jmp .Loop_sqr 26 27 .align 32 28 .Loop_sqr: 29 movl %r8d,128+8(%rsp) 30 31 movq %rdx,%rbx 32 mulq %rdx 33 movq %rax,%r8 34 movq 16(%rsi),%rax 35 movq %rdx,%r9 36 37 mulq %rbx 38 addq %rax,%r9 39 movq 24(%rsi),%rax 40 movq %rdx,%r10 41 adcq $0,%r10 42 43 mulq %rbx 44 addq %rax,%r10 45 movq 32(%rsi),%rax 46 movq %rdx,%r11 47 adcq $0,%r11 48 49 mulq %rbx 50 addq %rax,%r11 51 movq 40(%rsi),%rax 52 movq %rdx,%r12 53 adcq $0,%r12 54 55 mulq %rbx 56 addq %rax,%r12 57 movq 48(%rsi),%rax 58 movq %rdx,%r13 59 adcq $0,%r13 60 61 mulq %rbx 62 addq %rax,%r13 63 movq 56(%rsi),%rax 64 movq %rdx,%r14 65 adcq $0,%r14 66 67 mulq %rbx 68 addq %rax,%r14 69 movq %rbx,%rax 70 movq %rdx,%r15 71 adcq $0,%r15 72 73 addq %r8,%r8 74 movq %r9,%rcx 75 adcq %r9,%r9 76 77 mulq %rax 78 movq %rax,(%rsp) 79 addq %rdx,%r8 80 adcq $0,%r9 81 82 movq %r8,8(%rsp) 83 shrq $63,%rcx 84 85 86 movq 8(%rsi),%r8 87 movq 16(%rsi),%rax 88 mulq %r8 89 addq %rax,%r10 90 movq 24(%rsi),%rax 91 movq %rdx,%rbx 92 adcq $0,%rbx 93 94 mulq %r8 95 addq %rax,%r11 96 movq 32(%rsi),%rax 97 adcq $0,%rdx 98 addq %rbx,%r11 99 movq %rdx,%rbx 100 adcq $0,%rbx 101 102 mulq %r8 103 addq %rax,%r12 104 movq 40(%rsi),%rax 105 adcq $0,%rdx 106 addq %rbx,%r12 107 movq %rdx,%rbx 108 adcq $0,%rbx 109 110 mulq %r8 111 addq %rax,%r13 112 movq 48(%rsi),%rax 113 adcq $0,%rdx 114 addq %rbx,%r13 115 movq %rdx,%rbx 116 adcq $0,%rbx 117 118 mulq %r8 119 addq %rax,%r14 120 movq 56(%rsi),%rax 121 adcq $0,%rdx 122 addq %rbx,%r14 123 movq %rdx,%rbx 124 adcq $0,%rbx 125 126 mulq %r8 127 addq %rax,%r15 128 movq %r8,%rax 129 adcq $0,%rdx 130 addq %rbx,%r15 131 movq %rdx,%r8 132 movq %r10,%rdx 133 adcq $0,%r8 134 135 addq %rdx,%rdx 136 leaq (%rcx,%r10,2),%r10 137 movq %r11,%rbx 138 adcq %r11,%r11 139 140 mulq %rax 141 addq %rax,%r9 142 adcq %rdx,%r10 143 adcq $0,%r11 144 145 movq %r9,16(%rsp) 146 movq %r10,24(%rsp) 147 shrq $63,%rbx 148 149 150 movq 16(%rsi),%r9 151 movq 24(%rsi),%rax 152 mulq %r9 153 addq %rax,%r12 154 movq 32(%rsi),%rax 155 movq %rdx,%rcx 156 adcq $0,%rcx 157 158 mulq %r9 159 addq %rax,%r13 160 movq 40(%rsi),%rax 161 adcq $0,%rdx 162 addq %rcx,%r13 163 movq %rdx,%rcx 164 adcq $0,%rcx 165 166 mulq %r9 167 addq %rax,%r14 168 movq 48(%rsi),%rax 169 adcq $0,%rdx 170 addq %rcx,%r14 171 movq %rdx,%rcx 172 adcq $0,%rcx 173 174 mulq %r9 175 movq %r12,%r10 176 leaq (%rbx,%r12,2),%r12 177 addq %rax,%r15 178 movq 56(%rsi),%rax 179 adcq $0,%rdx 180 addq %rcx,%r15 181 movq %rdx,%rcx 182 adcq $0,%rcx 183 184 mulq %r9 185 shrq $63,%r10 186 addq %rax,%r8 187 movq %r9,%rax 188 adcq $0,%rdx 189 addq %rcx,%r8 190 movq %rdx,%r9 191 adcq $0,%r9 192 193 movq %r13,%rcx 194 leaq (%r10,%r13,2),%r13 195 196 mulq %rax 197 addq %rax,%r11 198 adcq %rdx,%r12 199 adcq $0,%r13 200 201 movq %r11,32(%rsp) 202 movq %r12,40(%rsp) 203 shrq $63,%rcx 204 205 206 movq 24(%rsi),%r10 207 movq 32(%rsi),%rax 208 mulq %r10 209 addq %rax,%r14 210 movq 40(%rsi),%rax 211 movq %rdx,%rbx 212 adcq $0,%rbx 213 214 mulq %r10 215 addq %rax,%r15 216 movq 48(%rsi),%rax 217 adcq $0,%rdx 218 addq %rbx,%r15 219 movq %rdx,%rbx 220 adcq $0,%rbx 221 222 mulq %r10 223 movq %r14,%r12 224 leaq (%rcx,%r14,2),%r14 225 addq %rax,%r8 226 movq 56(%rsi),%rax 227 adcq $0,%rdx 228 addq %rbx,%r8 229 movq %rdx,%rbx 230 adcq $0,%rbx 231 232 mulq %r10 233 shrq $63,%r12 234 addq %rax,%r9 235 movq %r10,%rax 236 adcq $0,%rdx 237 addq %rbx,%r9 238 movq %rdx,%r10 239 adcq $0,%r10 240 241 movq %r15,%rbx 242 leaq (%r12,%r15,2),%r15 243 244 mulq %rax 245 addq %rax,%r13 246 adcq %rdx,%r14 247 adcq $0,%r15 248 249 movq %r13,48(%rsp) 250 movq %r14,56(%rsp) 251 shrq $63,%rbx 252 253 254 movq 32(%rsi),%r11 255 movq 40(%rsi),%rax 256 mulq %r11 257 addq %rax,%r8 258 movq 48(%rsi),%rax 259 movq %rdx,%rcx 260 adcq $0,%rcx 261 262 mulq %r11 263 addq %rax,%r9 264 movq 56(%rsi),%rax 265 adcq $0,%rdx 266 movq %r8,%r12 267 leaq (%rbx,%r8,2),%r8 268 addq %rcx,%r9 269 movq %rdx,%rcx 270 adcq $0,%rcx 271 272 mulq %r11 273 shrq $63,%r12 274 addq %rax,%r10 275 movq %r11,%rax 276 adcq $0,%rdx 277 addq %rcx,%r10 278 movq %rdx,%r11 279 adcq $0,%r11 280 281 movq %r9,%rcx 282 leaq (%r12,%r9,2),%r9 283 284 mulq %rax 285 addq %rax,%r15 286 adcq %rdx,%r8 287 adcq $0,%r9 288 289 movq %r15,64(%rsp) 290 movq %r8,72(%rsp) 291 shrq $63,%rcx 292 293 294 movq 40(%rsi),%r12 295 movq 48(%rsi),%rax 296 mulq %r12 297 addq %rax,%r10 298 movq 56(%rsi),%rax 299 movq %rdx,%rbx 300 adcq $0,%rbx 301 302 mulq %r12 303 addq %rax,%r11 304 movq %r12,%rax 305 movq %r10,%r15 306 leaq (%rcx,%r10,2),%r10 307 adcq $0,%rdx 308 shrq $63,%r15 309 addq %rbx,%r11 310 movq %rdx,%r12 311 adcq $0,%r12 312 313 movq %r11,%rbx 314 leaq (%r15,%r11,2),%r11 315 316 mulq %rax 317 addq %rax,%r9 318 adcq %rdx,%r10 319 adcq $0,%r11 320 321 movq %r9,80(%rsp) 322 movq %r10,88(%rsp) 323 324 325 movq 48(%rsi),%r13 326 movq 56(%rsi),%rax 327 mulq %r13 328 addq %rax,%r12 329 movq %r13,%rax 330 movq %rdx,%r13 331 adcq $0,%r13 332 333 xorq %r14,%r14 334 shlq $1,%rbx 335 adcq %r12,%r12 336 adcq %r13,%r13 337 adcq %r14,%r14 338 339 mulq %rax 340 addq %rax,%r11 341 adcq %rdx,%r12 342 adcq $0,%r13 343 344 movq %r11,96(%rsp) 345 movq %r12,104(%rsp) 346 347 348 movq 56(%rsi),%rax 349 mulq %rax 350 addq %rax,%r13 351 adcq $0,%rdx 352 353 addq %rdx,%r14 354 355 movq %r13,112(%rsp) 356 movq %r14,120(%rsp) 357 358 movq (%rsp),%r8 359 movq 8(%rsp),%r9 360 movq 16(%rsp),%r10 361 movq 24(%rsp),%r11 362 movq 32(%rsp),%r12 363 movq 40(%rsp),%r13 364 movq 48(%rsp),%r14 365 movq 56(%rsp),%r15 366 367 call __rsaz_512_reduce 368 369 addq 64(%rsp),%r8 370 adcq 72(%rsp),%r9 371 adcq 80(%rsp),%r10 372 adcq 88(%rsp),%r11 373 adcq 96(%rsp),%r12 374 adcq 104(%rsp),%r13 375 adcq 112(%rsp),%r14 376 adcq 120(%rsp),%r15 377 sbbq %rcx,%rcx 378 379 call __rsaz_512_subtract 380 381 movq %r8,%rdx 382 movq %r9,%rax 383 movl 128+8(%rsp),%r8d 384 movq %rdi,%rsi 385 386 decl %r8d 387 jnz .Loop_sqr 388 389 leaq 128+24+48(%rsp),%rax 390 movq -48(%rax),%r15 391 movq -40(%rax),%r14 392 movq -32(%rax),%r13 393 movq -24(%rax),%r12 394 movq -16(%rax),%rbp 395 movq -8(%rax),%rbx 396 leaq (%rax),%rsp 397 .Lsqr_epilogue: 398 .byte 0xf3,0xc3 399 .size rsaz_512_sqr,.-rsaz_512_sqr 400 .globl rsaz_512_mul 401 .hidden rsaz_512_mul 402 .type rsaz_512_mul,@function 403 .align 32 404 rsaz_512_mul: 405 pushq %rbx 406 pushq %rbp 407 pushq %r12 408 pushq %r13 409 pushq %r14 410 pushq %r15 411 412 subq $128+24,%rsp 413 .Lmul_body: 414 .byte 102,72,15,110,199 415 .byte 102,72,15,110,201 416 movq %r8,128(%rsp) 417 movq (%rdx),%rbx 418 movq %rdx,%rbp 419 call __rsaz_512_mul 420 421 .byte 102,72,15,126,199 422 .byte 102,72,15,126,205 423 424 movq (%rsp),%r8 425 movq 8(%rsp),%r9 426 movq 16(%rsp),%r10 427 movq 24(%rsp),%r11 428 movq 32(%rsp),%r12 429 movq 40(%rsp),%r13 430 movq 48(%rsp),%r14 431 movq 56(%rsp),%r15 432 433 call __rsaz_512_reduce 434 addq 64(%rsp),%r8 435 adcq 72(%rsp),%r9 436 adcq 80(%rsp),%r10 437 adcq 88(%rsp),%r11 438 adcq 96(%rsp),%r12 439 adcq 104(%rsp),%r13 440 adcq 112(%rsp),%r14 441 adcq 120(%rsp),%r15 442 sbbq %rcx,%rcx 443 444 call __rsaz_512_subtract 445 446 leaq 128+24+48(%rsp),%rax 447 movq -48(%rax),%r15 448 movq -40(%rax),%r14 449 movq -32(%rax),%r13 450 movq -24(%rax),%r12 451 movq -16(%rax),%rbp 452 movq -8(%rax),%rbx 453 leaq (%rax),%rsp 454 .Lmul_epilogue: 455 .byte 0xf3,0xc3 456 .size rsaz_512_mul,.-rsaz_512_mul 457 .globl rsaz_512_mul_gather4 458 .hidden rsaz_512_mul_gather4 459 .type rsaz_512_mul_gather4,@function 460 .align 32 461 rsaz_512_mul_gather4: 462 pushq %rbx 463 pushq %rbp 464 pushq %r12 465 pushq %r13 466 pushq %r14 467 pushq %r15 468 469 movl %r9d,%r9d 470 subq $128+24,%rsp 471 .Lmul_gather4_body: 472 movl 64(%rdx,%r9,4),%eax 473 .byte 102,72,15,110,199 474 movl (%rdx,%r9,4),%ebx 475 .byte 102,72,15,110,201 476 movq %r8,128(%rsp) 477 478 shlq $32,%rax 479 orq %rax,%rbx 480 movq (%rsi),%rax 481 movq 8(%rsi),%rcx 482 leaq 128(%rdx,%r9,4),%rbp 483 mulq %rbx 484 movq %rax,(%rsp) 485 movq %rcx,%rax 486 movq %rdx,%r8 487 488 mulq %rbx 489 movd (%rbp),%xmm4 490 addq %rax,%r8 491 movq 16(%rsi),%rax 492 movq %rdx,%r9 493 adcq $0,%r9 494 495 mulq %rbx 496 movd 64(%rbp),%xmm5 497 addq %rax,%r9 498 movq 24(%rsi),%rax 499 movq %rdx,%r10 500 adcq $0,%r10 501 502 mulq %rbx 503 pslldq $4,%xmm5 504 addq %rax,%r10 505 movq 32(%rsi),%rax 506 movq %rdx,%r11 507 adcq $0,%r11 508 509 mulq %rbx 510 por %xmm5,%xmm4 511 addq %rax,%r11 512 movq 40(%rsi),%rax 513 movq %rdx,%r12 514 adcq $0,%r12 515 516 mulq %rbx 517 addq %rax,%r12 518 movq 48(%rsi),%rax 519 movq %rdx,%r13 520 adcq $0,%r13 521 522 mulq %rbx 523 leaq 128(%rbp),%rbp 524 addq %rax,%r13 525 movq 56(%rsi),%rax 526 movq %rdx,%r14 527 adcq $0,%r14 528 529 mulq %rbx 530 .byte 102,72,15,126,227 531 addq %rax,%r14 532 movq (%rsi),%rax 533 movq %rdx,%r15 534 adcq $0,%r15 535 536 leaq 8(%rsp),%rdi 537 movl $7,%ecx 538 jmp .Loop_mul_gather 539 540 .align 32 541 .Loop_mul_gather: 542 mulq %rbx 543 addq %rax,%r8 544 movq 8(%rsi),%rax 545 movq %r8,(%rdi) 546 movq %rdx,%r8 547 adcq $0,%r8 548 549 mulq %rbx 550 movd (%rbp),%xmm4 551 addq %rax,%r9 552 movq 16(%rsi),%rax 553 adcq $0,%rdx 554 addq %r9,%r8 555 movq %rdx,%r9 556 adcq $0,%r9 557 558 mulq %rbx 559 movd 64(%rbp),%xmm5 560 addq %rax,%r10 561 movq 24(%rsi),%rax 562 adcq $0,%rdx 563 addq %r10,%r9 564 movq %rdx,%r10 565 adcq $0,%r10 566 567 mulq %rbx 568 pslldq $4,%xmm5 569 addq %rax,%r11 570 movq 32(%rsi),%rax 571 adcq $0,%rdx 572 addq %r11,%r10 573 movq %rdx,%r11 574 adcq $0,%r11 575 576 mulq %rbx 577 por %xmm5,%xmm4 578 addq %rax,%r12 579 movq 40(%rsi),%rax 580 adcq $0,%rdx 581 addq %r12,%r11 582 movq %rdx,%r12 583 adcq $0,%r12 584 585 mulq %rbx 586 addq %rax,%r13 587 movq 48(%rsi),%rax 588 adcq $0,%rdx 589 addq %r13,%r12 590 movq %rdx,%r13 591 adcq $0,%r13 592 593 mulq %rbx 594 addq %rax,%r14 595 movq 56(%rsi),%rax 596 adcq $0,%rdx 597 addq %r14,%r13 598 movq %rdx,%r14 599 adcq $0,%r14 600 601 mulq %rbx 602 .byte 102,72,15,126,227 603 addq %rax,%r15 604 movq (%rsi),%rax 605 adcq $0,%rdx 606 addq %r15,%r14 607 movq %rdx,%r15 608 adcq $0,%r15 609 610 leaq 128(%rbp),%rbp 611 leaq 8(%rdi),%rdi 612 613 decl %ecx 614 jnz .Loop_mul_gather 615 616 movq %r8,(%rdi) 617 movq %r9,8(%rdi) 618 movq %r10,16(%rdi) 619 movq %r11,24(%rdi) 620 movq %r12,32(%rdi) 621 movq %r13,40(%rdi) 622 movq %r14,48(%rdi) 623 movq %r15,56(%rdi) 624 625 .byte 102,72,15,126,199 626 .byte 102,72,15,126,205 627 628 movq (%rsp),%r8 629 movq 8(%rsp),%r9 630 movq 16(%rsp),%r10 631 movq 24(%rsp),%r11 632 movq 32(%rsp),%r12 633 movq 40(%rsp),%r13 634 movq 48(%rsp),%r14 635 movq 56(%rsp),%r15 636 637 call __rsaz_512_reduce 638 addq 64(%rsp),%r8 639 adcq 72(%rsp),%r9 640 adcq 80(%rsp),%r10 641 adcq 88(%rsp),%r11 642 adcq 96(%rsp),%r12 643 adcq 104(%rsp),%r13 644 adcq 112(%rsp),%r14 645 adcq 120(%rsp),%r15 646 sbbq %rcx,%rcx 647 648 call __rsaz_512_subtract 649 650 leaq 128+24+48(%rsp),%rax 651 movq -48(%rax),%r15 652 movq -40(%rax),%r14 653 movq -32(%rax),%r13 654 movq -24(%rax),%r12 655 movq -16(%rax),%rbp 656 movq -8(%rax),%rbx 657 leaq (%rax),%rsp 658 .Lmul_gather4_epilogue: 659 .byte 0xf3,0xc3 660 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 661 .globl rsaz_512_mul_scatter4 662 .hidden rsaz_512_mul_scatter4 663 .type rsaz_512_mul_scatter4,@function 664 .align 32 665 rsaz_512_mul_scatter4: 666 pushq %rbx 667 pushq %rbp 668 pushq %r12 669 pushq %r13 670 pushq %r14 671 pushq %r15 672 673 movl %r9d,%r9d 674 subq $128+24,%rsp 675 .Lmul_scatter4_body: 676 leaq (%r8,%r9,4),%r8 677 .byte 102,72,15,110,199 678 .byte 102,72,15,110,202 679 .byte 102,73,15,110,208 680 movq %rcx,128(%rsp) 681 682 movq %rdi,%rbp 683 movq (%rdi),%rbx 684 call __rsaz_512_mul 685 686 .byte 102,72,15,126,199 687 .byte 102,72,15,126,205 688 689 movq (%rsp),%r8 690 movq 8(%rsp),%r9 691 movq 16(%rsp),%r10 692 movq 24(%rsp),%r11 693 movq 32(%rsp),%r12 694 movq 40(%rsp),%r13 695 movq 48(%rsp),%r14 696 movq 56(%rsp),%r15 697 698 call __rsaz_512_reduce 699 addq 64(%rsp),%r8 700 adcq 72(%rsp),%r9 701 adcq 80(%rsp),%r10 702 adcq 88(%rsp),%r11 703 adcq 96(%rsp),%r12 704 adcq 104(%rsp),%r13 705 adcq 112(%rsp),%r14 706 adcq 120(%rsp),%r15 707 .byte 102,72,15,126,214 708 sbbq %rcx,%rcx 709 710 call __rsaz_512_subtract 711 712 movl %r8d,0(%rsi) 713 shrq $32,%r8 714 movl %r9d,128(%rsi) 715 shrq $32,%r9 716 movl %r10d,256(%rsi) 717 shrq $32,%r10 718 movl %r11d,384(%rsi) 719 shrq $32,%r11 720 movl %r12d,512(%rsi) 721 shrq $32,%r12 722 movl %r13d,640(%rsi) 723 shrq $32,%r13 724 movl %r14d,768(%rsi) 725 shrq $32,%r14 726 movl %r15d,896(%rsi) 727 shrq $32,%r15 728 movl %r8d,64(%rsi) 729 movl %r9d,192(%rsi) 730 movl %r10d,320(%rsi) 731 movl %r11d,448(%rsi) 732 movl %r12d,576(%rsi) 733 movl %r13d,704(%rsi) 734 movl %r14d,832(%rsi) 735 movl %r15d,960(%rsi) 736 737 leaq 128+24+48(%rsp),%rax 738 movq -48(%rax),%r15 739 movq -40(%rax),%r14 740 movq -32(%rax),%r13 741 movq -24(%rax),%r12 742 movq -16(%rax),%rbp 743 movq -8(%rax),%rbx 744 leaq (%rax),%rsp 745 .Lmul_scatter4_epilogue: 746 .byte 0xf3,0xc3 747 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 748 .globl rsaz_512_mul_by_one 749 .hidden rsaz_512_mul_by_one 750 .type rsaz_512_mul_by_one,@function 751 .align 32 752 rsaz_512_mul_by_one: 753 pushq %rbx 754 pushq %rbp 755 pushq %r12 756 pushq %r13 757 pushq %r14 758 pushq %r15 759 760 subq $128+24,%rsp 761 .Lmul_by_one_body: 762 movq %rdx,%rbp 763 movq %rcx,128(%rsp) 764 765 movq (%rsi),%r8 766 pxor %xmm0,%xmm0 767 movq 8(%rsi),%r9 768 movq 16(%rsi),%r10 769 movq 24(%rsi),%r11 770 movq 32(%rsi),%r12 771 movq 40(%rsi),%r13 772 movq 48(%rsi),%r14 773 movq 56(%rsi),%r15 774 775 movdqa %xmm0,(%rsp) 776 movdqa %xmm0,16(%rsp) 777 movdqa %xmm0,32(%rsp) 778 movdqa %xmm0,48(%rsp) 779 movdqa %xmm0,64(%rsp) 780 movdqa %xmm0,80(%rsp) 781 movdqa %xmm0,96(%rsp) 782 call __rsaz_512_reduce 783 movq %r8,(%rdi) 784 movq %r9,8(%rdi) 785 movq %r10,16(%rdi) 786 movq %r11,24(%rdi) 787 movq %r12,32(%rdi) 788 movq %r13,40(%rdi) 789 movq %r14,48(%rdi) 790 movq %r15,56(%rdi) 791 792 leaq 128+24+48(%rsp),%rax 793 movq -48(%rax),%r15 794 movq -40(%rax),%r14 795 movq -32(%rax),%r13 796 movq -24(%rax),%r12 797 movq -16(%rax),%rbp 798 movq -8(%rax),%rbx 799 leaq (%rax),%rsp 800 .Lmul_by_one_epilogue: 801 .byte 0xf3,0xc3 802 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one 803 .type __rsaz_512_reduce,@function 804 .align 32 805 __rsaz_512_reduce: 806 movq %r8,%rbx 807 imulq 128+8(%rsp),%rbx 808 movq 0(%rbp),%rax 809 movl $8,%ecx 810 jmp .Lreduction_loop 811 812 .align 32 813 .Lreduction_loop: 814 mulq %rbx 815 movq 8(%rbp),%rax 816 negq %r8 817 movq %rdx,%r8 818 adcq $0,%r8 819 820 mulq %rbx 821 addq %rax,%r9 822 movq 16(%rbp),%rax 823 adcq $0,%rdx 824 addq %r9,%r8 825 movq %rdx,%r9 826 adcq $0,%r9 827 828 mulq %rbx 829 addq %rax,%r10 830 movq 24(%rbp),%rax 831 adcq $0,%rdx 832 addq %r10,%r9 833 movq %rdx,%r10 834 adcq $0,%r10 835 836 mulq %rbx 837 addq %rax,%r11 838 movq 32(%rbp),%rax 839 adcq $0,%rdx 840 addq %r11,%r10 841 movq 128+8(%rsp),%rsi 842 843 844 adcq $0,%rdx 845 movq %rdx,%r11 846 847 mulq %rbx 848 addq %rax,%r12 849 movq 40(%rbp),%rax 850 adcq $0,%rdx 851 imulq %r8,%rsi 852 addq %r12,%r11 853 movq %rdx,%r12 854 adcq $0,%r12 855 856 mulq %rbx 857 addq %rax,%r13 858 movq 48(%rbp),%rax 859 adcq $0,%rdx 860 addq %r13,%r12 861 movq %rdx,%r13 862 adcq $0,%r13 863 864 mulq %rbx 865 addq %rax,%r14 866 movq 56(%rbp),%rax 867 adcq $0,%rdx 868 addq %r14,%r13 869 movq %rdx,%r14 870 adcq $0,%r14 871 872 mulq %rbx 873 movq %rsi,%rbx 874 addq %rax,%r15 875 movq 0(%rbp),%rax 876 adcq $0,%rdx 877 addq %r15,%r14 878 movq %rdx,%r15 879 adcq $0,%r15 880 881 decl %ecx 882 jne .Lreduction_loop 883 884 .byte 0xf3,0xc3 885 .size __rsaz_512_reduce,.-__rsaz_512_reduce 886 .type __rsaz_512_subtract,@function 887 .align 32 888 __rsaz_512_subtract: 889 movq %r8,(%rdi) 890 movq %r9,8(%rdi) 891 movq %r10,16(%rdi) 892 movq %r11,24(%rdi) 893 movq %r12,32(%rdi) 894 movq %r13,40(%rdi) 895 movq %r14,48(%rdi) 896 movq %r15,56(%rdi) 897 898 movq 0(%rbp),%r8 899 movq 8(%rbp),%r9 900 negq %r8 901 notq %r9 902 andq %rcx,%r8 903 movq 16(%rbp),%r10 904 andq %rcx,%r9 905 notq %r10 906 movq 24(%rbp),%r11 907 andq %rcx,%r10 908 notq %r11 909 movq 32(%rbp),%r12 910 andq %rcx,%r11 911 notq %r12 912 movq 40(%rbp),%r13 913 andq %rcx,%r12 914 notq %r13 915 movq 48(%rbp),%r14 916 andq %rcx,%r13 917 notq %r14 918 movq 56(%rbp),%r15 919 andq %rcx,%r14 920 notq %r15 921 andq %rcx,%r15 922 923 addq (%rdi),%r8 924 adcq 8(%rdi),%r9 925 adcq 16(%rdi),%r10 926 adcq 24(%rdi),%r11 927 adcq 32(%rdi),%r12 928 adcq 40(%rdi),%r13 929 adcq 48(%rdi),%r14 930 adcq 56(%rdi),%r15 931 932 movq %r8,(%rdi) 933 movq %r9,8(%rdi) 934 movq %r10,16(%rdi) 935 movq %r11,24(%rdi) 936 movq %r12,32(%rdi) 937 movq %r13,40(%rdi) 938 movq %r14,48(%rdi) 939 movq %r15,56(%rdi) 940 941 .byte 0xf3,0xc3 942 .size __rsaz_512_subtract,.-__rsaz_512_subtract 943 .type __rsaz_512_mul,@function 944 .align 32 945 __rsaz_512_mul: 946 leaq 8(%rsp),%rdi 947 948 movq (%rsi),%rax 949 mulq %rbx 950 movq %rax,(%rdi) 951 movq 8(%rsi),%rax 952 movq %rdx,%r8 953 954 mulq %rbx 955 addq %rax,%r8 956 movq 16(%rsi),%rax 957 movq %rdx,%r9 958 adcq $0,%r9 959 960 mulq %rbx 961 addq %rax,%r9 962 movq 24(%rsi),%rax 963 movq %rdx,%r10 964 adcq $0,%r10 965 966 mulq %rbx 967 addq %rax,%r10 968 movq 32(%rsi),%rax 969 movq %rdx,%r11 970 adcq $0,%r11 971 972 mulq %rbx 973 addq %rax,%r11 974 movq 40(%rsi),%rax 975 movq %rdx,%r12 976 adcq $0,%r12 977 978 mulq %rbx 979 addq %rax,%r12 980 movq 48(%rsi),%rax 981 movq %rdx,%r13 982 adcq $0,%r13 983 984 mulq %rbx 985 addq %rax,%r13 986 movq 56(%rsi),%rax 987 movq %rdx,%r14 988 adcq $0,%r14 989 990 mulq %rbx 991 addq %rax,%r14 992 movq (%rsi),%rax 993 movq %rdx,%r15 994 adcq $0,%r15 995 996 leaq 8(%rbp),%rbp 997 leaq 8(%rdi),%rdi 998 999 movl $7,%ecx 1000 jmp .Loop_mul 1001 1002 .align 32 1003 .Loop_mul: 1004 movq (%rbp),%rbx 1005 mulq %rbx 1006 addq %rax,%r8 1007 movq 8(%rsi),%rax 1008 movq %r8,(%rdi) 1009 movq %rdx,%r8 1010 adcq $0,%r8 1011 1012 mulq %rbx 1013 addq %rax,%r9 1014 movq 16(%rsi),%rax 1015 adcq $0,%rdx 1016 addq %r9,%r8 1017 movq %rdx,%r9 1018 adcq $0,%r9 1019 1020 mulq %rbx 1021 addq %rax,%r10 1022 movq 24(%rsi),%rax 1023 adcq $0,%rdx 1024 addq %r10,%r9 1025 movq %rdx,%r10 1026 adcq $0,%r10 1027 1028 mulq %rbx 1029 addq %rax,%r11 1030 movq 32(%rsi),%rax 1031 adcq $0,%rdx 1032 addq %r11,%r10 1033 movq %rdx,%r11 1034 adcq $0,%r11 1035 1036 mulq %rbx 1037 addq %rax,%r12 1038 movq 40(%rsi),%rax 1039 adcq $0,%rdx 1040 addq %r12,%r11 1041 movq %rdx,%r12 1042 adcq $0,%r12 1043 1044 mulq %rbx 1045 addq %rax,%r13 1046 movq 48(%rsi),%rax 1047 adcq $0,%rdx 1048 addq %r13,%r12 1049 movq %rdx,%r13 1050 adcq $0,%r13 1051 1052 mulq %rbx 1053 addq %rax,%r14 1054 movq 56(%rsi),%rax 1055 adcq $0,%rdx 1056 addq %r14,%r13 1057 movq %rdx,%r14 1058 leaq 8(%rbp),%rbp 1059 adcq $0,%r14 1060 1061 mulq %rbx 1062 addq %rax,%r15 1063 movq (%rsi),%rax 1064 adcq $0,%rdx 1065 addq %r15,%r14 1066 movq %rdx,%r15 1067 adcq $0,%r15 1068 1069 leaq 8(%rdi),%rdi 1070 1071 decl %ecx 1072 jnz .Loop_mul 1073 1074 movq %r8,(%rdi) 1075 movq %r9,8(%rdi) 1076 movq %r10,16(%rdi) 1077 movq %r11,24(%rdi) 1078 movq %r12,32(%rdi) 1079 movq %r13,40(%rdi) 1080 movq %r14,48(%rdi) 1081 movq %r15,56(%rdi) 1082 1083 .byte 0xf3,0xc3 1084 .size __rsaz_512_mul,.-__rsaz_512_mul 1085 .globl rsaz_512_scatter4 1086 .hidden rsaz_512_scatter4 1087 .type rsaz_512_scatter4,@function 1088 .align 16 1089 rsaz_512_scatter4: 1090 leaq (%rdi,%rdx,4),%rdi 1091 movl $8,%r9d 1092 jmp .Loop_scatter 1093 .align 16 1094 .Loop_scatter: 1095 movq (%rsi),%rax 1096 leaq 8(%rsi),%rsi 1097 movl %eax,(%rdi) 1098 shrq $32,%rax 1099 movl %eax,64(%rdi) 1100 leaq 128(%rdi),%rdi 1101 decl %r9d 1102 jnz .Loop_scatter 1103 .byte 0xf3,0xc3 1104 .size rsaz_512_scatter4,.-rsaz_512_scatter4 1105 1106 .globl rsaz_512_gather4 1107 .hidden rsaz_512_gather4 1108 .type rsaz_512_gather4,@function 1109 .align 16 1110 rsaz_512_gather4: 1111 leaq (%rsi,%rdx,4),%rsi 1112 movl $8,%r9d 1113 jmp .Loop_gather 1114 .align 16 1115 .Loop_gather: 1116 movl (%rsi),%eax 1117 movl 64(%rsi),%r8d 1118 leaq 128(%rsi),%rsi 1119 shlq $32,%r8 1120 orq %r8,%rax 1121 movq %rax,(%rdi) 1122 leaq 8(%rdi),%rdi 1123 decl %r9d 1124 jnz .Loop_gather 1125 .byte 0xf3,0xc3 1126 .size rsaz_512_gather4,.-rsaz_512_gather4 1127 #endif 1128