1 #if defined(__x86_64__) 2 .text 3 4 5 6 .globl _rsaz_512_sqr 7 .private_extern _rsaz_512_sqr 8 9 .p2align 5 10 _rsaz_512_sqr: 11 pushq %rbx 12 pushq %rbp 13 pushq %r12 14 pushq %r13 15 pushq %r14 16 pushq %r15 17 18 subq $128+24,%rsp 19 L$sqr_body: 20 movq %rdx,%rbp 21 movq (%rsi),%rdx 22 movq 8(%rsi),%rax 23 movq %rcx,128(%rsp) 24 jmp L$oop_sqr 25 26 .p2align 5 27 L$oop_sqr: 28 movl %r8d,128+8(%rsp) 29 30 movq %rdx,%rbx 31 mulq %rdx 32 movq %rax,%r8 33 movq 16(%rsi),%rax 34 movq %rdx,%r9 35 36 mulq %rbx 37 addq %rax,%r9 38 movq 24(%rsi),%rax 39 movq %rdx,%r10 40 adcq $0,%r10 41 42 mulq %rbx 43 addq %rax,%r10 44 movq 32(%rsi),%rax 45 movq %rdx,%r11 46 adcq $0,%r11 47 48 mulq %rbx 49 addq %rax,%r11 50 movq 40(%rsi),%rax 51 movq %rdx,%r12 52 adcq $0,%r12 53 54 mulq %rbx 55 addq %rax,%r12 56 movq 48(%rsi),%rax 57 movq %rdx,%r13 58 adcq $0,%r13 59 60 mulq %rbx 61 addq %rax,%r13 62 movq 56(%rsi),%rax 63 movq %rdx,%r14 64 adcq $0,%r14 65 66 mulq %rbx 67 addq %rax,%r14 68 movq %rbx,%rax 69 movq %rdx,%r15 70 adcq $0,%r15 71 72 addq %r8,%r8 73 movq %r9,%rcx 74 adcq %r9,%r9 75 76 mulq %rax 77 movq %rax,(%rsp) 78 addq %rdx,%r8 79 adcq $0,%r9 80 81 movq %r8,8(%rsp) 82 shrq $63,%rcx 83 84 85 movq 8(%rsi),%r8 86 movq 16(%rsi),%rax 87 mulq %r8 88 addq %rax,%r10 89 movq 24(%rsi),%rax 90 movq %rdx,%rbx 91 adcq $0,%rbx 92 93 mulq %r8 94 addq %rax,%r11 95 movq 32(%rsi),%rax 96 adcq $0,%rdx 97 addq %rbx,%r11 98 movq %rdx,%rbx 99 adcq $0,%rbx 100 101 mulq %r8 102 addq %rax,%r12 103 movq 40(%rsi),%rax 104 adcq $0,%rdx 105 addq %rbx,%r12 106 movq %rdx,%rbx 107 adcq $0,%rbx 108 109 mulq %r8 110 addq %rax,%r13 111 movq 48(%rsi),%rax 112 adcq $0,%rdx 113 addq %rbx,%r13 114 movq %rdx,%rbx 115 adcq $0,%rbx 116 117 mulq %r8 118 addq %rax,%r14 119 movq 56(%rsi),%rax 120 adcq $0,%rdx 121 addq %rbx,%r14 122 movq %rdx,%rbx 123 adcq $0,%rbx 124 125 mulq %r8 126 addq %rax,%r15 127 movq %r8,%rax 128 adcq $0,%rdx 129 addq %rbx,%r15 130 movq %rdx,%r8 131 movq %r10,%rdx 132 adcq $0,%r8 133 134 addq %rdx,%rdx 135 leaq (%rcx,%r10,2),%r10 136 movq %r11,%rbx 137 adcq %r11,%r11 138 139 mulq %rax 140 addq %rax,%r9 141 adcq %rdx,%r10 142 adcq $0,%r11 143 144 movq %r9,16(%rsp) 145 movq %r10,24(%rsp) 146 shrq $63,%rbx 147 148 149 movq 16(%rsi),%r9 150 movq 24(%rsi),%rax 151 mulq %r9 152 addq %rax,%r12 153 movq 32(%rsi),%rax 154 movq %rdx,%rcx 155 adcq $0,%rcx 156 157 mulq %r9 158 addq %rax,%r13 159 movq 40(%rsi),%rax 160 adcq $0,%rdx 161 addq %rcx,%r13 162 movq %rdx,%rcx 163 adcq $0,%rcx 164 165 mulq %r9 166 addq %rax,%r14 167 movq 48(%rsi),%rax 168 adcq $0,%rdx 169 addq %rcx,%r14 170 movq %rdx,%rcx 171 adcq $0,%rcx 172 173 mulq %r9 174 movq %r12,%r10 175 leaq (%rbx,%r12,2),%r12 176 addq %rax,%r15 177 movq 56(%rsi),%rax 178 adcq $0,%rdx 179 addq %rcx,%r15 180 movq %rdx,%rcx 181 adcq $0,%rcx 182 183 mulq %r9 184 shrq $63,%r10 185 addq %rax,%r8 186 movq %r9,%rax 187 adcq $0,%rdx 188 addq %rcx,%r8 189 movq %rdx,%r9 190 adcq $0,%r9 191 192 movq %r13,%rcx 193 leaq (%r10,%r13,2),%r13 194 195 mulq %rax 196 addq %rax,%r11 197 adcq %rdx,%r12 198 adcq $0,%r13 199 200 movq %r11,32(%rsp) 201 movq %r12,40(%rsp) 202 shrq $63,%rcx 203 204 205 movq 24(%rsi),%r10 206 movq 32(%rsi),%rax 207 mulq %r10 208 addq %rax,%r14 209 movq 40(%rsi),%rax 210 movq %rdx,%rbx 211 adcq $0,%rbx 212 213 mulq %r10 214 addq %rax,%r15 215 movq 48(%rsi),%rax 216 adcq $0,%rdx 217 addq %rbx,%r15 218 movq %rdx,%rbx 219 adcq $0,%rbx 220 221 mulq %r10 222 movq %r14,%r12 223 leaq (%rcx,%r14,2),%r14 224 addq %rax,%r8 225 movq 56(%rsi),%rax 226 adcq $0,%rdx 227 addq %rbx,%r8 228 movq %rdx,%rbx 229 adcq $0,%rbx 230 231 mulq %r10 232 shrq $63,%r12 233 addq %rax,%r9 234 movq %r10,%rax 235 adcq $0,%rdx 236 addq %rbx,%r9 237 movq %rdx,%r10 238 adcq $0,%r10 239 240 movq %r15,%rbx 241 leaq (%r12,%r15,2),%r15 242 243 mulq %rax 244 addq %rax,%r13 245 adcq %rdx,%r14 246 adcq $0,%r15 247 248 movq %r13,48(%rsp) 249 movq %r14,56(%rsp) 250 shrq $63,%rbx 251 252 253 movq 32(%rsi),%r11 254 movq 40(%rsi),%rax 255 mulq %r11 256 addq %rax,%r8 257 movq 48(%rsi),%rax 258 movq %rdx,%rcx 259 adcq $0,%rcx 260 261 mulq %r11 262 addq %rax,%r9 263 movq 56(%rsi),%rax 264 adcq $0,%rdx 265 movq %r8,%r12 266 leaq (%rbx,%r8,2),%r8 267 addq %rcx,%r9 268 movq %rdx,%rcx 269 adcq $0,%rcx 270 271 mulq %r11 272 shrq $63,%r12 273 addq %rax,%r10 274 movq %r11,%rax 275 adcq $0,%rdx 276 addq %rcx,%r10 277 movq %rdx,%r11 278 adcq $0,%r11 279 280 movq %r9,%rcx 281 leaq (%r12,%r9,2),%r9 282 283 mulq %rax 284 addq %rax,%r15 285 adcq %rdx,%r8 286 adcq $0,%r9 287 288 movq %r15,64(%rsp) 289 movq %r8,72(%rsp) 290 shrq $63,%rcx 291 292 293 movq 40(%rsi),%r12 294 movq 48(%rsi),%rax 295 mulq %r12 296 addq %rax,%r10 297 movq 56(%rsi),%rax 298 movq %rdx,%rbx 299 adcq $0,%rbx 300 301 mulq %r12 302 addq %rax,%r11 303 movq %r12,%rax 304 movq %r10,%r15 305 leaq (%rcx,%r10,2),%r10 306 adcq $0,%rdx 307 shrq $63,%r15 308 addq %rbx,%r11 309 movq %rdx,%r12 310 adcq $0,%r12 311 312 movq %r11,%rbx 313 leaq (%r15,%r11,2),%r11 314 315 mulq %rax 316 addq %rax,%r9 317 adcq %rdx,%r10 318 adcq $0,%r11 319 320 movq %r9,80(%rsp) 321 movq %r10,88(%rsp) 322 323 324 movq 48(%rsi),%r13 325 movq 56(%rsi),%rax 326 mulq %r13 327 addq %rax,%r12 328 movq %r13,%rax 329 movq %rdx,%r13 330 adcq $0,%r13 331 332 xorq %r14,%r14 333 shlq $1,%rbx 334 adcq %r12,%r12 335 adcq %r13,%r13 336 adcq %r14,%r14 337 338 mulq %rax 339 addq %rax,%r11 340 adcq %rdx,%r12 341 adcq $0,%r13 342 343 movq %r11,96(%rsp) 344 movq %r12,104(%rsp) 345 346 347 movq 56(%rsi),%rax 348 mulq %rax 349 addq %rax,%r13 350 adcq $0,%rdx 351 352 addq %rdx,%r14 353 354 movq %r13,112(%rsp) 355 movq %r14,120(%rsp) 356 357 movq (%rsp),%r8 358 movq 8(%rsp),%r9 359 movq 16(%rsp),%r10 360 movq 24(%rsp),%r11 361 movq 32(%rsp),%r12 362 movq 40(%rsp),%r13 363 movq 48(%rsp),%r14 364 movq 56(%rsp),%r15 365 366 call __rsaz_512_reduce 367 368 addq 64(%rsp),%r8 369 adcq 72(%rsp),%r9 370 adcq 80(%rsp),%r10 371 adcq 88(%rsp),%r11 372 adcq 96(%rsp),%r12 373 adcq 104(%rsp),%r13 374 adcq 112(%rsp),%r14 375 adcq 120(%rsp),%r15 376 sbbq %rcx,%rcx 377 378 call __rsaz_512_subtract 379 380 movq %r8,%rdx 381 movq %r9,%rax 382 movl 128+8(%rsp),%r8d 383 movq %rdi,%rsi 384 385 decl %r8d 386 jnz L$oop_sqr 387 388 leaq 128+24+48(%rsp),%rax 389 movq -48(%rax),%r15 390 movq -40(%rax),%r14 391 movq -32(%rax),%r13 392 movq -24(%rax),%r12 393 movq -16(%rax),%rbp 394 movq -8(%rax),%rbx 395 leaq (%rax),%rsp 396 L$sqr_epilogue: 397 .byte 0xf3,0xc3 398 399 .globl _rsaz_512_mul 400 .private_extern _rsaz_512_mul 401 402 .p2align 5 403 _rsaz_512_mul: 404 pushq %rbx 405 pushq %rbp 406 pushq %r12 407 pushq %r13 408 pushq %r14 409 pushq %r15 410 411 subq $128+24,%rsp 412 L$mul_body: 413 .byte 102,72,15,110,199 414 .byte 102,72,15,110,201 415 movq %r8,128(%rsp) 416 movq (%rdx),%rbx 417 movq %rdx,%rbp 418 call __rsaz_512_mul 419 420 .byte 102,72,15,126,199 421 .byte 102,72,15,126,205 422 423 movq (%rsp),%r8 424 movq 8(%rsp),%r9 425 movq 16(%rsp),%r10 426 movq 24(%rsp),%r11 427 movq 32(%rsp),%r12 428 movq 40(%rsp),%r13 429 movq 48(%rsp),%r14 430 movq 56(%rsp),%r15 431 432 call __rsaz_512_reduce 433 addq 64(%rsp),%r8 434 adcq 72(%rsp),%r9 435 adcq 80(%rsp),%r10 436 adcq 88(%rsp),%r11 437 adcq 96(%rsp),%r12 438 adcq 104(%rsp),%r13 439 adcq 112(%rsp),%r14 440 adcq 120(%rsp),%r15 441 sbbq %rcx,%rcx 442 443 call __rsaz_512_subtract 444 445 leaq 128+24+48(%rsp),%rax 446 movq -48(%rax),%r15 447 movq -40(%rax),%r14 448 movq -32(%rax),%r13 449 movq -24(%rax),%r12 450 movq -16(%rax),%rbp 451 movq -8(%rax),%rbx 452 leaq (%rax),%rsp 453 L$mul_epilogue: 454 .byte 0xf3,0xc3 455 456 .globl _rsaz_512_mul_gather4 457 .private_extern _rsaz_512_mul_gather4 458 459 .p2align 5 460 _rsaz_512_mul_gather4: 461 pushq %rbx 462 pushq %rbp 463 pushq %r12 464 pushq %r13 465 pushq %r14 466 pushq %r15 467 468 movl %r9d,%r9d 469 subq $128+24,%rsp 470 L$mul_gather4_body: 471 movl 64(%rdx,%r9,4),%eax 472 .byte 102,72,15,110,199 473 movl (%rdx,%r9,4),%ebx 474 .byte 102,72,15,110,201 475 movq %r8,128(%rsp) 476 477 shlq $32,%rax 478 orq %rax,%rbx 479 movq (%rsi),%rax 480 movq 8(%rsi),%rcx 481 leaq 128(%rdx,%r9,4),%rbp 482 mulq %rbx 483 movq %rax,(%rsp) 484 movq %rcx,%rax 485 movq %rdx,%r8 486 487 mulq %rbx 488 movd (%rbp),%xmm4 489 addq %rax,%r8 490 movq 16(%rsi),%rax 491 movq %rdx,%r9 492 adcq $0,%r9 493 494 mulq %rbx 495 movd 64(%rbp),%xmm5 496 addq %rax,%r9 497 movq 24(%rsi),%rax 498 movq %rdx,%r10 499 adcq $0,%r10 500 501 mulq %rbx 502 pslldq $4,%xmm5 503 addq %rax,%r10 504 movq 32(%rsi),%rax 505 movq %rdx,%r11 506 adcq $0,%r11 507 508 mulq %rbx 509 por %xmm5,%xmm4 510 addq %rax,%r11 511 movq 40(%rsi),%rax 512 movq %rdx,%r12 513 adcq $0,%r12 514 515 mulq %rbx 516 addq %rax,%r12 517 movq 48(%rsi),%rax 518 movq %rdx,%r13 519 adcq $0,%r13 520 521 mulq %rbx 522 leaq 128(%rbp),%rbp 523 addq %rax,%r13 524 movq 56(%rsi),%rax 525 movq %rdx,%r14 526 adcq $0,%r14 527 528 mulq %rbx 529 .byte 102,72,15,126,227 530 addq %rax,%r14 531 movq (%rsi),%rax 532 movq %rdx,%r15 533 adcq $0,%r15 534 535 leaq 8(%rsp),%rdi 536 movl $7,%ecx 537 jmp L$oop_mul_gather 538 539 .p2align 5 540 L$oop_mul_gather: 541 mulq %rbx 542 addq %rax,%r8 543 movq 8(%rsi),%rax 544 movq %r8,(%rdi) 545 movq %rdx,%r8 546 adcq $0,%r8 547 548 mulq %rbx 549 movd (%rbp),%xmm4 550 addq %rax,%r9 551 movq 16(%rsi),%rax 552 adcq $0,%rdx 553 addq %r9,%r8 554 movq %rdx,%r9 555 adcq $0,%r9 556 557 mulq %rbx 558 movd 64(%rbp),%xmm5 559 addq %rax,%r10 560 movq 24(%rsi),%rax 561 adcq $0,%rdx 562 addq %r10,%r9 563 movq %rdx,%r10 564 adcq $0,%r10 565 566 mulq %rbx 567 pslldq $4,%xmm5 568 addq %rax,%r11 569 movq 32(%rsi),%rax 570 adcq $0,%rdx 571 addq %r11,%r10 572 movq %rdx,%r11 573 adcq $0,%r11 574 575 mulq %rbx 576 por %xmm5,%xmm4 577 addq %rax,%r12 578 movq 40(%rsi),%rax 579 adcq $0,%rdx 580 addq %r12,%r11 581 movq %rdx,%r12 582 adcq $0,%r12 583 584 mulq %rbx 585 addq %rax,%r13 586 movq 48(%rsi),%rax 587 adcq $0,%rdx 588 addq %r13,%r12 589 movq %rdx,%r13 590 adcq $0,%r13 591 592 mulq %rbx 593 addq %rax,%r14 594 movq 56(%rsi),%rax 595 adcq $0,%rdx 596 addq %r14,%r13 597 movq %rdx,%r14 598 adcq $0,%r14 599 600 mulq %rbx 601 .byte 102,72,15,126,227 602 addq %rax,%r15 603 movq (%rsi),%rax 604 adcq $0,%rdx 605 addq %r15,%r14 606 movq %rdx,%r15 607 adcq $0,%r15 608 609 leaq 128(%rbp),%rbp 610 leaq 8(%rdi),%rdi 611 612 decl %ecx 613 jnz L$oop_mul_gather 614 615 movq %r8,(%rdi) 616 movq %r9,8(%rdi) 617 movq %r10,16(%rdi) 618 movq %r11,24(%rdi) 619 movq %r12,32(%rdi) 620 movq %r13,40(%rdi) 621 movq %r14,48(%rdi) 622 movq %r15,56(%rdi) 623 624 .byte 102,72,15,126,199 625 .byte 102,72,15,126,205 626 627 movq (%rsp),%r8 628 movq 8(%rsp),%r9 629 movq 16(%rsp),%r10 630 movq 24(%rsp),%r11 631 movq 32(%rsp),%r12 632 movq 40(%rsp),%r13 633 movq 48(%rsp),%r14 634 movq 56(%rsp),%r15 635 636 call __rsaz_512_reduce 637 addq 64(%rsp),%r8 638 adcq 72(%rsp),%r9 639 adcq 80(%rsp),%r10 640 adcq 88(%rsp),%r11 641 adcq 96(%rsp),%r12 642 adcq 104(%rsp),%r13 643 adcq 112(%rsp),%r14 644 adcq 120(%rsp),%r15 645 sbbq %rcx,%rcx 646 647 call __rsaz_512_subtract 648 649 leaq 128+24+48(%rsp),%rax 650 movq -48(%rax),%r15 651 movq -40(%rax),%r14 652 movq -32(%rax),%r13 653 movq -24(%rax),%r12 654 movq -16(%rax),%rbp 655 movq -8(%rax),%rbx 656 leaq (%rax),%rsp 657 L$mul_gather4_epilogue: 658 .byte 0xf3,0xc3 659 660 .globl _rsaz_512_mul_scatter4 661 .private_extern _rsaz_512_mul_scatter4 662 663 .p2align 5 664 _rsaz_512_mul_scatter4: 665 pushq %rbx 666 pushq %rbp 667 pushq %r12 668 pushq %r13 669 pushq %r14 670 pushq %r15 671 672 movl %r9d,%r9d 673 subq $128+24,%rsp 674 L$mul_scatter4_body: 675 leaq (%r8,%r9,4),%r8 676 .byte 102,72,15,110,199 677 .byte 102,72,15,110,202 678 .byte 102,73,15,110,208 679 movq %rcx,128(%rsp) 680 681 movq %rdi,%rbp 682 movq (%rdi),%rbx 683 call __rsaz_512_mul 684 685 .byte 102,72,15,126,199 686 .byte 102,72,15,126,205 687 688 movq (%rsp),%r8 689 movq 8(%rsp),%r9 690 movq 16(%rsp),%r10 691 movq 24(%rsp),%r11 692 movq 32(%rsp),%r12 693 movq 40(%rsp),%r13 694 movq 48(%rsp),%r14 695 movq 56(%rsp),%r15 696 697 call __rsaz_512_reduce 698 addq 64(%rsp),%r8 699 adcq 72(%rsp),%r9 700 adcq 80(%rsp),%r10 701 adcq 88(%rsp),%r11 702 adcq 96(%rsp),%r12 703 adcq 104(%rsp),%r13 704 adcq 112(%rsp),%r14 705 adcq 120(%rsp),%r15 706 .byte 102,72,15,126,214 707 sbbq %rcx,%rcx 708 709 call __rsaz_512_subtract 710 711 movl %r8d,0(%rsi) 712 shrq $32,%r8 713 movl %r9d,128(%rsi) 714 shrq $32,%r9 715 movl %r10d,256(%rsi) 716 shrq $32,%r10 717 movl %r11d,384(%rsi) 718 shrq $32,%r11 719 movl %r12d,512(%rsi) 720 shrq $32,%r12 721 movl %r13d,640(%rsi) 722 shrq $32,%r13 723 movl %r14d,768(%rsi) 724 shrq $32,%r14 725 movl %r15d,896(%rsi) 726 shrq $32,%r15 727 movl %r8d,64(%rsi) 728 movl %r9d,192(%rsi) 729 movl %r10d,320(%rsi) 730 movl %r11d,448(%rsi) 731 movl %r12d,576(%rsi) 732 movl %r13d,704(%rsi) 733 movl %r14d,832(%rsi) 734 movl %r15d,960(%rsi) 735 736 leaq 128+24+48(%rsp),%rax 737 movq -48(%rax),%r15 738 movq -40(%rax),%r14 739 movq -32(%rax),%r13 740 movq -24(%rax),%r12 741 movq -16(%rax),%rbp 742 movq -8(%rax),%rbx 743 leaq (%rax),%rsp 744 L$mul_scatter4_epilogue: 745 .byte 0xf3,0xc3 746 747 .globl _rsaz_512_mul_by_one 748 .private_extern _rsaz_512_mul_by_one 749 750 .p2align 5 751 _rsaz_512_mul_by_one: 752 pushq %rbx 753 pushq %rbp 754 pushq %r12 755 pushq %r13 756 pushq %r14 757 pushq %r15 758 759 subq $128+24,%rsp 760 L$mul_by_one_body: 761 movq %rdx,%rbp 762 movq %rcx,128(%rsp) 763 764 movq (%rsi),%r8 765 pxor %xmm0,%xmm0 766 movq 8(%rsi),%r9 767 movq 16(%rsi),%r10 768 movq 24(%rsi),%r11 769 movq 32(%rsi),%r12 770 movq 40(%rsi),%r13 771 movq 48(%rsi),%r14 772 movq 56(%rsi),%r15 773 774 movdqa %xmm0,(%rsp) 775 movdqa %xmm0,16(%rsp) 776 movdqa %xmm0,32(%rsp) 777 movdqa %xmm0,48(%rsp) 778 movdqa %xmm0,64(%rsp) 779 movdqa %xmm0,80(%rsp) 780 movdqa %xmm0,96(%rsp) 781 call __rsaz_512_reduce 782 movq %r8,(%rdi) 783 movq %r9,8(%rdi) 784 movq %r10,16(%rdi) 785 movq %r11,24(%rdi) 786 movq %r12,32(%rdi) 787 movq %r13,40(%rdi) 788 movq %r14,48(%rdi) 789 movq %r15,56(%rdi) 790 791 leaq 128+24+48(%rsp),%rax 792 movq -48(%rax),%r15 793 movq -40(%rax),%r14 794 movq -32(%rax),%r13 795 movq -24(%rax),%r12 796 movq -16(%rax),%rbp 797 movq -8(%rax),%rbx 798 leaq (%rax),%rsp 799 L$mul_by_one_epilogue: 800 .byte 0xf3,0xc3 801 802 803 .p2align 5 804 __rsaz_512_reduce: 805 movq %r8,%rbx 806 imulq 128+8(%rsp),%rbx 807 movq 0(%rbp),%rax 808 movl $8,%ecx 809 jmp L$reduction_loop 810 811 .p2align 5 812 L$reduction_loop: 813 mulq %rbx 814 movq 8(%rbp),%rax 815 negq %r8 816 movq %rdx,%r8 817 adcq $0,%r8 818 819 mulq %rbx 820 addq %rax,%r9 821 movq 16(%rbp),%rax 822 adcq $0,%rdx 823 addq %r9,%r8 824 movq %rdx,%r9 825 adcq $0,%r9 826 827 mulq %rbx 828 addq %rax,%r10 829 movq 24(%rbp),%rax 830 adcq $0,%rdx 831 addq %r10,%r9 832 movq %rdx,%r10 833 adcq $0,%r10 834 835 mulq %rbx 836 addq %rax,%r11 837 movq 32(%rbp),%rax 838 adcq $0,%rdx 839 addq %r11,%r10 840 movq 128+8(%rsp),%rsi 841 842 843 adcq $0,%rdx 844 movq %rdx,%r11 845 846 mulq %rbx 847 addq %rax,%r12 848 movq 40(%rbp),%rax 849 adcq $0,%rdx 850 imulq %r8,%rsi 851 addq %r12,%r11 852 movq %rdx,%r12 853 adcq $0,%r12 854 855 mulq %rbx 856 addq %rax,%r13 857 movq 48(%rbp),%rax 858 adcq $0,%rdx 859 addq %r13,%r12 860 movq %rdx,%r13 861 adcq $0,%r13 862 863 mulq %rbx 864 addq %rax,%r14 865 movq 56(%rbp),%rax 866 adcq $0,%rdx 867 addq %r14,%r13 868 movq %rdx,%r14 869 adcq $0,%r14 870 871 mulq %rbx 872 movq %rsi,%rbx 873 addq %rax,%r15 874 movq 0(%rbp),%rax 875 adcq $0,%rdx 876 addq %r15,%r14 877 movq %rdx,%r15 878 adcq $0,%r15 879 880 decl %ecx 881 jne L$reduction_loop 882 883 .byte 0xf3,0xc3 884 885 886 .p2align 5 887 __rsaz_512_subtract: 888 movq %r8,(%rdi) 889 movq %r9,8(%rdi) 890 movq %r10,16(%rdi) 891 movq %r11,24(%rdi) 892 movq %r12,32(%rdi) 893 movq %r13,40(%rdi) 894 movq %r14,48(%rdi) 895 movq %r15,56(%rdi) 896 897 movq 0(%rbp),%r8 898 movq 8(%rbp),%r9 899 negq %r8 900 notq %r9 901 andq %rcx,%r8 902 movq 16(%rbp),%r10 903 andq %rcx,%r9 904 notq %r10 905 movq 24(%rbp),%r11 906 andq %rcx,%r10 907 notq %r11 908 movq 32(%rbp),%r12 909 andq %rcx,%r11 910 notq %r12 911 movq 40(%rbp),%r13 912 andq %rcx,%r12 913 notq %r13 914 movq 48(%rbp),%r14 915 andq %rcx,%r13 916 notq %r14 917 movq 56(%rbp),%r15 918 andq %rcx,%r14 919 notq %r15 920 andq %rcx,%r15 921 922 addq (%rdi),%r8 923 adcq 8(%rdi),%r9 924 adcq 16(%rdi),%r10 925 adcq 24(%rdi),%r11 926 adcq 32(%rdi),%r12 927 adcq 40(%rdi),%r13 928 adcq 48(%rdi),%r14 929 adcq 56(%rdi),%r15 930 931 movq %r8,(%rdi) 932 movq %r9,8(%rdi) 933 movq %r10,16(%rdi) 934 movq %r11,24(%rdi) 935 movq %r12,32(%rdi) 936 movq %r13,40(%rdi) 937 movq %r14,48(%rdi) 938 movq %r15,56(%rdi) 939 940 .byte 0xf3,0xc3 941 942 943 .p2align 5 944 __rsaz_512_mul: 945 leaq 8(%rsp),%rdi 946 947 movq (%rsi),%rax 948 mulq %rbx 949 movq %rax,(%rdi) 950 movq 8(%rsi),%rax 951 movq %rdx,%r8 952 953 mulq %rbx 954 addq %rax,%r8 955 movq 16(%rsi),%rax 956 movq %rdx,%r9 957 adcq $0,%r9 958 959 mulq %rbx 960 addq %rax,%r9 961 movq 24(%rsi),%rax 962 movq %rdx,%r10 963 adcq $0,%r10 964 965 mulq %rbx 966 addq %rax,%r10 967 movq 32(%rsi),%rax 968 movq %rdx,%r11 969 adcq $0,%r11 970 971 mulq %rbx 972 addq %rax,%r11 973 movq 40(%rsi),%rax 974 movq %rdx,%r12 975 adcq $0,%r12 976 977 mulq %rbx 978 addq %rax,%r12 979 movq 48(%rsi),%rax 980 movq %rdx,%r13 981 adcq $0,%r13 982 983 mulq %rbx 984 addq %rax,%r13 985 movq 56(%rsi),%rax 986 movq %rdx,%r14 987 adcq $0,%r14 988 989 mulq %rbx 990 addq %rax,%r14 991 movq (%rsi),%rax 992 movq %rdx,%r15 993 adcq $0,%r15 994 995 leaq 8(%rbp),%rbp 996 leaq 8(%rdi),%rdi 997 998 movl $7,%ecx 999 jmp L$oop_mul 1000 1001 .p2align 5 1002 L$oop_mul: 1003 movq (%rbp),%rbx 1004 mulq %rbx 1005 addq %rax,%r8 1006 movq 8(%rsi),%rax 1007 movq %r8,(%rdi) 1008 movq %rdx,%r8 1009 adcq $0,%r8 1010 1011 mulq %rbx 1012 addq %rax,%r9 1013 movq 16(%rsi),%rax 1014 adcq $0,%rdx 1015 addq %r9,%r8 1016 movq %rdx,%r9 1017 adcq $0,%r9 1018 1019 mulq %rbx 1020 addq %rax,%r10 1021 movq 24(%rsi),%rax 1022 adcq $0,%rdx 1023 addq %r10,%r9 1024 movq %rdx,%r10 1025 adcq $0,%r10 1026 1027 mulq %rbx 1028 addq %rax,%r11 1029 movq 32(%rsi),%rax 1030 adcq $0,%rdx 1031 addq %r11,%r10 1032 movq %rdx,%r11 1033 adcq $0,%r11 1034 1035 mulq %rbx 1036 addq %rax,%r12 1037 movq 40(%rsi),%rax 1038 adcq $0,%rdx 1039 addq %r12,%r11 1040 movq %rdx,%r12 1041 adcq $0,%r12 1042 1043 mulq %rbx 1044 addq %rax,%r13 1045 movq 48(%rsi),%rax 1046 adcq $0,%rdx 1047 addq %r13,%r12 1048 movq %rdx,%r13 1049 adcq $0,%r13 1050 1051 mulq %rbx 1052 addq %rax,%r14 1053 movq 56(%rsi),%rax 1054 adcq $0,%rdx 1055 addq %r14,%r13 1056 movq %rdx,%r14 1057 leaq 8(%rbp),%rbp 1058 adcq $0,%r14 1059 1060 mulq %rbx 1061 addq %rax,%r15 1062 movq (%rsi),%rax 1063 adcq $0,%rdx 1064 addq %r15,%r14 1065 movq %rdx,%r15 1066 adcq $0,%r15 1067 1068 leaq 8(%rdi),%rdi 1069 1070 decl %ecx 1071 jnz L$oop_mul 1072 1073 movq %r8,(%rdi) 1074 movq %r9,8(%rdi) 1075 movq %r10,16(%rdi) 1076 movq %r11,24(%rdi) 1077 movq %r12,32(%rdi) 1078 movq %r13,40(%rdi) 1079 movq %r14,48(%rdi) 1080 movq %r15,56(%rdi) 1081 1082 .byte 0xf3,0xc3 1083 1084 .globl _rsaz_512_scatter4 1085 .private_extern _rsaz_512_scatter4 1086 1087 .p2align 4 1088 _rsaz_512_scatter4: 1089 leaq (%rdi,%rdx,4),%rdi 1090 movl $8,%r9d 1091 jmp L$oop_scatter 1092 .p2align 4 1093 L$oop_scatter: 1094 movq (%rsi),%rax 1095 leaq 8(%rsi),%rsi 1096 movl %eax,(%rdi) 1097 shrq $32,%rax 1098 movl %eax,64(%rdi) 1099 leaq 128(%rdi),%rdi 1100 decl %r9d 1101 jnz L$oop_scatter 1102 .byte 0xf3,0xc3 1103 1104 1105 .globl _rsaz_512_gather4 1106 .private_extern _rsaz_512_gather4 1107 1108 .p2align 4 1109 _rsaz_512_gather4: 1110 leaq (%rsi,%rdx,4),%rsi 1111 movl $8,%r9d 1112 jmp L$oop_gather 1113 .p2align 4 1114 L$oop_gather: 1115 movl (%rsi),%eax 1116 movl 64(%rsi),%r8d 1117 leaq 128(%rsi),%rsi 1118 shlq $32,%r8 1119 orq %r8,%rax 1120 movq %rax,(%rdi) 1121 leaq 8(%rdi),%rdi 1122 decl %r9d 1123 jnz L$oop_gather 1124 .byte 0xf3,0xc3 1125 1126 #endif 1127