1 OPTION DOTNAME 2 .text$ SEGMENT ALIGN(256) 'CODE' 3 4 EXTERN OPENSSL_ia32cap_P:NEAR 5 6 PUBLIC bn_mul_mont_gather5 7 8 ALIGN 64 9 bn_mul_mont_gather5 PROC PUBLIC 10 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue 11 mov QWORD PTR[16+rsp],rsi 12 mov rax,rsp 13 $L$SEH_begin_bn_mul_mont_gather5:: 14 mov rdi,rcx 15 mov rsi,rdx 16 mov rdx,r8 17 mov rcx,r9 18 mov r8,QWORD PTR[40+rsp] 19 mov r9,QWORD PTR[48+rsp] 20 21 22 test r9d,7 23 jnz $L$mul_enter 24 jmp $L$mul4x_enter 25 26 ALIGN 16 27 $L$mul_enter:: 28 mov r9d,r9d 29 mov rax,rsp 30 mov r10d,DWORD PTR[56+rsp] 31 push rbx 32 push rbp 33 push r12 34 push r13 35 push r14 36 push r15 37 lea rsp,QWORD PTR[((-40))+rsp] 38 movaps XMMWORD PTR[rsp],xmm6 39 movaps XMMWORD PTR[16+rsp],xmm7 40 lea r11,QWORD PTR[2+r9] 41 neg r11 42 lea rsp,QWORD PTR[r11*8+rsp] 43 and rsp,-1024 44 45 mov QWORD PTR[8+r9*8+rsp],rax 46 $L$mul_body:: 47 mov r12,rdx 48 mov r11,r10 49 shr r10,3 50 and r11,7 51 not r10 52 lea rax,QWORD PTR[$L$magic_masks] 53 and r10,3 54 lea r12,QWORD PTR[96+r11*8+r12] 55 movq xmm4,QWORD PTR[r10*8+rax] 56 movq xmm5,QWORD PTR[8+r10*8+rax] 57 movq xmm6,QWORD PTR[16+r10*8+rax] 58 movq xmm7,QWORD PTR[24+r10*8+rax] 59 60 movq xmm0,QWORD PTR[(((-96)))+r12] 61 movq xmm1,QWORD PTR[((-32))+r12] 62 pand xmm0,xmm4 63 movq xmm2,QWORD PTR[32+r12] 64 pand xmm1,xmm5 65 movq xmm3,QWORD PTR[96+r12] 66 pand xmm2,xmm6 67 por xmm0,xmm1 68 pand xmm3,xmm7 69 por xmm0,xmm2 70 lea r12,QWORD PTR[256+r12] 71 por xmm0,xmm3 72 73 DB 102,72,15,126,195 74 75 mov r8,QWORD PTR[r8] 76 mov rax,QWORD PTR[rsi] 77 78 xor r14,r14 79 xor r15,r15 80 81 movq xmm0,QWORD PTR[(((-96)))+r12] 82 movq xmm1,QWORD PTR[((-32))+r12] 83 pand xmm0,xmm4 84 movq xmm2,QWORD PTR[32+r12] 85 pand xmm1,xmm5 86 87 mov rbp,r8 88 mul rbx 89 mov r10,rax 90 mov rax,QWORD PTR[rcx] 91 92 movq xmm3,QWORD PTR[96+r12] 93 pand xmm2,xmm6 94 por xmm0,xmm1 95 pand xmm3,xmm7 96 97 imul rbp,r10 98 mov r11,rdx 99 100 por xmm0,xmm2 101 lea r12,QWORD PTR[256+r12] 102 por xmm0,xmm3 103 104 mul rbp 105 add r10,rax 106 mov rax,QWORD PTR[8+rsi] 107 adc rdx,0 108 mov r13,rdx 109 110 lea r15,QWORD PTR[1+r15] 111 jmp $L$1st_enter 112 113 ALIGN 16 114 $L$1st:: 115 add r13,rax 116 mov rax,QWORD PTR[r15*8+rsi] 117 adc rdx,0 118 add r13,r11 119 mov r11,r10 120 adc rdx,0 121 mov QWORD PTR[((-16))+r15*8+rsp],r13 122 mov r13,rdx 123 124 $L$1st_enter:: 125 mul rbx 126 add r11,rax 127 mov rax,QWORD PTR[r15*8+rcx] 128 adc rdx,0 129 lea r15,QWORD PTR[1+r15] 130 mov r10,rdx 131 132 mul rbp 133 cmp r15,r9 134 jne $L$1st 135 136 DB 102,72,15,126,195 137 138 add r13,rax 139 mov rax,QWORD PTR[rsi] 140 adc rdx,0 141 add r13,r11 142 adc rdx,0 143 mov QWORD PTR[((-16))+r15*8+rsp],r13 144 mov r13,rdx 145 mov r11,r10 146 147 xor rdx,rdx 148 add r13,r11 149 adc rdx,0 150 mov QWORD PTR[((-8))+r9*8+rsp],r13 151 mov QWORD PTR[r9*8+rsp],rdx 152 153 lea r14,QWORD PTR[1+r14] 154 jmp $L$outer 155 ALIGN 16 156 $L$outer:: 157 xor r15,r15 158 mov rbp,r8 159 mov r10,QWORD PTR[rsp] 160 161 movq xmm0,QWORD PTR[(((-96)))+r12] 162 movq xmm1,QWORD PTR[((-32))+r12] 163 pand xmm0,xmm4 164 movq xmm2,QWORD PTR[32+r12] 165 pand xmm1,xmm5 166 167 mul rbx 168 add r10,rax 169 mov rax,QWORD PTR[rcx] 170 adc rdx,0 171 172 movq xmm3,QWORD PTR[96+r12] 173 pand xmm2,xmm6 174 por xmm0,xmm1 175 pand xmm3,xmm7 176 177 imul rbp,r10 178 mov r11,rdx 179 180 por xmm0,xmm2 181 lea r12,QWORD PTR[256+r12] 182 por xmm0,xmm3 183 184 mul rbp 185 add r10,rax 186 mov rax,QWORD PTR[8+rsi] 187 adc rdx,0 188 mov r10,QWORD PTR[8+rsp] 189 mov r13,rdx 190 191 lea r15,QWORD PTR[1+r15] 192 jmp $L$inner_enter 193 194 ALIGN 16 195 $L$inner:: 196 add r13,rax 197 mov rax,QWORD PTR[r15*8+rsi] 198 adc rdx,0 199 add r13,r10 200 mov r10,QWORD PTR[r15*8+rsp] 201 adc rdx,0 202 mov QWORD PTR[((-16))+r15*8+rsp],r13 203 mov r13,rdx 204 205 $L$inner_enter:: 206 mul rbx 207 add r11,rax 208 mov rax,QWORD PTR[r15*8+rcx] 209 adc rdx,0 210 add r10,r11 211 mov r11,rdx 212 adc r11,0 213 lea r15,QWORD PTR[1+r15] 214 215 mul rbp 216 cmp r15,r9 217 jne $L$inner 218 219 DB 102,72,15,126,195 220 221 add r13,rax 222 mov rax,QWORD PTR[rsi] 223 adc rdx,0 224 add r13,r10 225 mov r10,QWORD PTR[r15*8+rsp] 226 adc rdx,0 227 mov QWORD PTR[((-16))+r15*8+rsp],r13 228 mov r13,rdx 229 230 xor rdx,rdx 231 add r13,r11 232 adc rdx,0 233 add r13,r10 234 adc rdx,0 235 mov QWORD PTR[((-8))+r9*8+rsp],r13 236 mov QWORD PTR[r9*8+rsp],rdx 237 238 lea r14,QWORD PTR[1+r14] 239 cmp r14,r9 240 jb $L$outer 241 242 xor r14,r14 243 mov rax,QWORD PTR[rsp] 244 lea rsi,QWORD PTR[rsp] 245 mov r15,r9 246 jmp $L$sub 247 ALIGN 16 248 $L$sub:: sbb rax,QWORD PTR[r14*8+rcx] 249 mov QWORD PTR[r14*8+rdi],rax 250 mov rax,QWORD PTR[8+r14*8+rsi] 251 lea r14,QWORD PTR[1+r14] 252 dec r15 253 jnz $L$sub 254 255 sbb rax,0 256 xor r14,r14 257 mov r15,r9 258 ALIGN 16 259 $L$copy:: 260 mov rsi,QWORD PTR[r14*8+rsp] 261 mov rcx,QWORD PTR[r14*8+rdi] 262 xor rsi,rcx 263 and rsi,rax 264 xor rsi,rcx 265 mov QWORD PTR[r14*8+rsp],r14 266 mov QWORD PTR[r14*8+rdi],rsi 267 lea r14,QWORD PTR[1+r14] 268 sub r15,1 269 jnz $L$copy 270 271 mov rsi,QWORD PTR[8+r9*8+rsp] 272 mov rax,1 273 movaps xmm6,XMMWORD PTR[((-88))+rsi] 274 movaps xmm7,XMMWORD PTR[((-72))+rsi] 275 mov r15,QWORD PTR[((-48))+rsi] 276 mov r14,QWORD PTR[((-40))+rsi] 277 mov r13,QWORD PTR[((-32))+rsi] 278 mov r12,QWORD PTR[((-24))+rsi] 279 mov rbp,QWORD PTR[((-16))+rsi] 280 mov rbx,QWORD PTR[((-8))+rsi] 281 lea rsp,QWORD PTR[rsi] 282 $L$mul_epilogue:: 283 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue 284 mov rsi,QWORD PTR[16+rsp] 285 DB 0F3h,0C3h ;repret 286 $L$SEH_end_bn_mul_mont_gather5:: 287 bn_mul_mont_gather5 ENDP 288 289 ALIGN 32 290 bn_mul4x_mont_gather5 PROC PRIVATE 291 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue 292 mov QWORD PTR[16+rsp],rsi 293 mov rax,rsp 294 $L$SEH_begin_bn_mul4x_mont_gather5:: 295 mov rdi,rcx 296 mov rsi,rdx 297 mov rdx,r8 298 mov rcx,r9 299 mov r8,QWORD PTR[40+rsp] 300 mov r9,QWORD PTR[48+rsp] 301 302 303 $L$mul4x_enter:: 304 DB 067h 305 mov rax,rsp 306 push rbx 307 push rbp 308 push r12 309 push r13 310 push r14 311 push r15 312 lea rsp,QWORD PTR[((-40))+rsp] 313 movaps XMMWORD PTR[rsp],xmm6 314 movaps XMMWORD PTR[16+rsp],xmm7 315 DB 067h 316 mov r10d,r9d 317 shl r9d,3 318 shl r10d,3+2 319 neg r9 320 321 322 323 324 325 326 327 328 lea r11,QWORD PTR[((-64))+r9*2+rsp] 329 sub r11,rsi 330 and r11,4095 331 cmp r10,r11 332 jb $L$mul4xsp_alt 333 sub rsp,r11 334 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 335 jmp $L$mul4xsp_done 336 337 ALIGN 32 338 $L$mul4xsp_alt:: 339 lea r10,QWORD PTR[((4096-64))+r9*2] 340 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 341 sub r11,r10 342 mov r10,0 343 cmovc r11,r10 344 sub rsp,r11 345 $L$mul4xsp_done:: 346 and rsp,-64 347 neg r9 348 349 mov QWORD PTR[40+rsp],rax 350 $L$mul4x_body:: 351 352 call mul4x_internal 353 354 mov rsi,QWORD PTR[40+rsp] 355 mov rax,1 356 movaps xmm6,XMMWORD PTR[((-88))+rsi] 357 movaps xmm7,XMMWORD PTR[((-72))+rsi] 358 mov r15,QWORD PTR[((-48))+rsi] 359 mov r14,QWORD PTR[((-40))+rsi] 360 mov r13,QWORD PTR[((-32))+rsi] 361 mov r12,QWORD PTR[((-24))+rsi] 362 mov rbp,QWORD PTR[((-16))+rsi] 363 mov rbx,QWORD PTR[((-8))+rsi] 364 lea rsp,QWORD PTR[rsi] 365 $L$mul4x_epilogue:: 366 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue 367 mov rsi,QWORD PTR[16+rsp] 368 DB 0F3h,0C3h ;repret 369 $L$SEH_end_bn_mul4x_mont_gather5:: 370 bn_mul4x_mont_gather5 ENDP 371 372 373 ALIGN 32 374 mul4x_internal PROC PRIVATE 375 shl r9,5 376 mov r10d,DWORD PTR[56+rax] 377 lea r13,QWORD PTR[256+r9*1+rdx] 378 shr r9,5 379 mov r11,r10 380 shr r10,3 381 and r11,7 382 not r10 383 lea rax,QWORD PTR[$L$magic_masks] 384 and r10,3 385 lea r12,QWORD PTR[96+r11*8+rdx] 386 movq xmm4,QWORD PTR[r10*8+rax] 387 movq xmm5,QWORD PTR[8+r10*8+rax] 388 add r11,7 389 movq xmm6,QWORD PTR[16+r10*8+rax] 390 movq xmm7,QWORD PTR[24+r10*8+rax] 391 and r11,7 392 393 movq xmm0,QWORD PTR[(((-96)))+r12] 394 lea r14,QWORD PTR[256+r12] 395 movq xmm1,QWORD PTR[((-32))+r12] 396 pand xmm0,xmm4 397 movq xmm2,QWORD PTR[32+r12] 398 pand xmm1,xmm5 399 movq xmm3,QWORD PTR[96+r12] 400 pand xmm2,xmm6 401 DB 067h 402 por xmm0,xmm1 403 movq xmm1,QWORD PTR[((-96))+r14] 404 DB 067h 405 pand xmm3,xmm7 406 DB 067h 407 por xmm0,xmm2 408 movq xmm2,QWORD PTR[((-32))+r14] 409 DB 067h 410 pand xmm1,xmm4 411 DB 067h 412 por xmm0,xmm3 413 movq xmm3,QWORD PTR[32+r14] 414 415 DB 102,72,15,126,195 416 movq xmm0,QWORD PTR[96+r14] 417 mov QWORD PTR[((16+8))+rsp],r13 418 mov QWORD PTR[((56+8))+rsp],rdi 419 420 mov r8,QWORD PTR[r8] 421 mov rax,QWORD PTR[rsi] 422 lea rsi,QWORD PTR[r9*1+rsi] 423 neg r9 424 425 mov rbp,r8 426 mul rbx 427 mov r10,rax 428 mov rax,QWORD PTR[rcx] 429 430 pand xmm2,xmm5 431 pand xmm3,xmm6 432 por xmm1,xmm2 433 434 imul rbp,r10 435 436 437 438 439 440 441 442 lea r14,QWORD PTR[((64+8))+r11*8+rsp] 443 mov r11,rdx 444 445 pand xmm0,xmm7 446 por xmm1,xmm3 447 lea r12,QWORD PTR[512+r12] 448 por xmm0,xmm1 449 450 mul rbp 451 add r10,rax 452 mov rax,QWORD PTR[8+r9*1+rsi] 453 adc rdx,0 454 mov rdi,rdx 455 456 mul rbx 457 add r11,rax 458 mov rax,QWORD PTR[16+rcx] 459 adc rdx,0 460 mov r10,rdx 461 462 mul rbp 463 add rdi,rax 464 mov rax,QWORD PTR[16+r9*1+rsi] 465 adc rdx,0 466 add rdi,r11 467 lea r15,QWORD PTR[32+r9] 468 lea rcx,QWORD PTR[64+rcx] 469 adc rdx,0 470 mov QWORD PTR[r14],rdi 471 mov r13,rdx 472 jmp $L$1st4x 473 474 ALIGN 32 475 $L$1st4x:: 476 mul rbx 477 add r10,rax 478 mov rax,QWORD PTR[((-32))+rcx] 479 lea r14,QWORD PTR[32+r14] 480 adc rdx,0 481 mov r11,rdx 482 483 mul rbp 484 add r13,rax 485 mov rax,QWORD PTR[((-8))+r15*1+rsi] 486 adc rdx,0 487 add r13,r10 488 adc rdx,0 489 mov QWORD PTR[((-24))+r14],r13 490 mov rdi,rdx 491 492 mul rbx 493 add r11,rax 494 mov rax,QWORD PTR[((-16))+rcx] 495 adc rdx,0 496 mov r10,rdx 497 498 mul rbp 499 add rdi,rax 500 mov rax,QWORD PTR[r15*1+rsi] 501 adc rdx,0 502 add rdi,r11 503 adc rdx,0 504 mov QWORD PTR[((-16))+r14],rdi 505 mov r13,rdx 506 507 mul rbx 508 add r10,rax 509 mov rax,QWORD PTR[rcx] 510 adc rdx,0 511 mov r11,rdx 512 513 mul rbp 514 add r13,rax 515 mov rax,QWORD PTR[8+r15*1+rsi] 516 adc rdx,0 517 add r13,r10 518 adc rdx,0 519 mov QWORD PTR[((-8))+r14],r13 520 mov rdi,rdx 521 522 mul rbx 523 add r11,rax 524 mov rax,QWORD PTR[16+rcx] 525 adc rdx,0 526 mov r10,rdx 527 528 mul rbp 529 add rdi,rax 530 mov rax,QWORD PTR[16+r15*1+rsi] 531 adc rdx,0 532 add rdi,r11 533 lea rcx,QWORD PTR[64+rcx] 534 adc rdx,0 535 mov QWORD PTR[r14],rdi 536 mov r13,rdx 537 538 add r15,32 539 jnz $L$1st4x 540 541 mul rbx 542 add r10,rax 543 mov rax,QWORD PTR[((-32))+rcx] 544 lea r14,QWORD PTR[32+r14] 545 adc rdx,0 546 mov r11,rdx 547 548 mul rbp 549 add r13,rax 550 mov rax,QWORD PTR[((-8))+rsi] 551 adc rdx,0 552 add r13,r10 553 adc rdx,0 554 mov QWORD PTR[((-24))+r14],r13 555 mov rdi,rdx 556 557 mul rbx 558 add r11,rax 559 mov rax,QWORD PTR[((-16))+rcx] 560 adc rdx,0 561 mov r10,rdx 562 563 mul rbp 564 add rdi,rax 565 mov rax,QWORD PTR[r9*1+rsi] 566 adc rdx,0 567 add rdi,r11 568 adc rdx,0 569 mov QWORD PTR[((-16))+r14],rdi 570 mov r13,rdx 571 572 DB 102,72,15,126,195 573 lea rcx,QWORD PTR[r9*2+rcx] 574 575 xor rdi,rdi 576 add r13,r10 577 adc rdi,0 578 mov QWORD PTR[((-8))+r14],r13 579 580 jmp $L$outer4x 581 582 ALIGN 32 583 $L$outer4x:: 584 mov r10,QWORD PTR[r9*1+r14] 585 mov rbp,r8 586 mul rbx 587 add r10,rax 588 mov rax,QWORD PTR[rcx] 589 adc rdx,0 590 591 movq xmm0,QWORD PTR[(((-96)))+r12] 592 movq xmm1,QWORD PTR[((-32))+r12] 593 pand xmm0,xmm4 594 movq xmm2,QWORD PTR[32+r12] 595 pand xmm1,xmm5 596 movq xmm3,QWORD PTR[96+r12] 597 598 imul rbp,r10 599 DB 067h 600 mov r11,rdx 601 mov QWORD PTR[r14],rdi 602 603 pand xmm2,xmm6 604 por xmm0,xmm1 605 pand xmm3,xmm7 606 por xmm0,xmm2 607 lea r14,QWORD PTR[r9*1+r14] 608 lea r12,QWORD PTR[256+r12] 609 por xmm0,xmm3 610 611 mul rbp 612 add r10,rax 613 mov rax,QWORD PTR[8+r9*1+rsi] 614 adc rdx,0 615 mov rdi,rdx 616 617 mul rbx 618 add r11,rax 619 mov rax,QWORD PTR[16+rcx] 620 adc rdx,0 621 add r11,QWORD PTR[8+r14] 622 adc rdx,0 623 mov r10,rdx 624 625 mul rbp 626 add rdi,rax 627 mov rax,QWORD PTR[16+r9*1+rsi] 628 adc rdx,0 629 add rdi,r11 630 lea r15,QWORD PTR[32+r9] 631 lea rcx,QWORD PTR[64+rcx] 632 adc rdx,0 633 mov r13,rdx 634 jmp $L$inner4x 635 636 ALIGN 32 637 $L$inner4x:: 638 mul rbx 639 add r10,rax 640 mov rax,QWORD PTR[((-32))+rcx] 641 adc rdx,0 642 add r10,QWORD PTR[16+r14] 643 lea r14,QWORD PTR[32+r14] 644 adc rdx,0 645 mov r11,rdx 646 647 mul rbp 648 add r13,rax 649 mov rax,QWORD PTR[((-8))+r15*1+rsi] 650 adc rdx,0 651 add r13,r10 652 adc rdx,0 653 mov QWORD PTR[((-32))+r14],rdi 654 mov rdi,rdx 655 656 mul rbx 657 add r11,rax 658 mov rax,QWORD PTR[((-16))+rcx] 659 adc rdx,0 660 add r11,QWORD PTR[((-8))+r14] 661 adc rdx,0 662 mov r10,rdx 663 664 mul rbp 665 add rdi,rax 666 mov rax,QWORD PTR[r15*1+rsi] 667 adc rdx,0 668 add rdi,r11 669 adc rdx,0 670 mov QWORD PTR[((-24))+r14],r13 671 mov r13,rdx 672 673 mul rbx 674 add r10,rax 675 mov rax,QWORD PTR[rcx] 676 adc rdx,0 677 add r10,QWORD PTR[r14] 678 adc rdx,0 679 mov r11,rdx 680 681 mul rbp 682 add r13,rax 683 mov rax,QWORD PTR[8+r15*1+rsi] 684 adc rdx,0 685 add r13,r10 686 adc rdx,0 687 mov QWORD PTR[((-16))+r14],rdi 688 mov rdi,rdx 689 690 mul rbx 691 add r11,rax 692 mov rax,QWORD PTR[16+rcx] 693 adc rdx,0 694 add r11,QWORD PTR[8+r14] 695 adc rdx,0 696 mov r10,rdx 697 698 mul rbp 699 add rdi,rax 700 mov rax,QWORD PTR[16+r15*1+rsi] 701 adc rdx,0 702 add rdi,r11 703 lea rcx,QWORD PTR[64+rcx] 704 adc rdx,0 705 mov QWORD PTR[((-8))+r14],r13 706 mov r13,rdx 707 708 add r15,32 709 jnz $L$inner4x 710 711 mul rbx 712 add r10,rax 713 mov rax,QWORD PTR[((-32))+rcx] 714 adc rdx,0 715 add r10,QWORD PTR[16+r14] 716 lea r14,QWORD PTR[32+r14] 717 adc rdx,0 718 mov r11,rdx 719 720 mul rbp 721 add r13,rax 722 mov rax,QWORD PTR[((-8))+rsi] 723 adc rdx,0 724 add r13,r10 725 adc rdx,0 726 mov QWORD PTR[((-32))+r14],rdi 727 mov rdi,rdx 728 729 mul rbx 730 add r11,rax 731 mov rax,rbp 732 mov rbp,QWORD PTR[((-16))+rcx] 733 adc rdx,0 734 add r11,QWORD PTR[((-8))+r14] 735 adc rdx,0 736 mov r10,rdx 737 738 mul rbp 739 add rdi,rax 740 mov rax,QWORD PTR[r9*1+rsi] 741 adc rdx,0 742 add rdi,r11 743 adc rdx,0 744 mov QWORD PTR[((-24))+r14],r13 745 mov r13,rdx 746 747 DB 102,72,15,126,195 748 mov QWORD PTR[((-16))+r14],rdi 749 lea rcx,QWORD PTR[r9*2+rcx] 750 751 xor rdi,rdi 752 add r13,r10 753 adc rdi,0 754 add r13,QWORD PTR[r14] 755 adc rdi,0 756 mov QWORD PTR[((-8))+r14],r13 757 758 cmp r12,QWORD PTR[((16+8))+rsp] 759 jb $L$outer4x 760 sub rbp,r13 761 adc r15,r15 762 or rdi,r15 763 xor rdi,1 764 lea rbx,QWORD PTR[r9*1+r14] 765 lea rbp,QWORD PTR[rdi*8+rcx] 766 mov rcx,r9 767 sar rcx,3+2 768 mov rdi,QWORD PTR[((56+8))+rsp] 769 jmp $L$sqr4x_sub 770 mul4x_internal ENDP 771 PUBLIC bn_power5 772 773 ALIGN 32 774 bn_power5 PROC PUBLIC 775 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue 776 mov QWORD PTR[16+rsp],rsi 777 mov rax,rsp 778 $L$SEH_begin_bn_power5:: 779 mov rdi,rcx 780 mov rsi,rdx 781 mov rdx,r8 782 mov rcx,r9 783 mov r8,QWORD PTR[40+rsp] 784 mov r9,QWORD PTR[48+rsp] 785 786 787 mov rax,rsp 788 push rbx 789 push rbp 790 push r12 791 push r13 792 push r14 793 push r15 794 lea rsp,QWORD PTR[((-40))+rsp] 795 movaps XMMWORD PTR[rsp],xmm6 796 movaps XMMWORD PTR[16+rsp],xmm7 797 mov r10d,r9d 798 shl r9d,3 799 shl r10d,3+2 800 neg r9 801 mov r8,QWORD PTR[r8] 802 803 804 805 806 807 808 809 lea r11,QWORD PTR[((-64))+r9*2+rsp] 810 sub r11,rsi 811 and r11,4095 812 cmp r10,r11 813 jb $L$pwr_sp_alt 814 sub rsp,r11 815 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 816 jmp $L$pwr_sp_done 817 818 ALIGN 32 819 $L$pwr_sp_alt:: 820 lea r10,QWORD PTR[((4096-64))+r9*2] 821 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 822 sub r11,r10 823 mov r10,0 824 cmovc r11,r10 825 sub rsp,r11 826 $L$pwr_sp_done:: 827 and rsp,-64 828 mov r10,r9 829 neg r9 830 831 832 833 834 835 836 837 838 839 840 mov QWORD PTR[32+rsp],r8 841 mov QWORD PTR[40+rsp],rax 842 $L$power5_body:: 843 DB 102,72,15,110,207 844 DB 102,72,15,110,209 845 DB 102,73,15,110,218 846 DB 102,72,15,110,226 847 848 call __bn_sqr8x_internal 849 call __bn_sqr8x_internal 850 call __bn_sqr8x_internal 851 call __bn_sqr8x_internal 852 call __bn_sqr8x_internal 853 854 DB 102,72,15,126,209 855 DB 102,72,15,126,226 856 mov rdi,rsi 857 mov rax,QWORD PTR[40+rsp] 858 lea r8,QWORD PTR[32+rsp] 859 860 call mul4x_internal 861 862 mov rsi,QWORD PTR[40+rsp] 863 mov rax,1 864 mov r15,QWORD PTR[((-48))+rsi] 865 mov r14,QWORD PTR[((-40))+rsi] 866 mov r13,QWORD PTR[((-32))+rsi] 867 mov r12,QWORD PTR[((-24))+rsi] 868 mov rbp,QWORD PTR[((-16))+rsi] 869 mov rbx,QWORD PTR[((-8))+rsi] 870 lea rsp,QWORD PTR[rsi] 871 $L$power5_epilogue:: 872 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue 873 mov rsi,QWORD PTR[16+rsp] 874 DB 0F3h,0C3h ;repret 875 $L$SEH_end_bn_power5:: 876 bn_power5 ENDP 877 878 PUBLIC bn_sqr8x_internal 879 880 881 ALIGN 32 882 bn_sqr8x_internal PROC PUBLIC 883 __bn_sqr8x_internal:: 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 lea rbp,QWORD PTR[32+r10] 958 lea rsi,QWORD PTR[r9*1+rsi] 959 960 mov rcx,r9 961 962 963 mov r14,QWORD PTR[((-32))+rbp*1+rsi] 964 lea rdi,QWORD PTR[((48+8))+r9*2+rsp] 965 mov rax,QWORD PTR[((-24))+rbp*1+rsi] 966 lea rdi,QWORD PTR[((-32))+rbp*1+rdi] 967 mov rbx,QWORD PTR[((-16))+rbp*1+rsi] 968 mov r15,rax 969 970 mul r14 971 mov r10,rax 972 mov rax,rbx 973 mov r11,rdx 974 mov QWORD PTR[((-24))+rbp*1+rdi],r10 975 976 mul r14 977 add r11,rax 978 mov rax,rbx 979 adc rdx,0 980 mov QWORD PTR[((-16))+rbp*1+rdi],r11 981 mov r10,rdx 982 983 984 mov rbx,QWORD PTR[((-8))+rbp*1+rsi] 985 mul r15 986 mov r12,rax 987 mov rax,rbx 988 mov r13,rdx 989 990 lea rcx,QWORD PTR[rbp] 991 mul r14 992 add r10,rax 993 mov rax,rbx 994 mov r11,rdx 995 adc r11,0 996 add r10,r12 997 adc r11,0 998 mov QWORD PTR[((-8))+rcx*1+rdi],r10 999 jmp $L$sqr4x_1st 1000 1001 ALIGN 32 1002 $L$sqr4x_1st:: 1003 mov rbx,QWORD PTR[rcx*1+rsi] 1004 mul r15 1005 add r13,rax 1006 mov rax,rbx 1007 mov r12,rdx 1008 adc r12,0 1009 1010 mul r14 1011 add r11,rax 1012 mov rax,rbx 1013 mov rbx,QWORD PTR[8+rcx*1+rsi] 1014 mov r10,rdx 1015 adc r10,0 1016 add r11,r13 1017 adc r10,0 1018 1019 1020 mul r15 1021 add r12,rax 1022 mov rax,rbx 1023 mov QWORD PTR[rcx*1+rdi],r11 1024 mov r13,rdx 1025 adc r13,0 1026 1027 mul r14 1028 add r10,rax 1029 mov rax,rbx 1030 mov rbx,QWORD PTR[16+rcx*1+rsi] 1031 mov r11,rdx 1032 adc r11,0 1033 add r10,r12 1034 adc r11,0 1035 1036 mul r15 1037 add r13,rax 1038 mov rax,rbx 1039 mov QWORD PTR[8+rcx*1+rdi],r10 1040 mov r12,rdx 1041 adc r12,0 1042 1043 mul r14 1044 add r11,rax 1045 mov rax,rbx 1046 mov rbx,QWORD PTR[24+rcx*1+rsi] 1047 mov r10,rdx 1048 adc r10,0 1049 add r11,r13 1050 adc r10,0 1051 1052 1053 mul r15 1054 add r12,rax 1055 mov rax,rbx 1056 mov QWORD PTR[16+rcx*1+rdi],r11 1057 mov r13,rdx 1058 adc r13,0 1059 lea rcx,QWORD PTR[32+rcx] 1060 1061 mul r14 1062 add r10,rax 1063 mov rax,rbx 1064 mov r11,rdx 1065 adc r11,0 1066 add r10,r12 1067 adc r11,0 1068 mov QWORD PTR[((-8))+rcx*1+rdi],r10 1069 1070 cmp rcx,0 1071 jne $L$sqr4x_1st 1072 1073 mul r15 1074 add r13,rax 1075 lea rbp,QWORD PTR[16+rbp] 1076 adc rdx,0 1077 add r13,r11 1078 adc rdx,0 1079 1080 mov QWORD PTR[rdi],r13 1081 mov r12,rdx 1082 mov QWORD PTR[8+rdi],rdx 1083 jmp $L$sqr4x_outer 1084 1085 ALIGN 32 1086 $L$sqr4x_outer:: 1087 mov r14,QWORD PTR[((-32))+rbp*1+rsi] 1088 lea rdi,QWORD PTR[((48+8))+r9*2+rsp] 1089 mov rax,QWORD PTR[((-24))+rbp*1+rsi] 1090 lea rdi,QWORD PTR[((-32))+rbp*1+rdi] 1091 mov rbx,QWORD PTR[((-16))+rbp*1+rsi] 1092 mov r15,rax 1093 1094 mul r14 1095 mov r10,QWORD PTR[((-24))+rbp*1+rdi] 1096 add r10,rax 1097 mov rax,rbx 1098 adc rdx,0 1099 mov QWORD PTR[((-24))+rbp*1+rdi],r10 1100 mov r11,rdx 1101 1102 mul r14 1103 add r11,rax 1104 mov rax,rbx 1105 adc rdx,0 1106 add r11,QWORD PTR[((-16))+rbp*1+rdi] 1107 mov r10,rdx 1108 adc r10,0 1109 mov QWORD PTR[((-16))+rbp*1+rdi],r11 1110 1111 xor r12,r12 1112 1113 mov rbx,QWORD PTR[((-8))+rbp*1+rsi] 1114 mul r15 1115 add r12,rax 1116 mov rax,rbx 1117 adc rdx,0 1118 add r12,QWORD PTR[((-8))+rbp*1+rdi] 1119 mov r13,rdx 1120 adc r13,0 1121 1122 mul r14 1123 add r10,rax 1124 mov rax,rbx 1125 adc rdx,0 1126 add r10,r12 1127 mov r11,rdx 1128 adc r11,0 1129 mov QWORD PTR[((-8))+rbp*1+rdi],r10 1130 1131 lea rcx,QWORD PTR[rbp] 1132 jmp $L$sqr4x_inner 1133 1134 ALIGN 32 1135 $L$sqr4x_inner:: 1136 mov rbx,QWORD PTR[rcx*1+rsi] 1137 mul r15 1138 add r13,rax 1139 mov rax,rbx 1140 mov r12,rdx 1141 adc r12,0 1142 add r13,QWORD PTR[rcx*1+rdi] 1143 adc r12,0 1144 1145 DB 067h 1146 mul r14 1147 add r11,rax 1148 mov rax,rbx 1149 mov rbx,QWORD PTR[8+rcx*1+rsi] 1150 mov r10,rdx 1151 adc r10,0 1152 add r11,r13 1153 adc r10,0 1154 1155 mul r15 1156 add r12,rax 1157 mov QWORD PTR[rcx*1+rdi],r11 1158 mov rax,rbx 1159 mov r13,rdx 1160 adc r13,0 1161 add r12,QWORD PTR[8+rcx*1+rdi] 1162 lea rcx,QWORD PTR[16+rcx] 1163 adc r13,0 1164 1165 mul r14 1166 add r10,rax 1167 mov rax,rbx 1168 adc rdx,0 1169 add r10,r12 1170 mov r11,rdx 1171 adc r11,0 1172 mov QWORD PTR[((-8))+rcx*1+rdi],r10 1173 1174 cmp rcx,0 1175 jne $L$sqr4x_inner 1176 1177 DB 067h 1178 mul r15 1179 add r13,rax 1180 adc rdx,0 1181 add r13,r11 1182 adc rdx,0 1183 1184 mov QWORD PTR[rdi],r13 1185 mov r12,rdx 1186 mov QWORD PTR[8+rdi],rdx 1187 1188 add rbp,16 1189 jnz $L$sqr4x_outer 1190 1191 1192 mov r14,QWORD PTR[((-32))+rsi] 1193 lea rdi,QWORD PTR[((48+8))+r9*2+rsp] 1194 mov rax,QWORD PTR[((-24))+rsi] 1195 lea rdi,QWORD PTR[((-32))+rbp*1+rdi] 1196 mov rbx,QWORD PTR[((-16))+rsi] 1197 mov r15,rax 1198 1199 mul r14 1200 add r10,rax 1201 mov rax,rbx 1202 mov r11,rdx 1203 adc r11,0 1204 1205 mul r14 1206 add r11,rax 1207 mov rax,rbx 1208 mov QWORD PTR[((-24))+rdi],r10 1209 mov r10,rdx 1210 adc r10,0 1211 add r11,r13 1212 mov rbx,QWORD PTR[((-8))+rsi] 1213 adc r10,0 1214 1215 mul r15 1216 add r12,rax 1217 mov rax,rbx 1218 mov QWORD PTR[((-16))+rdi],r11 1219 mov r13,rdx 1220 adc r13,0 1221 1222 mul r14 1223 add r10,rax 1224 mov rax,rbx 1225 mov r11,rdx 1226 adc r11,0 1227 add r10,r12 1228 adc r11,0 1229 mov QWORD PTR[((-8))+rdi],r10 1230 1231 mul r15 1232 add r13,rax 1233 mov rax,QWORD PTR[((-16))+rsi] 1234 adc rdx,0 1235 add r13,r11 1236 adc rdx,0 1237 1238 mov QWORD PTR[rdi],r13 1239 mov r12,rdx 1240 mov QWORD PTR[8+rdi],rdx 1241 1242 mul rbx 1243 add rbp,16 1244 xor r14,r14 1245 sub rbp,r9 1246 xor r15,r15 1247 1248 add rax,r12 1249 adc rdx,0 1250 mov QWORD PTR[8+rdi],rax 1251 mov QWORD PTR[16+rdi],rdx 1252 mov QWORD PTR[24+rdi],r15 1253 1254 mov rax,QWORD PTR[((-16))+rbp*1+rsi] 1255 lea rdi,QWORD PTR[((48+8))+rsp] 1256 xor r10,r10 1257 mov r11,QWORD PTR[8+rdi] 1258 1259 lea r12,QWORD PTR[r10*2+r14] 1260 shr r10,63 1261 lea r13,QWORD PTR[r11*2+rcx] 1262 shr r11,63 1263 or r13,r10 1264 mov r10,QWORD PTR[16+rdi] 1265 mov r14,r11 1266 mul rax 1267 neg r15 1268 mov r11,QWORD PTR[24+rdi] 1269 adc r12,rax 1270 mov rax,QWORD PTR[((-8))+rbp*1+rsi] 1271 mov QWORD PTR[rdi],r12 1272 adc r13,rdx 1273 1274 lea rbx,QWORD PTR[r10*2+r14] 1275 mov QWORD PTR[8+rdi],r13 1276 sbb r15,r15 1277 shr r10,63 1278 lea r8,QWORD PTR[r11*2+rcx] 1279 shr r11,63 1280 or r8,r10 1281 mov r10,QWORD PTR[32+rdi] 1282 mov r14,r11 1283 mul rax 1284 neg r15 1285 mov r11,QWORD PTR[40+rdi] 1286 adc rbx,rax 1287 mov rax,QWORD PTR[rbp*1+rsi] 1288 mov QWORD PTR[16+rdi],rbx 1289 adc r8,rdx 1290 lea rbp,QWORD PTR[16+rbp] 1291 mov QWORD PTR[24+rdi],r8 1292 sbb r15,r15 1293 lea rdi,QWORD PTR[64+rdi] 1294 jmp $L$sqr4x_shift_n_add 1295 1296 ALIGN 32 1297 $L$sqr4x_shift_n_add:: 1298 lea r12,QWORD PTR[r10*2+r14] 1299 shr r10,63 1300 lea r13,QWORD PTR[r11*2+rcx] 1301 shr r11,63 1302 or r13,r10 1303 mov r10,QWORD PTR[((-16))+rdi] 1304 mov r14,r11 1305 mul rax 1306 neg r15 1307 mov r11,QWORD PTR[((-8))+rdi] 1308 adc r12,rax 1309 mov rax,QWORD PTR[((-8))+rbp*1+rsi] 1310 mov QWORD PTR[((-32))+rdi],r12 1311 adc r13,rdx 1312 1313 lea rbx,QWORD PTR[r10*2+r14] 1314 mov QWORD PTR[((-24))+rdi],r13 1315 sbb r15,r15 1316 shr r10,63 1317 lea r8,QWORD PTR[r11*2+rcx] 1318 shr r11,63 1319 or r8,r10 1320 mov r10,QWORD PTR[rdi] 1321 mov r14,r11 1322 mul rax 1323 neg r15 1324 mov r11,QWORD PTR[8+rdi] 1325 adc rbx,rax 1326 mov rax,QWORD PTR[rbp*1+rsi] 1327 mov QWORD PTR[((-16))+rdi],rbx 1328 adc r8,rdx 1329 1330 lea r12,QWORD PTR[r10*2+r14] 1331 mov QWORD PTR[((-8))+rdi],r8 1332 sbb r15,r15 1333 shr r10,63 1334 lea r13,QWORD PTR[r11*2+rcx] 1335 shr r11,63 1336 or r13,r10 1337 mov r10,QWORD PTR[16+rdi] 1338 mov r14,r11 1339 mul rax 1340 neg r15 1341 mov r11,QWORD PTR[24+rdi] 1342 adc r12,rax 1343 mov rax,QWORD PTR[8+rbp*1+rsi] 1344 mov QWORD PTR[rdi],r12 1345 adc r13,rdx 1346 1347 lea rbx,QWORD PTR[r10*2+r14] 1348 mov QWORD PTR[8+rdi],r13 1349 sbb r15,r15 1350 shr r10,63 1351 lea r8,QWORD PTR[r11*2+rcx] 1352 shr r11,63 1353 or r8,r10 1354 mov r10,QWORD PTR[32+rdi] 1355 mov r14,r11 1356 mul rax 1357 neg r15 1358 mov r11,QWORD PTR[40+rdi] 1359 adc rbx,rax 1360 mov rax,QWORD PTR[16+rbp*1+rsi] 1361 mov QWORD PTR[16+rdi],rbx 1362 adc r8,rdx 1363 mov QWORD PTR[24+rdi],r8 1364 sbb r15,r15 1365 lea rdi,QWORD PTR[64+rdi] 1366 add rbp,32 1367 jnz $L$sqr4x_shift_n_add 1368 1369 lea r12,QWORD PTR[r10*2+r14] 1370 DB 067h 1371 shr r10,63 1372 lea r13,QWORD PTR[r11*2+rcx] 1373 shr r11,63 1374 or r13,r10 1375 mov r10,QWORD PTR[((-16))+rdi] 1376 mov r14,r11 1377 mul rax 1378 neg r15 1379 mov r11,QWORD PTR[((-8))+rdi] 1380 adc r12,rax 1381 mov rax,QWORD PTR[((-8))+rsi] 1382 mov QWORD PTR[((-32))+rdi],r12 1383 adc r13,rdx 1384 1385 lea rbx,QWORD PTR[r10*2+r14] 1386 mov QWORD PTR[((-24))+rdi],r13 1387 sbb r15,r15 1388 shr r10,63 1389 lea r8,QWORD PTR[r11*2+rcx] 1390 shr r11,63 1391 or r8,r10 1392 mul rax 1393 neg r15 1394 adc rbx,rax 1395 adc r8,rdx 1396 mov QWORD PTR[((-16))+rdi],rbx 1397 mov QWORD PTR[((-8))+rdi],r8 1398 DB 102,72,15,126,213 1399 sqr8x_reduction:: 1400 xor rax,rax 1401 lea rcx,QWORD PTR[r9*2+rbp] 1402 lea rdx,QWORD PTR[((48+8))+r9*2+rsp] 1403 mov QWORD PTR[((0+8))+rsp],rcx 1404 lea rdi,QWORD PTR[((48+8))+r9*1+rsp] 1405 mov QWORD PTR[((8+8))+rsp],rdx 1406 neg r9 1407 jmp $L$8x_reduction_loop 1408 1409 ALIGN 32 1410 $L$8x_reduction_loop:: 1411 lea rdi,QWORD PTR[r9*1+rdi] 1412 DB 066h 1413 mov rbx,QWORD PTR[rdi] 1414 mov r9,QWORD PTR[8+rdi] 1415 mov r10,QWORD PTR[16+rdi] 1416 mov r11,QWORD PTR[24+rdi] 1417 mov r12,QWORD PTR[32+rdi] 1418 mov r13,QWORD PTR[40+rdi] 1419 mov r14,QWORD PTR[48+rdi] 1420 mov r15,QWORD PTR[56+rdi] 1421 mov QWORD PTR[rdx],rax 1422 lea rdi,QWORD PTR[64+rdi] 1423 1424 DB 067h 1425 mov r8,rbx 1426 imul rbx,QWORD PTR[((32+8))+rsp] 1427 mov rax,QWORD PTR[rbp] 1428 mov ecx,8 1429 jmp $L$8x_reduce 1430 1431 ALIGN 32 1432 $L$8x_reduce:: 1433 mul rbx 1434 mov rax,QWORD PTR[16+rbp] 1435 neg r8 1436 mov r8,rdx 1437 adc r8,0 1438 1439 mul rbx 1440 add r9,rax 1441 mov rax,QWORD PTR[32+rbp] 1442 adc rdx,0 1443 add r8,r9 1444 mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx 1445 mov r9,rdx 1446 adc r9,0 1447 1448 mul rbx 1449 add r10,rax 1450 mov rax,QWORD PTR[48+rbp] 1451 adc rdx,0 1452 add r9,r10 1453 mov rsi,QWORD PTR[((32+8))+rsp] 1454 mov r10,rdx 1455 adc r10,0 1456 1457 mul rbx 1458 add r11,rax 1459 mov rax,QWORD PTR[64+rbp] 1460 adc rdx,0 1461 imul rsi,r8 1462 add r10,r11 1463 mov r11,rdx 1464 adc r11,0 1465 1466 mul rbx 1467 add r12,rax 1468 mov rax,QWORD PTR[80+rbp] 1469 adc rdx,0 1470 add r11,r12 1471 mov r12,rdx 1472 adc r12,0 1473 1474 mul rbx 1475 add r13,rax 1476 mov rax,QWORD PTR[96+rbp] 1477 adc rdx,0 1478 add r12,r13 1479 mov r13,rdx 1480 adc r13,0 1481 1482 mul rbx 1483 add r14,rax 1484 mov rax,QWORD PTR[112+rbp] 1485 adc rdx,0 1486 add r13,r14 1487 mov r14,rdx 1488 adc r14,0 1489 1490 mul rbx 1491 mov rbx,rsi 1492 add r15,rax 1493 mov rax,QWORD PTR[rbp] 1494 adc rdx,0 1495 add r14,r15 1496 mov r15,rdx 1497 adc r15,0 1498 1499 dec ecx 1500 jnz $L$8x_reduce 1501 1502 lea rbp,QWORD PTR[128+rbp] 1503 xor rax,rax 1504 mov rdx,QWORD PTR[((8+8))+rsp] 1505 cmp rbp,QWORD PTR[((0+8))+rsp] 1506 jae $L$8x_no_tail 1507 1508 DB 066h 1509 add r8,QWORD PTR[rdi] 1510 adc r9,QWORD PTR[8+rdi] 1511 adc r10,QWORD PTR[16+rdi] 1512 adc r11,QWORD PTR[24+rdi] 1513 adc r12,QWORD PTR[32+rdi] 1514 adc r13,QWORD PTR[40+rdi] 1515 adc r14,QWORD PTR[48+rdi] 1516 adc r15,QWORD PTR[56+rdi] 1517 sbb rsi,rsi 1518 1519 mov rbx,QWORD PTR[((48+56+8))+rsp] 1520 mov ecx,8 1521 mov rax,QWORD PTR[rbp] 1522 jmp $L$8x_tail 1523 1524 ALIGN 32 1525 $L$8x_tail:: 1526 mul rbx 1527 add r8,rax 1528 mov rax,QWORD PTR[16+rbp] 1529 mov QWORD PTR[rdi],r8 1530 mov r8,rdx 1531 adc r8,0 1532 1533 mul rbx 1534 add r9,rax 1535 mov rax,QWORD PTR[32+rbp] 1536 adc rdx,0 1537 add r8,r9 1538 lea rdi,QWORD PTR[8+rdi] 1539 mov r9,rdx 1540 adc r9,0 1541 1542 mul rbx 1543 add r10,rax 1544 mov rax,QWORD PTR[48+rbp] 1545 adc rdx,0 1546 add r9,r10 1547 mov r10,rdx 1548 adc r10,0 1549 1550 mul rbx 1551 add r11,rax 1552 mov rax,QWORD PTR[64+rbp] 1553 adc rdx,0 1554 add r10,r11 1555 mov r11,rdx 1556 adc r11,0 1557 1558 mul rbx 1559 add r12,rax 1560 mov rax,QWORD PTR[80+rbp] 1561 adc rdx,0 1562 add r11,r12 1563 mov r12,rdx 1564 adc r12,0 1565 1566 mul rbx 1567 add r13,rax 1568 mov rax,QWORD PTR[96+rbp] 1569 adc rdx,0 1570 add r12,r13 1571 mov r13,rdx 1572 adc r13,0 1573 1574 mul rbx 1575 add r14,rax 1576 mov rax,QWORD PTR[112+rbp] 1577 adc rdx,0 1578 add r13,r14 1579 mov r14,rdx 1580 adc r14,0 1581 1582 mul rbx 1583 mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp] 1584 add r15,rax 1585 adc rdx,0 1586 add r14,r15 1587 mov rax,QWORD PTR[rbp] 1588 mov r15,rdx 1589 adc r15,0 1590 1591 dec ecx 1592 jnz $L$8x_tail 1593 1594 lea rbp,QWORD PTR[128+rbp] 1595 mov rdx,QWORD PTR[((8+8))+rsp] 1596 cmp rbp,QWORD PTR[((0+8))+rsp] 1597 jae $L$8x_tail_done 1598 1599 mov rbx,QWORD PTR[((48+56+8))+rsp] 1600 neg rsi 1601 mov rax,QWORD PTR[rbp] 1602 adc r8,QWORD PTR[rdi] 1603 adc r9,QWORD PTR[8+rdi] 1604 adc r10,QWORD PTR[16+rdi] 1605 adc r11,QWORD PTR[24+rdi] 1606 adc r12,QWORD PTR[32+rdi] 1607 adc r13,QWORD PTR[40+rdi] 1608 adc r14,QWORD PTR[48+rdi] 1609 adc r15,QWORD PTR[56+rdi] 1610 sbb rsi,rsi 1611 1612 mov ecx,8 1613 jmp $L$8x_tail 1614 1615 ALIGN 32 1616 $L$8x_tail_done:: 1617 add r8,QWORD PTR[rdx] 1618 xor rax,rax 1619 1620 neg rsi 1621 $L$8x_no_tail:: 1622 adc r8,QWORD PTR[rdi] 1623 adc r9,QWORD PTR[8+rdi] 1624 adc r10,QWORD PTR[16+rdi] 1625 adc r11,QWORD PTR[24+rdi] 1626 adc r12,QWORD PTR[32+rdi] 1627 adc r13,QWORD PTR[40+rdi] 1628 adc r14,QWORD PTR[48+rdi] 1629 adc r15,QWORD PTR[56+rdi] 1630 adc rax,0 1631 mov rcx,QWORD PTR[((-16))+rbp] 1632 xor rsi,rsi 1633 1634 DB 102,72,15,126,213 1635 1636 mov QWORD PTR[rdi],r8 1637 mov QWORD PTR[8+rdi],r9 1638 DB 102,73,15,126,217 1639 mov QWORD PTR[16+rdi],r10 1640 mov QWORD PTR[24+rdi],r11 1641 mov QWORD PTR[32+rdi],r12 1642 mov QWORD PTR[40+rdi],r13 1643 mov QWORD PTR[48+rdi],r14 1644 mov QWORD PTR[56+rdi],r15 1645 lea rdi,QWORD PTR[64+rdi] 1646 1647 cmp rdi,rdx 1648 jb $L$8x_reduction_loop 1649 1650 sub rcx,r15 1651 lea rbx,QWORD PTR[r9*1+rdi] 1652 adc rsi,rsi 1653 mov rcx,r9 1654 or rax,rsi 1655 DB 102,72,15,126,207 1656 xor rax,1 1657 DB 102,72,15,126,206 1658 lea rbp,QWORD PTR[rax*8+rbp] 1659 sar rcx,3+2 1660 jmp $L$sqr4x_sub 1661 1662 ALIGN 32 1663 $L$sqr4x_sub:: 1664 DB 066h 1665 mov r12,QWORD PTR[rbx] 1666 mov r13,QWORD PTR[8+rbx] 1667 sbb r12,QWORD PTR[rbp] 1668 mov r14,QWORD PTR[16+rbx] 1669 sbb r13,QWORD PTR[16+rbp] 1670 mov r15,QWORD PTR[24+rbx] 1671 lea rbx,QWORD PTR[32+rbx] 1672 sbb r14,QWORD PTR[32+rbp] 1673 mov QWORD PTR[rdi],r12 1674 sbb r15,QWORD PTR[48+rbp] 1675 lea rbp,QWORD PTR[64+rbp] 1676 mov QWORD PTR[8+rdi],r13 1677 mov QWORD PTR[16+rdi],r14 1678 mov QWORD PTR[24+rdi],r15 1679 lea rdi,QWORD PTR[32+rdi] 1680 1681 inc rcx 1682 jnz $L$sqr4x_sub 1683 mov r10,r9 1684 neg r9 1685 DB 0F3h,0C3h ;repret 1686 bn_sqr8x_internal ENDP 1687 PUBLIC bn_from_montgomery 1688 1689 ALIGN 32 1690 bn_from_montgomery PROC PUBLIC 1691 test DWORD PTR[48+rsp],7 1692 jz bn_from_mont8x 1693 xor eax,eax 1694 DB 0F3h,0C3h ;repret 1695 bn_from_montgomery ENDP 1696 1697 1698 ALIGN 32 1699 bn_from_mont8x PROC PRIVATE 1700 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue 1701 mov QWORD PTR[16+rsp],rsi 1702 mov rax,rsp 1703 $L$SEH_begin_bn_from_mont8x:: 1704 mov rdi,rcx 1705 mov rsi,rdx 1706 mov rdx,r8 1707 mov rcx,r9 1708 mov r8,QWORD PTR[40+rsp] 1709 mov r9,QWORD PTR[48+rsp] 1710 1711 1712 DB 067h 1713 mov rax,rsp 1714 push rbx 1715 push rbp 1716 push r12 1717 push r13 1718 push r14 1719 push r15 1720 lea rsp,QWORD PTR[((-40))+rsp] 1721 movaps XMMWORD PTR[rsp],xmm6 1722 movaps XMMWORD PTR[16+rsp],xmm7 1723 DB 067h 1724 mov r10d,r9d 1725 shl r9d,3 1726 shl r10d,3+2 1727 neg r9 1728 mov r8,QWORD PTR[r8] 1729 1730 1731 1732 1733 1734 1735 1736 lea r11,QWORD PTR[((-64))+r9*2+rsp] 1737 sub r11,rsi 1738 and r11,4095 1739 cmp r10,r11 1740 jb $L$from_sp_alt 1741 sub rsp,r11 1742 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 1743 jmp $L$from_sp_done 1744 1745 ALIGN 32 1746 $L$from_sp_alt:: 1747 lea r10,QWORD PTR[((4096-64))+r9*2] 1748 lea rsp,QWORD PTR[((-64))+r9*2+rsp] 1749 sub r11,r10 1750 mov r10,0 1751 cmovc r11,r10 1752 sub rsp,r11 1753 $L$from_sp_done:: 1754 and rsp,-64 1755 mov r10,r9 1756 neg r9 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 mov QWORD PTR[32+rsp],r8 1768 mov QWORD PTR[40+rsp],rax 1769 $L$from_body:: 1770 mov r11,r9 1771 lea rax,QWORD PTR[48+rsp] 1772 pxor xmm0,xmm0 1773 jmp $L$mul_by_1 1774 1775 ALIGN 32 1776 $L$mul_by_1:: 1777 movdqu xmm1,XMMWORD PTR[rsi] 1778 movdqu xmm2,XMMWORD PTR[16+rsi] 1779 movdqu xmm3,XMMWORD PTR[32+rsi] 1780 movdqa XMMWORD PTR[r9*1+rax],xmm0 1781 movdqu xmm4,XMMWORD PTR[48+rsi] 1782 movdqa XMMWORD PTR[16+r9*1+rax],xmm0 1783 DB 048h,08dh,0b6h,040h,000h,000h,000h 1784 movdqa XMMWORD PTR[rax],xmm1 1785 movdqa XMMWORD PTR[32+r9*1+rax],xmm0 1786 movdqa XMMWORD PTR[16+rax],xmm2 1787 movdqa XMMWORD PTR[48+r9*1+rax],xmm0 1788 movdqa XMMWORD PTR[32+rax],xmm3 1789 movdqa XMMWORD PTR[48+rax],xmm4 1790 lea rax,QWORD PTR[64+rax] 1791 sub r11,64 1792 jnz $L$mul_by_1 1793 1794 DB 102,72,15,110,207 1795 DB 102,72,15,110,209 1796 DB 067h 1797 mov rbp,rcx 1798 DB 102,73,15,110,218 1799 call sqr8x_reduction 1800 1801 pxor xmm0,xmm0 1802 lea rax,QWORD PTR[48+rsp] 1803 mov rsi,QWORD PTR[40+rsp] 1804 jmp $L$from_mont_zero 1805 1806 ALIGN 32 1807 $L$from_mont_zero:: 1808 movdqa XMMWORD PTR[rax],xmm0 1809 movdqa XMMWORD PTR[16+rax],xmm0 1810 movdqa XMMWORD PTR[32+rax],xmm0 1811 movdqa XMMWORD PTR[48+rax],xmm0 1812 lea rax,QWORD PTR[64+rax] 1813 sub r9,32 1814 jnz $L$from_mont_zero 1815 1816 mov rax,1 1817 mov r15,QWORD PTR[((-48))+rsi] 1818 mov r14,QWORD PTR[((-40))+rsi] 1819 mov r13,QWORD PTR[((-32))+rsi] 1820 mov r12,QWORD PTR[((-24))+rsi] 1821 mov rbp,QWORD PTR[((-16))+rsi] 1822 mov rbx,QWORD PTR[((-8))+rsi] 1823 lea rsp,QWORD PTR[rsi] 1824 $L$from_epilogue:: 1825 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue 1826 mov rsi,QWORD PTR[16+rsp] 1827 DB 0F3h,0C3h ;repret 1828 $L$SEH_end_bn_from_mont8x:: 1829 bn_from_mont8x ENDP 1830 PUBLIC bn_scatter5 1831 1832 ALIGN 16 1833 bn_scatter5 PROC PUBLIC 1834 cmp edx,0 1835 jz $L$scatter_epilogue 1836 lea r8,QWORD PTR[r9*8+r8] 1837 $L$scatter:: 1838 mov rax,QWORD PTR[rcx] 1839 lea rcx,QWORD PTR[8+rcx] 1840 mov QWORD PTR[r8],rax 1841 lea r8,QWORD PTR[256+r8] 1842 sub edx,1 1843 jnz $L$scatter 1844 $L$scatter_epilogue:: 1845 DB 0F3h,0C3h ;repret 1846 bn_scatter5 ENDP 1847 1848 PUBLIC bn_gather5 1849 1850 ALIGN 16 1851 bn_gather5 PROC PUBLIC 1852 $L$SEH_begin_bn_gather5:: 1853 1854 DB 048h,083h,0ech,028h 1855 DB 00fh,029h,034h,024h 1856 DB 00fh,029h,07ch,024h,010h 1857 mov r11d,r9d 1858 shr r9d,3 1859 and r11,7 1860 not r9d 1861 lea rax,QWORD PTR[$L$magic_masks] 1862 and r9d,3 1863 lea r8,QWORD PTR[128+r11*8+r8] 1864 movq xmm4,QWORD PTR[r9*8+rax] 1865 movq xmm5,QWORD PTR[8+r9*8+rax] 1866 movq xmm6,QWORD PTR[16+r9*8+rax] 1867 movq xmm7,QWORD PTR[24+r9*8+rax] 1868 jmp $L$gather 1869 ALIGN 16 1870 $L$gather:: 1871 movq xmm0,QWORD PTR[(((-128)))+r8] 1872 movq xmm1,QWORD PTR[((-64))+r8] 1873 pand xmm0,xmm4 1874 movq xmm2,QWORD PTR[r8] 1875 pand xmm1,xmm5 1876 movq xmm3,QWORD PTR[64+r8] 1877 pand xmm2,xmm6 1878 por xmm0,xmm1 1879 pand xmm3,xmm7 1880 DB 067h,067h 1881 por xmm0,xmm2 1882 lea r8,QWORD PTR[256+r8] 1883 por xmm0,xmm3 1884 1885 movq QWORD PTR[rcx],xmm0 1886 lea rcx,QWORD PTR[8+rcx] 1887 sub edx,1 1888 jnz $L$gather 1889 movaps xmm6,XMMWORD PTR[rsp] 1890 movaps xmm7,XMMWORD PTR[16+rsp] 1891 lea rsp,QWORD PTR[40+rsp] 1892 DB 0F3h,0C3h ;repret 1893 $L$SEH_end_bn_gather5:: 1894 bn_gather5 ENDP 1895 ALIGN 64 1896 $L$magic_masks:: 1897 DD 0,0,0,0,0,0,-1,-1 1898 DD 0,0,0,0,0,0,0,0 1899 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1900 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 1901 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 1902 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 1903 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 1904 DB 112,101,110,115,115,108,46,111,114,103,62,0 1905 EXTERN __imp_RtlVirtualUnwind:NEAR 1906 1907 ALIGN 16 1908 mul_handler PROC PRIVATE 1909 push rsi 1910 push rdi 1911 push rbx 1912 push rbp 1913 push r12 1914 push r13 1915 push r14 1916 push r15 1917 pushfq 1918 sub rsp,64 1919 1920 mov rax,QWORD PTR[120+r8] 1921 mov rbx,QWORD PTR[248+r8] 1922 1923 mov rsi,QWORD PTR[8+r9] 1924 mov r11,QWORD PTR[56+r9] 1925 1926 mov r10d,DWORD PTR[r11] 1927 lea r10,QWORD PTR[r10*1+rsi] 1928 cmp rbx,r10 1929 jb $L$common_seh_tail 1930 1931 mov rax,QWORD PTR[152+r8] 1932 1933 mov r10d,DWORD PTR[4+r11] 1934 lea r10,QWORD PTR[r10*1+rsi] 1935 cmp rbx,r10 1936 jae $L$common_seh_tail 1937 1938 lea r10,QWORD PTR[$L$mul_epilogue] 1939 cmp rbx,r10 1940 jb $L$body_40 1941 1942 mov r10,QWORD PTR[192+r8] 1943 mov rax,QWORD PTR[8+r10*8+rax] 1944 jmp $L$body_proceed 1945 1946 $L$body_40:: 1947 mov rax,QWORD PTR[40+rax] 1948 $L$body_proceed:: 1949 1950 movaps xmm0,XMMWORD PTR[((-88))+rax] 1951 movaps xmm1,XMMWORD PTR[((-72))+rax] 1952 1953 mov rbx,QWORD PTR[((-8))+rax] 1954 mov rbp,QWORD PTR[((-16))+rax] 1955 mov r12,QWORD PTR[((-24))+rax] 1956 mov r13,QWORD PTR[((-32))+rax] 1957 mov r14,QWORD PTR[((-40))+rax] 1958 mov r15,QWORD PTR[((-48))+rax] 1959 mov QWORD PTR[144+r8],rbx 1960 mov QWORD PTR[160+r8],rbp 1961 mov QWORD PTR[216+r8],r12 1962 mov QWORD PTR[224+r8],r13 1963 mov QWORD PTR[232+r8],r14 1964 mov QWORD PTR[240+r8],r15 1965 movups XMMWORD PTR[512+r8],xmm0 1966 movups XMMWORD PTR[528+r8],xmm1 1967 1968 $L$common_seh_tail:: 1969 mov rdi,QWORD PTR[8+rax] 1970 mov rsi,QWORD PTR[16+rax] 1971 mov QWORD PTR[152+r8],rax 1972 mov QWORD PTR[168+r8],rsi 1973 mov QWORD PTR[176+r8],rdi 1974 1975 mov rdi,QWORD PTR[40+r9] 1976 mov rsi,r8 1977 mov ecx,154 1978 DD 0a548f3fch 1979 1980 mov rsi,r9 1981 xor rcx,rcx 1982 mov rdx,QWORD PTR[8+rsi] 1983 mov r8,QWORD PTR[rsi] 1984 mov r9,QWORD PTR[16+rsi] 1985 mov r10,QWORD PTR[40+rsi] 1986 lea r11,QWORD PTR[56+rsi] 1987 lea r12,QWORD PTR[24+rsi] 1988 mov QWORD PTR[32+rsp],r10 1989 mov QWORD PTR[40+rsp],r11 1990 mov QWORD PTR[48+rsp],r12 1991 mov QWORD PTR[56+rsp],rcx 1992 call QWORD PTR[__imp_RtlVirtualUnwind] 1993 1994 mov eax,1 1995 add rsp,64 1996 popfq 1997 pop r15 1998 pop r14 1999 pop r13 2000 pop r12 2001 pop rbp 2002 pop rbx 2003 pop rdi 2004 pop rsi 2005 DB 0F3h,0C3h ;repret 2006 mul_handler ENDP 2007 2008 .text$ ENDS 2009 .pdata SEGMENT READONLY ALIGN(4) 2010 ALIGN 4 2011 DD imagerel $L$SEH_begin_bn_mul_mont_gather5 2012 DD imagerel $L$SEH_end_bn_mul_mont_gather5 2013 DD imagerel $L$SEH_info_bn_mul_mont_gather5 2014 2015 DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5 2016 DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 2017 DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 2018 2019 DD imagerel $L$SEH_begin_bn_power5 2020 DD imagerel $L$SEH_end_bn_power5 2021 DD imagerel $L$SEH_info_bn_power5 2022 2023 DD imagerel $L$SEH_begin_bn_from_mont8x 2024 DD imagerel $L$SEH_end_bn_from_mont8x 2025 DD imagerel $L$SEH_info_bn_from_mont8x 2026 DD imagerel $L$SEH_begin_bn_gather5 2027 DD imagerel $L$SEH_end_bn_gather5 2028 DD imagerel $L$SEH_info_bn_gather5 2029 2030 .pdata ENDS 2031 .xdata SEGMENT READONLY ALIGN(8) 2032 ALIGN 8 2033 $L$SEH_info_bn_mul_mont_gather5:: 2034 DB 9,0,0,0 2035 DD imagerel mul_handler 2036 DD imagerel $L$mul_body,imagerel $L$mul_epilogue 2037 ALIGN 8 2038 $L$SEH_info_bn_mul4x_mont_gather5:: 2039 DB 9,0,0,0 2040 DD imagerel mul_handler 2041 DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue 2042 ALIGN 8 2043 $L$SEH_info_bn_power5:: 2044 DB 9,0,0,0 2045 DD imagerel mul_handler 2046 DD imagerel $L$power5_body,imagerel $L$power5_epilogue 2047 ALIGN 8 2048 $L$SEH_info_bn_from_mont8x:: 2049 DB 9,0,0,0 2050 DD imagerel mul_handler 2051 DD imagerel $L$from_body,imagerel $L$from_epilogue 2052 ALIGN 8 2053 $L$SEH_info_bn_gather5:: 2054 DB 001h,00dh,005h,000h 2055 DB 00dh,078h,001h,000h 2056 DB 008h,068h,000h,000h 2057 DB 004h,042h,000h,000h 2058 ALIGN 8 2059 2060 .xdata ENDS 2061 END 2062