1 default rel 2 %define XMMWORD 3 %define YMMWORD 4 %define ZMMWORD 5 section .text code align=64 6 7 8 EXTERN OPENSSL_ia32cap_P 9 10 global bn_mul_mont_gather5 11 12 ALIGN 64 13 bn_mul_mont_gather5: 14 mov QWORD[8+rsp],rdi ;WIN64 prologue 15 mov QWORD[16+rsp],rsi 16 mov rax,rsp 17 $L$SEH_begin_bn_mul_mont_gather5: 18 mov rdi,rcx 19 mov rsi,rdx 20 mov rdx,r8 21 mov rcx,r9 22 mov r8,QWORD[40+rsp] 23 mov r9,QWORD[48+rsp] 24 25 26 27 mov r9d,r9d 28 mov rax,rsp 29 30 test r9d,7 31 jnz NEAR $L$mul_enter 32 jmp NEAR $L$mul4x_enter 33 34 ALIGN 16 35 $L$mul_enter: 36 movd xmm5,DWORD[56+rsp] 37 push rbx 38 39 push rbp 40 41 push r12 42 43 push r13 44 45 push r14 46 47 push r15 48 49 50 neg r9 51 mov r11,rsp 52 lea r10,[((-280))+r9*8+rsp] 53 neg r9 54 and r10,-1024 55 56 57 58 59 60 61 62 63 64 sub r11,r10 65 and r11,-4096 66 lea rsp,[r11*1+r10] 67 mov r11,QWORD[rsp] 68 cmp rsp,r10 69 ja NEAR $L$mul_page_walk 70 jmp NEAR $L$mul_page_walk_done 71 72 $L$mul_page_walk: 73 lea rsp,[((-4096))+rsp] 74 mov r11,QWORD[rsp] 75 cmp rsp,r10 76 ja NEAR $L$mul_page_walk 77 $L$mul_page_walk_done: 78 79 lea r10,[$L$inc] 80 mov QWORD[8+r9*8+rsp],rax 81 82 $L$mul_body: 83 84 lea r12,[128+rdx] 85 movdqa xmm0,XMMWORD[r10] 86 movdqa xmm1,XMMWORD[16+r10] 87 lea r10,[((24-112))+r9*8+rsp] 88 and r10,-16 89 90 pshufd xmm5,xmm5,0 91 movdqa xmm4,xmm1 92 movdqa xmm2,xmm1 93 paddd xmm1,xmm0 94 pcmpeqd xmm0,xmm5 95 DB 0x67 96 movdqa xmm3,xmm4 97 paddd xmm2,xmm1 98 pcmpeqd xmm1,xmm5 99 movdqa XMMWORD[112+r10],xmm0 100 movdqa xmm0,xmm4 101 102 paddd xmm3,xmm2 103 pcmpeqd xmm2,xmm5 104 movdqa XMMWORD[128+r10],xmm1 105 movdqa xmm1,xmm4 106 107 paddd xmm0,xmm3 108 pcmpeqd xmm3,xmm5 109 movdqa XMMWORD[144+r10],xmm2 110 movdqa xmm2,xmm4 111 112 paddd xmm1,xmm0 113 pcmpeqd xmm0,xmm5 114 movdqa XMMWORD[160+r10],xmm3 115 movdqa xmm3,xmm4 116 paddd xmm2,xmm1 117 pcmpeqd xmm1,xmm5 118 movdqa XMMWORD[176+r10],xmm0 119 movdqa xmm0,xmm4 120 121 paddd xmm3,xmm2 122 pcmpeqd xmm2,xmm5 123 movdqa XMMWORD[192+r10],xmm1 124 movdqa xmm1,xmm4 125 126 paddd xmm0,xmm3 127 pcmpeqd xmm3,xmm5 128 movdqa XMMWORD[208+r10],xmm2 129 movdqa xmm2,xmm4 130 131 paddd xmm1,xmm0 132 pcmpeqd xmm0,xmm5 133 movdqa XMMWORD[224+r10],xmm3 134 movdqa xmm3,xmm4 135 paddd xmm2,xmm1 136 pcmpeqd xmm1,xmm5 137 movdqa XMMWORD[240+r10],xmm0 138 movdqa xmm0,xmm4 139 140 paddd xmm3,xmm2 141 pcmpeqd xmm2,xmm5 142 movdqa XMMWORD[256+r10],xmm1 143 movdqa xmm1,xmm4 144 145 paddd xmm0,xmm3 146 pcmpeqd xmm3,xmm5 147 movdqa XMMWORD[272+r10],xmm2 148 movdqa xmm2,xmm4 149 150 paddd xmm1,xmm0 151 pcmpeqd xmm0,xmm5 152 movdqa XMMWORD[288+r10],xmm3 153 movdqa xmm3,xmm4 154 paddd xmm2,xmm1 155 pcmpeqd xmm1,xmm5 156 movdqa XMMWORD[304+r10],xmm0 157 158 paddd xmm3,xmm2 159 DB 0x67 160 pcmpeqd xmm2,xmm5 161 movdqa XMMWORD[320+r10],xmm1 162 163 pcmpeqd xmm3,xmm5 164 movdqa XMMWORD[336+r10],xmm2 165 pand xmm0,XMMWORD[64+r12] 166 167 pand xmm1,XMMWORD[80+r12] 168 pand xmm2,XMMWORD[96+r12] 169 movdqa XMMWORD[352+r10],xmm3 170 pand xmm3,XMMWORD[112+r12] 171 por xmm0,xmm2 172 por xmm1,xmm3 173 movdqa xmm4,XMMWORD[((-128))+r12] 174 movdqa xmm5,XMMWORD[((-112))+r12] 175 movdqa xmm2,XMMWORD[((-96))+r12] 176 pand xmm4,XMMWORD[112+r10] 177 movdqa xmm3,XMMWORD[((-80))+r12] 178 pand xmm5,XMMWORD[128+r10] 179 por xmm0,xmm4 180 pand xmm2,XMMWORD[144+r10] 181 por xmm1,xmm5 182 pand xmm3,XMMWORD[160+r10] 183 por xmm0,xmm2 184 por xmm1,xmm3 185 movdqa xmm4,XMMWORD[((-64))+r12] 186 movdqa xmm5,XMMWORD[((-48))+r12] 187 movdqa xmm2,XMMWORD[((-32))+r12] 188 pand xmm4,XMMWORD[176+r10] 189 movdqa xmm3,XMMWORD[((-16))+r12] 190 pand xmm5,XMMWORD[192+r10] 191 por xmm0,xmm4 192 pand xmm2,XMMWORD[208+r10] 193 por xmm1,xmm5 194 pand xmm3,XMMWORD[224+r10] 195 por xmm0,xmm2 196 por xmm1,xmm3 197 movdqa xmm4,XMMWORD[r12] 198 movdqa xmm5,XMMWORD[16+r12] 199 movdqa xmm2,XMMWORD[32+r12] 200 pand xmm4,XMMWORD[240+r10] 201 movdqa xmm3,XMMWORD[48+r12] 202 pand xmm5,XMMWORD[256+r10] 203 por xmm0,xmm4 204 pand xmm2,XMMWORD[272+r10] 205 por xmm1,xmm5 206 pand xmm3,XMMWORD[288+r10] 207 por xmm0,xmm2 208 por xmm1,xmm3 209 por xmm0,xmm1 210 pshufd xmm1,xmm0,0x4e 211 por xmm0,xmm1 212 lea r12,[256+r12] 213 DB 102,72,15,126,195 214 215 mov r8,QWORD[r8] 216 mov rax,QWORD[rsi] 217 218 xor r14,r14 219 xor r15,r15 220 221 mov rbp,r8 222 mul rbx 223 mov r10,rax 224 mov rax,QWORD[rcx] 225 226 imul rbp,r10 227 mov r11,rdx 228 229 mul rbp 230 add r10,rax 231 mov rax,QWORD[8+rsi] 232 adc rdx,0 233 mov r13,rdx 234 235 lea r15,[1+r15] 236 jmp NEAR $L$1st_enter 237 238 ALIGN 16 239 $L$1st: 240 add r13,rax 241 mov rax,QWORD[r15*8+rsi] 242 adc rdx,0 243 add r13,r11 244 mov r11,r10 245 adc rdx,0 246 mov QWORD[((-16))+r15*8+rsp],r13 247 mov r13,rdx 248 249 $L$1st_enter: 250 mul rbx 251 add r11,rax 252 mov rax,QWORD[r15*8+rcx] 253 adc rdx,0 254 lea r15,[1+r15] 255 mov r10,rdx 256 257 mul rbp 258 cmp r15,r9 259 jne NEAR $L$1st 260 261 262 add r13,rax 263 adc rdx,0 264 add r13,r11 265 adc rdx,0 266 mov QWORD[((-16))+r9*8+rsp],r13 267 mov r13,rdx 268 mov r11,r10 269 270 xor rdx,rdx 271 add r13,r11 272 adc rdx,0 273 mov QWORD[((-8))+r9*8+rsp],r13 274 mov QWORD[r9*8+rsp],rdx 275 276 lea r14,[1+r14] 277 jmp NEAR $L$outer 278 ALIGN 16 279 $L$outer: 280 lea rdx,[((24+128))+r9*8+rsp] 281 and rdx,-16 282 pxor xmm4,xmm4 283 pxor xmm5,xmm5 284 movdqa xmm0,XMMWORD[((-128))+r12] 285 movdqa xmm1,XMMWORD[((-112))+r12] 286 movdqa xmm2,XMMWORD[((-96))+r12] 287 movdqa xmm3,XMMWORD[((-80))+r12] 288 pand xmm0,XMMWORD[((-128))+rdx] 289 pand xmm1,XMMWORD[((-112))+rdx] 290 por xmm4,xmm0 291 pand xmm2,XMMWORD[((-96))+rdx] 292 por xmm5,xmm1 293 pand xmm3,XMMWORD[((-80))+rdx] 294 por xmm4,xmm2 295 por xmm5,xmm3 296 movdqa xmm0,XMMWORD[((-64))+r12] 297 movdqa xmm1,XMMWORD[((-48))+r12] 298 movdqa xmm2,XMMWORD[((-32))+r12] 299 movdqa xmm3,XMMWORD[((-16))+r12] 300 pand xmm0,XMMWORD[((-64))+rdx] 301 pand xmm1,XMMWORD[((-48))+rdx] 302 por xmm4,xmm0 303 pand xmm2,XMMWORD[((-32))+rdx] 304 por xmm5,xmm1 305 pand xmm3,XMMWORD[((-16))+rdx] 306 por xmm4,xmm2 307 por xmm5,xmm3 308 movdqa xmm0,XMMWORD[r12] 309 movdqa xmm1,XMMWORD[16+r12] 310 movdqa xmm2,XMMWORD[32+r12] 311 movdqa xmm3,XMMWORD[48+r12] 312 pand xmm0,XMMWORD[rdx] 313 pand xmm1,XMMWORD[16+rdx] 314 por xmm4,xmm0 315 pand xmm2,XMMWORD[32+rdx] 316 por xmm5,xmm1 317 pand xmm3,XMMWORD[48+rdx] 318 por xmm4,xmm2 319 por xmm5,xmm3 320 movdqa xmm0,XMMWORD[64+r12] 321 movdqa xmm1,XMMWORD[80+r12] 322 movdqa xmm2,XMMWORD[96+r12] 323 movdqa xmm3,XMMWORD[112+r12] 324 pand xmm0,XMMWORD[64+rdx] 325 pand xmm1,XMMWORD[80+rdx] 326 por xmm4,xmm0 327 pand xmm2,XMMWORD[96+rdx] 328 por xmm5,xmm1 329 pand xmm3,XMMWORD[112+rdx] 330 por xmm4,xmm2 331 por xmm5,xmm3 332 por xmm4,xmm5 333 pshufd xmm0,xmm4,0x4e 334 por xmm0,xmm4 335 lea r12,[256+r12] 336 337 mov rax,QWORD[rsi] 338 DB 102,72,15,126,195 339 340 xor r15,r15 341 mov rbp,r8 342 mov r10,QWORD[rsp] 343 344 mul rbx 345 add r10,rax 346 mov rax,QWORD[rcx] 347 adc rdx,0 348 349 imul rbp,r10 350 mov r11,rdx 351 352 mul rbp 353 add r10,rax 354 mov rax,QWORD[8+rsi] 355 adc rdx,0 356 mov r10,QWORD[8+rsp] 357 mov r13,rdx 358 359 lea r15,[1+r15] 360 jmp NEAR $L$inner_enter 361 362 ALIGN 16 363 $L$inner: 364 add r13,rax 365 mov rax,QWORD[r15*8+rsi] 366 adc rdx,0 367 add r13,r10 368 mov r10,QWORD[r15*8+rsp] 369 adc rdx,0 370 mov QWORD[((-16))+r15*8+rsp],r13 371 mov r13,rdx 372 373 $L$inner_enter: 374 mul rbx 375 add r11,rax 376 mov rax,QWORD[r15*8+rcx] 377 adc rdx,0 378 add r10,r11 379 mov r11,rdx 380 adc r11,0 381 lea r15,[1+r15] 382 383 mul rbp 384 cmp r15,r9 385 jne NEAR $L$inner 386 387 add r13,rax 388 adc rdx,0 389 add r13,r10 390 mov r10,QWORD[r9*8+rsp] 391 adc rdx,0 392 mov QWORD[((-16))+r9*8+rsp],r13 393 mov r13,rdx 394 395 xor rdx,rdx 396 add r13,r11 397 adc rdx,0 398 add r13,r10 399 adc rdx,0 400 mov QWORD[((-8))+r9*8+rsp],r13 401 mov QWORD[r9*8+rsp],rdx 402 403 lea r14,[1+r14] 404 cmp r14,r9 405 jb NEAR $L$outer 406 407 xor r14,r14 408 mov rax,QWORD[rsp] 409 lea rsi,[rsp] 410 mov r15,r9 411 jmp NEAR $L$sub 412 ALIGN 16 413 $L$sub: 414 sbb rax,QWORD[r14*8+rcx] 415 mov QWORD[r14*8+rdi],rax 416 mov rax,QWORD[8+r14*8+rsi] 417 lea r14,[1+r14] 418 dec r15 419 jnz NEAR $L$sub 420 421 sbb rax,0 422 xor r14,r14 423 and rsi,rax 424 not rax 425 mov rcx,rdi 426 and rcx,rax 427 mov r15,r9 428 or rsi,rcx 429 ALIGN 16 430 $L$copy: 431 mov rax,QWORD[r14*8+rsi] 432 mov QWORD[r14*8+rsp],r14 433 mov QWORD[r14*8+rdi],rax 434 lea r14,[1+r14] 435 sub r15,1 436 jnz NEAR $L$copy 437 438 mov rsi,QWORD[8+r9*8+rsp] 439 440 mov rax,1 441 442 mov r15,QWORD[((-48))+rsi] 443 444 mov r14,QWORD[((-40))+rsi] 445 446 mov r13,QWORD[((-32))+rsi] 447 448 mov r12,QWORD[((-24))+rsi] 449 450 mov rbp,QWORD[((-16))+rsi] 451 452 mov rbx,QWORD[((-8))+rsi] 453 454 lea rsp,[rsi] 455 456 $L$mul_epilogue: 457 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 458 mov rsi,QWORD[16+rsp] 459 DB 0F3h,0C3h ;repret 460 461 $L$SEH_end_bn_mul_mont_gather5: 462 463 ALIGN 32 464 bn_mul4x_mont_gather5: 465 mov QWORD[8+rsp],rdi ;WIN64 prologue 466 mov QWORD[16+rsp],rsi 467 mov rax,rsp 468 $L$SEH_begin_bn_mul4x_mont_gather5: 469 mov rdi,rcx 470 mov rsi,rdx 471 mov rdx,r8 472 mov rcx,r9 473 mov r8,QWORD[40+rsp] 474 mov r9,QWORD[48+rsp] 475 476 477 478 DB 0x67 479 mov rax,rsp 480 481 $L$mul4x_enter: 482 push rbx 483 484 push rbp 485 486 push r12 487 488 push r13 489 490 push r14 491 492 push r15 493 494 $L$mul4x_prologue: 495 496 DB 0x67 497 shl r9d,3 498 lea r10,[r9*2+r9] 499 neg r9 500 501 502 503 504 505 506 507 508 509 510 lea r11,[((-320))+r9*2+rsp] 511 mov rbp,rsp 512 sub r11,rdi 513 and r11,4095 514 cmp r10,r11 515 jb NEAR $L$mul4xsp_alt 516 sub rbp,r11 517 lea rbp,[((-320))+r9*2+rbp] 518 jmp NEAR $L$mul4xsp_done 519 520 ALIGN 32 521 $L$mul4xsp_alt: 522 lea r10,[((4096-320))+r9*2] 523 lea rbp,[((-320))+r9*2+rbp] 524 sub r11,r10 525 mov r10,0 526 cmovc r11,r10 527 sub rbp,r11 528 $L$mul4xsp_done: 529 and rbp,-64 530 mov r11,rsp 531 sub r11,rbp 532 and r11,-4096 533 lea rsp,[rbp*1+r11] 534 mov r10,QWORD[rsp] 535 cmp rsp,rbp 536 ja NEAR $L$mul4x_page_walk 537 jmp NEAR $L$mul4x_page_walk_done 538 539 $L$mul4x_page_walk: 540 lea rsp,[((-4096))+rsp] 541 mov r10,QWORD[rsp] 542 cmp rsp,rbp 543 ja NEAR $L$mul4x_page_walk 544 $L$mul4x_page_walk_done: 545 546 neg r9 547 548 mov QWORD[40+rsp],rax 549 550 $L$mul4x_body: 551 552 call mul4x_internal 553 554 mov rsi,QWORD[40+rsp] 555 556 mov rax,1 557 558 mov r15,QWORD[((-48))+rsi] 559 560 mov r14,QWORD[((-40))+rsi] 561 562 mov r13,QWORD[((-32))+rsi] 563 564 mov r12,QWORD[((-24))+rsi] 565 566 mov rbp,QWORD[((-16))+rsi] 567 568 mov rbx,QWORD[((-8))+rsi] 569 570 lea rsp,[rsi] 571 572 $L$mul4x_epilogue: 573 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 574 mov rsi,QWORD[16+rsp] 575 DB 0F3h,0C3h ;repret 576 577 $L$SEH_end_bn_mul4x_mont_gather5: 578 579 580 ALIGN 32 581 mul4x_internal: 582 shl r9,5 583 movd xmm5,DWORD[56+rax] 584 lea rax,[$L$inc] 585 lea r13,[128+r9*1+rdx] 586 shr r9,5 587 movdqa xmm0,XMMWORD[rax] 588 movdqa xmm1,XMMWORD[16+rax] 589 lea r10,[((88-112))+r9*1+rsp] 590 lea r12,[128+rdx] 591 592 pshufd xmm5,xmm5,0 593 movdqa xmm4,xmm1 594 DB 0x67,0x67 595 movdqa xmm2,xmm1 596 paddd xmm1,xmm0 597 pcmpeqd xmm0,xmm5 598 DB 0x67 599 movdqa xmm3,xmm4 600 paddd xmm2,xmm1 601 pcmpeqd xmm1,xmm5 602 movdqa XMMWORD[112+r10],xmm0 603 movdqa xmm0,xmm4 604 605 paddd xmm3,xmm2 606 pcmpeqd xmm2,xmm5 607 movdqa XMMWORD[128+r10],xmm1 608 movdqa xmm1,xmm4 609 610 paddd xmm0,xmm3 611 pcmpeqd xmm3,xmm5 612 movdqa XMMWORD[144+r10],xmm2 613 movdqa xmm2,xmm4 614 615 paddd xmm1,xmm0 616 pcmpeqd xmm0,xmm5 617 movdqa XMMWORD[160+r10],xmm3 618 movdqa xmm3,xmm4 619 paddd xmm2,xmm1 620 pcmpeqd xmm1,xmm5 621 movdqa XMMWORD[176+r10],xmm0 622 movdqa xmm0,xmm4 623 624 paddd xmm3,xmm2 625 pcmpeqd xmm2,xmm5 626 movdqa XMMWORD[192+r10],xmm1 627 movdqa xmm1,xmm4 628 629 paddd xmm0,xmm3 630 pcmpeqd xmm3,xmm5 631 movdqa XMMWORD[208+r10],xmm2 632 movdqa xmm2,xmm4 633 634 paddd xmm1,xmm0 635 pcmpeqd xmm0,xmm5 636 movdqa XMMWORD[224+r10],xmm3 637 movdqa xmm3,xmm4 638 paddd xmm2,xmm1 639 pcmpeqd xmm1,xmm5 640 movdqa XMMWORD[240+r10],xmm0 641 movdqa xmm0,xmm4 642 643 paddd xmm3,xmm2 644 pcmpeqd xmm2,xmm5 645 movdqa XMMWORD[256+r10],xmm1 646 movdqa xmm1,xmm4 647 648 paddd xmm0,xmm3 649 pcmpeqd xmm3,xmm5 650 movdqa XMMWORD[272+r10],xmm2 651 movdqa xmm2,xmm4 652 653 paddd xmm1,xmm0 654 pcmpeqd xmm0,xmm5 655 movdqa XMMWORD[288+r10],xmm3 656 movdqa xmm3,xmm4 657 paddd xmm2,xmm1 658 pcmpeqd xmm1,xmm5 659 movdqa XMMWORD[304+r10],xmm0 660 661 paddd xmm3,xmm2 662 DB 0x67 663 pcmpeqd xmm2,xmm5 664 movdqa XMMWORD[320+r10],xmm1 665 666 pcmpeqd xmm3,xmm5 667 movdqa XMMWORD[336+r10],xmm2 668 pand xmm0,XMMWORD[64+r12] 669 670 pand xmm1,XMMWORD[80+r12] 671 pand xmm2,XMMWORD[96+r12] 672 movdqa XMMWORD[352+r10],xmm3 673 pand xmm3,XMMWORD[112+r12] 674 por xmm0,xmm2 675 por xmm1,xmm3 676 movdqa xmm4,XMMWORD[((-128))+r12] 677 movdqa xmm5,XMMWORD[((-112))+r12] 678 movdqa xmm2,XMMWORD[((-96))+r12] 679 pand xmm4,XMMWORD[112+r10] 680 movdqa xmm3,XMMWORD[((-80))+r12] 681 pand xmm5,XMMWORD[128+r10] 682 por xmm0,xmm4 683 pand xmm2,XMMWORD[144+r10] 684 por xmm1,xmm5 685 pand xmm3,XMMWORD[160+r10] 686 por xmm0,xmm2 687 por xmm1,xmm3 688 movdqa xmm4,XMMWORD[((-64))+r12] 689 movdqa xmm5,XMMWORD[((-48))+r12] 690 movdqa xmm2,XMMWORD[((-32))+r12] 691 pand xmm4,XMMWORD[176+r10] 692 movdqa xmm3,XMMWORD[((-16))+r12] 693 pand xmm5,XMMWORD[192+r10] 694 por xmm0,xmm4 695 pand xmm2,XMMWORD[208+r10] 696 por xmm1,xmm5 697 pand xmm3,XMMWORD[224+r10] 698 por xmm0,xmm2 699 por xmm1,xmm3 700 movdqa xmm4,XMMWORD[r12] 701 movdqa xmm5,XMMWORD[16+r12] 702 movdqa xmm2,XMMWORD[32+r12] 703 pand xmm4,XMMWORD[240+r10] 704 movdqa xmm3,XMMWORD[48+r12] 705 pand xmm5,XMMWORD[256+r10] 706 por xmm0,xmm4 707 pand xmm2,XMMWORD[272+r10] 708 por xmm1,xmm5 709 pand xmm3,XMMWORD[288+r10] 710 por xmm0,xmm2 711 por xmm1,xmm3 712 por xmm0,xmm1 713 pshufd xmm1,xmm0,0x4e 714 por xmm0,xmm1 715 lea r12,[256+r12] 716 DB 102,72,15,126,195 717 718 mov QWORD[((16+8))+rsp],r13 719 mov QWORD[((56+8))+rsp],rdi 720 721 mov r8,QWORD[r8] 722 mov rax,QWORD[rsi] 723 lea rsi,[r9*1+rsi] 724 neg r9 725 726 mov rbp,r8 727 mul rbx 728 mov r10,rax 729 mov rax,QWORD[rcx] 730 731 imul rbp,r10 732 lea r14,[((64+8))+rsp] 733 mov r11,rdx 734 735 mul rbp 736 add r10,rax 737 mov rax,QWORD[8+r9*1+rsi] 738 adc rdx,0 739 mov rdi,rdx 740 741 mul rbx 742 add r11,rax 743 mov rax,QWORD[8+rcx] 744 adc rdx,0 745 mov r10,rdx 746 747 mul rbp 748 add rdi,rax 749 mov rax,QWORD[16+r9*1+rsi] 750 adc rdx,0 751 add rdi,r11 752 lea r15,[32+r9] 753 lea rcx,[32+rcx] 754 adc rdx,0 755 mov QWORD[r14],rdi 756 mov r13,rdx 757 jmp NEAR $L$1st4x 758 759 ALIGN 32 760 $L$1st4x: 761 mul rbx 762 add r10,rax 763 mov rax,QWORD[((-16))+rcx] 764 lea r14,[32+r14] 765 adc rdx,0 766 mov r11,rdx 767 768 mul rbp 769 add r13,rax 770 mov rax,QWORD[((-8))+r15*1+rsi] 771 adc rdx,0 772 add r13,r10 773 adc rdx,0 774 mov QWORD[((-24))+r14],r13 775 mov rdi,rdx 776 777 mul rbx 778 add r11,rax 779 mov rax,QWORD[((-8))+rcx] 780 adc rdx,0 781 mov r10,rdx 782 783 mul rbp 784 add rdi,rax 785 mov rax,QWORD[r15*1+rsi] 786 adc rdx,0 787 add rdi,r11 788 adc rdx,0 789 mov QWORD[((-16))+r14],rdi 790 mov r13,rdx 791 792 mul rbx 793 add r10,rax 794 mov rax,QWORD[rcx] 795 adc rdx,0 796 mov r11,rdx 797 798 mul rbp 799 add r13,rax 800 mov rax,QWORD[8+r15*1+rsi] 801 adc rdx,0 802 add r13,r10 803 adc rdx,0 804 mov QWORD[((-8))+r14],r13 805 mov rdi,rdx 806 807 mul rbx 808 add r11,rax 809 mov rax,QWORD[8+rcx] 810 adc rdx,0 811 mov r10,rdx 812 813 mul rbp 814 add rdi,rax 815 mov rax,QWORD[16+r15*1+rsi] 816 adc rdx,0 817 add rdi,r11 818 lea rcx,[32+rcx] 819 adc rdx,0 820 mov QWORD[r14],rdi 821 mov r13,rdx 822 823 add r15,32 824 jnz NEAR $L$1st4x 825 826 mul rbx 827 add r10,rax 828 mov rax,QWORD[((-16))+rcx] 829 lea r14,[32+r14] 830 adc rdx,0 831 mov r11,rdx 832 833 mul rbp 834 add r13,rax 835 mov rax,QWORD[((-8))+rsi] 836 adc rdx,0 837 add r13,r10 838 adc rdx,0 839 mov QWORD[((-24))+r14],r13 840 mov rdi,rdx 841 842 mul rbx 843 add r11,rax 844 mov rax,QWORD[((-8))+rcx] 845 adc rdx,0 846 mov r10,rdx 847 848 mul rbp 849 add rdi,rax 850 mov rax,QWORD[r9*1+rsi] 851 adc rdx,0 852 add rdi,r11 853 adc rdx,0 854 mov QWORD[((-16))+r14],rdi 855 mov r13,rdx 856 857 lea rcx,[r9*1+rcx] 858 859 xor rdi,rdi 860 add r13,r10 861 adc rdi,0 862 mov QWORD[((-8))+r14],r13 863 864 jmp NEAR $L$outer4x 865 866 ALIGN 32 867 $L$outer4x: 868 lea rdx,[((16+128))+r14] 869 pxor xmm4,xmm4 870 pxor xmm5,xmm5 871 movdqa xmm0,XMMWORD[((-128))+r12] 872 movdqa xmm1,XMMWORD[((-112))+r12] 873 movdqa xmm2,XMMWORD[((-96))+r12] 874 movdqa xmm3,XMMWORD[((-80))+r12] 875 pand xmm0,XMMWORD[((-128))+rdx] 876 pand xmm1,XMMWORD[((-112))+rdx] 877 por xmm4,xmm0 878 pand xmm2,XMMWORD[((-96))+rdx] 879 por xmm5,xmm1 880 pand xmm3,XMMWORD[((-80))+rdx] 881 por xmm4,xmm2 882 por xmm5,xmm3 883 movdqa xmm0,XMMWORD[((-64))+r12] 884 movdqa xmm1,XMMWORD[((-48))+r12] 885 movdqa xmm2,XMMWORD[((-32))+r12] 886 movdqa xmm3,XMMWORD[((-16))+r12] 887 pand xmm0,XMMWORD[((-64))+rdx] 888 pand xmm1,XMMWORD[((-48))+rdx] 889 por xmm4,xmm0 890 pand xmm2,XMMWORD[((-32))+rdx] 891 por xmm5,xmm1 892 pand xmm3,XMMWORD[((-16))+rdx] 893 por xmm4,xmm2 894 por xmm5,xmm3 895 movdqa xmm0,XMMWORD[r12] 896 movdqa xmm1,XMMWORD[16+r12] 897 movdqa xmm2,XMMWORD[32+r12] 898 movdqa xmm3,XMMWORD[48+r12] 899 pand xmm0,XMMWORD[rdx] 900 pand xmm1,XMMWORD[16+rdx] 901 por xmm4,xmm0 902 pand xmm2,XMMWORD[32+rdx] 903 por xmm5,xmm1 904 pand xmm3,XMMWORD[48+rdx] 905 por xmm4,xmm2 906 por xmm5,xmm3 907 movdqa xmm0,XMMWORD[64+r12] 908 movdqa xmm1,XMMWORD[80+r12] 909 movdqa xmm2,XMMWORD[96+r12] 910 movdqa xmm3,XMMWORD[112+r12] 911 pand xmm0,XMMWORD[64+rdx] 912 pand xmm1,XMMWORD[80+rdx] 913 por xmm4,xmm0 914 pand xmm2,XMMWORD[96+rdx] 915 por xmm5,xmm1 916 pand xmm3,XMMWORD[112+rdx] 917 por xmm4,xmm2 918 por xmm5,xmm3 919 por xmm4,xmm5 920 pshufd xmm0,xmm4,0x4e 921 por xmm0,xmm4 922 lea r12,[256+r12] 923 DB 102,72,15,126,195 924 925 mov r10,QWORD[r9*1+r14] 926 mov rbp,r8 927 mul rbx 928 add r10,rax 929 mov rax,QWORD[rcx] 930 adc rdx,0 931 932 imul rbp,r10 933 mov r11,rdx 934 mov QWORD[r14],rdi 935 936 lea r14,[r9*1+r14] 937 938 mul rbp 939 add r10,rax 940 mov rax,QWORD[8+r9*1+rsi] 941 adc rdx,0 942 mov rdi,rdx 943 944 mul rbx 945 add r11,rax 946 mov rax,QWORD[8+rcx] 947 adc rdx,0 948 add r11,QWORD[8+r14] 949 adc rdx,0 950 mov r10,rdx 951 952 mul rbp 953 add rdi,rax 954 mov rax,QWORD[16+r9*1+rsi] 955 adc rdx,0 956 add rdi,r11 957 lea r15,[32+r9] 958 lea rcx,[32+rcx] 959 adc rdx,0 960 mov r13,rdx 961 jmp NEAR $L$inner4x 962 963 ALIGN 32 964 $L$inner4x: 965 mul rbx 966 add r10,rax 967 mov rax,QWORD[((-16))+rcx] 968 adc rdx,0 969 add r10,QWORD[16+r14] 970 lea r14,[32+r14] 971 adc rdx,0 972 mov r11,rdx 973 974 mul rbp 975 add r13,rax 976 mov rax,QWORD[((-8))+r15*1+rsi] 977 adc rdx,0 978 add r13,r10 979 adc rdx,0 980 mov QWORD[((-32))+r14],rdi 981 mov rdi,rdx 982 983 mul rbx 984 add r11,rax 985 mov rax,QWORD[((-8))+rcx] 986 adc rdx,0 987 add r11,QWORD[((-8))+r14] 988 adc rdx,0 989 mov r10,rdx 990 991 mul rbp 992 add rdi,rax 993 mov rax,QWORD[r15*1+rsi] 994 adc rdx,0 995 add rdi,r11 996 adc rdx,0 997 mov QWORD[((-24))+r14],r13 998 mov r13,rdx 999 1000 mul rbx 1001 add r10,rax 1002 mov rax,QWORD[rcx] 1003 adc rdx,0 1004 add r10,QWORD[r14] 1005 adc rdx,0 1006 mov r11,rdx 1007 1008 mul rbp 1009 add r13,rax 1010 mov rax,QWORD[8+r15*1+rsi] 1011 adc rdx,0 1012 add r13,r10 1013 adc rdx,0 1014 mov QWORD[((-16))+r14],rdi 1015 mov rdi,rdx 1016 1017 mul rbx 1018 add r11,rax 1019 mov rax,QWORD[8+rcx] 1020 adc rdx,0 1021 add r11,QWORD[8+r14] 1022 adc rdx,0 1023 mov r10,rdx 1024 1025 mul rbp 1026 add rdi,rax 1027 mov rax,QWORD[16+r15*1+rsi] 1028 adc rdx,0 1029 add rdi,r11 1030 lea rcx,[32+rcx] 1031 adc rdx,0 1032 mov QWORD[((-8))+r14],r13 1033 mov r13,rdx 1034 1035 add r15,32 1036 jnz NEAR $L$inner4x 1037 1038 mul rbx 1039 add r10,rax 1040 mov rax,QWORD[((-16))+rcx] 1041 adc rdx,0 1042 add r10,QWORD[16+r14] 1043 lea r14,[32+r14] 1044 adc rdx,0 1045 mov r11,rdx 1046 1047 mul rbp 1048 add r13,rax 1049 mov rax,QWORD[((-8))+rsi] 1050 adc rdx,0 1051 add r13,r10 1052 adc rdx,0 1053 mov QWORD[((-32))+r14],rdi 1054 mov rdi,rdx 1055 1056 mul rbx 1057 add r11,rax 1058 mov rax,rbp 1059 mov rbp,QWORD[((-8))+rcx] 1060 adc rdx,0 1061 add r11,QWORD[((-8))+r14] 1062 adc rdx,0 1063 mov r10,rdx 1064 1065 mul rbp 1066 add rdi,rax 1067 mov rax,QWORD[r9*1+rsi] 1068 adc rdx,0 1069 add rdi,r11 1070 adc rdx,0 1071 mov QWORD[((-24))+r14],r13 1072 mov r13,rdx 1073 1074 mov QWORD[((-16))+r14],rdi 1075 lea rcx,[r9*1+rcx] 1076 1077 xor rdi,rdi 1078 add r13,r10 1079 adc rdi,0 1080 add r13,QWORD[r14] 1081 adc rdi,0 1082 mov QWORD[((-8))+r14],r13 1083 1084 cmp r12,QWORD[((16+8))+rsp] 1085 jb NEAR $L$outer4x 1086 xor rax,rax 1087 sub rbp,r13 1088 adc r15,r15 1089 or rdi,r15 1090 sub rax,rdi 1091 lea rbx,[r9*1+r14] 1092 mov r12,QWORD[rcx] 1093 lea rbp,[rcx] 1094 mov rcx,r9 1095 sar rcx,3+2 1096 mov rdi,QWORD[((56+8))+rsp] 1097 dec r12 1098 xor r10,r10 1099 mov r13,QWORD[8+rbp] 1100 mov r14,QWORD[16+rbp] 1101 mov r15,QWORD[24+rbp] 1102 jmp NEAR $L$sqr4x_sub_entry 1103 1104 global bn_power5 1105 1106 ALIGN 32 1107 bn_power5: 1108 mov QWORD[8+rsp],rdi ;WIN64 prologue 1109 mov QWORD[16+rsp],rsi 1110 mov rax,rsp 1111 $L$SEH_begin_bn_power5: 1112 mov rdi,rcx 1113 mov rsi,rdx 1114 mov rdx,r8 1115 mov rcx,r9 1116 mov r8,QWORD[40+rsp] 1117 mov r9,QWORD[48+rsp] 1118 1119 1120 1121 mov rax,rsp 1122 1123 push rbx 1124 1125 push rbp 1126 1127 push r12 1128 1129 push r13 1130 1131 push r14 1132 1133 push r15 1134 1135 $L$power5_prologue: 1136 1137 shl r9d,3 1138 lea r10d,[r9*2+r9] 1139 neg r9 1140 mov r8,QWORD[r8] 1141 1142 1143 1144 1145 1146 1147 1148 1149 lea r11,[((-320))+r9*2+rsp] 1150 mov rbp,rsp 1151 sub r11,rdi 1152 and r11,4095 1153 cmp r10,r11 1154 jb NEAR $L$pwr_sp_alt 1155 sub rbp,r11 1156 lea rbp,[((-320))+r9*2+rbp] 1157 jmp NEAR $L$pwr_sp_done 1158 1159 ALIGN 32 1160 $L$pwr_sp_alt: 1161 lea r10,[((4096-320))+r9*2] 1162 lea rbp,[((-320))+r9*2+rbp] 1163 sub r11,r10 1164 mov r10,0 1165 cmovc r11,r10 1166 sub rbp,r11 1167 $L$pwr_sp_done: 1168 and rbp,-64 1169 mov r11,rsp 1170 sub r11,rbp 1171 and r11,-4096 1172 lea rsp,[rbp*1+r11] 1173 mov r10,QWORD[rsp] 1174 cmp rsp,rbp 1175 ja NEAR $L$pwr_page_walk 1176 jmp NEAR $L$pwr_page_walk_done 1177 1178 $L$pwr_page_walk: 1179 lea rsp,[((-4096))+rsp] 1180 mov r10,QWORD[rsp] 1181 cmp rsp,rbp 1182 ja NEAR $L$pwr_page_walk 1183 $L$pwr_page_walk_done: 1184 1185 mov r10,r9 1186 neg r9 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 mov QWORD[32+rsp],r8 1198 mov QWORD[40+rsp],rax 1199 1200 $L$power5_body: 1201 DB 102,72,15,110,207 1202 DB 102,72,15,110,209 1203 DB 102,73,15,110,218 1204 DB 102,72,15,110,226 1205 1206 call __bn_sqr8x_internal 1207 call __bn_post4x_internal 1208 call __bn_sqr8x_internal 1209 call __bn_post4x_internal 1210 call __bn_sqr8x_internal 1211 call __bn_post4x_internal 1212 call __bn_sqr8x_internal 1213 call __bn_post4x_internal 1214 call __bn_sqr8x_internal 1215 call __bn_post4x_internal 1216 1217 DB 102,72,15,126,209 1218 DB 102,72,15,126,226 1219 mov rdi,rsi 1220 mov rax,QWORD[40+rsp] 1221 lea r8,[32+rsp] 1222 1223 call mul4x_internal 1224 1225 mov rsi,QWORD[40+rsp] 1226 1227 mov rax,1 1228 mov r15,QWORD[((-48))+rsi] 1229 1230 mov r14,QWORD[((-40))+rsi] 1231 1232 mov r13,QWORD[((-32))+rsi] 1233 1234 mov r12,QWORD[((-24))+rsi] 1235 1236 mov rbp,QWORD[((-16))+rsi] 1237 1238 mov rbx,QWORD[((-8))+rsi] 1239 1240 lea rsp,[rsi] 1241 1242 $L$power5_epilogue: 1243 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1244 mov rsi,QWORD[16+rsp] 1245 DB 0F3h,0C3h ;repret 1246 1247 $L$SEH_end_bn_power5: 1248 1249 global bn_sqr8x_internal 1250 1251 1252 ALIGN 32 1253 bn_sqr8x_internal: 1254 __bn_sqr8x_internal: 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 lea rbp,[32+r10] 1329 lea rsi,[r9*1+rsi] 1330 1331 mov rcx,r9 1332 1333 1334 mov r14,QWORD[((-32))+rbp*1+rsi] 1335 lea rdi,[((48+8))+r9*2+rsp] 1336 mov rax,QWORD[((-24))+rbp*1+rsi] 1337 lea rdi,[((-32))+rbp*1+rdi] 1338 mov rbx,QWORD[((-16))+rbp*1+rsi] 1339 mov r15,rax 1340 1341 mul r14 1342 mov r10,rax 1343 mov rax,rbx 1344 mov r11,rdx 1345 mov QWORD[((-24))+rbp*1+rdi],r10 1346 1347 mul r14 1348 add r11,rax 1349 mov rax,rbx 1350 adc rdx,0 1351 mov QWORD[((-16))+rbp*1+rdi],r11 1352 mov r10,rdx 1353 1354 1355 mov rbx,QWORD[((-8))+rbp*1+rsi] 1356 mul r15 1357 mov r12,rax 1358 mov rax,rbx 1359 mov r13,rdx 1360 1361 lea rcx,[rbp] 1362 mul r14 1363 add r10,rax 1364 mov rax,rbx 1365 mov r11,rdx 1366 adc r11,0 1367 add r10,r12 1368 adc r11,0 1369 mov QWORD[((-8))+rcx*1+rdi],r10 1370 jmp NEAR $L$sqr4x_1st 1371 1372 ALIGN 32 1373 $L$sqr4x_1st: 1374 mov rbx,QWORD[rcx*1+rsi] 1375 mul r15 1376 add r13,rax 1377 mov rax,rbx 1378 mov r12,rdx 1379 adc r12,0 1380 1381 mul r14 1382 add r11,rax 1383 mov rax,rbx 1384 mov rbx,QWORD[8+rcx*1+rsi] 1385 mov r10,rdx 1386 adc r10,0 1387 add r11,r13 1388 adc r10,0 1389 1390 1391 mul r15 1392 add r12,rax 1393 mov rax,rbx 1394 mov QWORD[rcx*1+rdi],r11 1395 mov r13,rdx 1396 adc r13,0 1397 1398 mul r14 1399 add r10,rax 1400 mov rax,rbx 1401 mov rbx,QWORD[16+rcx*1+rsi] 1402 mov r11,rdx 1403 adc r11,0 1404 add r10,r12 1405 adc r11,0 1406 1407 mul r15 1408 add r13,rax 1409 mov rax,rbx 1410 mov QWORD[8+rcx*1+rdi],r10 1411 mov r12,rdx 1412 adc r12,0 1413 1414 mul r14 1415 add r11,rax 1416 mov rax,rbx 1417 mov rbx,QWORD[24+rcx*1+rsi] 1418 mov r10,rdx 1419 adc r10,0 1420 add r11,r13 1421 adc r10,0 1422 1423 1424 mul r15 1425 add r12,rax 1426 mov rax,rbx 1427 mov QWORD[16+rcx*1+rdi],r11 1428 mov r13,rdx 1429 adc r13,0 1430 lea rcx,[32+rcx] 1431 1432 mul r14 1433 add r10,rax 1434 mov rax,rbx 1435 mov r11,rdx 1436 adc r11,0 1437 add r10,r12 1438 adc r11,0 1439 mov QWORD[((-8))+rcx*1+rdi],r10 1440 1441 cmp rcx,0 1442 jne NEAR $L$sqr4x_1st 1443 1444 mul r15 1445 add r13,rax 1446 lea rbp,[16+rbp] 1447 adc rdx,0 1448 add r13,r11 1449 adc rdx,0 1450 1451 mov QWORD[rdi],r13 1452 mov r12,rdx 1453 mov QWORD[8+rdi],rdx 1454 jmp NEAR $L$sqr4x_outer 1455 1456 ALIGN 32 1457 $L$sqr4x_outer: 1458 mov r14,QWORD[((-32))+rbp*1+rsi] 1459 lea rdi,[((48+8))+r9*2+rsp] 1460 mov rax,QWORD[((-24))+rbp*1+rsi] 1461 lea rdi,[((-32))+rbp*1+rdi] 1462 mov rbx,QWORD[((-16))+rbp*1+rsi] 1463 mov r15,rax 1464 1465 mul r14 1466 mov r10,QWORD[((-24))+rbp*1+rdi] 1467 add r10,rax 1468 mov rax,rbx 1469 adc rdx,0 1470 mov QWORD[((-24))+rbp*1+rdi],r10 1471 mov r11,rdx 1472 1473 mul r14 1474 add r11,rax 1475 mov rax,rbx 1476 adc rdx,0 1477 add r11,QWORD[((-16))+rbp*1+rdi] 1478 mov r10,rdx 1479 adc r10,0 1480 mov QWORD[((-16))+rbp*1+rdi],r11 1481 1482 xor r12,r12 1483 1484 mov rbx,QWORD[((-8))+rbp*1+rsi] 1485 mul r15 1486 add r12,rax 1487 mov rax,rbx 1488 adc rdx,0 1489 add r12,QWORD[((-8))+rbp*1+rdi] 1490 mov r13,rdx 1491 adc r13,0 1492 1493 mul r14 1494 add r10,rax 1495 mov rax,rbx 1496 adc rdx,0 1497 add r10,r12 1498 mov r11,rdx 1499 adc r11,0 1500 mov QWORD[((-8))+rbp*1+rdi],r10 1501 1502 lea rcx,[rbp] 1503 jmp NEAR $L$sqr4x_inner 1504 1505 ALIGN 32 1506 $L$sqr4x_inner: 1507 mov rbx,QWORD[rcx*1+rsi] 1508 mul r15 1509 add r13,rax 1510 mov rax,rbx 1511 mov r12,rdx 1512 adc r12,0 1513 add r13,QWORD[rcx*1+rdi] 1514 adc r12,0 1515 1516 DB 0x67 1517 mul r14 1518 add r11,rax 1519 mov rax,rbx 1520 mov rbx,QWORD[8+rcx*1+rsi] 1521 mov r10,rdx 1522 adc r10,0 1523 add r11,r13 1524 adc r10,0 1525 1526 mul r15 1527 add r12,rax 1528 mov QWORD[rcx*1+rdi],r11 1529 mov rax,rbx 1530 mov r13,rdx 1531 adc r13,0 1532 add r12,QWORD[8+rcx*1+rdi] 1533 lea rcx,[16+rcx] 1534 adc r13,0 1535 1536 mul r14 1537 add r10,rax 1538 mov rax,rbx 1539 adc rdx,0 1540 add r10,r12 1541 mov r11,rdx 1542 adc r11,0 1543 mov QWORD[((-8))+rcx*1+rdi],r10 1544 1545 cmp rcx,0 1546 jne NEAR $L$sqr4x_inner 1547 1548 DB 0x67 1549 mul r15 1550 add r13,rax 1551 adc rdx,0 1552 add r13,r11 1553 adc rdx,0 1554 1555 mov QWORD[rdi],r13 1556 mov r12,rdx 1557 mov QWORD[8+rdi],rdx 1558 1559 add rbp,16 1560 jnz NEAR $L$sqr4x_outer 1561 1562 1563 mov r14,QWORD[((-32))+rsi] 1564 lea rdi,[((48+8))+r9*2+rsp] 1565 mov rax,QWORD[((-24))+rsi] 1566 lea rdi,[((-32))+rbp*1+rdi] 1567 mov rbx,QWORD[((-16))+rsi] 1568 mov r15,rax 1569 1570 mul r14 1571 add r10,rax 1572 mov rax,rbx 1573 mov r11,rdx 1574 adc r11,0 1575 1576 mul r14 1577 add r11,rax 1578 mov rax,rbx 1579 mov QWORD[((-24))+rdi],r10 1580 mov r10,rdx 1581 adc r10,0 1582 add r11,r13 1583 mov rbx,QWORD[((-8))+rsi] 1584 adc r10,0 1585 1586 mul r15 1587 add r12,rax 1588 mov rax,rbx 1589 mov QWORD[((-16))+rdi],r11 1590 mov r13,rdx 1591 adc r13,0 1592 1593 mul r14 1594 add r10,rax 1595 mov rax,rbx 1596 mov r11,rdx 1597 adc r11,0 1598 add r10,r12 1599 adc r11,0 1600 mov QWORD[((-8))+rdi],r10 1601 1602 mul r15 1603 add r13,rax 1604 mov rax,QWORD[((-16))+rsi] 1605 adc rdx,0 1606 add r13,r11 1607 adc rdx,0 1608 1609 mov QWORD[rdi],r13 1610 mov r12,rdx 1611 mov QWORD[8+rdi],rdx 1612 1613 mul rbx 1614 add rbp,16 1615 xor r14,r14 1616 sub rbp,r9 1617 xor r15,r15 1618 1619 add rax,r12 1620 adc rdx,0 1621 mov QWORD[8+rdi],rax 1622 mov QWORD[16+rdi],rdx 1623 mov QWORD[24+rdi],r15 1624 1625 mov rax,QWORD[((-16))+rbp*1+rsi] 1626 lea rdi,[((48+8))+rsp] 1627 xor r10,r10 1628 mov r11,QWORD[8+rdi] 1629 1630 lea r12,[r10*2+r14] 1631 shr r10,63 1632 lea r13,[r11*2+rcx] 1633 shr r11,63 1634 or r13,r10 1635 mov r10,QWORD[16+rdi] 1636 mov r14,r11 1637 mul rax 1638 neg r15 1639 mov r11,QWORD[24+rdi] 1640 adc r12,rax 1641 mov rax,QWORD[((-8))+rbp*1+rsi] 1642 mov QWORD[rdi],r12 1643 adc r13,rdx 1644 1645 lea rbx,[r10*2+r14] 1646 mov QWORD[8+rdi],r13 1647 sbb r15,r15 1648 shr r10,63 1649 lea r8,[r11*2+rcx] 1650 shr r11,63 1651 or r8,r10 1652 mov r10,QWORD[32+rdi] 1653 mov r14,r11 1654 mul rax 1655 neg r15 1656 mov r11,QWORD[40+rdi] 1657 adc rbx,rax 1658 mov rax,QWORD[rbp*1+rsi] 1659 mov QWORD[16+rdi],rbx 1660 adc r8,rdx 1661 lea rbp,[16+rbp] 1662 mov QWORD[24+rdi],r8 1663 sbb r15,r15 1664 lea rdi,[64+rdi] 1665 jmp NEAR $L$sqr4x_shift_n_add 1666 1667 ALIGN 32 1668 $L$sqr4x_shift_n_add: 1669 lea r12,[r10*2+r14] 1670 shr r10,63 1671 lea r13,[r11*2+rcx] 1672 shr r11,63 1673 or r13,r10 1674 mov r10,QWORD[((-16))+rdi] 1675 mov r14,r11 1676 mul rax 1677 neg r15 1678 mov r11,QWORD[((-8))+rdi] 1679 adc r12,rax 1680 mov rax,QWORD[((-8))+rbp*1+rsi] 1681 mov QWORD[((-32))+rdi],r12 1682 adc r13,rdx 1683 1684 lea rbx,[r10*2+r14] 1685 mov QWORD[((-24))+rdi],r13 1686 sbb r15,r15 1687 shr r10,63 1688 lea r8,[r11*2+rcx] 1689 shr r11,63 1690 or r8,r10 1691 mov r10,QWORD[rdi] 1692 mov r14,r11 1693 mul rax 1694 neg r15 1695 mov r11,QWORD[8+rdi] 1696 adc rbx,rax 1697 mov rax,QWORD[rbp*1+rsi] 1698 mov QWORD[((-16))+rdi],rbx 1699 adc r8,rdx 1700 1701 lea r12,[r10*2+r14] 1702 mov QWORD[((-8))+rdi],r8 1703 sbb r15,r15 1704 shr r10,63 1705 lea r13,[r11*2+rcx] 1706 shr r11,63 1707 or r13,r10 1708 mov r10,QWORD[16+rdi] 1709 mov r14,r11 1710 mul rax 1711 neg r15 1712 mov r11,QWORD[24+rdi] 1713 adc r12,rax 1714 mov rax,QWORD[8+rbp*1+rsi] 1715 mov QWORD[rdi],r12 1716 adc r13,rdx 1717 1718 lea rbx,[r10*2+r14] 1719 mov QWORD[8+rdi],r13 1720 sbb r15,r15 1721 shr r10,63 1722 lea r8,[r11*2+rcx] 1723 shr r11,63 1724 or r8,r10 1725 mov r10,QWORD[32+rdi] 1726 mov r14,r11 1727 mul rax 1728 neg r15 1729 mov r11,QWORD[40+rdi] 1730 adc rbx,rax 1731 mov rax,QWORD[16+rbp*1+rsi] 1732 mov QWORD[16+rdi],rbx 1733 adc r8,rdx 1734 mov QWORD[24+rdi],r8 1735 sbb r15,r15 1736 lea rdi,[64+rdi] 1737 add rbp,32 1738 jnz NEAR $L$sqr4x_shift_n_add 1739 1740 lea r12,[r10*2+r14] 1741 DB 0x67 1742 shr r10,63 1743 lea r13,[r11*2+rcx] 1744 shr r11,63 1745 or r13,r10 1746 mov r10,QWORD[((-16))+rdi] 1747 mov r14,r11 1748 mul rax 1749 neg r15 1750 mov r11,QWORD[((-8))+rdi] 1751 adc r12,rax 1752 mov rax,QWORD[((-8))+rsi] 1753 mov QWORD[((-32))+rdi],r12 1754 adc r13,rdx 1755 1756 lea rbx,[r10*2+r14] 1757 mov QWORD[((-24))+rdi],r13 1758 sbb r15,r15 1759 shr r10,63 1760 lea r8,[r11*2+rcx] 1761 shr r11,63 1762 or r8,r10 1763 mul rax 1764 neg r15 1765 adc rbx,rax 1766 adc r8,rdx 1767 mov QWORD[((-16))+rdi],rbx 1768 mov QWORD[((-8))+rdi],r8 1769 DB 102,72,15,126,213 1770 __bn_sqr8x_reduction: 1771 xor rax,rax 1772 lea rcx,[rbp*1+r9] 1773 lea rdx,[((48+8))+r9*2+rsp] 1774 mov QWORD[((0+8))+rsp],rcx 1775 lea rdi,[((48+8))+r9*1+rsp] 1776 mov QWORD[((8+8))+rsp],rdx 1777 neg r9 1778 jmp NEAR $L$8x_reduction_loop 1779 1780 ALIGN 32 1781 $L$8x_reduction_loop: 1782 lea rdi,[r9*1+rdi] 1783 DB 0x66 1784 mov rbx,QWORD[rdi] 1785 mov r9,QWORD[8+rdi] 1786 mov r10,QWORD[16+rdi] 1787 mov r11,QWORD[24+rdi] 1788 mov r12,QWORD[32+rdi] 1789 mov r13,QWORD[40+rdi] 1790 mov r14,QWORD[48+rdi] 1791 mov r15,QWORD[56+rdi] 1792 mov QWORD[rdx],rax 1793 lea rdi,[64+rdi] 1794 1795 DB 0x67 1796 mov r8,rbx 1797 imul rbx,QWORD[((32+8))+rsp] 1798 mov rax,QWORD[rbp] 1799 mov ecx,8 1800 jmp NEAR $L$8x_reduce 1801 1802 ALIGN 32 1803 $L$8x_reduce: 1804 mul rbx 1805 mov rax,QWORD[8+rbp] 1806 neg r8 1807 mov r8,rdx 1808 adc r8,0 1809 1810 mul rbx 1811 add r9,rax 1812 mov rax,QWORD[16+rbp] 1813 adc rdx,0 1814 add r8,r9 1815 mov QWORD[((48-8+8))+rcx*8+rsp],rbx 1816 mov r9,rdx 1817 adc r9,0 1818 1819 mul rbx 1820 add r10,rax 1821 mov rax,QWORD[24+rbp] 1822 adc rdx,0 1823 add r9,r10 1824 mov rsi,QWORD[((32+8))+rsp] 1825 mov r10,rdx 1826 adc r10,0 1827 1828 mul rbx 1829 add r11,rax 1830 mov rax,QWORD[32+rbp] 1831 adc rdx,0 1832 imul rsi,r8 1833 add r10,r11 1834 mov r11,rdx 1835 adc r11,0 1836 1837 mul rbx 1838 add r12,rax 1839 mov rax,QWORD[40+rbp] 1840 adc rdx,0 1841 add r11,r12 1842 mov r12,rdx 1843 adc r12,0 1844 1845 mul rbx 1846 add r13,rax 1847 mov rax,QWORD[48+rbp] 1848 adc rdx,0 1849 add r12,r13 1850 mov r13,rdx 1851 adc r13,0 1852 1853 mul rbx 1854 add r14,rax 1855 mov rax,QWORD[56+rbp] 1856 adc rdx,0 1857 add r13,r14 1858 mov r14,rdx 1859 adc r14,0 1860 1861 mul rbx 1862 mov rbx,rsi 1863 add r15,rax 1864 mov rax,QWORD[rbp] 1865 adc rdx,0 1866 add r14,r15 1867 mov r15,rdx 1868 adc r15,0 1869 1870 dec ecx 1871 jnz NEAR $L$8x_reduce 1872 1873 lea rbp,[64+rbp] 1874 xor rax,rax 1875 mov rdx,QWORD[((8+8))+rsp] 1876 cmp rbp,QWORD[((0+8))+rsp] 1877 jae NEAR $L$8x_no_tail 1878 1879 DB 0x66 1880 add r8,QWORD[rdi] 1881 adc r9,QWORD[8+rdi] 1882 adc r10,QWORD[16+rdi] 1883 adc r11,QWORD[24+rdi] 1884 adc r12,QWORD[32+rdi] 1885 adc r13,QWORD[40+rdi] 1886 adc r14,QWORD[48+rdi] 1887 adc r15,QWORD[56+rdi] 1888 sbb rsi,rsi 1889 1890 mov rbx,QWORD[((48+56+8))+rsp] 1891 mov ecx,8 1892 mov rax,QWORD[rbp] 1893 jmp NEAR $L$8x_tail 1894 1895 ALIGN 32 1896 $L$8x_tail: 1897 mul rbx 1898 add r8,rax 1899 mov rax,QWORD[8+rbp] 1900 mov QWORD[rdi],r8 1901 mov r8,rdx 1902 adc r8,0 1903 1904 mul rbx 1905 add r9,rax 1906 mov rax,QWORD[16+rbp] 1907 adc rdx,0 1908 add r8,r9 1909 lea rdi,[8+rdi] 1910 mov r9,rdx 1911 adc r9,0 1912 1913 mul rbx 1914 add r10,rax 1915 mov rax,QWORD[24+rbp] 1916 adc rdx,0 1917 add r9,r10 1918 mov r10,rdx 1919 adc r10,0 1920 1921 mul rbx 1922 add r11,rax 1923 mov rax,QWORD[32+rbp] 1924 adc rdx,0 1925 add r10,r11 1926 mov r11,rdx 1927 adc r11,0 1928 1929 mul rbx 1930 add r12,rax 1931 mov rax,QWORD[40+rbp] 1932 adc rdx,0 1933 add r11,r12 1934 mov r12,rdx 1935 adc r12,0 1936 1937 mul rbx 1938 add r13,rax 1939 mov rax,QWORD[48+rbp] 1940 adc rdx,0 1941 add r12,r13 1942 mov r13,rdx 1943 adc r13,0 1944 1945 mul rbx 1946 add r14,rax 1947 mov rax,QWORD[56+rbp] 1948 adc rdx,0 1949 add r13,r14 1950 mov r14,rdx 1951 adc r14,0 1952 1953 mul rbx 1954 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] 1955 add r15,rax 1956 adc rdx,0 1957 add r14,r15 1958 mov rax,QWORD[rbp] 1959 mov r15,rdx 1960 adc r15,0 1961 1962 dec ecx 1963 jnz NEAR $L$8x_tail 1964 1965 lea rbp,[64+rbp] 1966 mov rdx,QWORD[((8+8))+rsp] 1967 cmp rbp,QWORD[((0+8))+rsp] 1968 jae NEAR $L$8x_tail_done 1969 1970 mov rbx,QWORD[((48+56+8))+rsp] 1971 neg rsi 1972 mov rax,QWORD[rbp] 1973 adc r8,QWORD[rdi] 1974 adc r9,QWORD[8+rdi] 1975 adc r10,QWORD[16+rdi] 1976 adc r11,QWORD[24+rdi] 1977 adc r12,QWORD[32+rdi] 1978 adc r13,QWORD[40+rdi] 1979 adc r14,QWORD[48+rdi] 1980 adc r15,QWORD[56+rdi] 1981 sbb rsi,rsi 1982 1983 mov ecx,8 1984 jmp NEAR $L$8x_tail 1985 1986 ALIGN 32 1987 $L$8x_tail_done: 1988 xor rax,rax 1989 add r8,QWORD[rdx] 1990 adc r9,0 1991 adc r10,0 1992 adc r11,0 1993 adc r12,0 1994 adc r13,0 1995 adc r14,0 1996 adc r15,0 1997 adc rax,0 1998 1999 neg rsi 2000 $L$8x_no_tail: 2001 adc r8,QWORD[rdi] 2002 adc r9,QWORD[8+rdi] 2003 adc r10,QWORD[16+rdi] 2004 adc r11,QWORD[24+rdi] 2005 adc r12,QWORD[32+rdi] 2006 adc r13,QWORD[40+rdi] 2007 adc r14,QWORD[48+rdi] 2008 adc r15,QWORD[56+rdi] 2009 adc rax,0 2010 mov rcx,QWORD[((-8))+rbp] 2011 xor rsi,rsi 2012 2013 DB 102,72,15,126,213 2014 2015 mov QWORD[rdi],r8 2016 mov QWORD[8+rdi],r9 2017 DB 102,73,15,126,217 2018 mov QWORD[16+rdi],r10 2019 mov QWORD[24+rdi],r11 2020 mov QWORD[32+rdi],r12 2021 mov QWORD[40+rdi],r13 2022 mov QWORD[48+rdi],r14 2023 mov QWORD[56+rdi],r15 2024 lea rdi,[64+rdi] 2025 2026 cmp rdi,rdx 2027 jb NEAR $L$8x_reduction_loop 2028 DB 0F3h,0C3h ;repret 2029 2030 2031 ALIGN 32 2032 __bn_post4x_internal: 2033 mov r12,QWORD[rbp] 2034 lea rbx,[r9*1+rdi] 2035 mov rcx,r9 2036 DB 102,72,15,126,207 2037 neg rax 2038 DB 102,72,15,126,206 2039 sar rcx,3+2 2040 dec r12 2041 xor r10,r10 2042 mov r13,QWORD[8+rbp] 2043 mov r14,QWORD[16+rbp] 2044 mov r15,QWORD[24+rbp] 2045 jmp NEAR $L$sqr4x_sub_entry 2046 2047 ALIGN 16 2048 $L$sqr4x_sub: 2049 mov r12,QWORD[rbp] 2050 mov r13,QWORD[8+rbp] 2051 mov r14,QWORD[16+rbp] 2052 mov r15,QWORD[24+rbp] 2053 $L$sqr4x_sub_entry: 2054 lea rbp,[32+rbp] 2055 not r12 2056 not r13 2057 not r14 2058 not r15 2059 and r12,rax 2060 and r13,rax 2061 and r14,rax 2062 and r15,rax 2063 2064 neg r10 2065 adc r12,QWORD[rbx] 2066 adc r13,QWORD[8+rbx] 2067 adc r14,QWORD[16+rbx] 2068 adc r15,QWORD[24+rbx] 2069 mov QWORD[rdi],r12 2070 lea rbx,[32+rbx] 2071 mov QWORD[8+rdi],r13 2072 sbb r10,r10 2073 mov QWORD[16+rdi],r14 2074 mov QWORD[24+rdi],r15 2075 lea rdi,[32+rdi] 2076 2077 inc rcx 2078 jnz NEAR $L$sqr4x_sub 2079 2080 mov r10,r9 2081 neg r9 2082 DB 0F3h,0C3h ;repret 2083 2084 global bn_from_montgomery 2085 2086 ALIGN 32 2087 bn_from_montgomery: 2088 test DWORD[48+rsp],7 2089 jz NEAR bn_from_mont8x 2090 xor eax,eax 2091 DB 0F3h,0C3h ;repret 2092 2093 2094 2095 ALIGN 32 2096 bn_from_mont8x: 2097 mov QWORD[8+rsp],rdi ;WIN64 prologue 2098 mov QWORD[16+rsp],rsi 2099 mov rax,rsp 2100 $L$SEH_begin_bn_from_mont8x: 2101 mov rdi,rcx 2102 mov rsi,rdx 2103 mov rdx,r8 2104 mov rcx,r9 2105 mov r8,QWORD[40+rsp] 2106 mov r9,QWORD[48+rsp] 2107 2108 2109 2110 DB 0x67 2111 mov rax,rsp 2112 2113 push rbx 2114 2115 push rbp 2116 2117 push r12 2118 2119 push r13 2120 2121 push r14 2122 2123 push r15 2124 2125 $L$from_prologue: 2126 2127 shl r9d,3 2128 lea r10,[r9*2+r9] 2129 neg r9 2130 mov r8,QWORD[r8] 2131 2132 2133 2134 2135 2136 2137 2138 2139 lea r11,[((-320))+r9*2+rsp] 2140 mov rbp,rsp 2141 sub r11,rdi 2142 and r11,4095 2143 cmp r10,r11 2144 jb NEAR $L$from_sp_alt 2145 sub rbp,r11 2146 lea rbp,[((-320))+r9*2+rbp] 2147 jmp NEAR $L$from_sp_done 2148 2149 ALIGN 32 2150 $L$from_sp_alt: 2151 lea r10,[((4096-320))+r9*2] 2152 lea rbp,[((-320))+r9*2+rbp] 2153 sub r11,r10 2154 mov r10,0 2155 cmovc r11,r10 2156 sub rbp,r11 2157 $L$from_sp_done: 2158 and rbp,-64 2159 mov r11,rsp 2160 sub r11,rbp 2161 and r11,-4096 2162 lea rsp,[rbp*1+r11] 2163 mov r10,QWORD[rsp] 2164 cmp rsp,rbp 2165 ja NEAR $L$from_page_walk 2166 jmp NEAR $L$from_page_walk_done 2167 2168 $L$from_page_walk: 2169 lea rsp,[((-4096))+rsp] 2170 mov r10,QWORD[rsp] 2171 cmp rsp,rbp 2172 ja NEAR $L$from_page_walk 2173 $L$from_page_walk_done: 2174 2175 mov r10,r9 2176 neg r9 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 mov QWORD[32+rsp],r8 2188 mov QWORD[40+rsp],rax 2189 2190 $L$from_body: 2191 mov r11,r9 2192 lea rax,[48+rsp] 2193 pxor xmm0,xmm0 2194 jmp NEAR $L$mul_by_1 2195 2196 ALIGN 32 2197 $L$mul_by_1: 2198 movdqu xmm1,XMMWORD[rsi] 2199 movdqu xmm2,XMMWORD[16+rsi] 2200 movdqu xmm3,XMMWORD[32+rsi] 2201 movdqa XMMWORD[r9*1+rax],xmm0 2202 movdqu xmm4,XMMWORD[48+rsi] 2203 movdqa XMMWORD[16+r9*1+rax],xmm0 2204 DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2205 movdqa XMMWORD[rax],xmm1 2206 movdqa XMMWORD[32+r9*1+rax],xmm0 2207 movdqa XMMWORD[16+rax],xmm2 2208 movdqa XMMWORD[48+r9*1+rax],xmm0 2209 movdqa XMMWORD[32+rax],xmm3 2210 movdqa XMMWORD[48+rax],xmm4 2211 lea rax,[64+rax] 2212 sub r11,64 2213 jnz NEAR $L$mul_by_1 2214 2215 DB 102,72,15,110,207 2216 DB 102,72,15,110,209 2217 DB 0x67 2218 mov rbp,rcx 2219 DB 102,73,15,110,218 2220 call __bn_sqr8x_reduction 2221 call __bn_post4x_internal 2222 2223 pxor xmm0,xmm0 2224 lea rax,[48+rsp] 2225 jmp NEAR $L$from_mont_zero 2226 2227 ALIGN 32 2228 $L$from_mont_zero: 2229 mov rsi,QWORD[40+rsp] 2230 2231 movdqa XMMWORD[rax],xmm0 2232 movdqa XMMWORD[16+rax],xmm0 2233 movdqa XMMWORD[32+rax],xmm0 2234 movdqa XMMWORD[48+rax],xmm0 2235 lea rax,[64+rax] 2236 sub r9,32 2237 jnz NEAR $L$from_mont_zero 2238 2239 mov rax,1 2240 mov r15,QWORD[((-48))+rsi] 2241 2242 mov r14,QWORD[((-40))+rsi] 2243 2244 mov r13,QWORD[((-32))+rsi] 2245 2246 mov r12,QWORD[((-24))+rsi] 2247 2248 mov rbp,QWORD[((-16))+rsi] 2249 2250 mov rbx,QWORD[((-8))+rsi] 2251 2252 lea rsp,[rsi] 2253 2254 $L$from_epilogue: 2255 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2256 mov rsi,QWORD[16+rsp] 2257 DB 0F3h,0C3h ;repret 2258 2259 $L$SEH_end_bn_from_mont8x: 2260 global bn_scatter5 2261 2262 ALIGN 16 2263 bn_scatter5: 2264 cmp edx,0 2265 jz NEAR $L$scatter_epilogue 2266 lea r8,[r9*8+r8] 2267 $L$scatter: 2268 mov rax,QWORD[rcx] 2269 lea rcx,[8+rcx] 2270 mov QWORD[r8],rax 2271 lea r8,[256+r8] 2272 sub edx,1 2273 jnz NEAR $L$scatter 2274 $L$scatter_epilogue: 2275 DB 0F3h,0C3h ;repret 2276 2277 2278 global bn_gather5 2279 2280 ALIGN 32 2281 bn_gather5: 2282 $L$SEH_begin_bn_gather5: 2283 2284 DB 0x4c,0x8d,0x14,0x24 2285 DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 2286 lea rax,[$L$inc] 2287 and rsp,-16 2288 2289 movd xmm5,r9d 2290 movdqa xmm0,XMMWORD[rax] 2291 movdqa xmm1,XMMWORD[16+rax] 2292 lea r11,[128+r8] 2293 lea rax,[128+rsp] 2294 2295 pshufd xmm5,xmm5,0 2296 movdqa xmm4,xmm1 2297 movdqa xmm2,xmm1 2298 paddd xmm1,xmm0 2299 pcmpeqd xmm0,xmm5 2300 movdqa xmm3,xmm4 2301 2302 paddd xmm2,xmm1 2303 pcmpeqd xmm1,xmm5 2304 movdqa XMMWORD[(-128)+rax],xmm0 2305 movdqa xmm0,xmm4 2306 2307 paddd xmm3,xmm2 2308 pcmpeqd xmm2,xmm5 2309 movdqa XMMWORD[(-112)+rax],xmm1 2310 movdqa xmm1,xmm4 2311 2312 paddd xmm0,xmm3 2313 pcmpeqd xmm3,xmm5 2314 movdqa XMMWORD[(-96)+rax],xmm2 2315 movdqa xmm2,xmm4 2316 paddd xmm1,xmm0 2317 pcmpeqd xmm0,xmm5 2318 movdqa XMMWORD[(-80)+rax],xmm3 2319 movdqa xmm3,xmm4 2320 2321 paddd xmm2,xmm1 2322 pcmpeqd xmm1,xmm5 2323 movdqa XMMWORD[(-64)+rax],xmm0 2324 movdqa xmm0,xmm4 2325 2326 paddd xmm3,xmm2 2327 pcmpeqd xmm2,xmm5 2328 movdqa XMMWORD[(-48)+rax],xmm1 2329 movdqa xmm1,xmm4 2330 2331 paddd xmm0,xmm3 2332 pcmpeqd xmm3,xmm5 2333 movdqa XMMWORD[(-32)+rax],xmm2 2334 movdqa xmm2,xmm4 2335 paddd xmm1,xmm0 2336 pcmpeqd xmm0,xmm5 2337 movdqa XMMWORD[(-16)+rax],xmm3 2338 movdqa xmm3,xmm4 2339 2340 paddd xmm2,xmm1 2341 pcmpeqd xmm1,xmm5 2342 movdqa XMMWORD[rax],xmm0 2343 movdqa xmm0,xmm4 2344 2345 paddd xmm3,xmm2 2346 pcmpeqd xmm2,xmm5 2347 movdqa XMMWORD[16+rax],xmm1 2348 movdqa xmm1,xmm4 2349 2350 paddd xmm0,xmm3 2351 pcmpeqd xmm3,xmm5 2352 movdqa XMMWORD[32+rax],xmm2 2353 movdqa xmm2,xmm4 2354 paddd xmm1,xmm0 2355 pcmpeqd xmm0,xmm5 2356 movdqa XMMWORD[48+rax],xmm3 2357 movdqa xmm3,xmm4 2358 2359 paddd xmm2,xmm1 2360 pcmpeqd xmm1,xmm5 2361 movdqa XMMWORD[64+rax],xmm0 2362 movdqa xmm0,xmm4 2363 2364 paddd xmm3,xmm2 2365 pcmpeqd xmm2,xmm5 2366 movdqa XMMWORD[80+rax],xmm1 2367 movdqa xmm1,xmm4 2368 2369 paddd xmm0,xmm3 2370 pcmpeqd xmm3,xmm5 2371 movdqa XMMWORD[96+rax],xmm2 2372 movdqa xmm2,xmm4 2373 movdqa XMMWORD[112+rax],xmm3 2374 jmp NEAR $L$gather 2375 2376 ALIGN 32 2377 $L$gather: 2378 pxor xmm4,xmm4 2379 pxor xmm5,xmm5 2380 movdqa xmm0,XMMWORD[((-128))+r11] 2381 movdqa xmm1,XMMWORD[((-112))+r11] 2382 movdqa xmm2,XMMWORD[((-96))+r11] 2383 pand xmm0,XMMWORD[((-128))+rax] 2384 movdqa xmm3,XMMWORD[((-80))+r11] 2385 pand xmm1,XMMWORD[((-112))+rax] 2386 por xmm4,xmm0 2387 pand xmm2,XMMWORD[((-96))+rax] 2388 por xmm5,xmm1 2389 pand xmm3,XMMWORD[((-80))+rax] 2390 por xmm4,xmm2 2391 por xmm5,xmm3 2392 movdqa xmm0,XMMWORD[((-64))+r11] 2393 movdqa xmm1,XMMWORD[((-48))+r11] 2394 movdqa xmm2,XMMWORD[((-32))+r11] 2395 pand xmm0,XMMWORD[((-64))+rax] 2396 movdqa xmm3,XMMWORD[((-16))+r11] 2397 pand xmm1,XMMWORD[((-48))+rax] 2398 por xmm4,xmm0 2399 pand xmm2,XMMWORD[((-32))+rax] 2400 por xmm5,xmm1 2401 pand xmm3,XMMWORD[((-16))+rax] 2402 por xmm4,xmm2 2403 por xmm5,xmm3 2404 movdqa xmm0,XMMWORD[r11] 2405 movdqa xmm1,XMMWORD[16+r11] 2406 movdqa xmm2,XMMWORD[32+r11] 2407 pand xmm0,XMMWORD[rax] 2408 movdqa xmm3,XMMWORD[48+r11] 2409 pand xmm1,XMMWORD[16+rax] 2410 por xmm4,xmm0 2411 pand xmm2,XMMWORD[32+rax] 2412 por xmm5,xmm1 2413 pand xmm3,XMMWORD[48+rax] 2414 por xmm4,xmm2 2415 por xmm5,xmm3 2416 movdqa xmm0,XMMWORD[64+r11] 2417 movdqa xmm1,XMMWORD[80+r11] 2418 movdqa xmm2,XMMWORD[96+r11] 2419 pand xmm0,XMMWORD[64+rax] 2420 movdqa xmm3,XMMWORD[112+r11] 2421 pand xmm1,XMMWORD[80+rax] 2422 por xmm4,xmm0 2423 pand xmm2,XMMWORD[96+rax] 2424 por xmm5,xmm1 2425 pand xmm3,XMMWORD[112+rax] 2426 por xmm4,xmm2 2427 por xmm5,xmm3 2428 por xmm4,xmm5 2429 lea r11,[256+r11] 2430 pshufd xmm0,xmm4,0x4e 2431 por xmm0,xmm4 2432 movq QWORD[rcx],xmm0 2433 lea rcx,[8+rcx] 2434 sub edx,1 2435 jnz NEAR $L$gather 2436 2437 lea rsp,[r10] 2438 DB 0F3h,0C3h ;repret 2439 $L$SEH_end_bn_gather5: 2440 2441 ALIGN 64 2442 $L$inc: 2443 DD 0,0,1,1 2444 DD 2,2,2,2 2445 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 2446 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 2447 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 2448 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 2449 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 2450 DB 112,101,110,115,115,108,46,111,114,103,62,0 2451 EXTERN __imp_RtlVirtualUnwind 2452 2453 ALIGN 16 2454 mul_handler: 2455 push rsi 2456 push rdi 2457 push rbx 2458 push rbp 2459 push r12 2460 push r13 2461 push r14 2462 push r15 2463 pushfq 2464 sub rsp,64 2465 2466 mov rax,QWORD[120+r8] 2467 mov rbx,QWORD[248+r8] 2468 2469 mov rsi,QWORD[8+r9] 2470 mov r11,QWORD[56+r9] 2471 2472 mov r10d,DWORD[r11] 2473 lea r10,[r10*1+rsi] 2474 cmp rbx,r10 2475 jb NEAR $L$common_seh_tail 2476 2477 mov r10d,DWORD[4+r11] 2478 lea r10,[r10*1+rsi] 2479 cmp rbx,r10 2480 jb NEAR $L$common_pop_regs 2481 2482 mov rax,QWORD[152+r8] 2483 2484 mov r10d,DWORD[8+r11] 2485 lea r10,[r10*1+rsi] 2486 cmp rbx,r10 2487 jae NEAR $L$common_seh_tail 2488 2489 lea r10,[$L$mul_epilogue] 2490 cmp rbx,r10 2491 ja NEAR $L$body_40 2492 2493 mov r10,QWORD[192+r8] 2494 mov rax,QWORD[8+r10*8+rax] 2495 2496 jmp NEAR $L$common_pop_regs 2497 2498 $L$body_40: 2499 mov rax,QWORD[40+rax] 2500 $L$common_pop_regs: 2501 mov rbx,QWORD[((-8))+rax] 2502 mov rbp,QWORD[((-16))+rax] 2503 mov r12,QWORD[((-24))+rax] 2504 mov r13,QWORD[((-32))+rax] 2505 mov r14,QWORD[((-40))+rax] 2506 mov r15,QWORD[((-48))+rax] 2507 mov QWORD[144+r8],rbx 2508 mov QWORD[160+r8],rbp 2509 mov QWORD[216+r8],r12 2510 mov QWORD[224+r8],r13 2511 mov QWORD[232+r8],r14 2512 mov QWORD[240+r8],r15 2513 2514 $L$common_seh_tail: 2515 mov rdi,QWORD[8+rax] 2516 mov rsi,QWORD[16+rax] 2517 mov QWORD[152+r8],rax 2518 mov QWORD[168+r8],rsi 2519 mov QWORD[176+r8],rdi 2520 2521 mov rdi,QWORD[40+r9] 2522 mov rsi,r8 2523 mov ecx,154 2524 DD 0xa548f3fc 2525 2526 mov rsi,r9 2527 xor rcx,rcx 2528 mov rdx,QWORD[8+rsi] 2529 mov r8,QWORD[rsi] 2530 mov r9,QWORD[16+rsi] 2531 mov r10,QWORD[40+rsi] 2532 lea r11,[56+rsi] 2533 lea r12,[24+rsi] 2534 mov QWORD[32+rsp],r10 2535 mov QWORD[40+rsp],r11 2536 mov QWORD[48+rsp],r12 2537 mov QWORD[56+rsp],rcx 2538 call QWORD[__imp_RtlVirtualUnwind] 2539 2540 mov eax,1 2541 add rsp,64 2542 popfq 2543 pop r15 2544 pop r14 2545 pop r13 2546 pop r12 2547 pop rbp 2548 pop rbx 2549 pop rdi 2550 pop rsi 2551 DB 0F3h,0C3h ;repret 2552 2553 2554 section .pdata rdata align=4 2555 ALIGN 4 2556 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase 2557 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase 2558 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase 2559 2560 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase 2561 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase 2562 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase 2563 2564 DD $L$SEH_begin_bn_power5 wrt ..imagebase 2565 DD $L$SEH_end_bn_power5 wrt ..imagebase 2566 DD $L$SEH_info_bn_power5 wrt ..imagebase 2567 2568 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase 2569 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 2570 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 2571 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 2572 DD $L$SEH_end_bn_gather5 wrt ..imagebase 2573 DD $L$SEH_info_bn_gather5 wrt ..imagebase 2574 2575 section .xdata rdata align=8 2576 ALIGN 8 2577 $L$SEH_info_bn_mul_mont_gather5: 2578 DB 9,0,0,0 2579 DD mul_handler wrt ..imagebase 2580 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 2581 ALIGN 8 2582 $L$SEH_info_bn_mul4x_mont_gather5: 2583 DB 9,0,0,0 2584 DD mul_handler wrt ..imagebase 2585 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 2586 ALIGN 8 2587 $L$SEH_info_bn_power5: 2588 DB 9,0,0,0 2589 DD mul_handler wrt ..imagebase 2590 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 2591 ALIGN 8 2592 $L$SEH_info_bn_from_mont8x: 2593 DB 9,0,0,0 2594 DD mul_handler wrt ..imagebase 2595 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 2596 ALIGN 8 2597 $L$SEH_info_bn_gather5: 2598 DB 0x01,0x0b,0x03,0x0a 2599 DB 0x0b,0x01,0x21,0x00 2600 DB 0x04,0xa3,0x00,0x00 2601 ALIGN 8 2602