Home | History | Annotate | Download | only in bn
      1 default	rel
      2 %define XMMWORD
      3 %define YMMWORD
      4 %define ZMMWORD
      5 section	.text code align=64
      6 
      7 
      8 EXTERN	OPENSSL_ia32cap_P
      9 
     10 global	rsaz_512_sqr
     11 
     12 ALIGN	32
     13 rsaz_512_sqr:
     14 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
     15 	mov	QWORD[16+rsp],rsi
     16 	mov	rax,rsp
     17 $L$SEH_begin_rsaz_512_sqr:
     18 	mov	rdi,rcx
     19 	mov	rsi,rdx
     20 	mov	rdx,r8
     21 	mov	rcx,r9
     22 	mov	r8,QWORD[40+rsp]
     23 
     24 
     25 	push	rbx
     26 	push	rbp
     27 	push	r12
     28 	push	r13
     29 	push	r14
     30 	push	r15
     31 
     32 	sub	rsp,128+24
     33 $L$sqr_body:
     34 	mov	rbp,rdx
     35 	mov	rdx,QWORD[rsi]
     36 	mov	rax,QWORD[8+rsi]
     37 	mov	QWORD[128+rsp],rcx
     38 	jmp	NEAR $L$oop_sqr
     39 
     40 ALIGN	32
     41 $L$oop_sqr:
     42 	mov	DWORD[((128+8))+rsp],r8d
     43 
     44 	mov	rbx,rdx
     45 	mul	rdx
     46 	mov	r8,rax
     47 	mov	rax,QWORD[16+rsi]
     48 	mov	r9,rdx
     49 
     50 	mul	rbx
     51 	add	r9,rax
     52 	mov	rax,QWORD[24+rsi]
     53 	mov	r10,rdx
     54 	adc	r10,0
     55 
     56 	mul	rbx
     57 	add	r10,rax
     58 	mov	rax,QWORD[32+rsi]
     59 	mov	r11,rdx
     60 	adc	r11,0
     61 
     62 	mul	rbx
     63 	add	r11,rax
     64 	mov	rax,QWORD[40+rsi]
     65 	mov	r12,rdx
     66 	adc	r12,0
     67 
     68 	mul	rbx
     69 	add	r12,rax
     70 	mov	rax,QWORD[48+rsi]
     71 	mov	r13,rdx
     72 	adc	r13,0
     73 
     74 	mul	rbx
     75 	add	r13,rax
     76 	mov	rax,QWORD[56+rsi]
     77 	mov	r14,rdx
     78 	adc	r14,0
     79 
     80 	mul	rbx
     81 	add	r14,rax
     82 	mov	rax,rbx
     83 	mov	r15,rdx
     84 	adc	r15,0
     85 
     86 	add	r8,r8
     87 	mov	rcx,r9
     88 	adc	r9,r9
     89 
     90 	mul	rax
     91 	mov	QWORD[rsp],rax
     92 	add	r8,rdx
     93 	adc	r9,0
     94 
     95 	mov	QWORD[8+rsp],r8
     96 	shr	rcx,63
     97 
     98 
     99 	mov	r8,QWORD[8+rsi]
    100 	mov	rax,QWORD[16+rsi]
    101 	mul	r8
    102 	add	r10,rax
    103 	mov	rax,QWORD[24+rsi]
    104 	mov	rbx,rdx
    105 	adc	rbx,0
    106 
    107 	mul	r8
    108 	add	r11,rax
    109 	mov	rax,QWORD[32+rsi]
    110 	adc	rdx,0
    111 	add	r11,rbx
    112 	mov	rbx,rdx
    113 	adc	rbx,0
    114 
    115 	mul	r8
    116 	add	r12,rax
    117 	mov	rax,QWORD[40+rsi]
    118 	adc	rdx,0
    119 	add	r12,rbx
    120 	mov	rbx,rdx
    121 	adc	rbx,0
    122 
    123 	mul	r8
    124 	add	r13,rax
    125 	mov	rax,QWORD[48+rsi]
    126 	adc	rdx,0
    127 	add	r13,rbx
    128 	mov	rbx,rdx
    129 	adc	rbx,0
    130 
    131 	mul	r8
    132 	add	r14,rax
    133 	mov	rax,QWORD[56+rsi]
    134 	adc	rdx,0
    135 	add	r14,rbx
    136 	mov	rbx,rdx
    137 	adc	rbx,0
    138 
    139 	mul	r8
    140 	add	r15,rax
    141 	mov	rax,r8
    142 	adc	rdx,0
    143 	add	r15,rbx
    144 	mov	r8,rdx
    145 	mov	rdx,r10
    146 	adc	r8,0
    147 
    148 	add	rdx,rdx
    149 	lea	r10,[r10*2+rcx]
    150 	mov	rbx,r11
    151 	adc	r11,r11
    152 
    153 	mul	rax
    154 	add	r9,rax
    155 	adc	r10,rdx
    156 	adc	r11,0
    157 
    158 	mov	QWORD[16+rsp],r9
    159 	mov	QWORD[24+rsp],r10
    160 	shr	rbx,63
    161 
    162 
    163 	mov	r9,QWORD[16+rsi]
    164 	mov	rax,QWORD[24+rsi]
    165 	mul	r9
    166 	add	r12,rax
    167 	mov	rax,QWORD[32+rsi]
    168 	mov	rcx,rdx
    169 	adc	rcx,0
    170 
    171 	mul	r9
    172 	add	r13,rax
    173 	mov	rax,QWORD[40+rsi]
    174 	adc	rdx,0
    175 	add	r13,rcx
    176 	mov	rcx,rdx
    177 	adc	rcx,0
    178 
    179 	mul	r9
    180 	add	r14,rax
    181 	mov	rax,QWORD[48+rsi]
    182 	adc	rdx,0
    183 	add	r14,rcx
    184 	mov	rcx,rdx
    185 	adc	rcx,0
    186 
    187 	mul	r9
    188 	mov	r10,r12
    189 	lea	r12,[r12*2+rbx]
    190 	add	r15,rax
    191 	mov	rax,QWORD[56+rsi]
    192 	adc	rdx,0
    193 	add	r15,rcx
    194 	mov	rcx,rdx
    195 	adc	rcx,0
    196 
    197 	mul	r9
    198 	shr	r10,63
    199 	add	r8,rax
    200 	mov	rax,r9
    201 	adc	rdx,0
    202 	add	r8,rcx
    203 	mov	r9,rdx
    204 	adc	r9,0
    205 
    206 	mov	rcx,r13
    207 	lea	r13,[r13*2+r10]
    208 
    209 	mul	rax
    210 	add	r11,rax
    211 	adc	r12,rdx
    212 	adc	r13,0
    213 
    214 	mov	QWORD[32+rsp],r11
    215 	mov	QWORD[40+rsp],r12
    216 	shr	rcx,63
    217 
    218 
    219 	mov	r10,QWORD[24+rsi]
    220 	mov	rax,QWORD[32+rsi]
    221 	mul	r10
    222 	add	r14,rax
    223 	mov	rax,QWORD[40+rsi]
    224 	mov	rbx,rdx
    225 	adc	rbx,0
    226 
    227 	mul	r10
    228 	add	r15,rax
    229 	mov	rax,QWORD[48+rsi]
    230 	adc	rdx,0
    231 	add	r15,rbx
    232 	mov	rbx,rdx
    233 	adc	rbx,0
    234 
    235 	mul	r10
    236 	mov	r12,r14
    237 	lea	r14,[r14*2+rcx]
    238 	add	r8,rax
    239 	mov	rax,QWORD[56+rsi]
    240 	adc	rdx,0
    241 	add	r8,rbx
    242 	mov	rbx,rdx
    243 	adc	rbx,0
    244 
    245 	mul	r10
    246 	shr	r12,63
    247 	add	r9,rax
    248 	mov	rax,r10
    249 	adc	rdx,0
    250 	add	r9,rbx
    251 	mov	r10,rdx
    252 	adc	r10,0
    253 
    254 	mov	rbx,r15
    255 	lea	r15,[r15*2+r12]
    256 
    257 	mul	rax
    258 	add	r13,rax
    259 	adc	r14,rdx
    260 	adc	r15,0
    261 
    262 	mov	QWORD[48+rsp],r13
    263 	mov	QWORD[56+rsp],r14
    264 	shr	rbx,63
    265 
    266 
    267 	mov	r11,QWORD[32+rsi]
    268 	mov	rax,QWORD[40+rsi]
    269 	mul	r11
    270 	add	r8,rax
    271 	mov	rax,QWORD[48+rsi]
    272 	mov	rcx,rdx
    273 	adc	rcx,0
    274 
    275 	mul	r11
    276 	add	r9,rax
    277 	mov	rax,QWORD[56+rsi]
    278 	adc	rdx,0
    279 	mov	r12,r8
    280 	lea	r8,[r8*2+rbx]
    281 	add	r9,rcx
    282 	mov	rcx,rdx
    283 	adc	rcx,0
    284 
    285 	mul	r11
    286 	shr	r12,63
    287 	add	r10,rax
    288 	mov	rax,r11
    289 	adc	rdx,0
    290 	add	r10,rcx
    291 	mov	r11,rdx
    292 	adc	r11,0
    293 
    294 	mov	rcx,r9
    295 	lea	r9,[r9*2+r12]
    296 
    297 	mul	rax
    298 	add	r15,rax
    299 	adc	r8,rdx
    300 	adc	r9,0
    301 
    302 	mov	QWORD[64+rsp],r15
    303 	mov	QWORD[72+rsp],r8
    304 	shr	rcx,63
    305 
    306 
    307 	mov	r12,QWORD[40+rsi]
    308 	mov	rax,QWORD[48+rsi]
    309 	mul	r12
    310 	add	r10,rax
    311 	mov	rax,QWORD[56+rsi]
    312 	mov	rbx,rdx
    313 	adc	rbx,0
    314 
    315 	mul	r12
    316 	add	r11,rax
    317 	mov	rax,r12
    318 	mov	r15,r10
    319 	lea	r10,[r10*2+rcx]
    320 	adc	rdx,0
    321 	shr	r15,63
    322 	add	r11,rbx
    323 	mov	r12,rdx
    324 	adc	r12,0
    325 
    326 	mov	rbx,r11
    327 	lea	r11,[r11*2+r15]
    328 
    329 	mul	rax
    330 	add	r9,rax
    331 	adc	r10,rdx
    332 	adc	r11,0
    333 
    334 	mov	QWORD[80+rsp],r9
    335 	mov	QWORD[88+rsp],r10
    336 
    337 
    338 	mov	r13,QWORD[48+rsi]
    339 	mov	rax,QWORD[56+rsi]
    340 	mul	r13
    341 	add	r12,rax
    342 	mov	rax,r13
    343 	mov	r13,rdx
    344 	adc	r13,0
    345 
    346 	xor	r14,r14
    347 	shl	rbx,1
    348 	adc	r12,r12
    349 	adc	r13,r13
    350 	adc	r14,r14
    351 
    352 	mul	rax
    353 	add	r11,rax
    354 	adc	r12,rdx
    355 	adc	r13,0
    356 
    357 	mov	QWORD[96+rsp],r11
    358 	mov	QWORD[104+rsp],r12
    359 
    360 
    361 	mov	rax,QWORD[56+rsi]
    362 	mul	rax
    363 	add	r13,rax
    364 	adc	rdx,0
    365 
    366 	add	r14,rdx
    367 
    368 	mov	QWORD[112+rsp],r13
    369 	mov	QWORD[120+rsp],r14
    370 
    371 	mov	r8,QWORD[rsp]
    372 	mov	r9,QWORD[8+rsp]
    373 	mov	r10,QWORD[16+rsp]
    374 	mov	r11,QWORD[24+rsp]
    375 	mov	r12,QWORD[32+rsp]
    376 	mov	r13,QWORD[40+rsp]
    377 	mov	r14,QWORD[48+rsp]
    378 	mov	r15,QWORD[56+rsp]
    379 
    380 	call	__rsaz_512_reduce
    381 
    382 	add	r8,QWORD[64+rsp]
    383 	adc	r9,QWORD[72+rsp]
    384 	adc	r10,QWORD[80+rsp]
    385 	adc	r11,QWORD[88+rsp]
    386 	adc	r12,QWORD[96+rsp]
    387 	adc	r13,QWORD[104+rsp]
    388 	adc	r14,QWORD[112+rsp]
    389 	adc	r15,QWORD[120+rsp]
    390 	sbb	rcx,rcx
    391 
    392 	call	__rsaz_512_subtract
    393 
    394 	mov	rdx,r8
    395 	mov	rax,r9
    396 	mov	r8d,DWORD[((128+8))+rsp]
    397 	mov	rsi,rdi
    398 
    399 	dec	r8d
    400 	jnz	NEAR $L$oop_sqr
    401 
    402 	lea	rax,[((128+24+48))+rsp]
    403 	mov	r15,QWORD[((-48))+rax]
    404 	mov	r14,QWORD[((-40))+rax]
    405 	mov	r13,QWORD[((-32))+rax]
    406 	mov	r12,QWORD[((-24))+rax]
    407 	mov	rbp,QWORD[((-16))+rax]
    408 	mov	rbx,QWORD[((-8))+rax]
    409 	lea	rsp,[rax]
    410 $L$sqr_epilogue:
    411 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    412 	mov	rsi,QWORD[16+rsp]
    413 	DB	0F3h,0C3h		;repret
    414 $L$SEH_end_rsaz_512_sqr:
    415 global	rsaz_512_mul
    416 
    417 ALIGN	32
    418 rsaz_512_mul:
    419 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    420 	mov	QWORD[16+rsp],rsi
    421 	mov	rax,rsp
    422 $L$SEH_begin_rsaz_512_mul:
    423 	mov	rdi,rcx
    424 	mov	rsi,rdx
    425 	mov	rdx,r8
    426 	mov	rcx,r9
    427 	mov	r8,QWORD[40+rsp]
    428 
    429 
    430 	push	rbx
    431 	push	rbp
    432 	push	r12
    433 	push	r13
    434 	push	r14
    435 	push	r15
    436 
    437 	sub	rsp,128+24
    438 $L$mul_body:
    439 DB	102,72,15,110,199
    440 DB	102,72,15,110,201
    441 	mov	QWORD[128+rsp],r8
    442 	mov	rbx,QWORD[rdx]
    443 	mov	rbp,rdx
    444 	call	__rsaz_512_mul
    445 
    446 DB	102,72,15,126,199
    447 DB	102,72,15,126,205
    448 
    449 	mov	r8,QWORD[rsp]
    450 	mov	r9,QWORD[8+rsp]
    451 	mov	r10,QWORD[16+rsp]
    452 	mov	r11,QWORD[24+rsp]
    453 	mov	r12,QWORD[32+rsp]
    454 	mov	r13,QWORD[40+rsp]
    455 	mov	r14,QWORD[48+rsp]
    456 	mov	r15,QWORD[56+rsp]
    457 
    458 	call	__rsaz_512_reduce
    459 	add	r8,QWORD[64+rsp]
    460 	adc	r9,QWORD[72+rsp]
    461 	adc	r10,QWORD[80+rsp]
    462 	adc	r11,QWORD[88+rsp]
    463 	adc	r12,QWORD[96+rsp]
    464 	adc	r13,QWORD[104+rsp]
    465 	adc	r14,QWORD[112+rsp]
    466 	adc	r15,QWORD[120+rsp]
    467 	sbb	rcx,rcx
    468 
    469 	call	__rsaz_512_subtract
    470 
    471 	lea	rax,[((128+24+48))+rsp]
    472 	mov	r15,QWORD[((-48))+rax]
    473 	mov	r14,QWORD[((-40))+rax]
    474 	mov	r13,QWORD[((-32))+rax]
    475 	mov	r12,QWORD[((-24))+rax]
    476 	mov	rbp,QWORD[((-16))+rax]
    477 	mov	rbx,QWORD[((-8))+rax]
    478 	lea	rsp,[rax]
    479 $L$mul_epilogue:
    480 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    481 	mov	rsi,QWORD[16+rsp]
    482 	DB	0F3h,0C3h		;repret
    483 $L$SEH_end_rsaz_512_mul:
    484 global	rsaz_512_mul_gather4
    485 
    486 ALIGN	32
    487 rsaz_512_mul_gather4:
    488 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    489 	mov	QWORD[16+rsp],rsi
    490 	mov	rax,rsp
    491 $L$SEH_begin_rsaz_512_mul_gather4:
    492 	mov	rdi,rcx
    493 	mov	rsi,rdx
    494 	mov	rdx,r8
    495 	mov	rcx,r9
    496 	mov	r8,QWORD[40+rsp]
    497 	mov	r9,QWORD[48+rsp]
    498 
    499 
    500 	push	rbx
    501 	push	rbp
    502 	push	r12
    503 	push	r13
    504 	push	r14
    505 	push	r15
    506 
    507 	mov	r9d,r9d
    508 	sub	rsp,128+24
    509 $L$mul_gather4_body:
    510 	mov	eax,DWORD[64+r9*4+rdx]
    511 DB	102,72,15,110,199
    512 	mov	ebx,DWORD[r9*4+rdx]
    513 DB	102,72,15,110,201
    514 	mov	QWORD[128+rsp],r8
    515 
    516 	shl	rax,32
    517 	or	rbx,rax
    518 	mov	rax,QWORD[rsi]
    519 	mov	rcx,QWORD[8+rsi]
    520 	lea	rbp,[128+r9*4+rdx]
    521 	mul	rbx
    522 	mov	QWORD[rsp],rax
    523 	mov	rax,rcx
    524 	mov	r8,rdx
    525 
    526 	mul	rbx
    527 	movd	xmm4,DWORD[rbp]
    528 	add	r8,rax
    529 	mov	rax,QWORD[16+rsi]
    530 	mov	r9,rdx
    531 	adc	r9,0
    532 
    533 	mul	rbx
    534 	movd	xmm5,DWORD[64+rbp]
    535 	add	r9,rax
    536 	mov	rax,QWORD[24+rsi]
    537 	mov	r10,rdx
    538 	adc	r10,0
    539 
    540 	mul	rbx
    541 	pslldq	xmm5,4
    542 	add	r10,rax
    543 	mov	rax,QWORD[32+rsi]
    544 	mov	r11,rdx
    545 	adc	r11,0
    546 
    547 	mul	rbx
    548 	por	xmm4,xmm5
    549 	add	r11,rax
    550 	mov	rax,QWORD[40+rsi]
    551 	mov	r12,rdx
    552 	adc	r12,0
    553 
    554 	mul	rbx
    555 	add	r12,rax
    556 	mov	rax,QWORD[48+rsi]
    557 	mov	r13,rdx
    558 	adc	r13,0
    559 
    560 	mul	rbx
    561 	lea	rbp,[128+rbp]
    562 	add	r13,rax
    563 	mov	rax,QWORD[56+rsi]
    564 	mov	r14,rdx
    565 	adc	r14,0
    566 
    567 	mul	rbx
    568 DB	102,72,15,126,227
    569 	add	r14,rax
    570 	mov	rax,QWORD[rsi]
    571 	mov	r15,rdx
    572 	adc	r15,0
    573 
    574 	lea	rdi,[8+rsp]
    575 	mov	ecx,7
    576 	jmp	NEAR $L$oop_mul_gather
    577 
    578 ALIGN	32
    579 $L$oop_mul_gather:
    580 	mul	rbx
    581 	add	r8,rax
    582 	mov	rax,QWORD[8+rsi]
    583 	mov	QWORD[rdi],r8
    584 	mov	r8,rdx
    585 	adc	r8,0
    586 
    587 	mul	rbx
    588 	movd	xmm4,DWORD[rbp]
    589 	add	r9,rax
    590 	mov	rax,QWORD[16+rsi]
    591 	adc	rdx,0
    592 	add	r8,r9
    593 	mov	r9,rdx
    594 	adc	r9,0
    595 
    596 	mul	rbx
    597 	movd	xmm5,DWORD[64+rbp]
    598 	add	r10,rax
    599 	mov	rax,QWORD[24+rsi]
    600 	adc	rdx,0
    601 	add	r9,r10
    602 	mov	r10,rdx
    603 	adc	r10,0
    604 
    605 	mul	rbx
    606 	pslldq	xmm5,4
    607 	add	r11,rax
    608 	mov	rax,QWORD[32+rsi]
    609 	adc	rdx,0
    610 	add	r10,r11
    611 	mov	r11,rdx
    612 	adc	r11,0
    613 
    614 	mul	rbx
    615 	por	xmm4,xmm5
    616 	add	r12,rax
    617 	mov	rax,QWORD[40+rsi]
    618 	adc	rdx,0
    619 	add	r11,r12
    620 	mov	r12,rdx
    621 	adc	r12,0
    622 
    623 	mul	rbx
    624 	add	r13,rax
    625 	mov	rax,QWORD[48+rsi]
    626 	adc	rdx,0
    627 	add	r12,r13
    628 	mov	r13,rdx
    629 	adc	r13,0
    630 
    631 	mul	rbx
    632 	add	r14,rax
    633 	mov	rax,QWORD[56+rsi]
    634 	adc	rdx,0
    635 	add	r13,r14
    636 	mov	r14,rdx
    637 	adc	r14,0
    638 
    639 	mul	rbx
    640 DB	102,72,15,126,227
    641 	add	r15,rax
    642 	mov	rax,QWORD[rsi]
    643 	adc	rdx,0
    644 	add	r14,r15
    645 	mov	r15,rdx
    646 	adc	r15,0
    647 
    648 	lea	rbp,[128+rbp]
    649 	lea	rdi,[8+rdi]
    650 
    651 	dec	ecx
    652 	jnz	NEAR $L$oop_mul_gather
    653 
    654 	mov	QWORD[rdi],r8
    655 	mov	QWORD[8+rdi],r9
    656 	mov	QWORD[16+rdi],r10
    657 	mov	QWORD[24+rdi],r11
    658 	mov	QWORD[32+rdi],r12
    659 	mov	QWORD[40+rdi],r13
    660 	mov	QWORD[48+rdi],r14
    661 	mov	QWORD[56+rdi],r15
    662 
    663 DB	102,72,15,126,199
    664 DB	102,72,15,126,205
    665 
    666 	mov	r8,QWORD[rsp]
    667 	mov	r9,QWORD[8+rsp]
    668 	mov	r10,QWORD[16+rsp]
    669 	mov	r11,QWORD[24+rsp]
    670 	mov	r12,QWORD[32+rsp]
    671 	mov	r13,QWORD[40+rsp]
    672 	mov	r14,QWORD[48+rsp]
    673 	mov	r15,QWORD[56+rsp]
    674 
    675 	call	__rsaz_512_reduce
    676 	add	r8,QWORD[64+rsp]
    677 	adc	r9,QWORD[72+rsp]
    678 	adc	r10,QWORD[80+rsp]
    679 	adc	r11,QWORD[88+rsp]
    680 	adc	r12,QWORD[96+rsp]
    681 	adc	r13,QWORD[104+rsp]
    682 	adc	r14,QWORD[112+rsp]
    683 	adc	r15,QWORD[120+rsp]
    684 	sbb	rcx,rcx
    685 
    686 	call	__rsaz_512_subtract
    687 
    688 	lea	rax,[((128+24+48))+rsp]
    689 	mov	r15,QWORD[((-48))+rax]
    690 	mov	r14,QWORD[((-40))+rax]
    691 	mov	r13,QWORD[((-32))+rax]
    692 	mov	r12,QWORD[((-24))+rax]
    693 	mov	rbp,QWORD[((-16))+rax]
    694 	mov	rbx,QWORD[((-8))+rax]
    695 	lea	rsp,[rax]
    696 $L$mul_gather4_epilogue:
    697 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    698 	mov	rsi,QWORD[16+rsp]
    699 	DB	0F3h,0C3h		;repret
    700 $L$SEH_end_rsaz_512_mul_gather4:
    701 global	rsaz_512_mul_scatter4
    702 
    703 ALIGN	32
    704 rsaz_512_mul_scatter4:
    705 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    706 	mov	QWORD[16+rsp],rsi
    707 	mov	rax,rsp
    708 $L$SEH_begin_rsaz_512_mul_scatter4:
    709 	mov	rdi,rcx
    710 	mov	rsi,rdx
    711 	mov	rdx,r8
    712 	mov	rcx,r9
    713 	mov	r8,QWORD[40+rsp]
    714 	mov	r9,QWORD[48+rsp]
    715 
    716 
    717 	push	rbx
    718 	push	rbp
    719 	push	r12
    720 	push	r13
    721 	push	r14
    722 	push	r15
    723 
    724 	mov	r9d,r9d
    725 	sub	rsp,128+24
    726 $L$mul_scatter4_body:
    727 	lea	r8,[r9*4+r8]
    728 DB	102,72,15,110,199
    729 DB	102,72,15,110,202
    730 DB	102,73,15,110,208
    731 	mov	QWORD[128+rsp],rcx
    732 
    733 	mov	rbp,rdi
    734 	mov	rbx,QWORD[rdi]
    735 	call	__rsaz_512_mul
    736 
    737 DB	102,72,15,126,199
    738 DB	102,72,15,126,205
    739 
    740 	mov	r8,QWORD[rsp]
    741 	mov	r9,QWORD[8+rsp]
    742 	mov	r10,QWORD[16+rsp]
    743 	mov	r11,QWORD[24+rsp]
    744 	mov	r12,QWORD[32+rsp]
    745 	mov	r13,QWORD[40+rsp]
    746 	mov	r14,QWORD[48+rsp]
    747 	mov	r15,QWORD[56+rsp]
    748 
    749 	call	__rsaz_512_reduce
    750 	add	r8,QWORD[64+rsp]
    751 	adc	r9,QWORD[72+rsp]
    752 	adc	r10,QWORD[80+rsp]
    753 	adc	r11,QWORD[88+rsp]
    754 	adc	r12,QWORD[96+rsp]
    755 	adc	r13,QWORD[104+rsp]
    756 	adc	r14,QWORD[112+rsp]
    757 	adc	r15,QWORD[120+rsp]
    758 DB	102,72,15,126,214
    759 	sbb	rcx,rcx
    760 
    761 	call	__rsaz_512_subtract
    762 
    763 	mov	DWORD[rsi],r8d
    764 	shr	r8,32
    765 	mov	DWORD[128+rsi],r9d
    766 	shr	r9,32
    767 	mov	DWORD[256+rsi],r10d
    768 	shr	r10,32
    769 	mov	DWORD[384+rsi],r11d
    770 	shr	r11,32
    771 	mov	DWORD[512+rsi],r12d
    772 	shr	r12,32
    773 	mov	DWORD[640+rsi],r13d
    774 	shr	r13,32
    775 	mov	DWORD[768+rsi],r14d
    776 	shr	r14,32
    777 	mov	DWORD[896+rsi],r15d
    778 	shr	r15,32
    779 	mov	DWORD[64+rsi],r8d
    780 	mov	DWORD[192+rsi],r9d
    781 	mov	DWORD[320+rsi],r10d
    782 	mov	DWORD[448+rsi],r11d
    783 	mov	DWORD[576+rsi],r12d
    784 	mov	DWORD[704+rsi],r13d
    785 	mov	DWORD[832+rsi],r14d
    786 	mov	DWORD[960+rsi],r15d
    787 
    788 	lea	rax,[((128+24+48))+rsp]
    789 	mov	r15,QWORD[((-48))+rax]
    790 	mov	r14,QWORD[((-40))+rax]
    791 	mov	r13,QWORD[((-32))+rax]
    792 	mov	r12,QWORD[((-24))+rax]
    793 	mov	rbp,QWORD[((-16))+rax]
    794 	mov	rbx,QWORD[((-8))+rax]
    795 	lea	rsp,[rax]
    796 $L$mul_scatter4_epilogue:
    797 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    798 	mov	rsi,QWORD[16+rsp]
    799 	DB	0F3h,0C3h		;repret
    800 $L$SEH_end_rsaz_512_mul_scatter4:
    801 global	rsaz_512_mul_by_one
    802 
    803 ALIGN	32
    804 rsaz_512_mul_by_one:
    805 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    806 	mov	QWORD[16+rsp],rsi
    807 	mov	rax,rsp
    808 $L$SEH_begin_rsaz_512_mul_by_one:
    809 	mov	rdi,rcx
    810 	mov	rsi,rdx
    811 	mov	rdx,r8
    812 	mov	rcx,r9
    813 
    814 
    815 	push	rbx
    816 	push	rbp
    817 	push	r12
    818 	push	r13
    819 	push	r14
    820 	push	r15
    821 
    822 	sub	rsp,128+24
    823 $L$mul_by_one_body:
    824 	mov	rbp,rdx
    825 	mov	QWORD[128+rsp],rcx
    826 
    827 	mov	r8,QWORD[rsi]
    828 	pxor	xmm0,xmm0
    829 	mov	r9,QWORD[8+rsi]
    830 	mov	r10,QWORD[16+rsi]
    831 	mov	r11,QWORD[24+rsi]
    832 	mov	r12,QWORD[32+rsi]
    833 	mov	r13,QWORD[40+rsi]
    834 	mov	r14,QWORD[48+rsi]
    835 	mov	r15,QWORD[56+rsi]
    836 
    837 	movdqa	XMMWORD[rsp],xmm0
    838 	movdqa	XMMWORD[16+rsp],xmm0
    839 	movdqa	XMMWORD[32+rsp],xmm0
    840 	movdqa	XMMWORD[48+rsp],xmm0
    841 	movdqa	XMMWORD[64+rsp],xmm0
    842 	movdqa	XMMWORD[80+rsp],xmm0
    843 	movdqa	XMMWORD[96+rsp],xmm0
    844 	call	__rsaz_512_reduce
    845 	mov	QWORD[rdi],r8
    846 	mov	QWORD[8+rdi],r9
    847 	mov	QWORD[16+rdi],r10
    848 	mov	QWORD[24+rdi],r11
    849 	mov	QWORD[32+rdi],r12
    850 	mov	QWORD[40+rdi],r13
    851 	mov	QWORD[48+rdi],r14
    852 	mov	QWORD[56+rdi],r15
    853 
    854 	lea	rax,[((128+24+48))+rsp]
    855 	mov	r15,QWORD[((-48))+rax]
    856 	mov	r14,QWORD[((-40))+rax]
    857 	mov	r13,QWORD[((-32))+rax]
    858 	mov	r12,QWORD[((-24))+rax]
    859 	mov	rbp,QWORD[((-16))+rax]
    860 	mov	rbx,QWORD[((-8))+rax]
    861 	lea	rsp,[rax]
    862 $L$mul_by_one_epilogue:
    863 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    864 	mov	rsi,QWORD[16+rsp]
    865 	DB	0F3h,0C3h		;repret
    866 $L$SEH_end_rsaz_512_mul_by_one:
    867 
    868 ALIGN	32
    869 __rsaz_512_reduce:
    870 	mov	rbx,r8
    871 	imul	rbx,QWORD[((128+8))+rsp]
    872 	mov	rax,QWORD[rbp]
    873 	mov	ecx,8
    874 	jmp	NEAR $L$reduction_loop
    875 
    876 ALIGN	32
    877 $L$reduction_loop:
    878 	mul	rbx
    879 	mov	rax,QWORD[8+rbp]
    880 	neg	r8
    881 	mov	r8,rdx
    882 	adc	r8,0
    883 
    884 	mul	rbx
    885 	add	r9,rax
    886 	mov	rax,QWORD[16+rbp]
    887 	adc	rdx,0
    888 	add	r8,r9
    889 	mov	r9,rdx
    890 	adc	r9,0
    891 
    892 	mul	rbx
    893 	add	r10,rax
    894 	mov	rax,QWORD[24+rbp]
    895 	adc	rdx,0
    896 	add	r9,r10
    897 	mov	r10,rdx
    898 	adc	r10,0
    899 
    900 	mul	rbx
    901 	add	r11,rax
    902 	mov	rax,QWORD[32+rbp]
    903 	adc	rdx,0
    904 	add	r10,r11
    905 	mov	rsi,QWORD[((128+8))+rsp]
    906 
    907 
    908 	adc	rdx,0
    909 	mov	r11,rdx
    910 
    911 	mul	rbx
    912 	add	r12,rax
    913 	mov	rax,QWORD[40+rbp]
    914 	adc	rdx,0
    915 	imul	rsi,r8
    916 	add	r11,r12
    917 	mov	r12,rdx
    918 	adc	r12,0
    919 
    920 	mul	rbx
    921 	add	r13,rax
    922 	mov	rax,QWORD[48+rbp]
    923 	adc	rdx,0
    924 	add	r12,r13
    925 	mov	r13,rdx
    926 	adc	r13,0
    927 
    928 	mul	rbx
    929 	add	r14,rax
    930 	mov	rax,QWORD[56+rbp]
    931 	adc	rdx,0
    932 	add	r13,r14
    933 	mov	r14,rdx
    934 	adc	r14,0
    935 
    936 	mul	rbx
    937 	mov	rbx,rsi
    938 	add	r15,rax
    939 	mov	rax,QWORD[rbp]
    940 	adc	rdx,0
    941 	add	r14,r15
    942 	mov	r15,rdx
    943 	adc	r15,0
    944 
    945 	dec	ecx
    946 	jne	NEAR $L$reduction_loop
    947 
    948 	DB	0F3h,0C3h		;repret
    949 
    950 
    951 ALIGN	32
    952 __rsaz_512_subtract:
    953 	mov	QWORD[rdi],r8
    954 	mov	QWORD[8+rdi],r9
    955 	mov	QWORD[16+rdi],r10
    956 	mov	QWORD[24+rdi],r11
    957 	mov	QWORD[32+rdi],r12
    958 	mov	QWORD[40+rdi],r13
    959 	mov	QWORD[48+rdi],r14
    960 	mov	QWORD[56+rdi],r15
    961 
    962 	mov	r8,QWORD[rbp]
    963 	mov	r9,QWORD[8+rbp]
    964 	neg	r8
    965 	not	r9
    966 	and	r8,rcx
    967 	mov	r10,QWORD[16+rbp]
    968 	and	r9,rcx
    969 	not	r10
    970 	mov	r11,QWORD[24+rbp]
    971 	and	r10,rcx
    972 	not	r11
    973 	mov	r12,QWORD[32+rbp]
    974 	and	r11,rcx
    975 	not	r12
    976 	mov	r13,QWORD[40+rbp]
    977 	and	r12,rcx
    978 	not	r13
    979 	mov	r14,QWORD[48+rbp]
    980 	and	r13,rcx
    981 	not	r14
    982 	mov	r15,QWORD[56+rbp]
    983 	and	r14,rcx
    984 	not	r15
    985 	and	r15,rcx
    986 
    987 	add	r8,QWORD[rdi]
    988 	adc	r9,QWORD[8+rdi]
    989 	adc	r10,QWORD[16+rdi]
    990 	adc	r11,QWORD[24+rdi]
    991 	adc	r12,QWORD[32+rdi]
    992 	adc	r13,QWORD[40+rdi]
    993 	adc	r14,QWORD[48+rdi]
    994 	adc	r15,QWORD[56+rdi]
    995 
    996 	mov	QWORD[rdi],r8
    997 	mov	QWORD[8+rdi],r9
    998 	mov	QWORD[16+rdi],r10
    999 	mov	QWORD[24+rdi],r11
   1000 	mov	QWORD[32+rdi],r12
   1001 	mov	QWORD[40+rdi],r13
   1002 	mov	QWORD[48+rdi],r14
   1003 	mov	QWORD[56+rdi],r15
   1004 
   1005 	DB	0F3h,0C3h		;repret
   1006 
   1007 
   1008 ALIGN	32
   1009 __rsaz_512_mul:
   1010 	lea	rdi,[8+rsp]
   1011 
   1012 	mov	rax,QWORD[rsi]
   1013 	mul	rbx
   1014 	mov	QWORD[rdi],rax
   1015 	mov	rax,QWORD[8+rsi]
   1016 	mov	r8,rdx
   1017 
   1018 	mul	rbx
   1019 	add	r8,rax
   1020 	mov	rax,QWORD[16+rsi]
   1021 	mov	r9,rdx
   1022 	adc	r9,0
   1023 
   1024 	mul	rbx
   1025 	add	r9,rax
   1026 	mov	rax,QWORD[24+rsi]
   1027 	mov	r10,rdx
   1028 	adc	r10,0
   1029 
   1030 	mul	rbx
   1031 	add	r10,rax
   1032 	mov	rax,QWORD[32+rsi]
   1033 	mov	r11,rdx
   1034 	adc	r11,0
   1035 
   1036 	mul	rbx
   1037 	add	r11,rax
   1038 	mov	rax,QWORD[40+rsi]
   1039 	mov	r12,rdx
   1040 	adc	r12,0
   1041 
   1042 	mul	rbx
   1043 	add	r12,rax
   1044 	mov	rax,QWORD[48+rsi]
   1045 	mov	r13,rdx
   1046 	adc	r13,0
   1047 
   1048 	mul	rbx
   1049 	add	r13,rax
   1050 	mov	rax,QWORD[56+rsi]
   1051 	mov	r14,rdx
   1052 	adc	r14,0
   1053 
   1054 	mul	rbx
   1055 	add	r14,rax
   1056 	mov	rax,QWORD[rsi]
   1057 	mov	r15,rdx
   1058 	adc	r15,0
   1059 
   1060 	lea	rbp,[8+rbp]
   1061 	lea	rdi,[8+rdi]
   1062 
   1063 	mov	ecx,7
   1064 	jmp	NEAR $L$oop_mul
   1065 
   1066 ALIGN	32
   1067 $L$oop_mul:
   1068 	mov	rbx,QWORD[rbp]
   1069 	mul	rbx
   1070 	add	r8,rax
   1071 	mov	rax,QWORD[8+rsi]
   1072 	mov	QWORD[rdi],r8
   1073 	mov	r8,rdx
   1074 	adc	r8,0
   1075 
   1076 	mul	rbx
   1077 	add	r9,rax
   1078 	mov	rax,QWORD[16+rsi]
   1079 	adc	rdx,0
   1080 	add	r8,r9
   1081 	mov	r9,rdx
   1082 	adc	r9,0
   1083 
   1084 	mul	rbx
   1085 	add	r10,rax
   1086 	mov	rax,QWORD[24+rsi]
   1087 	adc	rdx,0
   1088 	add	r9,r10
   1089 	mov	r10,rdx
   1090 	adc	r10,0
   1091 
   1092 	mul	rbx
   1093 	add	r11,rax
   1094 	mov	rax,QWORD[32+rsi]
   1095 	adc	rdx,0
   1096 	add	r10,r11
   1097 	mov	r11,rdx
   1098 	adc	r11,0
   1099 
   1100 	mul	rbx
   1101 	add	r12,rax
   1102 	mov	rax,QWORD[40+rsi]
   1103 	adc	rdx,0
   1104 	add	r11,r12
   1105 	mov	r12,rdx
   1106 	adc	r12,0
   1107 
   1108 	mul	rbx
   1109 	add	r13,rax
   1110 	mov	rax,QWORD[48+rsi]
   1111 	adc	rdx,0
   1112 	add	r12,r13
   1113 	mov	r13,rdx
   1114 	adc	r13,0
   1115 
   1116 	mul	rbx
   1117 	add	r14,rax
   1118 	mov	rax,QWORD[56+rsi]
   1119 	adc	rdx,0
   1120 	add	r13,r14
   1121 	mov	r14,rdx
   1122 	lea	rbp,[8+rbp]
   1123 	adc	r14,0
   1124 
   1125 	mul	rbx
   1126 	add	r15,rax
   1127 	mov	rax,QWORD[rsi]
   1128 	adc	rdx,0
   1129 	add	r14,r15
   1130 	mov	r15,rdx
   1131 	adc	r15,0
   1132 
   1133 	lea	rdi,[8+rdi]
   1134 
   1135 	dec	ecx
   1136 	jnz	NEAR $L$oop_mul
   1137 
   1138 	mov	QWORD[rdi],r8
   1139 	mov	QWORD[8+rdi],r9
   1140 	mov	QWORD[16+rdi],r10
   1141 	mov	QWORD[24+rdi],r11
   1142 	mov	QWORD[32+rdi],r12
   1143 	mov	QWORD[40+rdi],r13
   1144 	mov	QWORD[48+rdi],r14
   1145 	mov	QWORD[56+rdi],r15
   1146 
   1147 	DB	0F3h,0C3h		;repret
   1148 
   1149 global	rsaz_512_scatter4
   1150 
   1151 ALIGN	16
   1152 rsaz_512_scatter4:
   1153 	lea	rcx,[r8*4+rcx]
   1154 	mov	r9d,8
   1155 	jmp	NEAR $L$oop_scatter
   1156 ALIGN	16
   1157 $L$oop_scatter:
   1158 	mov	rax,QWORD[rdx]
   1159 	lea	rdx,[8+rdx]
   1160 	mov	DWORD[rcx],eax
   1161 	shr	rax,32
   1162 	mov	DWORD[64+rcx],eax
   1163 	lea	rcx,[128+rcx]
   1164 	dec	r9d
   1165 	jnz	NEAR $L$oop_scatter
   1166 	DB	0F3h,0C3h		;repret
   1167 
   1168 
   1169 global	rsaz_512_gather4
   1170 
   1171 ALIGN	16
   1172 rsaz_512_gather4:
   1173 	lea	rdx,[r8*4+rdx]
   1174 	mov	r9d,8
   1175 	jmp	NEAR $L$oop_gather
   1176 ALIGN	16
   1177 $L$oop_gather:
   1178 	mov	eax,DWORD[rdx]
   1179 	mov	r8d,DWORD[64+rdx]
   1180 	lea	rdx,[128+rdx]
   1181 	shl	r8,32
   1182 	or	rax,r8
   1183 	mov	QWORD[rcx],rax
   1184 	lea	rcx,[8+rcx]
   1185 	dec	r9d
   1186 	jnz	NEAR $L$oop_gather
   1187 	DB	0F3h,0C3h		;repret
   1188 
   1189 EXTERN	__imp_RtlVirtualUnwind
   1190 
   1191 ALIGN	16
   1192 se_handler:
   1193 	push	rsi
   1194 	push	rdi
   1195 	push	rbx
   1196 	push	rbp
   1197 	push	r12
   1198 	push	r13
   1199 	push	r14
   1200 	push	r15
   1201 	pushfq
   1202 	sub	rsp,64
   1203 
   1204 	mov	rax,QWORD[120+r8]
   1205 	mov	rbx,QWORD[248+r8]
   1206 
   1207 	mov	rsi,QWORD[8+r9]
   1208 	mov	r11,QWORD[56+r9]
   1209 
   1210 	mov	r10d,DWORD[r11]
   1211 	lea	r10,[r10*1+rsi]
   1212 	cmp	rbx,r10
   1213 	jb	NEAR $L$common_seh_tail
   1214 
   1215 	mov	rax,QWORD[152+r8]
   1216 
   1217 	mov	r10d,DWORD[4+r11]
   1218 	lea	r10,[r10*1+rsi]
   1219 	cmp	rbx,r10
   1220 	jae	NEAR $L$common_seh_tail
   1221 
   1222 	lea	rax,[((128+24+48))+rax]
   1223 
   1224 	mov	rbx,QWORD[((-8))+rax]
   1225 	mov	rbp,QWORD[((-16))+rax]
   1226 	mov	r12,QWORD[((-24))+rax]
   1227 	mov	r13,QWORD[((-32))+rax]
   1228 	mov	r14,QWORD[((-40))+rax]
   1229 	mov	r15,QWORD[((-48))+rax]
   1230 	mov	QWORD[144+r8],rbx
   1231 	mov	QWORD[160+r8],rbp
   1232 	mov	QWORD[216+r8],r12
   1233 	mov	QWORD[224+r8],r13
   1234 	mov	QWORD[232+r8],r14
   1235 	mov	QWORD[240+r8],r15
   1236 
   1237 $L$common_seh_tail:
   1238 	mov	rdi,QWORD[8+rax]
   1239 	mov	rsi,QWORD[16+rax]
   1240 	mov	QWORD[152+r8],rax
   1241 	mov	QWORD[168+r8],rsi
   1242 	mov	QWORD[176+r8],rdi
   1243 
   1244 	mov	rdi,QWORD[40+r9]
   1245 	mov	rsi,r8
   1246 	mov	ecx,154
   1247 	DD	0xa548f3fc
   1248 
   1249 	mov	rsi,r9
   1250 	xor	rcx,rcx
   1251 	mov	rdx,QWORD[8+rsi]
   1252 	mov	r8,QWORD[rsi]
   1253 	mov	r9,QWORD[16+rsi]
   1254 	mov	r10,QWORD[40+rsi]
   1255 	lea	r11,[56+rsi]
   1256 	lea	r12,[24+rsi]
   1257 	mov	QWORD[32+rsp],r10
   1258 	mov	QWORD[40+rsp],r11
   1259 	mov	QWORD[48+rsp],r12
   1260 	mov	QWORD[56+rsp],rcx
   1261 	call	QWORD[__imp_RtlVirtualUnwind]
   1262 
   1263 	mov	eax,1
   1264 	add	rsp,64
   1265 	popfq
   1266 	pop	r15
   1267 	pop	r14
   1268 	pop	r13
   1269 	pop	r12
   1270 	pop	rbp
   1271 	pop	rbx
   1272 	pop	rdi
   1273 	pop	rsi
   1274 	DB	0F3h,0C3h		;repret
   1275 
   1276 
   1277 section	.pdata rdata align=4
   1278 ALIGN	4
   1279 	DD	$L$SEH_begin_rsaz_512_sqr wrt ..imagebase
   1280 	DD	$L$SEH_end_rsaz_512_sqr wrt ..imagebase
   1281 	DD	$L$SEH_info_rsaz_512_sqr wrt ..imagebase
   1282 
   1283 	DD	$L$SEH_begin_rsaz_512_mul wrt ..imagebase
   1284 	DD	$L$SEH_end_rsaz_512_mul wrt ..imagebase
   1285 	DD	$L$SEH_info_rsaz_512_mul wrt ..imagebase
   1286 
   1287 	DD	$L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
   1288 	DD	$L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
   1289 	DD	$L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
   1290 
   1291 	DD	$L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
   1292 	DD	$L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
   1293 	DD	$L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
   1294 
   1295 	DD	$L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
   1296 	DD	$L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
   1297 	DD	$L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
   1298 
   1299 section	.xdata rdata align=8
   1300 ALIGN	8
   1301 $L$SEH_info_rsaz_512_sqr:
   1302 DB	9,0,0,0
   1303 	DD	se_handler wrt ..imagebase
   1304 	DD	$L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
   1305 $L$SEH_info_rsaz_512_mul:
   1306 DB	9,0,0,0
   1307 	DD	se_handler wrt ..imagebase
   1308 	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
   1309 $L$SEH_info_rsaz_512_mul_gather4:
   1310 DB	9,0,0,0
   1311 	DD	se_handler wrt ..imagebase
   1312 	DD	$L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
   1313 $L$SEH_info_rsaz_512_mul_scatter4:
   1314 DB	9,0,0,0
   1315 	DD	se_handler wrt ..imagebase
   1316 	DD	$L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
   1317 $L$SEH_info_rsaz_512_mul_by_one:
   1318 DB	9,0,0,0
   1319 	DD	se_handler wrt ..imagebase
   1320 	DD	$L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase
   1321