Home | History | Annotate | Download | only in fipsmodule
      1 default	rel
      2 %define XMMWORD
      3 %define YMMWORD
      4 %define ZMMWORD
      5 section	.text code align=64
      6 
      7 
      8 EXTERN	OPENSSL_ia32cap_P
      9 
     10 global	bn_mul_mont
     11 
     12 ALIGN	16
     13 bn_mul_mont:
     14 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
     15 	mov	QWORD[16+rsp],rsi
     16 	mov	rax,rsp
     17 $L$SEH_begin_bn_mul_mont:
     18 	mov	rdi,rcx
     19 	mov	rsi,rdx
     20 	mov	rdx,r8
     21 	mov	rcx,r9
     22 	mov	r8,QWORD[40+rsp]
     23 	mov	r9,QWORD[48+rsp]
     24 
     25 
     26 
     27 	mov	r9d,r9d
     28 	mov	rax,rsp
     29 
     30 	test	r9d,3
     31 	jnz	NEAR $L$mul_enter
     32 	cmp	r9d,8
     33 	jb	NEAR $L$mul_enter
     34 	cmp	rdx,rsi
     35 	jne	NEAR $L$mul4x_enter
     36 	test	r9d,7
     37 	jz	NEAR $L$sqr8x_enter
     38 	jmp	NEAR $L$mul4x_enter
     39 
     40 ALIGN	16
     41 $L$mul_enter:
     42 	push	rbx
     43 
     44 	push	rbp
     45 
     46 	push	r12
     47 
     48 	push	r13
     49 
     50 	push	r14
     51 
     52 	push	r15
     53 
     54 
     55 	neg	r9
     56 	mov	r11,rsp
     57 	lea	r10,[((-16))+r9*8+rsp]
     58 	neg	r9
     59 	and	r10,-1024
     60 
     61 
     62 
     63 
     64 
     65 
     66 
     67 
     68 
     69 	sub	r11,r10
     70 	and	r11,-4096
     71 	lea	rsp,[r11*1+r10]
     72 	mov	r11,QWORD[rsp]
     73 	cmp	rsp,r10
     74 	ja	NEAR $L$mul_page_walk
     75 	jmp	NEAR $L$mul_page_walk_done
     76 
     77 ALIGN	16
     78 $L$mul_page_walk:
     79 	lea	rsp,[((-4096))+rsp]
     80 	mov	r11,QWORD[rsp]
     81 	cmp	rsp,r10
     82 	ja	NEAR $L$mul_page_walk
     83 $L$mul_page_walk_done:
     84 
     85 	mov	QWORD[8+r9*8+rsp],rax
     86 
     87 $L$mul_body:
     88 	mov	r12,rdx
     89 	mov	r8,QWORD[r8]
     90 	mov	rbx,QWORD[r12]
     91 	mov	rax,QWORD[rsi]
     92 
     93 	xor	r14,r14
     94 	xor	r15,r15
     95 
     96 	mov	rbp,r8
     97 	mul	rbx
     98 	mov	r10,rax
     99 	mov	rax,QWORD[rcx]
    100 
    101 	imul	rbp,r10
    102 	mov	r11,rdx
    103 
    104 	mul	rbp
    105 	add	r10,rax
    106 	mov	rax,QWORD[8+rsi]
    107 	adc	rdx,0
    108 	mov	r13,rdx
    109 
    110 	lea	r15,[1+r15]
    111 	jmp	NEAR $L$1st_enter
    112 
    113 ALIGN	16
    114 $L$1st:
    115 	add	r13,rax
    116 	mov	rax,QWORD[r15*8+rsi]
    117 	adc	rdx,0
    118 	add	r13,r11
    119 	mov	r11,r10
    120 	adc	rdx,0
    121 	mov	QWORD[((-16))+r15*8+rsp],r13
    122 	mov	r13,rdx
    123 
    124 $L$1st_enter:
    125 	mul	rbx
    126 	add	r11,rax
    127 	mov	rax,QWORD[r15*8+rcx]
    128 	adc	rdx,0
    129 	lea	r15,[1+r15]
    130 	mov	r10,rdx
    131 
    132 	mul	rbp
    133 	cmp	r15,r9
    134 	jne	NEAR $L$1st
    135 
    136 	add	r13,rax
    137 	mov	rax,QWORD[rsi]
    138 	adc	rdx,0
    139 	add	r13,r11
    140 	adc	rdx,0
    141 	mov	QWORD[((-16))+r15*8+rsp],r13
    142 	mov	r13,rdx
    143 	mov	r11,r10
    144 
    145 	xor	rdx,rdx
    146 	add	r13,r11
    147 	adc	rdx,0
    148 	mov	QWORD[((-8))+r9*8+rsp],r13
    149 	mov	QWORD[r9*8+rsp],rdx
    150 
    151 	lea	r14,[1+r14]
    152 	jmp	NEAR $L$outer
    153 ALIGN	16
    154 $L$outer:
    155 	mov	rbx,QWORD[r14*8+r12]
    156 	xor	r15,r15
    157 	mov	rbp,r8
    158 	mov	r10,QWORD[rsp]
    159 	mul	rbx
    160 	add	r10,rax
    161 	mov	rax,QWORD[rcx]
    162 	adc	rdx,0
    163 
    164 	imul	rbp,r10
    165 	mov	r11,rdx
    166 
    167 	mul	rbp
    168 	add	r10,rax
    169 	mov	rax,QWORD[8+rsi]
    170 	adc	rdx,0
    171 	mov	r10,QWORD[8+rsp]
    172 	mov	r13,rdx
    173 
    174 	lea	r15,[1+r15]
    175 	jmp	NEAR $L$inner_enter
    176 
    177 ALIGN	16
    178 $L$inner:
    179 	add	r13,rax
    180 	mov	rax,QWORD[r15*8+rsi]
    181 	adc	rdx,0
    182 	add	r13,r10
    183 	mov	r10,QWORD[r15*8+rsp]
    184 	adc	rdx,0
    185 	mov	QWORD[((-16))+r15*8+rsp],r13
    186 	mov	r13,rdx
    187 
    188 $L$inner_enter:
    189 	mul	rbx
    190 	add	r11,rax
    191 	mov	rax,QWORD[r15*8+rcx]
    192 	adc	rdx,0
    193 	add	r10,r11
    194 	mov	r11,rdx
    195 	adc	r11,0
    196 	lea	r15,[1+r15]
    197 
    198 	mul	rbp
    199 	cmp	r15,r9
    200 	jne	NEAR $L$inner
    201 
    202 	add	r13,rax
    203 	mov	rax,QWORD[rsi]
    204 	adc	rdx,0
    205 	add	r13,r10
    206 	mov	r10,QWORD[r15*8+rsp]
    207 	adc	rdx,0
    208 	mov	QWORD[((-16))+r15*8+rsp],r13
    209 	mov	r13,rdx
    210 
    211 	xor	rdx,rdx
    212 	add	r13,r11
    213 	adc	rdx,0
    214 	add	r13,r10
    215 	adc	rdx,0
    216 	mov	QWORD[((-8))+r9*8+rsp],r13
    217 	mov	QWORD[r9*8+rsp],rdx
    218 
    219 	lea	r14,[1+r14]
    220 	cmp	r14,r9
    221 	jb	NEAR $L$outer
    222 
    223 	xor	r14,r14
    224 	mov	rax,QWORD[rsp]
    225 	lea	rsi,[rsp]
    226 	mov	r15,r9
    227 	jmp	NEAR $L$sub
    228 ALIGN	16
    229 $L$sub:
    230 	sbb	rax,QWORD[r14*8+rcx]
    231 	mov	QWORD[r14*8+rdi],rax
    232 	mov	rax,QWORD[8+r14*8+rsi]
    233 	lea	r14,[1+r14]
    234 	dec	r15
    235 	jnz	NEAR $L$sub
    236 
    237 	sbb	rax,0
    238 	xor	r14,r14
    239 	and	rsi,rax
    240 	not	rax
    241 	mov	rcx,rdi
    242 	and	rcx,rax
    243 	mov	r15,r9
    244 	or	rsi,rcx
    245 ALIGN	16
    246 $L$copy:
    247 	mov	rax,QWORD[r14*8+rsi]
    248 	mov	QWORD[r14*8+rsp],r14
    249 	mov	QWORD[r14*8+rdi],rax
    250 	lea	r14,[1+r14]
    251 	sub	r15,1
    252 	jnz	NEAR $L$copy
    253 
    254 	mov	rsi,QWORD[8+r9*8+rsp]
    255 
    256 	mov	rax,1
    257 	mov	r15,QWORD[((-48))+rsi]
    258 
    259 	mov	r14,QWORD[((-40))+rsi]
    260 
    261 	mov	r13,QWORD[((-32))+rsi]
    262 
    263 	mov	r12,QWORD[((-24))+rsi]
    264 
    265 	mov	rbp,QWORD[((-16))+rsi]
    266 
    267 	mov	rbx,QWORD[((-8))+rsi]
    268 
    269 	lea	rsp,[rsi]
    270 
    271 $L$mul_epilogue:
    272 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    273 	mov	rsi,QWORD[16+rsp]
    274 	DB	0F3h,0C3h		;repret
    275 
    276 $L$SEH_end_bn_mul_mont:
    277 
    278 ALIGN	16
    279 bn_mul4x_mont:
    280 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    281 	mov	QWORD[16+rsp],rsi
    282 	mov	rax,rsp
    283 $L$SEH_begin_bn_mul4x_mont:
    284 	mov	rdi,rcx
    285 	mov	rsi,rdx
    286 	mov	rdx,r8
    287 	mov	rcx,r9
    288 	mov	r8,QWORD[40+rsp]
    289 	mov	r9,QWORD[48+rsp]
    290 
    291 
    292 
    293 	mov	r9d,r9d
    294 	mov	rax,rsp
    295 
    296 $L$mul4x_enter:
    297 	push	rbx
    298 
    299 	push	rbp
    300 
    301 	push	r12
    302 
    303 	push	r13
    304 
    305 	push	r14
    306 
    307 	push	r15
    308 
    309 
    310 	neg	r9
    311 	mov	r11,rsp
    312 	lea	r10,[((-32))+r9*8+rsp]
    313 	neg	r9
    314 	and	r10,-1024
    315 
    316 	sub	r11,r10
    317 	and	r11,-4096
    318 	lea	rsp,[r11*1+r10]
    319 	mov	r11,QWORD[rsp]
    320 	cmp	rsp,r10
    321 	ja	NEAR $L$mul4x_page_walk
    322 	jmp	NEAR $L$mul4x_page_walk_done
    323 
    324 $L$mul4x_page_walk:
    325 	lea	rsp,[((-4096))+rsp]
    326 	mov	r11,QWORD[rsp]
    327 	cmp	rsp,r10
    328 	ja	NEAR $L$mul4x_page_walk
    329 $L$mul4x_page_walk_done:
    330 
    331 	mov	QWORD[8+r9*8+rsp],rax
    332 
    333 $L$mul4x_body:
    334 	mov	QWORD[16+r9*8+rsp],rdi
    335 	mov	r12,rdx
    336 	mov	r8,QWORD[r8]
    337 	mov	rbx,QWORD[r12]
    338 	mov	rax,QWORD[rsi]
    339 
    340 	xor	r14,r14
    341 	xor	r15,r15
    342 
    343 	mov	rbp,r8
    344 	mul	rbx
    345 	mov	r10,rax
    346 	mov	rax,QWORD[rcx]
    347 
    348 	imul	rbp,r10
    349 	mov	r11,rdx
    350 
    351 	mul	rbp
    352 	add	r10,rax
    353 	mov	rax,QWORD[8+rsi]
    354 	adc	rdx,0
    355 	mov	rdi,rdx
    356 
    357 	mul	rbx
    358 	add	r11,rax
    359 	mov	rax,QWORD[8+rcx]
    360 	adc	rdx,0
    361 	mov	r10,rdx
    362 
    363 	mul	rbp
    364 	add	rdi,rax
    365 	mov	rax,QWORD[16+rsi]
    366 	adc	rdx,0
    367 	add	rdi,r11
    368 	lea	r15,[4+r15]
    369 	adc	rdx,0
    370 	mov	QWORD[rsp],rdi
    371 	mov	r13,rdx
    372 	jmp	NEAR $L$1st4x
    373 ALIGN	16
    374 $L$1st4x:
    375 	mul	rbx
    376 	add	r10,rax
    377 	mov	rax,QWORD[((-16))+r15*8+rcx]
    378 	adc	rdx,0
    379 	mov	r11,rdx
    380 
    381 	mul	rbp
    382 	add	r13,rax
    383 	mov	rax,QWORD[((-8))+r15*8+rsi]
    384 	adc	rdx,0
    385 	add	r13,r10
    386 	adc	rdx,0
    387 	mov	QWORD[((-24))+r15*8+rsp],r13
    388 	mov	rdi,rdx
    389 
    390 	mul	rbx
    391 	add	r11,rax
    392 	mov	rax,QWORD[((-8))+r15*8+rcx]
    393 	adc	rdx,0
    394 	mov	r10,rdx
    395 
    396 	mul	rbp
    397 	add	rdi,rax
    398 	mov	rax,QWORD[r15*8+rsi]
    399 	adc	rdx,0
    400 	add	rdi,r11
    401 	adc	rdx,0
    402 	mov	QWORD[((-16))+r15*8+rsp],rdi
    403 	mov	r13,rdx
    404 
    405 	mul	rbx
    406 	add	r10,rax
    407 	mov	rax,QWORD[r15*8+rcx]
    408 	adc	rdx,0
    409 	mov	r11,rdx
    410 
    411 	mul	rbp
    412 	add	r13,rax
    413 	mov	rax,QWORD[8+r15*8+rsi]
    414 	adc	rdx,0
    415 	add	r13,r10
    416 	adc	rdx,0
    417 	mov	QWORD[((-8))+r15*8+rsp],r13
    418 	mov	rdi,rdx
    419 
    420 	mul	rbx
    421 	add	r11,rax
    422 	mov	rax,QWORD[8+r15*8+rcx]
    423 	adc	rdx,0
    424 	lea	r15,[4+r15]
    425 	mov	r10,rdx
    426 
    427 	mul	rbp
    428 	add	rdi,rax
    429 	mov	rax,QWORD[((-16))+r15*8+rsi]
    430 	adc	rdx,0
    431 	add	rdi,r11
    432 	adc	rdx,0
    433 	mov	QWORD[((-32))+r15*8+rsp],rdi
    434 	mov	r13,rdx
    435 	cmp	r15,r9
    436 	jb	NEAR $L$1st4x
    437 
    438 	mul	rbx
    439 	add	r10,rax
    440 	mov	rax,QWORD[((-16))+r15*8+rcx]
    441 	adc	rdx,0
    442 	mov	r11,rdx
    443 
    444 	mul	rbp
    445 	add	r13,rax
    446 	mov	rax,QWORD[((-8))+r15*8+rsi]
    447 	adc	rdx,0
    448 	add	r13,r10
    449 	adc	rdx,0
    450 	mov	QWORD[((-24))+r15*8+rsp],r13
    451 	mov	rdi,rdx
    452 
    453 	mul	rbx
    454 	add	r11,rax
    455 	mov	rax,QWORD[((-8))+r15*8+rcx]
    456 	adc	rdx,0
    457 	mov	r10,rdx
    458 
    459 	mul	rbp
    460 	add	rdi,rax
    461 	mov	rax,QWORD[rsi]
    462 	adc	rdx,0
    463 	add	rdi,r11
    464 	adc	rdx,0
    465 	mov	QWORD[((-16))+r15*8+rsp],rdi
    466 	mov	r13,rdx
    467 
    468 	xor	rdi,rdi
    469 	add	r13,r10
    470 	adc	rdi,0
    471 	mov	QWORD[((-8))+r15*8+rsp],r13
    472 	mov	QWORD[r15*8+rsp],rdi
    473 
    474 	lea	r14,[1+r14]
    475 ALIGN	4
    476 $L$outer4x:
    477 	mov	rbx,QWORD[r14*8+r12]
    478 	xor	r15,r15
    479 	mov	r10,QWORD[rsp]
    480 	mov	rbp,r8
    481 	mul	rbx
    482 	add	r10,rax
    483 	mov	rax,QWORD[rcx]
    484 	adc	rdx,0
    485 
    486 	imul	rbp,r10
    487 	mov	r11,rdx
    488 
    489 	mul	rbp
    490 	add	r10,rax
    491 	mov	rax,QWORD[8+rsi]
    492 	adc	rdx,0
    493 	mov	rdi,rdx
    494 
    495 	mul	rbx
    496 	add	r11,rax
    497 	mov	rax,QWORD[8+rcx]
    498 	adc	rdx,0
    499 	add	r11,QWORD[8+rsp]
    500 	adc	rdx,0
    501 	mov	r10,rdx
    502 
    503 	mul	rbp
    504 	add	rdi,rax
    505 	mov	rax,QWORD[16+rsi]
    506 	adc	rdx,0
    507 	add	rdi,r11
    508 	lea	r15,[4+r15]
    509 	adc	rdx,0
    510 	mov	QWORD[rsp],rdi
    511 	mov	r13,rdx
    512 	jmp	NEAR $L$inner4x
    513 ALIGN	16
    514 $L$inner4x:
    515 	mul	rbx
    516 	add	r10,rax
    517 	mov	rax,QWORD[((-16))+r15*8+rcx]
    518 	adc	rdx,0
    519 	add	r10,QWORD[((-16))+r15*8+rsp]
    520 	adc	rdx,0
    521 	mov	r11,rdx
    522 
    523 	mul	rbp
    524 	add	r13,rax
    525 	mov	rax,QWORD[((-8))+r15*8+rsi]
    526 	adc	rdx,0
    527 	add	r13,r10
    528 	adc	rdx,0
    529 	mov	QWORD[((-24))+r15*8+rsp],r13
    530 	mov	rdi,rdx
    531 
    532 	mul	rbx
    533 	add	r11,rax
    534 	mov	rax,QWORD[((-8))+r15*8+rcx]
    535 	adc	rdx,0
    536 	add	r11,QWORD[((-8))+r15*8+rsp]
    537 	adc	rdx,0
    538 	mov	r10,rdx
    539 
    540 	mul	rbp
    541 	add	rdi,rax
    542 	mov	rax,QWORD[r15*8+rsi]
    543 	adc	rdx,0
    544 	add	rdi,r11
    545 	adc	rdx,0
    546 	mov	QWORD[((-16))+r15*8+rsp],rdi
    547 	mov	r13,rdx
    548 
    549 	mul	rbx
    550 	add	r10,rax
    551 	mov	rax,QWORD[r15*8+rcx]
    552 	adc	rdx,0
    553 	add	r10,QWORD[r15*8+rsp]
    554 	adc	rdx,0
    555 	mov	r11,rdx
    556 
    557 	mul	rbp
    558 	add	r13,rax
    559 	mov	rax,QWORD[8+r15*8+rsi]
    560 	adc	rdx,0
    561 	add	r13,r10
    562 	adc	rdx,0
    563 	mov	QWORD[((-8))+r15*8+rsp],r13
    564 	mov	rdi,rdx
    565 
    566 	mul	rbx
    567 	add	r11,rax
    568 	mov	rax,QWORD[8+r15*8+rcx]
    569 	adc	rdx,0
    570 	add	r11,QWORD[8+r15*8+rsp]
    571 	adc	rdx,0
    572 	lea	r15,[4+r15]
    573 	mov	r10,rdx
    574 
    575 	mul	rbp
    576 	add	rdi,rax
    577 	mov	rax,QWORD[((-16))+r15*8+rsi]
    578 	adc	rdx,0
    579 	add	rdi,r11
    580 	adc	rdx,0
    581 	mov	QWORD[((-32))+r15*8+rsp],rdi
    582 	mov	r13,rdx
    583 	cmp	r15,r9
    584 	jb	NEAR $L$inner4x
    585 
    586 	mul	rbx
    587 	add	r10,rax
    588 	mov	rax,QWORD[((-16))+r15*8+rcx]
    589 	adc	rdx,0
    590 	add	r10,QWORD[((-16))+r15*8+rsp]
    591 	adc	rdx,0
    592 	mov	r11,rdx
    593 
    594 	mul	rbp
    595 	add	r13,rax
    596 	mov	rax,QWORD[((-8))+r15*8+rsi]
    597 	adc	rdx,0
    598 	add	r13,r10
    599 	adc	rdx,0
    600 	mov	QWORD[((-24))+r15*8+rsp],r13
    601 	mov	rdi,rdx
    602 
    603 	mul	rbx
    604 	add	r11,rax
    605 	mov	rax,QWORD[((-8))+r15*8+rcx]
    606 	adc	rdx,0
    607 	add	r11,QWORD[((-8))+r15*8+rsp]
    608 	adc	rdx,0
    609 	lea	r14,[1+r14]
    610 	mov	r10,rdx
    611 
    612 	mul	rbp
    613 	add	rdi,rax
    614 	mov	rax,QWORD[rsi]
    615 	adc	rdx,0
    616 	add	rdi,r11
    617 	adc	rdx,0
    618 	mov	QWORD[((-16))+r15*8+rsp],rdi
    619 	mov	r13,rdx
    620 
    621 	xor	rdi,rdi
    622 	add	r13,r10
    623 	adc	rdi,0
    624 	add	r13,QWORD[r9*8+rsp]
    625 	adc	rdi,0
    626 	mov	QWORD[((-8))+r15*8+rsp],r13
    627 	mov	QWORD[r15*8+rsp],rdi
    628 
    629 	cmp	r14,r9
    630 	jb	NEAR $L$outer4x
    631 	mov	rdi,QWORD[16+r9*8+rsp]
    632 	lea	r15,[((-4))+r9]
    633 	mov	rax,QWORD[rsp]
    634 	pxor	xmm0,xmm0
    635 	mov	rdx,QWORD[8+rsp]
    636 	shr	r15,2
    637 	lea	rsi,[rsp]
    638 	xor	r14,r14
    639 
    640 	sub	rax,QWORD[rcx]
    641 	mov	rbx,QWORD[16+rsi]
    642 	mov	rbp,QWORD[24+rsi]
    643 	sbb	rdx,QWORD[8+rcx]
    644 	jmp	NEAR $L$sub4x
    645 ALIGN	16
    646 $L$sub4x:
    647 	mov	QWORD[r14*8+rdi],rax
    648 	mov	QWORD[8+r14*8+rdi],rdx
    649 	sbb	rbx,QWORD[16+r14*8+rcx]
    650 	mov	rax,QWORD[32+r14*8+rsi]
    651 	mov	rdx,QWORD[40+r14*8+rsi]
    652 	sbb	rbp,QWORD[24+r14*8+rcx]
    653 	mov	QWORD[16+r14*8+rdi],rbx
    654 	mov	QWORD[24+r14*8+rdi],rbp
    655 	sbb	rax,QWORD[32+r14*8+rcx]
    656 	mov	rbx,QWORD[48+r14*8+rsi]
    657 	mov	rbp,QWORD[56+r14*8+rsi]
    658 	sbb	rdx,QWORD[40+r14*8+rcx]
    659 	lea	r14,[4+r14]
    660 	dec	r15
    661 	jnz	NEAR $L$sub4x
    662 
    663 	mov	QWORD[r14*8+rdi],rax
    664 	mov	rax,QWORD[32+r14*8+rsi]
    665 	sbb	rbx,QWORD[16+r14*8+rcx]
    666 	mov	QWORD[8+r14*8+rdi],rdx
    667 	sbb	rbp,QWORD[24+r14*8+rcx]
    668 	mov	QWORD[16+r14*8+rdi],rbx
    669 
    670 	sbb	rax,0
    671 	mov	QWORD[24+r14*8+rdi],rbp
    672 	xor	r14,r14
    673 	and	rsi,rax
    674 	not	rax
    675 	mov	rcx,rdi
    676 	and	rcx,rax
    677 	lea	r15,[((-4))+r9]
    678 	or	rsi,rcx
    679 	shr	r15,2
    680 
    681 	movdqu	xmm1,XMMWORD[rsi]
    682 	movdqa	XMMWORD[rsp],xmm0
    683 	movdqu	XMMWORD[rdi],xmm1
    684 	jmp	NEAR $L$copy4x
    685 ALIGN	16
    686 $L$copy4x:
    687 	movdqu	xmm2,XMMWORD[16+r14*1+rsi]
    688 	movdqu	xmm1,XMMWORD[32+r14*1+rsi]
    689 	movdqa	XMMWORD[16+r14*1+rsp],xmm0
    690 	movdqu	XMMWORD[16+r14*1+rdi],xmm2
    691 	movdqa	XMMWORD[32+r14*1+rsp],xmm0
    692 	movdqu	XMMWORD[32+r14*1+rdi],xmm1
    693 	lea	r14,[32+r14]
    694 	dec	r15
    695 	jnz	NEAR $L$copy4x
    696 
    697 	movdqu	xmm2,XMMWORD[16+r14*1+rsi]
    698 	movdqa	XMMWORD[16+r14*1+rsp],xmm0
    699 	movdqu	XMMWORD[16+r14*1+rdi],xmm2
    700 	mov	rsi,QWORD[8+r9*8+rsp]
    701 
    702 	mov	rax,1
    703 	mov	r15,QWORD[((-48))+rsi]
    704 
    705 	mov	r14,QWORD[((-40))+rsi]
    706 
    707 	mov	r13,QWORD[((-32))+rsi]
    708 
    709 	mov	r12,QWORD[((-24))+rsi]
    710 
    711 	mov	rbp,QWORD[((-16))+rsi]
    712 
    713 	mov	rbx,QWORD[((-8))+rsi]
    714 
    715 	lea	rsp,[rsi]
    716 
    717 $L$mul4x_epilogue:
    718 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    719 	mov	rsi,QWORD[16+rsp]
    720 	DB	0F3h,0C3h		;repret
    721 
    722 $L$SEH_end_bn_mul4x_mont:
    723 EXTERN	bn_sqr8x_internal
    724 
    725 
    726 ALIGN	32
    727 bn_sqr8x_mont:
    728 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    729 	mov	QWORD[16+rsp],rsi
    730 	mov	rax,rsp
    731 $L$SEH_begin_bn_sqr8x_mont:
    732 	mov	rdi,rcx
    733 	mov	rsi,rdx
    734 	mov	rdx,r8
    735 	mov	rcx,r9
    736 	mov	r8,QWORD[40+rsp]
    737 	mov	r9,QWORD[48+rsp]
    738 
    739 
    740 
    741 	mov	rax,rsp
    742 
    743 $L$sqr8x_enter:
    744 	push	rbx
    745 
    746 	push	rbp
    747 
    748 	push	r12
    749 
    750 	push	r13
    751 
    752 	push	r14
    753 
    754 	push	r15
    755 
    756 $L$sqr8x_prologue:
    757 
    758 	mov	r10d,r9d
    759 	shl	r9d,3
    760 	shl	r10,3+2
    761 	neg	r9
    762 
    763 
    764 
    765 
    766 
    767 
    768 	lea	r11,[((-64))+r9*2+rsp]
    769 	mov	rbp,rsp
    770 	mov	r8,QWORD[r8]
    771 	sub	r11,rsi
    772 	and	r11,4095
    773 	cmp	r10,r11
    774 	jb	NEAR $L$sqr8x_sp_alt
    775 	sub	rbp,r11
    776 	lea	rbp,[((-64))+r9*2+rbp]
    777 	jmp	NEAR $L$sqr8x_sp_done
    778 
    779 ALIGN	32
    780 $L$sqr8x_sp_alt:
    781 	lea	r10,[((4096-64))+r9*2]
    782 	lea	rbp,[((-64))+r9*2+rbp]
    783 	sub	r11,r10
    784 	mov	r10,0
    785 	cmovc	r11,r10
    786 	sub	rbp,r11
    787 $L$sqr8x_sp_done:
    788 	and	rbp,-64
    789 	mov	r11,rsp
    790 	sub	r11,rbp
    791 	and	r11,-4096
    792 	lea	rsp,[rbp*1+r11]
    793 	mov	r10,QWORD[rsp]
    794 	cmp	rsp,rbp
    795 	ja	NEAR $L$sqr8x_page_walk
    796 	jmp	NEAR $L$sqr8x_page_walk_done
    797 
    798 ALIGN	16
    799 $L$sqr8x_page_walk:
    800 	lea	rsp,[((-4096))+rsp]
    801 	mov	r10,QWORD[rsp]
    802 	cmp	rsp,rbp
    803 	ja	NEAR $L$sqr8x_page_walk
    804 $L$sqr8x_page_walk_done:
    805 
    806 	mov	r10,r9
    807 	neg	r9
    808 
    809 	mov	QWORD[32+rsp],r8
    810 	mov	QWORD[40+rsp],rax
    811 
    812 $L$sqr8x_body:
    813 
    814 DB	102,72,15,110,209
    815 	pxor	xmm0,xmm0
    816 DB	102,72,15,110,207
    817 DB	102,73,15,110,218
    818 	call	bn_sqr8x_internal
    819 
    820 
    821 
    822 
    823 	lea	rbx,[r9*1+rdi]
    824 	mov	rcx,r9
    825 	mov	rdx,r9
    826 DB	102,72,15,126,207
    827 	sar	rcx,3+2
    828 	jmp	NEAR $L$sqr8x_sub
    829 
    830 ALIGN	32
    831 $L$sqr8x_sub:
    832 	mov	r12,QWORD[rbx]
    833 	mov	r13,QWORD[8+rbx]
    834 	mov	r14,QWORD[16+rbx]
    835 	mov	r15,QWORD[24+rbx]
    836 	lea	rbx,[32+rbx]
    837 	sbb	r12,QWORD[rbp]
    838 	sbb	r13,QWORD[8+rbp]
    839 	sbb	r14,QWORD[16+rbp]
    840 	sbb	r15,QWORD[24+rbp]
    841 	lea	rbp,[32+rbp]
    842 	mov	QWORD[rdi],r12
    843 	mov	QWORD[8+rdi],r13
    844 	mov	QWORD[16+rdi],r14
    845 	mov	QWORD[24+rdi],r15
    846 	lea	rdi,[32+rdi]
    847 	inc	rcx
    848 	jnz	NEAR $L$sqr8x_sub
    849 
    850 	sbb	rax,0
    851 	lea	rbx,[r9*1+rbx]
    852 	lea	rdi,[r9*1+rdi]
    853 
    854 DB	102,72,15,110,200
    855 	pxor	xmm0,xmm0
    856 	pshufd	xmm1,xmm1,0
    857 	mov	rsi,QWORD[40+rsp]
    858 
    859 	jmp	NEAR $L$sqr8x_cond_copy
    860 
    861 ALIGN	32
    862 $L$sqr8x_cond_copy:
    863 	movdqa	xmm2,XMMWORD[rbx]
    864 	movdqa	xmm3,XMMWORD[16+rbx]
    865 	lea	rbx,[32+rbx]
    866 	movdqu	xmm4,XMMWORD[rdi]
    867 	movdqu	xmm5,XMMWORD[16+rdi]
    868 	lea	rdi,[32+rdi]
    869 	movdqa	XMMWORD[(-32)+rbx],xmm0
    870 	movdqa	XMMWORD[(-16)+rbx],xmm0
    871 	movdqa	XMMWORD[(-32)+rdx*1+rbx],xmm0
    872 	movdqa	XMMWORD[(-16)+rdx*1+rbx],xmm0
    873 	pcmpeqd	xmm0,xmm1
    874 	pand	xmm2,xmm1
    875 	pand	xmm3,xmm1
    876 	pand	xmm4,xmm0
    877 	pand	xmm5,xmm0
    878 	pxor	xmm0,xmm0
    879 	por	xmm4,xmm2
    880 	por	xmm5,xmm3
    881 	movdqu	XMMWORD[(-32)+rdi],xmm4
    882 	movdqu	XMMWORD[(-16)+rdi],xmm5
    883 	add	r9,32
    884 	jnz	NEAR $L$sqr8x_cond_copy
    885 
    886 	mov	rax,1
    887 	mov	r15,QWORD[((-48))+rsi]
    888 
    889 	mov	r14,QWORD[((-40))+rsi]
    890 
    891 	mov	r13,QWORD[((-32))+rsi]
    892 
    893 	mov	r12,QWORD[((-24))+rsi]
    894 
    895 	mov	rbp,QWORD[((-16))+rsi]
    896 
    897 	mov	rbx,QWORD[((-8))+rsi]
    898 
    899 	lea	rsp,[rsi]
    900 
    901 $L$sqr8x_epilogue:
    902 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    903 	mov	rsi,QWORD[16+rsp]
    904 	DB	0F3h,0C3h		;repret
    905 
    906 $L$SEH_end_bn_sqr8x_mont:
    907 DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
    908 DB	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
    909 DB	54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
    910 DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
    911 DB	115,108,46,111,114,103,62,0
    912 ALIGN	16
    913 EXTERN	__imp_RtlVirtualUnwind
    914 
    915 ALIGN	16
    916 mul_handler:
    917 	push	rsi
    918 	push	rdi
    919 	push	rbx
    920 	push	rbp
    921 	push	r12
    922 	push	r13
    923 	push	r14
    924 	push	r15
    925 	pushfq
    926 	sub	rsp,64
    927 
    928 	mov	rax,QWORD[120+r8]
    929 	mov	rbx,QWORD[248+r8]
    930 
    931 	mov	rsi,QWORD[8+r9]
    932 	mov	r11,QWORD[56+r9]
    933 
    934 	mov	r10d,DWORD[r11]
    935 	lea	r10,[r10*1+rsi]
    936 	cmp	rbx,r10
    937 	jb	NEAR $L$common_seh_tail
    938 
    939 	mov	rax,QWORD[152+r8]
    940 
    941 	mov	r10d,DWORD[4+r11]
    942 	lea	r10,[r10*1+rsi]
    943 	cmp	rbx,r10
    944 	jae	NEAR $L$common_seh_tail
    945 
    946 	mov	r10,QWORD[192+r8]
    947 	mov	rax,QWORD[8+r10*8+rax]
    948 
    949 	jmp	NEAR $L$common_pop_regs
    950 
    951 
    952 
    953 ALIGN	16
    954 sqr_handler:
    955 	push	rsi
    956 	push	rdi
    957 	push	rbx
    958 	push	rbp
    959 	push	r12
    960 	push	r13
    961 	push	r14
    962 	push	r15
    963 	pushfq
    964 	sub	rsp,64
    965 
    966 	mov	rax,QWORD[120+r8]
    967 	mov	rbx,QWORD[248+r8]
    968 
    969 	mov	rsi,QWORD[8+r9]
    970 	mov	r11,QWORD[56+r9]
    971 
    972 	mov	r10d,DWORD[r11]
    973 	lea	r10,[r10*1+rsi]
    974 	cmp	rbx,r10
    975 	jb	NEAR $L$common_seh_tail
    976 
    977 	mov	r10d,DWORD[4+r11]
    978 	lea	r10,[r10*1+rsi]
    979 	cmp	rbx,r10
    980 	jb	NEAR $L$common_pop_regs
    981 
    982 	mov	rax,QWORD[152+r8]
    983 
    984 	mov	r10d,DWORD[8+r11]
    985 	lea	r10,[r10*1+rsi]
    986 	cmp	rbx,r10
    987 	jae	NEAR $L$common_seh_tail
    988 
    989 	mov	rax,QWORD[40+rax]
    990 
    991 $L$common_pop_regs:
    992 	mov	rbx,QWORD[((-8))+rax]
    993 	mov	rbp,QWORD[((-16))+rax]
    994 	mov	r12,QWORD[((-24))+rax]
    995 	mov	r13,QWORD[((-32))+rax]
    996 	mov	r14,QWORD[((-40))+rax]
    997 	mov	r15,QWORD[((-48))+rax]
    998 	mov	QWORD[144+r8],rbx
    999 	mov	QWORD[160+r8],rbp
   1000 	mov	QWORD[216+r8],r12
   1001 	mov	QWORD[224+r8],r13
   1002 	mov	QWORD[232+r8],r14
   1003 	mov	QWORD[240+r8],r15
   1004 
   1005 $L$common_seh_tail:
   1006 	mov	rdi,QWORD[8+rax]
   1007 	mov	rsi,QWORD[16+rax]
   1008 	mov	QWORD[152+r8],rax
   1009 	mov	QWORD[168+r8],rsi
   1010 	mov	QWORD[176+r8],rdi
   1011 
   1012 	mov	rdi,QWORD[40+r9]
   1013 	mov	rsi,r8
   1014 	mov	ecx,154
   1015 	DD	0xa548f3fc
   1016 
   1017 	mov	rsi,r9
   1018 	xor	rcx,rcx
   1019 	mov	rdx,QWORD[8+rsi]
   1020 	mov	r8,QWORD[rsi]
   1021 	mov	r9,QWORD[16+rsi]
   1022 	mov	r10,QWORD[40+rsi]
   1023 	lea	r11,[56+rsi]
   1024 	lea	r12,[24+rsi]
   1025 	mov	QWORD[32+rsp],r10
   1026 	mov	QWORD[40+rsp],r11
   1027 	mov	QWORD[48+rsp],r12
   1028 	mov	QWORD[56+rsp],rcx
   1029 	call	QWORD[__imp_RtlVirtualUnwind]
   1030 
   1031 	mov	eax,1
   1032 	add	rsp,64
   1033 	popfq
   1034 	pop	r15
   1035 	pop	r14
   1036 	pop	r13
   1037 	pop	r12
   1038 	pop	rbp
   1039 	pop	rbx
   1040 	pop	rdi
   1041 	pop	rsi
   1042 	DB	0F3h,0C3h		;repret
   1043 
   1044 
   1045 section	.pdata rdata align=4
   1046 ALIGN	4
   1047 	DD	$L$SEH_begin_bn_mul_mont wrt ..imagebase
   1048 	DD	$L$SEH_end_bn_mul_mont wrt ..imagebase
   1049 	DD	$L$SEH_info_bn_mul_mont wrt ..imagebase
   1050 
   1051 	DD	$L$SEH_begin_bn_mul4x_mont wrt ..imagebase
   1052 	DD	$L$SEH_end_bn_mul4x_mont wrt ..imagebase
   1053 	DD	$L$SEH_info_bn_mul4x_mont wrt ..imagebase
   1054 
   1055 	DD	$L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
   1056 	DD	$L$SEH_end_bn_sqr8x_mont wrt ..imagebase
   1057 	DD	$L$SEH_info_bn_sqr8x_mont wrt ..imagebase
   1058 section	.xdata rdata align=8
   1059 ALIGN	8
   1060 $L$SEH_info_bn_mul_mont:
   1061 DB	9,0,0,0
   1062 	DD	mul_handler wrt ..imagebase
   1063 	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
   1064 $L$SEH_info_bn_mul4x_mont:
   1065 DB	9,0,0,0
   1066 	DD	mul_handler wrt ..imagebase
   1067 	DD	$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
   1068 $L$SEH_info_bn_sqr8x_mont:
   1069 DB	9,0,0,0
   1070 	DD	sqr_handler wrt ..imagebase
   1071 	DD	$L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
   1072 ALIGN	8
   1073