Home | History | Annotate | Download | only in asm
      1 .text
      2 
      3 .type	MULADD_128x512,@function
      4 .align	16
      5 MULADD_128x512:
      6 	movq	0(%rsi),%rax
      7 	mulq	%rbp
      8 	addq	%rax,%r8
      9 	adcq	$0,%rdx
     10 	movq	%r8,0(%rcx)
     11 	movq	%rdx,%rbx
     12 
     13 	movq	8(%rsi),%rax
     14 	mulq	%rbp
     15 	addq	%rax,%r9
     16 	adcq	$0,%rdx
     17 	addq	%rbx,%r9
     18 	adcq	$0,%rdx
     19 	movq	%rdx,%rbx
     20 
     21 	movq	16(%rsi),%rax
     22 	mulq	%rbp
     23 	addq	%rax,%r10
     24 	adcq	$0,%rdx
     25 	addq	%rbx,%r10
     26 	adcq	$0,%rdx
     27 	movq	%rdx,%rbx
     28 
     29 	movq	24(%rsi),%rax
     30 	mulq	%rbp
     31 	addq	%rax,%r11
     32 	adcq	$0,%rdx
     33 	addq	%rbx,%r11
     34 	adcq	$0,%rdx
     35 	movq	%rdx,%rbx
     36 
     37 	movq	32(%rsi),%rax
     38 	mulq	%rbp
     39 	addq	%rax,%r12
     40 	adcq	$0,%rdx
     41 	addq	%rbx,%r12
     42 	adcq	$0,%rdx
     43 	movq	%rdx,%rbx
     44 
     45 	movq	40(%rsi),%rax
     46 	mulq	%rbp
     47 	addq	%rax,%r13
     48 	adcq	$0,%rdx
     49 	addq	%rbx,%r13
     50 	adcq	$0,%rdx
     51 	movq	%rdx,%rbx
     52 
     53 	movq	48(%rsi),%rax
     54 	mulq	%rbp
     55 	addq	%rax,%r14
     56 	adcq	$0,%rdx
     57 	addq	%rbx,%r14
     58 	adcq	$0,%rdx
     59 	movq	%rdx,%rbx
     60 
     61 	movq	56(%rsi),%rax
     62 	mulq	%rbp
     63 	addq	%rax,%r15
     64 	adcq	$0,%rdx
     65 	addq	%rbx,%r15
     66 	adcq	$0,%rdx
     67 	movq	%rdx,%r8
     68 	movq	8(%rdi),%rbp
     69 	movq	0(%rsi),%rax
     70 	mulq	%rbp
     71 	addq	%rax,%r9
     72 	adcq	$0,%rdx
     73 	movq	%r9,8(%rcx)
     74 	movq	%rdx,%rbx
     75 
     76 	movq	8(%rsi),%rax
     77 	mulq	%rbp
     78 	addq	%rax,%r10
     79 	adcq	$0,%rdx
     80 	addq	%rbx,%r10
     81 	adcq	$0,%rdx
     82 	movq	%rdx,%rbx
     83 
     84 	movq	16(%rsi),%rax
     85 	mulq	%rbp
     86 	addq	%rax,%r11
     87 	adcq	$0,%rdx
     88 	addq	%rbx,%r11
     89 	adcq	$0,%rdx
     90 	movq	%rdx,%rbx
     91 
     92 	movq	24(%rsi),%rax
     93 	mulq	%rbp
     94 	addq	%rax,%r12
     95 	adcq	$0,%rdx
     96 	addq	%rbx,%r12
     97 	adcq	$0,%rdx
     98 	movq	%rdx,%rbx
     99 
    100 	movq	32(%rsi),%rax
    101 	mulq	%rbp
    102 	addq	%rax,%r13
    103 	adcq	$0,%rdx
    104 	addq	%rbx,%r13
    105 	adcq	$0,%rdx
    106 	movq	%rdx,%rbx
    107 
    108 	movq	40(%rsi),%rax
    109 	mulq	%rbp
    110 	addq	%rax,%r14
    111 	adcq	$0,%rdx
    112 	addq	%rbx,%r14
    113 	adcq	$0,%rdx
    114 	movq	%rdx,%rbx
    115 
    116 	movq	48(%rsi),%rax
    117 	mulq	%rbp
    118 	addq	%rax,%r15
    119 	adcq	$0,%rdx
    120 	addq	%rbx,%r15
    121 	adcq	$0,%rdx
    122 	movq	%rdx,%rbx
    123 
    124 	movq	56(%rsi),%rax
    125 	mulq	%rbp
    126 	addq	%rax,%r8
    127 	adcq	$0,%rdx
    128 	addq	%rbx,%r8
    129 	adcq	$0,%rdx
    130 	movq	%rdx,%r9
    131 	.byte	0xf3,0xc3
    132 .size	MULADD_128x512,.-MULADD_128x512
    133 .type	mont_reduce,@function
    134 .align	16
    135 mont_reduce:
    136 	leaq	192(%rsp),%rdi
    137 	movq	32(%rsp),%rsi
    138 	addq	$576,%rsi
    139 	leaq	520(%rsp),%rcx
    140 
    141 	movq	96(%rcx),%rbp
    142 	movq	0(%rsi),%rax
    143 	mulq	%rbp
    144 	movq	(%rcx),%r8
    145 	addq	%rax,%r8
    146 	adcq	$0,%rdx
    147 	movq	%r8,0(%rdi)
    148 	movq	%rdx,%rbx
    149 
    150 	movq	8(%rsi),%rax
    151 	mulq	%rbp
    152 	movq	8(%rcx),%r9
    153 	addq	%rax,%r9
    154 	adcq	$0,%rdx
    155 	addq	%rbx,%r9
    156 	adcq	$0,%rdx
    157 	movq	%rdx,%rbx
    158 
    159 	movq	16(%rsi),%rax
    160 	mulq	%rbp
    161 	movq	16(%rcx),%r10
    162 	addq	%rax,%r10
    163 	adcq	$0,%rdx
    164 	addq	%rbx,%r10
    165 	adcq	$0,%rdx
    166 	movq	%rdx,%rbx
    167 
    168 	movq	24(%rsi),%rax
    169 	mulq	%rbp
    170 	movq	24(%rcx),%r11
    171 	addq	%rax,%r11
    172 	adcq	$0,%rdx
    173 	addq	%rbx,%r11
    174 	adcq	$0,%rdx
    175 	movq	%rdx,%rbx
    176 
    177 	movq	32(%rsi),%rax
    178 	mulq	%rbp
    179 	movq	32(%rcx),%r12
    180 	addq	%rax,%r12
    181 	adcq	$0,%rdx
    182 	addq	%rbx,%r12
    183 	adcq	$0,%rdx
    184 	movq	%rdx,%rbx
    185 
    186 	movq	40(%rsi),%rax
    187 	mulq	%rbp
    188 	movq	40(%rcx),%r13
    189 	addq	%rax,%r13
    190 	adcq	$0,%rdx
    191 	addq	%rbx,%r13
    192 	adcq	$0,%rdx
    193 	movq	%rdx,%rbx
    194 
    195 	movq	48(%rsi),%rax
    196 	mulq	%rbp
    197 	movq	48(%rcx),%r14
    198 	addq	%rax,%r14
    199 	adcq	$0,%rdx
    200 	addq	%rbx,%r14
    201 	adcq	$0,%rdx
    202 	movq	%rdx,%rbx
    203 
    204 	movq	56(%rsi),%rax
    205 	mulq	%rbp
    206 	movq	56(%rcx),%r15
    207 	addq	%rax,%r15
    208 	adcq	$0,%rdx
    209 	addq	%rbx,%r15
    210 	adcq	$0,%rdx
    211 	movq	%rdx,%r8
    212 	movq	104(%rcx),%rbp
    213 	movq	0(%rsi),%rax
    214 	mulq	%rbp
    215 	addq	%rax,%r9
    216 	adcq	$0,%rdx
    217 	movq	%r9,8(%rdi)
    218 	movq	%rdx,%rbx
    219 
    220 	movq	8(%rsi),%rax
    221 	mulq	%rbp
    222 	addq	%rax,%r10
    223 	adcq	$0,%rdx
    224 	addq	%rbx,%r10
    225 	adcq	$0,%rdx
    226 	movq	%rdx,%rbx
    227 
    228 	movq	16(%rsi),%rax
    229 	mulq	%rbp
    230 	addq	%rax,%r11
    231 	adcq	$0,%rdx
    232 	addq	%rbx,%r11
    233 	adcq	$0,%rdx
    234 	movq	%rdx,%rbx
    235 
    236 	movq	24(%rsi),%rax
    237 	mulq	%rbp
    238 	addq	%rax,%r12
    239 	adcq	$0,%rdx
    240 	addq	%rbx,%r12
    241 	adcq	$0,%rdx
    242 	movq	%rdx,%rbx
    243 
    244 	movq	32(%rsi),%rax
    245 	mulq	%rbp
    246 	addq	%rax,%r13
    247 	adcq	$0,%rdx
    248 	addq	%rbx,%r13
    249 	adcq	$0,%rdx
    250 	movq	%rdx,%rbx
    251 
    252 	movq	40(%rsi),%rax
    253 	mulq	%rbp
    254 	addq	%rax,%r14
    255 	adcq	$0,%rdx
    256 	addq	%rbx,%r14
    257 	adcq	$0,%rdx
    258 	movq	%rdx,%rbx
    259 
    260 	movq	48(%rsi),%rax
    261 	mulq	%rbp
    262 	addq	%rax,%r15
    263 	adcq	$0,%rdx
    264 	addq	%rbx,%r15
    265 	adcq	$0,%rdx
    266 	movq	%rdx,%rbx
    267 
    268 	movq	56(%rsi),%rax
    269 	mulq	%rbp
    270 	addq	%rax,%r8
    271 	adcq	$0,%rdx
    272 	addq	%rbx,%r8
    273 	adcq	$0,%rdx
    274 	movq	%rdx,%r9
    275 	movq	112(%rcx),%rbp
    276 	movq	0(%rsi),%rax
    277 	mulq	%rbp
    278 	addq	%rax,%r10
    279 	adcq	$0,%rdx
    280 	movq	%r10,16(%rdi)
    281 	movq	%rdx,%rbx
    282 
    283 	movq	8(%rsi),%rax
    284 	mulq	%rbp
    285 	addq	%rax,%r11
    286 	adcq	$0,%rdx
    287 	addq	%rbx,%r11
    288 	adcq	$0,%rdx
    289 	movq	%rdx,%rbx
    290 
    291 	movq	16(%rsi),%rax
    292 	mulq	%rbp
    293 	addq	%rax,%r12
    294 	adcq	$0,%rdx
    295 	addq	%rbx,%r12
    296 	adcq	$0,%rdx
    297 	movq	%rdx,%rbx
    298 
    299 	movq	24(%rsi),%rax
    300 	mulq	%rbp
    301 	addq	%rax,%r13
    302 	adcq	$0,%rdx
    303 	addq	%rbx,%r13
    304 	adcq	$0,%rdx
    305 	movq	%rdx,%rbx
    306 
    307 	movq	32(%rsi),%rax
    308 	mulq	%rbp
    309 	addq	%rax,%r14
    310 	adcq	$0,%rdx
    311 	addq	%rbx,%r14
    312 	adcq	$0,%rdx
    313 	movq	%rdx,%rbx
    314 
    315 	movq	40(%rsi),%rax
    316 	mulq	%rbp
    317 	addq	%rax,%r15
    318 	adcq	$0,%rdx
    319 	addq	%rbx,%r15
    320 	adcq	$0,%rdx
    321 	movq	%rdx,%rbx
    322 
    323 	movq	48(%rsi),%rax
    324 	mulq	%rbp
    325 	addq	%rax,%r8
    326 	adcq	$0,%rdx
    327 	addq	%rbx,%r8
    328 	adcq	$0,%rdx
    329 	movq	%rdx,%rbx
    330 
    331 	movq	56(%rsi),%rax
    332 	mulq	%rbp
    333 	addq	%rax,%r9
    334 	adcq	$0,%rdx
    335 	addq	%rbx,%r9
    336 	adcq	$0,%rdx
    337 	movq	%rdx,%r10
    338 	movq	120(%rcx),%rbp
    339 	movq	0(%rsi),%rax
    340 	mulq	%rbp
    341 	addq	%rax,%r11
    342 	adcq	$0,%rdx
    343 	movq	%r11,24(%rdi)
    344 	movq	%rdx,%rbx
    345 
    346 	movq	8(%rsi),%rax
    347 	mulq	%rbp
    348 	addq	%rax,%r12
    349 	adcq	$0,%rdx
    350 	addq	%rbx,%r12
    351 	adcq	$0,%rdx
    352 	movq	%rdx,%rbx
    353 
    354 	movq	16(%rsi),%rax
    355 	mulq	%rbp
    356 	addq	%rax,%r13
    357 	adcq	$0,%rdx
    358 	addq	%rbx,%r13
    359 	adcq	$0,%rdx
    360 	movq	%rdx,%rbx
    361 
    362 	movq	24(%rsi),%rax
    363 	mulq	%rbp
    364 	addq	%rax,%r14
    365 	adcq	$0,%rdx
    366 	addq	%rbx,%r14
    367 	adcq	$0,%rdx
    368 	movq	%rdx,%rbx
    369 
    370 	movq	32(%rsi),%rax
    371 	mulq	%rbp
    372 	addq	%rax,%r15
    373 	adcq	$0,%rdx
    374 	addq	%rbx,%r15
    375 	adcq	$0,%rdx
    376 	movq	%rdx,%rbx
    377 
    378 	movq	40(%rsi),%rax
    379 	mulq	%rbp
    380 	addq	%rax,%r8
    381 	adcq	$0,%rdx
    382 	addq	%rbx,%r8
    383 	adcq	$0,%rdx
    384 	movq	%rdx,%rbx
    385 
    386 	movq	48(%rsi),%rax
    387 	mulq	%rbp
    388 	addq	%rax,%r9
    389 	adcq	$0,%rdx
    390 	addq	%rbx,%r9
    391 	adcq	$0,%rdx
    392 	movq	%rdx,%rbx
    393 
    394 	movq	56(%rsi),%rax
    395 	mulq	%rbp
    396 	addq	%rax,%r10
    397 	adcq	$0,%rdx
    398 	addq	%rbx,%r10
    399 	adcq	$0,%rdx
    400 	movq	%rdx,%r11
    401 	xorq	%rax,%rax
    402 
    403 	addq	64(%rcx),%r8
    404 	adcq	72(%rcx),%r9
    405 	adcq	80(%rcx),%r10
    406 	adcq	88(%rcx),%r11
    407 	adcq	$0,%rax
    408 
    409 
    410 
    411 
    412 	movq	%r8,64(%rdi)
    413 	movq	%r9,72(%rdi)
    414 	movq	%r10,%rbp
    415 	movq	%r11,88(%rdi)
    416 
    417 	movq	%rax,384(%rsp)
    418 
    419 	movq	0(%rdi),%r8
    420 	movq	8(%rdi),%r9
    421 	movq	16(%rdi),%r10
    422 	movq	24(%rdi),%r11
    423 
    424 
    425 
    426 
    427 
    428 
    429 
    430 
    431 	addq	$80,%rdi
    432 
    433 	addq	$64,%rsi
    434 	leaq	296(%rsp),%rcx
    435 
    436 	call	MULADD_128x512
    437 
    438 	movq	384(%rsp),%rax
    439 
    440 
    441 	addq	-16(%rdi),%r8
    442 	adcq	-8(%rdi),%r9
    443 	movq	%r8,64(%rcx)
    444 	movq	%r9,72(%rcx)
    445 
    446 	adcq	%rax,%rax
    447 	movq	%rax,384(%rsp)
    448 
    449 	leaq	192(%rsp),%rdi
    450 	addq	$64,%rsi
    451 
    452 
    453 
    454 
    455 
    456 	movq	(%rsi),%r8
    457 	movq	8(%rsi),%rbx
    458 
    459 	movq	(%rcx),%rax
    460 	mulq	%r8
    461 	movq	%rax,%rbp
    462 	movq	%rdx,%r9
    463 
    464 	movq	8(%rcx),%rax
    465 	mulq	%r8
    466 	addq	%rax,%r9
    467 
    468 	movq	(%rcx),%rax
    469 	mulq	%rbx
    470 	addq	%rax,%r9
    471 
    472 	movq	%r9,8(%rdi)
    473 
    474 
    475 	subq	$192,%rsi
    476 
    477 	movq	(%rcx),%r8
    478 	movq	8(%rcx),%r9
    479 
    480 	call	MULADD_128x512
    481 
    482 
    483 
    484 
    485 	movq	0(%rsi),%rax
    486 	movq	8(%rsi),%rbx
    487 	movq	16(%rsi),%rdi
    488 	movq	24(%rsi),%rdx
    489 
    490 
    491 	movq	384(%rsp),%rbp
    492 
    493 	addq	64(%rcx),%r8
    494 	adcq	72(%rcx),%r9
    495 
    496 
    497 	adcq	%rbp,%rbp
    498 
    499 
    500 
    501 	shlq	$3,%rbp
    502 	movq	32(%rsp),%rcx
    503 	addq	%rcx,%rbp
    504 
    505 
    506 	xorq	%rsi,%rsi
    507 
    508 	addq	0(%rbp),%r10
    509 	adcq	64(%rbp),%r11
    510 	adcq	128(%rbp),%r12
    511 	adcq	192(%rbp),%r13
    512 	adcq	256(%rbp),%r14
    513 	adcq	320(%rbp),%r15
    514 	adcq	384(%rbp),%r8
    515 	adcq	448(%rbp),%r9
    516 
    517 
    518 
    519 	sbbq	$0,%rsi
    520 
    521 
    522 	andq	%rsi,%rax
    523 	andq	%rsi,%rbx
    524 	andq	%rsi,%rdi
    525 	andq	%rsi,%rdx
    526 
    527 	movq	$1,%rbp
    528 	subq	%rax,%r10
    529 	sbbq	%rbx,%r11
    530 	sbbq	%rdi,%r12
    531 	sbbq	%rdx,%r13
    532 
    533 
    534 
    535 
    536 	sbbq	$0,%rbp
    537 
    538 
    539 
    540 	addq	$512,%rcx
    541 	movq	32(%rcx),%rax
    542 	movq	40(%rcx),%rbx
    543 	movq	48(%rcx),%rdi
    544 	movq	56(%rcx),%rdx
    545 
    546 
    547 
    548 	andq	%rsi,%rax
    549 	andq	%rsi,%rbx
    550 	andq	%rsi,%rdi
    551 	andq	%rsi,%rdx
    552 
    553 
    554 
    555 	subq	$1,%rbp
    556 
    557 	sbbq	%rax,%r14
    558 	sbbq	%rbx,%r15
    559 	sbbq	%rdi,%r8
    560 	sbbq	%rdx,%r9
    561 
    562 
    563 
    564 	movq	144(%rsp),%rsi
    565 	movq	%r10,0(%rsi)
    566 	movq	%r11,8(%rsi)
    567 	movq	%r12,16(%rsi)
    568 	movq	%r13,24(%rsi)
    569 	movq	%r14,32(%rsi)
    570 	movq	%r15,40(%rsi)
    571 	movq	%r8,48(%rsi)
    572 	movq	%r9,56(%rsi)
    573 
    574 	.byte	0xf3,0xc3
    575 .size	mont_reduce,.-mont_reduce
    576 .type	mont_mul_a3b,@function
    577 .align	16
    578 mont_mul_a3b:
    579 
    580 
    581 
    582 
    583 	movq	0(%rdi),%rbp
    584 
    585 	movq	%r10,%rax
    586 	mulq	%rbp
    587 	movq	%rax,520(%rsp)
    588 	movq	%rdx,%r10
    589 	movq	%r11,%rax
    590 	mulq	%rbp
    591 	addq	%rax,%r10
    592 	adcq	$0,%rdx
    593 	movq	%rdx,%r11
    594 	movq	%r12,%rax
    595 	mulq	%rbp
    596 	addq	%rax,%r11
    597 	adcq	$0,%rdx
    598 	movq	%rdx,%r12
    599 	movq	%r13,%rax
    600 	mulq	%rbp
    601 	addq	%rax,%r12
    602 	adcq	$0,%rdx
    603 	movq	%rdx,%r13
    604 	movq	%r14,%rax
    605 	mulq	%rbp
    606 	addq	%rax,%r13
    607 	adcq	$0,%rdx
    608 	movq	%rdx,%r14
    609 	movq	%r15,%rax
    610 	mulq	%rbp
    611 	addq	%rax,%r14
    612 	adcq	$0,%rdx
    613 	movq	%rdx,%r15
    614 	movq	%r8,%rax
    615 	mulq	%rbp
    616 	addq	%rax,%r15
    617 	adcq	$0,%rdx
    618 	movq	%rdx,%r8
    619 	movq	%r9,%rax
    620 	mulq	%rbp
    621 	addq	%rax,%r8
    622 	adcq	$0,%rdx
    623 	movq	%rdx,%r9
    624 	movq	8(%rdi),%rbp
    625 	movq	0(%rsi),%rax
    626 	mulq	%rbp
    627 	addq	%rax,%r10
    628 	adcq	$0,%rdx
    629 	movq	%r10,528(%rsp)
    630 	movq	%rdx,%rbx
    631 
    632 	movq	8(%rsi),%rax
    633 	mulq	%rbp
    634 	addq	%rax,%r11
    635 	adcq	$0,%rdx
    636 	addq	%rbx,%r11
    637 	adcq	$0,%rdx
    638 	movq	%rdx,%rbx
    639 
    640 	movq	16(%rsi),%rax
    641 	mulq	%rbp
    642 	addq	%rax,%r12
    643 	adcq	$0,%rdx
    644 	addq	%rbx,%r12
    645 	adcq	$0,%rdx
    646 	movq	%rdx,%rbx
    647 
    648 	movq	24(%rsi),%rax
    649 	mulq	%rbp
    650 	addq	%rax,%r13
    651 	adcq	$0,%rdx
    652 	addq	%rbx,%r13
    653 	adcq	$0,%rdx
    654 	movq	%rdx,%rbx
    655 
    656 	movq	32(%rsi),%rax
    657 	mulq	%rbp
    658 	addq	%rax,%r14
    659 	adcq	$0,%rdx
    660 	addq	%rbx,%r14
    661 	adcq	$0,%rdx
    662 	movq	%rdx,%rbx
    663 
    664 	movq	40(%rsi),%rax
    665 	mulq	%rbp
    666 	addq	%rax,%r15
    667 	adcq	$0,%rdx
    668 	addq	%rbx,%r15
    669 	adcq	$0,%rdx
    670 	movq	%rdx,%rbx
    671 
    672 	movq	48(%rsi),%rax
    673 	mulq	%rbp
    674 	addq	%rax,%r8
    675 	adcq	$0,%rdx
    676 	addq	%rbx,%r8
    677 	adcq	$0,%rdx
    678 	movq	%rdx,%rbx
    679 
    680 	movq	56(%rsi),%rax
    681 	mulq	%rbp
    682 	addq	%rax,%r9
    683 	adcq	$0,%rdx
    684 	addq	%rbx,%r9
    685 	adcq	$0,%rdx
    686 	movq	%rdx,%r10
    687 	movq	16(%rdi),%rbp
    688 	movq	0(%rsi),%rax
    689 	mulq	%rbp
    690 	addq	%rax,%r11
    691 	adcq	$0,%rdx
    692 	movq	%r11,536(%rsp)
    693 	movq	%rdx,%rbx
    694 
    695 	movq	8(%rsi),%rax
    696 	mulq	%rbp
    697 	addq	%rax,%r12
    698 	adcq	$0,%rdx
    699 	addq	%rbx,%r12
    700 	adcq	$0,%rdx
    701 	movq	%rdx,%rbx
    702 
    703 	movq	16(%rsi),%rax
    704 	mulq	%rbp
    705 	addq	%rax,%r13
    706 	adcq	$0,%rdx
    707 	addq	%rbx,%r13
    708 	adcq	$0,%rdx
    709 	movq	%rdx,%rbx
    710 
    711 	movq	24(%rsi),%rax
    712 	mulq	%rbp
    713 	addq	%rax,%r14
    714 	adcq	$0,%rdx
    715 	addq	%rbx,%r14
    716 	adcq	$0,%rdx
    717 	movq	%rdx,%rbx
    718 
    719 	movq	32(%rsi),%rax
    720 	mulq	%rbp
    721 	addq	%rax,%r15
    722 	adcq	$0,%rdx
    723 	addq	%rbx,%r15
    724 	adcq	$0,%rdx
    725 	movq	%rdx,%rbx
    726 
    727 	movq	40(%rsi),%rax
    728 	mulq	%rbp
    729 	addq	%rax,%r8
    730 	adcq	$0,%rdx
    731 	addq	%rbx,%r8
    732 	adcq	$0,%rdx
    733 	movq	%rdx,%rbx
    734 
    735 	movq	48(%rsi),%rax
    736 	mulq	%rbp
    737 	addq	%rax,%r9
    738 	adcq	$0,%rdx
    739 	addq	%rbx,%r9
    740 	adcq	$0,%rdx
    741 	movq	%rdx,%rbx
    742 
    743 	movq	56(%rsi),%rax
    744 	mulq	%rbp
    745 	addq	%rax,%r10
    746 	adcq	$0,%rdx
    747 	addq	%rbx,%r10
    748 	adcq	$0,%rdx
    749 	movq	%rdx,%r11
    750 	movq	24(%rdi),%rbp
    751 	movq	0(%rsi),%rax
    752 	mulq	%rbp
    753 	addq	%rax,%r12
    754 	adcq	$0,%rdx
    755 	movq	%r12,544(%rsp)
    756 	movq	%rdx,%rbx
    757 
    758 	movq	8(%rsi),%rax
    759 	mulq	%rbp
    760 	addq	%rax,%r13
    761 	adcq	$0,%rdx
    762 	addq	%rbx,%r13
    763 	adcq	$0,%rdx
    764 	movq	%rdx,%rbx
    765 
    766 	movq	16(%rsi),%rax
    767 	mulq	%rbp
    768 	addq	%rax,%r14
    769 	adcq	$0,%rdx
    770 	addq	%rbx,%r14
    771 	adcq	$0,%rdx
    772 	movq	%rdx,%rbx
    773 
    774 	movq	24(%rsi),%rax
    775 	mulq	%rbp
    776 	addq	%rax,%r15
    777 	adcq	$0,%rdx
    778 	addq	%rbx,%r15
    779 	adcq	$0,%rdx
    780 	movq	%rdx,%rbx
    781 
    782 	movq	32(%rsi),%rax
    783 	mulq	%rbp
    784 	addq	%rax,%r8
    785 	adcq	$0,%rdx
    786 	addq	%rbx,%r8
    787 	adcq	$0,%rdx
    788 	movq	%rdx,%rbx
    789 
    790 	movq	40(%rsi),%rax
    791 	mulq	%rbp
    792 	addq	%rax,%r9
    793 	adcq	$0,%rdx
    794 	addq	%rbx,%r9
    795 	adcq	$0,%rdx
    796 	movq	%rdx,%rbx
    797 
    798 	movq	48(%rsi),%rax
    799 	mulq	%rbp
    800 	addq	%rax,%r10
    801 	adcq	$0,%rdx
    802 	addq	%rbx,%r10
    803 	adcq	$0,%rdx
    804 	movq	%rdx,%rbx
    805 
    806 	movq	56(%rsi),%rax
    807 	mulq	%rbp
    808 	addq	%rax,%r11
    809 	adcq	$0,%rdx
    810 	addq	%rbx,%r11
    811 	adcq	$0,%rdx
    812 	movq	%rdx,%r12
    813 	movq	32(%rdi),%rbp
    814 	movq	0(%rsi),%rax
    815 	mulq	%rbp
    816 	addq	%rax,%r13
    817 	adcq	$0,%rdx
    818 	movq	%r13,552(%rsp)
    819 	movq	%rdx,%rbx
    820 
    821 	movq	8(%rsi),%rax
    822 	mulq	%rbp
    823 	addq	%rax,%r14
    824 	adcq	$0,%rdx
    825 	addq	%rbx,%r14
    826 	adcq	$0,%rdx
    827 	movq	%rdx,%rbx
    828 
    829 	movq	16(%rsi),%rax
    830 	mulq	%rbp
    831 	addq	%rax,%r15
    832 	adcq	$0,%rdx
    833 	addq	%rbx,%r15
    834 	adcq	$0,%rdx
    835 	movq	%rdx,%rbx
    836 
    837 	movq	24(%rsi),%rax
    838 	mulq	%rbp
    839 	addq	%rax,%r8
    840 	adcq	$0,%rdx
    841 	addq	%rbx,%r8
    842 	adcq	$0,%rdx
    843 	movq	%rdx,%rbx
    844 
    845 	movq	32(%rsi),%rax
    846 	mulq	%rbp
    847 	addq	%rax,%r9
    848 	adcq	$0,%rdx
    849 	addq	%rbx,%r9
    850 	adcq	$0,%rdx
    851 	movq	%rdx,%rbx
    852 
    853 	movq	40(%rsi),%rax
    854 	mulq	%rbp
    855 	addq	%rax,%r10
    856 	adcq	$0,%rdx
    857 	addq	%rbx,%r10
    858 	adcq	$0,%rdx
    859 	movq	%rdx,%rbx
    860 
    861 	movq	48(%rsi),%rax
    862 	mulq	%rbp
    863 	addq	%rax,%r11
    864 	adcq	$0,%rdx
    865 	addq	%rbx,%r11
    866 	adcq	$0,%rdx
    867 	movq	%rdx,%rbx
    868 
    869 	movq	56(%rsi),%rax
    870 	mulq	%rbp
    871 	addq	%rax,%r12
    872 	adcq	$0,%rdx
    873 	addq	%rbx,%r12
    874 	adcq	$0,%rdx
    875 	movq	%rdx,%r13
    876 	movq	40(%rdi),%rbp
    877 	movq	0(%rsi),%rax
    878 	mulq	%rbp
    879 	addq	%rax,%r14
    880 	adcq	$0,%rdx
    881 	movq	%r14,560(%rsp)
    882 	movq	%rdx,%rbx
    883 
    884 	movq	8(%rsi),%rax
    885 	mulq	%rbp
    886 	addq	%rax,%r15
    887 	adcq	$0,%rdx
    888 	addq	%rbx,%r15
    889 	adcq	$0,%rdx
    890 	movq	%rdx,%rbx
    891 
    892 	movq	16(%rsi),%rax
    893 	mulq	%rbp
    894 	addq	%rax,%r8
    895 	adcq	$0,%rdx
    896 	addq	%rbx,%r8
    897 	adcq	$0,%rdx
    898 	movq	%rdx,%rbx
    899 
    900 	movq	24(%rsi),%rax
    901 	mulq	%rbp
    902 	addq	%rax,%r9
    903 	adcq	$0,%rdx
    904 	addq	%rbx,%r9
    905 	adcq	$0,%rdx
    906 	movq	%rdx,%rbx
    907 
    908 	movq	32(%rsi),%rax
    909 	mulq	%rbp
    910 	addq	%rax,%r10
    911 	adcq	$0,%rdx
    912 	addq	%rbx,%r10
    913 	adcq	$0,%rdx
    914 	movq	%rdx,%rbx
    915 
    916 	movq	40(%rsi),%rax
    917 	mulq	%rbp
    918 	addq	%rax,%r11
    919 	adcq	$0,%rdx
    920 	addq	%rbx,%r11
    921 	adcq	$0,%rdx
    922 	movq	%rdx,%rbx
    923 
    924 	movq	48(%rsi),%rax
    925 	mulq	%rbp
    926 	addq	%rax,%r12
    927 	adcq	$0,%rdx
    928 	addq	%rbx,%r12
    929 	adcq	$0,%rdx
    930 	movq	%rdx,%rbx
    931 
    932 	movq	56(%rsi),%rax
    933 	mulq	%rbp
    934 	addq	%rax,%r13
    935 	adcq	$0,%rdx
    936 	addq	%rbx,%r13
    937 	adcq	$0,%rdx
    938 	movq	%rdx,%r14
    939 	movq	48(%rdi),%rbp
    940 	movq	0(%rsi),%rax
    941 	mulq	%rbp
    942 	addq	%rax,%r15
    943 	adcq	$0,%rdx
    944 	movq	%r15,568(%rsp)
    945 	movq	%rdx,%rbx
    946 
    947 	movq	8(%rsi),%rax
    948 	mulq	%rbp
    949 	addq	%rax,%r8
    950 	adcq	$0,%rdx
    951 	addq	%rbx,%r8
    952 	adcq	$0,%rdx
    953 	movq	%rdx,%rbx
    954 
    955 	movq	16(%rsi),%rax
    956 	mulq	%rbp
    957 	addq	%rax,%r9
    958 	adcq	$0,%rdx
    959 	addq	%rbx,%r9
    960 	adcq	$0,%rdx
    961 	movq	%rdx,%rbx
    962 
    963 	movq	24(%rsi),%rax
    964 	mulq	%rbp
    965 	addq	%rax,%r10
    966 	adcq	$0,%rdx
    967 	addq	%rbx,%r10
    968 	adcq	$0,%rdx
    969 	movq	%rdx,%rbx
    970 
    971 	movq	32(%rsi),%rax
    972 	mulq	%rbp
    973 	addq	%rax,%r11
    974 	adcq	$0,%rdx
    975 	addq	%rbx,%r11
    976 	adcq	$0,%rdx
    977 	movq	%rdx,%rbx
    978 
    979 	movq	40(%rsi),%rax
    980 	mulq	%rbp
    981 	addq	%rax,%r12
    982 	adcq	$0,%rdx
    983 	addq	%rbx,%r12
    984 	adcq	$0,%rdx
    985 	movq	%rdx,%rbx
    986 
    987 	movq	48(%rsi),%rax
    988 	mulq	%rbp
    989 	addq	%rax,%r13
    990 	adcq	$0,%rdx
    991 	addq	%rbx,%r13
    992 	adcq	$0,%rdx
    993 	movq	%rdx,%rbx
    994 
    995 	movq	56(%rsi),%rax
    996 	mulq	%rbp
    997 	addq	%rax,%r14
    998 	adcq	$0,%rdx
    999 	addq	%rbx,%r14
   1000 	adcq	$0,%rdx
   1001 	movq	%rdx,%r15
   1002 	movq	56(%rdi),%rbp
   1003 	movq	0(%rsi),%rax
   1004 	mulq	%rbp
   1005 	addq	%rax,%r8
   1006 	adcq	$0,%rdx
   1007 	movq	%r8,576(%rsp)
   1008 	movq	%rdx,%rbx
   1009 
   1010 	movq	8(%rsi),%rax
   1011 	mulq	%rbp
   1012 	addq	%rax,%r9
   1013 	adcq	$0,%rdx
   1014 	addq	%rbx,%r9
   1015 	adcq	$0,%rdx
   1016 	movq	%rdx,%rbx
   1017 
   1018 	movq	16(%rsi),%rax
   1019 	mulq	%rbp
   1020 	addq	%rax,%r10
   1021 	adcq	$0,%rdx
   1022 	addq	%rbx,%r10
   1023 	adcq	$0,%rdx
   1024 	movq	%rdx,%rbx
   1025 
   1026 	movq	24(%rsi),%rax
   1027 	mulq	%rbp
   1028 	addq	%rax,%r11
   1029 	adcq	$0,%rdx
   1030 	addq	%rbx,%r11
   1031 	adcq	$0,%rdx
   1032 	movq	%rdx,%rbx
   1033 
   1034 	movq	32(%rsi),%rax
   1035 	mulq	%rbp
   1036 	addq	%rax,%r12
   1037 	adcq	$0,%rdx
   1038 	addq	%rbx,%r12
   1039 	adcq	$0,%rdx
   1040 	movq	%rdx,%rbx
   1041 
   1042 	movq	40(%rsi),%rax
   1043 	mulq	%rbp
   1044 	addq	%rax,%r13
   1045 	adcq	$0,%rdx
   1046 	addq	%rbx,%r13
   1047 	adcq	$0,%rdx
   1048 	movq	%rdx,%rbx
   1049 
   1050 	movq	48(%rsi),%rax
   1051 	mulq	%rbp
   1052 	addq	%rax,%r14
   1053 	adcq	$0,%rdx
   1054 	addq	%rbx,%r14
   1055 	adcq	$0,%rdx
   1056 	movq	%rdx,%rbx
   1057 
   1058 	movq	56(%rsi),%rax
   1059 	mulq	%rbp
   1060 	addq	%rax,%r15
   1061 	adcq	$0,%rdx
   1062 	addq	%rbx,%r15
   1063 	adcq	$0,%rdx
   1064 	movq	%rdx,%r8
   1065 	movq	%r9,584(%rsp)
   1066 	movq	%r10,592(%rsp)
   1067 	movq	%r11,600(%rsp)
   1068 	movq	%r12,608(%rsp)
   1069 	movq	%r13,616(%rsp)
   1070 	movq	%r14,624(%rsp)
   1071 	movq	%r15,632(%rsp)
   1072 	movq	%r8,640(%rsp)
   1073 
   1074 
   1075 
   1076 
   1077 
   1078 	jmp	mont_reduce
   1079 
   1080 
   1081 .size	mont_mul_a3b,.-mont_mul_a3b
   1082 .type	sqr_reduce,@function
   1083 .align	16
   1084 sqr_reduce:
   1085 	movq	16(%rsp),%rcx
   1086 
   1087 
   1088 
   1089 	movq	%r10,%rbx
   1090 
   1091 	movq	%r11,%rax
   1092 	mulq	%rbx
   1093 	movq	%rax,528(%rsp)
   1094 	movq	%rdx,%r10
   1095 	movq	%r12,%rax
   1096 	mulq	%rbx
   1097 	addq	%rax,%r10
   1098 	adcq	$0,%rdx
   1099 	movq	%rdx,%r11
   1100 	movq	%r13,%rax
   1101 	mulq	%rbx
   1102 	addq	%rax,%r11
   1103 	adcq	$0,%rdx
   1104 	movq	%rdx,%r12
   1105 	movq	%r14,%rax
   1106 	mulq	%rbx
   1107 	addq	%rax,%r12
   1108 	adcq	$0,%rdx
   1109 	movq	%rdx,%r13
   1110 	movq	%r15,%rax
   1111 	mulq	%rbx
   1112 	addq	%rax,%r13
   1113 	adcq	$0,%rdx
   1114 	movq	%rdx,%r14
   1115 	movq	%r8,%rax
   1116 	mulq	%rbx
   1117 	addq	%rax,%r14
   1118 	adcq	$0,%rdx
   1119 	movq	%rdx,%r15
   1120 	movq	%r9,%rax
   1121 	mulq	%rbx
   1122 	addq	%rax,%r15
   1123 	adcq	$0,%rdx
   1124 	movq	%rdx,%rsi
   1125 
   1126 	movq	%r10,536(%rsp)
   1127 
   1128 
   1129 
   1130 
   1131 
   1132 	movq	8(%rcx),%rbx
   1133 
   1134 	movq	16(%rcx),%rax
   1135 	mulq	%rbx
   1136 	addq	%rax,%r11
   1137 	adcq	$0,%rdx
   1138 	movq	%r11,544(%rsp)
   1139 
   1140 	movq	%rdx,%r10
   1141 	movq	24(%rcx),%rax
   1142 	mulq	%rbx
   1143 	addq	%rax,%r12
   1144 	adcq	$0,%rdx
   1145 	addq	%r10,%r12
   1146 	adcq	$0,%rdx
   1147 	movq	%r12,552(%rsp)
   1148 
   1149 	movq	%rdx,%r10
   1150 	movq	32(%rcx),%rax
   1151 	mulq	%rbx
   1152 	addq	%rax,%r13
   1153 	adcq	$0,%rdx
   1154 	addq	%r10,%r13
   1155 	adcq	$0,%rdx
   1156 
   1157 	movq	%rdx,%r10
   1158 	movq	40(%rcx),%rax
   1159 	mulq	%rbx
   1160 	addq	%rax,%r14
   1161 	adcq	$0,%rdx
   1162 	addq	%r10,%r14
   1163 	adcq	$0,%rdx
   1164 
   1165 	movq	%rdx,%r10
   1166 	movq	%r8,%rax
   1167 	mulq	%rbx
   1168 	addq	%rax,%r15
   1169 	adcq	$0,%rdx
   1170 	addq	%r10,%r15
   1171 	adcq	$0,%rdx
   1172 
   1173 	movq	%rdx,%r10
   1174 	movq	%r9,%rax
   1175 	mulq	%rbx
   1176 	addq	%rax,%rsi
   1177 	adcq	$0,%rdx
   1178 	addq	%r10,%rsi
   1179 	adcq	$0,%rdx
   1180 
   1181 	movq	%rdx,%r11
   1182 
   1183 
   1184 
   1185 
   1186 	movq	16(%rcx),%rbx
   1187 
   1188 	movq	24(%rcx),%rax
   1189 	mulq	%rbx
   1190 	addq	%rax,%r13
   1191 	adcq	$0,%rdx
   1192 	movq	%r13,560(%rsp)
   1193 
   1194 	movq	%rdx,%r10
   1195 	movq	32(%rcx),%rax
   1196 	mulq	%rbx
   1197 	addq	%rax,%r14
   1198 	adcq	$0,%rdx
   1199 	addq	%r10,%r14
   1200 	adcq	$0,%rdx
   1201 	movq	%r14,568(%rsp)
   1202 
   1203 	movq	%rdx,%r10
   1204 	movq	40(%rcx),%rax
   1205 	mulq	%rbx
   1206 	addq	%rax,%r15
   1207 	adcq	$0,%rdx
   1208 	addq	%r10,%r15
   1209 	adcq	$0,%rdx
   1210 
   1211 	movq	%rdx,%r10
   1212 	movq	%r8,%rax
   1213 	mulq	%rbx
   1214 	addq	%rax,%rsi
   1215 	adcq	$0,%rdx
   1216 	addq	%r10,%rsi
   1217 	adcq	$0,%rdx
   1218 
   1219 	movq	%rdx,%r10
   1220 	movq	%r9,%rax
   1221 	mulq	%rbx
   1222 	addq	%rax,%r11
   1223 	adcq	$0,%rdx
   1224 	addq	%r10,%r11
   1225 	adcq	$0,%rdx
   1226 
   1227 	movq	%rdx,%r12
   1228 
   1229 
   1230 
   1231 
   1232 
   1233 	movq	24(%rcx),%rbx
   1234 
   1235 	movq	32(%rcx),%rax
   1236 	mulq	%rbx
   1237 	addq	%rax,%r15
   1238 	adcq	$0,%rdx
   1239 	movq	%r15,576(%rsp)
   1240 
   1241 	movq	%rdx,%r10
   1242 	movq	40(%rcx),%rax
   1243 	mulq	%rbx
   1244 	addq	%rax,%rsi
   1245 	adcq	$0,%rdx
   1246 	addq	%r10,%rsi
   1247 	adcq	$0,%rdx
   1248 	movq	%rsi,584(%rsp)
   1249 
   1250 	movq	%rdx,%r10
   1251 	movq	%r8,%rax
   1252 	mulq	%rbx
   1253 	addq	%rax,%r11
   1254 	adcq	$0,%rdx
   1255 	addq	%r10,%r11
   1256 	adcq	$0,%rdx
   1257 
   1258 	movq	%rdx,%r10
   1259 	movq	%r9,%rax
   1260 	mulq	%rbx
   1261 	addq	%rax,%r12
   1262 	adcq	$0,%rdx
   1263 	addq	%r10,%r12
   1264 	adcq	$0,%rdx
   1265 
   1266 	movq	%rdx,%r15
   1267 
   1268 
   1269 
   1270 
   1271 	movq	32(%rcx),%rbx
   1272 
   1273 	movq	40(%rcx),%rax
   1274 	mulq	%rbx
   1275 	addq	%rax,%r11
   1276 	adcq	$0,%rdx
   1277 	movq	%r11,592(%rsp)
   1278 
   1279 	movq	%rdx,%r10
   1280 	movq	%r8,%rax
   1281 	mulq	%rbx
   1282 	addq	%rax,%r12
   1283 	adcq	$0,%rdx
   1284 	addq	%r10,%r12
   1285 	adcq	$0,%rdx
   1286 	movq	%r12,600(%rsp)
   1287 
   1288 	movq	%rdx,%r10
   1289 	movq	%r9,%rax
   1290 	mulq	%rbx
   1291 	addq	%rax,%r15
   1292 	adcq	$0,%rdx
   1293 	addq	%r10,%r15
   1294 	adcq	$0,%rdx
   1295 
   1296 	movq	%rdx,%r11
   1297 
   1298 
   1299 
   1300 
   1301 	movq	40(%rcx),%rbx
   1302 
   1303 	movq	%r8,%rax
   1304 	mulq	%rbx
   1305 	addq	%rax,%r15
   1306 	adcq	$0,%rdx
   1307 	movq	%r15,608(%rsp)
   1308 
   1309 	movq	%rdx,%r10
   1310 	movq	%r9,%rax
   1311 	mulq	%rbx
   1312 	addq	%rax,%r11
   1313 	adcq	$0,%rdx
   1314 	addq	%r10,%r11
   1315 	adcq	$0,%rdx
   1316 	movq	%r11,616(%rsp)
   1317 
   1318 	movq	%rdx,%r12
   1319 
   1320 
   1321 
   1322 
   1323 	movq	%r8,%rbx
   1324 
   1325 	movq	%r9,%rax
   1326 	mulq	%rbx
   1327 	addq	%rax,%r12
   1328 	adcq	$0,%rdx
   1329 	movq	%r12,624(%rsp)
   1330 
   1331 	movq	%rdx,632(%rsp)
   1332 
   1333 
   1334 	movq	528(%rsp),%r10
   1335 	movq	536(%rsp),%r11
   1336 	movq	544(%rsp),%r12
   1337 	movq	552(%rsp),%r13
   1338 	movq	560(%rsp),%r14
   1339 	movq	568(%rsp),%r15
   1340 
   1341 	movq	24(%rcx),%rax
   1342 	mulq	%rax
   1343 	movq	%rax,%rdi
   1344 	movq	%rdx,%r8
   1345 
   1346 	addq	%r10,%r10
   1347 	adcq	%r11,%r11
   1348 	adcq	%r12,%r12
   1349 	adcq	%r13,%r13
   1350 	adcq	%r14,%r14
   1351 	adcq	%r15,%r15
   1352 	adcq	$0,%r8
   1353 
   1354 	movq	0(%rcx),%rax
   1355 	mulq	%rax
   1356 	movq	%rax,520(%rsp)
   1357 	movq	%rdx,%rbx
   1358 
   1359 	movq	8(%rcx),%rax
   1360 	mulq	%rax
   1361 
   1362 	addq	%rbx,%r10
   1363 	adcq	%rax,%r11
   1364 	adcq	$0,%rdx
   1365 
   1366 	movq	%rdx,%rbx
   1367 	movq	%r10,528(%rsp)
   1368 	movq	%r11,536(%rsp)
   1369 
   1370 	movq	16(%rcx),%rax
   1371 	mulq	%rax
   1372 
   1373 	addq	%rbx,%r12
   1374 	adcq	%rax,%r13
   1375 	adcq	$0,%rdx
   1376 
   1377 	movq	%rdx,%rbx
   1378 
   1379 	movq	%r12,544(%rsp)
   1380 	movq	%r13,552(%rsp)
   1381 
   1382 	xorq	%rbp,%rbp
   1383 	addq	%rbx,%r14
   1384 	adcq	%rdi,%r15
   1385 	adcq	$0,%rbp
   1386 
   1387 	movq	%r14,560(%rsp)
   1388 	movq	%r15,568(%rsp)
   1389 
   1390 
   1391 
   1392 
   1393 	movq	576(%rsp),%r10
   1394 	movq	584(%rsp),%r11
   1395 	movq	592(%rsp),%r12
   1396 	movq	600(%rsp),%r13
   1397 	movq	608(%rsp),%r14
   1398 	movq	616(%rsp),%r15
   1399 	movq	624(%rsp),%rdi
   1400 	movq	632(%rsp),%rsi
   1401 
   1402 	movq	%r9,%rax
   1403 	mulq	%rax
   1404 	movq	%rax,%r9
   1405 	movq	%rdx,%rbx
   1406 
   1407 	addq	%r10,%r10
   1408 	adcq	%r11,%r11
   1409 	adcq	%r12,%r12
   1410 	adcq	%r13,%r13
   1411 	adcq	%r14,%r14
   1412 	adcq	%r15,%r15
   1413 	adcq	%rdi,%rdi
   1414 	adcq	%rsi,%rsi
   1415 	adcq	$0,%rbx
   1416 
   1417 	addq	%rbp,%r10
   1418 
   1419 	movq	32(%rcx),%rax
   1420 	mulq	%rax
   1421 
   1422 	addq	%r8,%r10
   1423 	adcq	%rax,%r11
   1424 	adcq	$0,%rdx
   1425 
   1426 	movq	%rdx,%rbp
   1427 
   1428 	movq	%r10,576(%rsp)
   1429 	movq	%r11,584(%rsp)
   1430 
   1431 	movq	40(%rcx),%rax
   1432 	mulq	%rax
   1433 
   1434 	addq	%rbp,%r12
   1435 	adcq	%rax,%r13
   1436 	adcq	$0,%rdx
   1437 
   1438 	movq	%rdx,%rbp
   1439 
   1440 	movq	%r12,592(%rsp)
   1441 	movq	%r13,600(%rsp)
   1442 
   1443 	movq	48(%rcx),%rax
   1444 	mulq	%rax
   1445 
   1446 	addq	%rbp,%r14
   1447 	adcq	%rax,%r15
   1448 	adcq	$0,%rdx
   1449 
   1450 	movq	%r14,608(%rsp)
   1451 	movq	%r15,616(%rsp)
   1452 
   1453 	addq	%rdx,%rdi
   1454 	adcq	%r9,%rsi
   1455 	adcq	$0,%rbx
   1456 
   1457 	movq	%rdi,624(%rsp)
   1458 	movq	%rsi,632(%rsp)
   1459 	movq	%rbx,640(%rsp)
   1460 
   1461 	jmp	mont_reduce
   1462 
   1463 
   1464 .size	sqr_reduce,.-sqr_reduce
   1465 .globl	mod_exp_512
   1466 .type	mod_exp_512,@function
   1467 mod_exp_512:
   1468 	pushq	%rbp
   1469 	pushq	%rbx
   1470 	pushq	%r12
   1471 	pushq	%r13
   1472 	pushq	%r14
   1473 	pushq	%r15
   1474 
   1475 
   1476 	movq	%rsp,%r8
   1477 	subq	$2688,%rsp
   1478 	andq	$-64,%rsp
   1479 
   1480 
   1481 	movq	%r8,0(%rsp)
   1482 	movq	%rdi,8(%rsp)
   1483 	movq	%rsi,16(%rsp)
   1484 	movq	%rcx,24(%rsp)
   1485 .Lbody:
   1486 
   1487 
   1488 
   1489 	pxor	%xmm4,%xmm4
   1490 	movdqu	0(%rsi),%xmm0
   1491 	movdqu	16(%rsi),%xmm1
   1492 	movdqu	32(%rsi),%xmm2
   1493 	movdqu	48(%rsi),%xmm3
   1494 	movdqa	%xmm4,512(%rsp)
   1495 	movdqa	%xmm4,528(%rsp)
   1496 	movdqa	%xmm4,608(%rsp)
   1497 	movdqa	%xmm4,624(%rsp)
   1498 	movdqa	%xmm0,544(%rsp)
   1499 	movdqa	%xmm1,560(%rsp)
   1500 	movdqa	%xmm2,576(%rsp)
   1501 	movdqa	%xmm3,592(%rsp)
   1502 
   1503 
   1504 	movdqu	0(%rdx),%xmm0
   1505 	movdqu	16(%rdx),%xmm1
   1506 	movdqu	32(%rdx),%xmm2
   1507 	movdqu	48(%rdx),%xmm3
   1508 
   1509 	leaq	384(%rsp),%rbx
   1510 	movq	%rbx,136(%rsp)
   1511 	call	mont_reduce
   1512 
   1513 
   1514 	leaq	448(%rsp),%rcx
   1515 	xorq	%rax,%rax
   1516 	movq	%rax,0(%rcx)
   1517 	movq	%rax,8(%rcx)
   1518 	movq	%rax,24(%rcx)
   1519 	movq	%rax,32(%rcx)
   1520 	movq	%rax,40(%rcx)
   1521 	movq	%rax,48(%rcx)
   1522 	movq	%rax,56(%rcx)
   1523 	movq	%rax,128(%rsp)
   1524 	movq	$1,16(%rcx)
   1525 
   1526 	leaq	640(%rsp),%rbp
   1527 	movq	%rcx,%rsi
   1528 	movq	%rbp,%rdi
   1529 	movq	$8,%rax
   1530 loop_0:
   1531 	movq	(%rcx),%rbx
   1532 	movw	%bx,(%rdi)
   1533 	shrq	$16,%rbx
   1534 	movw	%bx,64(%rdi)
   1535 	shrq	$16,%rbx
   1536 	movw	%bx,128(%rdi)
   1537 	shrq	$16,%rbx
   1538 	movw	%bx,192(%rdi)
   1539 	leaq	8(%rcx),%rcx
   1540 	leaq	256(%rdi),%rdi
   1541 	decq	%rax
   1542 	jnz	loop_0
   1543 	movq	$31,%rax
   1544 	movq	%rax,32(%rsp)
   1545 	movq	%rbp,40(%rsp)
   1546 
   1547 	movq	%rsi,136(%rsp)
   1548 	movq	0(%rsi),%r10
   1549 	movq	8(%rsi),%r11
   1550 	movq	16(%rsi),%r12
   1551 	movq	24(%rsi),%r13
   1552 	movq	32(%rsi),%r14
   1553 	movq	40(%rsi),%r15
   1554 	movq	48(%rsi),%r8
   1555 	movq	56(%rsi),%r9
   1556 init_loop:
   1557 	leaq	384(%rsp),%rdi
   1558 	call	mont_mul_a3b
   1559 	leaq	448(%rsp),%rsi
   1560 	movq	40(%rsp),%rbp
   1561 	addq	$2,%rbp
   1562 	movq	%rbp,40(%rsp)
   1563 	movq	%rsi,%rcx
   1564 	movq	$8,%rax
   1565 loop_1:
   1566 	movq	(%rcx),%rbx
   1567 	movw	%bx,(%rbp)
   1568 	shrq	$16,%rbx
   1569 	movw	%bx,64(%rbp)
   1570 	shrq	$16,%rbx
   1571 	movw	%bx,128(%rbp)
   1572 	shrq	$16,%rbx
   1573 	movw	%bx,192(%rbp)
   1574 	leaq	8(%rcx),%rcx
   1575 	leaq	256(%rbp),%rbp
   1576 	decq	%rax
   1577 	jnz	loop_1
   1578 	movq	32(%rsp),%rax
   1579 	subq	$1,%rax
   1580 	movq	%rax,32(%rsp)
   1581 	jne	init_loop
   1582 
   1583 
   1584 
   1585 	movdqa	%xmm0,64(%rsp)
   1586 	movdqa	%xmm1,80(%rsp)
   1587 	movdqa	%xmm2,96(%rsp)
   1588 	movdqa	%xmm3,112(%rsp)
   1589 
   1590 
   1591 
   1592 
   1593 
   1594 	movl	126(%rsp),%eax
   1595 	movq	%rax,%rdx
   1596 	shrq	$11,%rax
   1597 	andl	$2047,%edx
   1598 	movl	%edx,126(%rsp)
   1599 	leaq	640(%rsp,%rax,2),%rsi
   1600 	movq	8(%rsp),%rdx
   1601 	movq	$4,%rbp
   1602 loop_2:
   1603 	movzwq	192(%rsi),%rbx
   1604 	movzwq	448(%rsi),%rax
   1605 	shlq	$16,%rbx
   1606 	shlq	$16,%rax
   1607 	movw	128(%rsi),%bx
   1608 	movw	384(%rsi),%ax
   1609 	shlq	$16,%rbx
   1610 	shlq	$16,%rax
   1611 	movw	64(%rsi),%bx
   1612 	movw	320(%rsi),%ax
   1613 	shlq	$16,%rbx
   1614 	shlq	$16,%rax
   1615 	movw	0(%rsi),%bx
   1616 	movw	256(%rsi),%ax
   1617 	movq	%rbx,0(%rdx)
   1618 	movq	%rax,8(%rdx)
   1619 	leaq	512(%rsi),%rsi
   1620 	leaq	16(%rdx),%rdx
   1621 	subq	$1,%rbp
   1622 	jnz	loop_2
   1623 	movq	$505,48(%rsp)
   1624 
   1625 	movq	8(%rsp),%rcx
   1626 	movq	%rcx,136(%rsp)
   1627 	movq	0(%rcx),%r10
   1628 	movq	8(%rcx),%r11
   1629 	movq	16(%rcx),%r12
   1630 	movq	24(%rcx),%r13
   1631 	movq	32(%rcx),%r14
   1632 	movq	40(%rcx),%r15
   1633 	movq	48(%rcx),%r8
   1634 	movq	56(%rcx),%r9
   1635 	jmp	sqr_2
   1636 
   1637 main_loop_a3b:
   1638 	call	sqr_reduce
   1639 	call	sqr_reduce
   1640 	call	sqr_reduce
   1641 sqr_2:
   1642 	call	sqr_reduce
   1643 	call	sqr_reduce
   1644 
   1645 
   1646 
   1647 	movq	48(%rsp),%rcx
   1648 	movq	%rcx,%rax
   1649 	shrq	$4,%rax
   1650 	movl	64(%rsp,%rax,2),%edx
   1651 	andq	$15,%rcx
   1652 	shrq	%cl,%rdx
   1653 	andq	$31,%rdx
   1654 
   1655 	leaq	640(%rsp,%rdx,2),%rsi
   1656 	leaq	448(%rsp),%rdx
   1657 	movq	%rdx,%rdi
   1658 	movq	$4,%rbp
   1659 loop_3:
   1660 	movzwq	192(%rsi),%rbx
   1661 	movzwq	448(%rsi),%rax
   1662 	shlq	$16,%rbx
   1663 	shlq	$16,%rax
   1664 	movw	128(%rsi),%bx
   1665 	movw	384(%rsi),%ax
   1666 	shlq	$16,%rbx
   1667 	shlq	$16,%rax
   1668 	movw	64(%rsi),%bx
   1669 	movw	320(%rsi),%ax
   1670 	shlq	$16,%rbx
   1671 	shlq	$16,%rax
   1672 	movw	0(%rsi),%bx
   1673 	movw	256(%rsi),%ax
   1674 	movq	%rbx,0(%rdx)
   1675 	movq	%rax,8(%rdx)
   1676 	leaq	512(%rsi),%rsi
   1677 	leaq	16(%rdx),%rdx
   1678 	subq	$1,%rbp
   1679 	jnz	loop_3
   1680 	movq	8(%rsp),%rsi
   1681 	call	mont_mul_a3b
   1682 
   1683 
   1684 
   1685 	movq	48(%rsp),%rcx
   1686 	subq	$5,%rcx
   1687 	movq	%rcx,48(%rsp)
   1688 	jge	main_loop_a3b
   1689 
   1690 
   1691 
   1692 end_main_loop_a3b:
   1693 
   1694 
   1695 	movq	8(%rsp),%rdx
   1696 	pxor	%xmm4,%xmm4
   1697 	movdqu	0(%rdx),%xmm0
   1698 	movdqu	16(%rdx),%xmm1
   1699 	movdqu	32(%rdx),%xmm2
   1700 	movdqu	48(%rdx),%xmm3
   1701 	movdqa	%xmm4,576(%rsp)
   1702 	movdqa	%xmm4,592(%rsp)
   1703 	movdqa	%xmm4,608(%rsp)
   1704 	movdqa	%xmm4,624(%rsp)
   1705 	movdqa	%xmm0,512(%rsp)
   1706 	movdqa	%xmm1,528(%rsp)
   1707 	movdqa	%xmm2,544(%rsp)
   1708 	movdqa	%xmm3,560(%rsp)
   1709 	call	mont_reduce
   1710 
   1711 
   1712 
   1713 	movq	8(%rsp),%rax
   1714 	movq	0(%rax),%r8
   1715 	movq	8(%rax),%r9
   1716 	movq	16(%rax),%r10
   1717 	movq	24(%rax),%r11
   1718 	movq	32(%rax),%r12
   1719 	movq	40(%rax),%r13
   1720 	movq	48(%rax),%r14
   1721 	movq	56(%rax),%r15
   1722 
   1723 
   1724 	movq	24(%rsp),%rbx
   1725 	addq	$512,%rbx
   1726 
   1727 	subq	0(%rbx),%r8
   1728 	sbbq	8(%rbx),%r9
   1729 	sbbq	16(%rbx),%r10
   1730 	sbbq	24(%rbx),%r11
   1731 	sbbq	32(%rbx),%r12
   1732 	sbbq	40(%rbx),%r13
   1733 	sbbq	48(%rbx),%r14
   1734 	sbbq	56(%rbx),%r15
   1735 
   1736 
   1737 	movq	0(%rax),%rsi
   1738 	movq	8(%rax),%rdi
   1739 	movq	16(%rax),%rcx
   1740 	movq	24(%rax),%rdx
   1741 	cmovncq	%r8,%rsi
   1742 	cmovncq	%r9,%rdi
   1743 	cmovncq	%r10,%rcx
   1744 	cmovncq	%r11,%rdx
   1745 	movq	%rsi,0(%rax)
   1746 	movq	%rdi,8(%rax)
   1747 	movq	%rcx,16(%rax)
   1748 	movq	%rdx,24(%rax)
   1749 
   1750 	movq	32(%rax),%rsi
   1751 	movq	40(%rax),%rdi
   1752 	movq	48(%rax),%rcx
   1753 	movq	56(%rax),%rdx
   1754 	cmovncq	%r12,%rsi
   1755 	cmovncq	%r13,%rdi
   1756 	cmovncq	%r14,%rcx
   1757 	cmovncq	%r15,%rdx
   1758 	movq	%rsi,32(%rax)
   1759 	movq	%rdi,40(%rax)
   1760 	movq	%rcx,48(%rax)
   1761 	movq	%rdx,56(%rax)
   1762 
   1763 	movq	0(%rsp),%rsi
   1764 	movq	0(%rsi),%r15
   1765 	movq	8(%rsi),%r14
   1766 	movq	16(%rsi),%r13
   1767 	movq	24(%rsi),%r12
   1768 	movq	32(%rsi),%rbx
   1769 	movq	40(%rsi),%rbp
   1770 	leaq	48(%rsi),%rsp
   1771 .Lepilogue:
   1772 	.byte	0xf3,0xc3
   1773 .size	mod_exp_512, . - mod_exp_512
   1774