Home | History | Annotate | Download | only in bn
      1 #if defined(__x86_64__)
      2 .text
      3 
      4 .type	MULADD_128x512,@function
      5 .align	16
      6 MULADD_128x512:
      7 	movq	0(%rsi),%rax
      8 	mulq	%rbp
      9 	addq	%rax,%r8
     10 	adcq	$0,%rdx
     11 	movq	%r8,0(%rcx)
     12 	movq	%rdx,%rbx
     13 
     14 	movq	8(%rsi),%rax
     15 	mulq	%rbp
     16 	addq	%rax,%r9
     17 	adcq	$0,%rdx
     18 	addq	%rbx,%r9
     19 	adcq	$0,%rdx
     20 	movq	%rdx,%rbx
     21 
     22 	movq	16(%rsi),%rax
     23 	mulq	%rbp
     24 	addq	%rax,%r10
     25 	adcq	$0,%rdx
     26 	addq	%rbx,%r10
     27 	adcq	$0,%rdx
     28 	movq	%rdx,%rbx
     29 
     30 	movq	24(%rsi),%rax
     31 	mulq	%rbp
     32 	addq	%rax,%r11
     33 	adcq	$0,%rdx
     34 	addq	%rbx,%r11
     35 	adcq	$0,%rdx
     36 	movq	%rdx,%rbx
     37 
     38 	movq	32(%rsi),%rax
     39 	mulq	%rbp
     40 	addq	%rax,%r12
     41 	adcq	$0,%rdx
     42 	addq	%rbx,%r12
     43 	adcq	$0,%rdx
     44 	movq	%rdx,%rbx
     45 
     46 	movq	40(%rsi),%rax
     47 	mulq	%rbp
     48 	addq	%rax,%r13
     49 	adcq	$0,%rdx
     50 	addq	%rbx,%r13
     51 	adcq	$0,%rdx
     52 	movq	%rdx,%rbx
     53 
     54 	movq	48(%rsi),%rax
     55 	mulq	%rbp
     56 	addq	%rax,%r14
     57 	adcq	$0,%rdx
     58 	addq	%rbx,%r14
     59 	adcq	$0,%rdx
     60 	movq	%rdx,%rbx
     61 
     62 	movq	56(%rsi),%rax
     63 	mulq	%rbp
     64 	addq	%rax,%r15
     65 	adcq	$0,%rdx
     66 	addq	%rbx,%r15
     67 	adcq	$0,%rdx
     68 	movq	%rdx,%r8
     69 	movq	8(%rdi),%rbp
     70 	movq	0(%rsi),%rax
     71 	mulq	%rbp
     72 	addq	%rax,%r9
     73 	adcq	$0,%rdx
     74 	movq	%r9,8(%rcx)
     75 	movq	%rdx,%rbx
     76 
     77 	movq	8(%rsi),%rax
     78 	mulq	%rbp
     79 	addq	%rax,%r10
     80 	adcq	$0,%rdx
     81 	addq	%rbx,%r10
     82 	adcq	$0,%rdx
     83 	movq	%rdx,%rbx
     84 
     85 	movq	16(%rsi),%rax
     86 	mulq	%rbp
     87 	addq	%rax,%r11
     88 	adcq	$0,%rdx
     89 	addq	%rbx,%r11
     90 	adcq	$0,%rdx
     91 	movq	%rdx,%rbx
     92 
     93 	movq	24(%rsi),%rax
     94 	mulq	%rbp
     95 	addq	%rax,%r12
     96 	adcq	$0,%rdx
     97 	addq	%rbx,%r12
     98 	adcq	$0,%rdx
     99 	movq	%rdx,%rbx
    100 
    101 	movq	32(%rsi),%rax
    102 	mulq	%rbp
    103 	addq	%rax,%r13
    104 	adcq	$0,%rdx
    105 	addq	%rbx,%r13
    106 	adcq	$0,%rdx
    107 	movq	%rdx,%rbx
    108 
    109 	movq	40(%rsi),%rax
    110 	mulq	%rbp
    111 	addq	%rax,%r14
    112 	adcq	$0,%rdx
    113 	addq	%rbx,%r14
    114 	adcq	$0,%rdx
    115 	movq	%rdx,%rbx
    116 
    117 	movq	48(%rsi),%rax
    118 	mulq	%rbp
    119 	addq	%rax,%r15
    120 	adcq	$0,%rdx
    121 	addq	%rbx,%r15
    122 	adcq	$0,%rdx
    123 	movq	%rdx,%rbx
    124 
    125 	movq	56(%rsi),%rax
    126 	mulq	%rbp
    127 	addq	%rax,%r8
    128 	adcq	$0,%rdx
    129 	addq	%rbx,%r8
    130 	adcq	$0,%rdx
    131 	movq	%rdx,%r9
    132 	.byte	0xf3,0xc3
    133 .size	MULADD_128x512,.-MULADD_128x512
    134 .type	mont_reduce,@function
    135 .align	16
    136 mont_reduce:
    137 	leaq	192(%rsp),%rdi
    138 	movq	32(%rsp),%rsi
    139 	addq	$576,%rsi
    140 	leaq	520(%rsp),%rcx
    141 
    142 	movq	96(%rcx),%rbp
    143 	movq	0(%rsi),%rax
    144 	mulq	%rbp
    145 	movq	(%rcx),%r8
    146 	addq	%rax,%r8
    147 	adcq	$0,%rdx
    148 	movq	%r8,0(%rdi)
    149 	movq	%rdx,%rbx
    150 
    151 	movq	8(%rsi),%rax
    152 	mulq	%rbp
    153 	movq	8(%rcx),%r9
    154 	addq	%rax,%r9
    155 	adcq	$0,%rdx
    156 	addq	%rbx,%r9
    157 	adcq	$0,%rdx
    158 	movq	%rdx,%rbx
    159 
    160 	movq	16(%rsi),%rax
    161 	mulq	%rbp
    162 	movq	16(%rcx),%r10
    163 	addq	%rax,%r10
    164 	adcq	$0,%rdx
    165 	addq	%rbx,%r10
    166 	adcq	$0,%rdx
    167 	movq	%rdx,%rbx
    168 
    169 	movq	24(%rsi),%rax
    170 	mulq	%rbp
    171 	movq	24(%rcx),%r11
    172 	addq	%rax,%r11
    173 	adcq	$0,%rdx
    174 	addq	%rbx,%r11
    175 	adcq	$0,%rdx
    176 	movq	%rdx,%rbx
    177 
    178 	movq	32(%rsi),%rax
    179 	mulq	%rbp
    180 	movq	32(%rcx),%r12
    181 	addq	%rax,%r12
    182 	adcq	$0,%rdx
    183 	addq	%rbx,%r12
    184 	adcq	$0,%rdx
    185 	movq	%rdx,%rbx
    186 
    187 	movq	40(%rsi),%rax
    188 	mulq	%rbp
    189 	movq	40(%rcx),%r13
    190 	addq	%rax,%r13
    191 	adcq	$0,%rdx
    192 	addq	%rbx,%r13
    193 	adcq	$0,%rdx
    194 	movq	%rdx,%rbx
    195 
    196 	movq	48(%rsi),%rax
    197 	mulq	%rbp
    198 	movq	48(%rcx),%r14
    199 	addq	%rax,%r14
    200 	adcq	$0,%rdx
    201 	addq	%rbx,%r14
    202 	adcq	$0,%rdx
    203 	movq	%rdx,%rbx
    204 
    205 	movq	56(%rsi),%rax
    206 	mulq	%rbp
    207 	movq	56(%rcx),%r15
    208 	addq	%rax,%r15
    209 	adcq	$0,%rdx
    210 	addq	%rbx,%r15
    211 	adcq	$0,%rdx
    212 	movq	%rdx,%r8
    213 	movq	104(%rcx),%rbp
    214 	movq	0(%rsi),%rax
    215 	mulq	%rbp
    216 	addq	%rax,%r9
    217 	adcq	$0,%rdx
    218 	movq	%r9,8(%rdi)
    219 	movq	%rdx,%rbx
    220 
    221 	movq	8(%rsi),%rax
    222 	mulq	%rbp
    223 	addq	%rax,%r10
    224 	adcq	$0,%rdx
    225 	addq	%rbx,%r10
    226 	adcq	$0,%rdx
    227 	movq	%rdx,%rbx
    228 
    229 	movq	16(%rsi),%rax
    230 	mulq	%rbp
    231 	addq	%rax,%r11
    232 	adcq	$0,%rdx
    233 	addq	%rbx,%r11
    234 	adcq	$0,%rdx
    235 	movq	%rdx,%rbx
    236 
    237 	movq	24(%rsi),%rax
    238 	mulq	%rbp
    239 	addq	%rax,%r12
    240 	adcq	$0,%rdx
    241 	addq	%rbx,%r12
    242 	adcq	$0,%rdx
    243 	movq	%rdx,%rbx
    244 
    245 	movq	32(%rsi),%rax
    246 	mulq	%rbp
    247 	addq	%rax,%r13
    248 	adcq	$0,%rdx
    249 	addq	%rbx,%r13
    250 	adcq	$0,%rdx
    251 	movq	%rdx,%rbx
    252 
    253 	movq	40(%rsi),%rax
    254 	mulq	%rbp
    255 	addq	%rax,%r14
    256 	adcq	$0,%rdx
    257 	addq	%rbx,%r14
    258 	adcq	$0,%rdx
    259 	movq	%rdx,%rbx
    260 
    261 	movq	48(%rsi),%rax
    262 	mulq	%rbp
    263 	addq	%rax,%r15
    264 	adcq	$0,%rdx
    265 	addq	%rbx,%r15
    266 	adcq	$0,%rdx
    267 	movq	%rdx,%rbx
    268 
    269 	movq	56(%rsi),%rax
    270 	mulq	%rbp
    271 	addq	%rax,%r8
    272 	adcq	$0,%rdx
    273 	addq	%rbx,%r8
    274 	adcq	$0,%rdx
    275 	movq	%rdx,%r9
    276 	movq	112(%rcx),%rbp
    277 	movq	0(%rsi),%rax
    278 	mulq	%rbp
    279 	addq	%rax,%r10
    280 	adcq	$0,%rdx
    281 	movq	%r10,16(%rdi)
    282 	movq	%rdx,%rbx
    283 
    284 	movq	8(%rsi),%rax
    285 	mulq	%rbp
    286 	addq	%rax,%r11
    287 	adcq	$0,%rdx
    288 	addq	%rbx,%r11
    289 	adcq	$0,%rdx
    290 	movq	%rdx,%rbx
    291 
    292 	movq	16(%rsi),%rax
    293 	mulq	%rbp
    294 	addq	%rax,%r12
    295 	adcq	$0,%rdx
    296 	addq	%rbx,%r12
    297 	adcq	$0,%rdx
    298 	movq	%rdx,%rbx
    299 
    300 	movq	24(%rsi),%rax
    301 	mulq	%rbp
    302 	addq	%rax,%r13
    303 	adcq	$0,%rdx
    304 	addq	%rbx,%r13
    305 	adcq	$0,%rdx
    306 	movq	%rdx,%rbx
    307 
    308 	movq	32(%rsi),%rax
    309 	mulq	%rbp
    310 	addq	%rax,%r14
    311 	adcq	$0,%rdx
    312 	addq	%rbx,%r14
    313 	adcq	$0,%rdx
    314 	movq	%rdx,%rbx
    315 
    316 	movq	40(%rsi),%rax
    317 	mulq	%rbp
    318 	addq	%rax,%r15
    319 	adcq	$0,%rdx
    320 	addq	%rbx,%r15
    321 	adcq	$0,%rdx
    322 	movq	%rdx,%rbx
    323 
    324 	movq	48(%rsi),%rax
    325 	mulq	%rbp
    326 	addq	%rax,%r8
    327 	adcq	$0,%rdx
    328 	addq	%rbx,%r8
    329 	adcq	$0,%rdx
    330 	movq	%rdx,%rbx
    331 
    332 	movq	56(%rsi),%rax
    333 	mulq	%rbp
    334 	addq	%rax,%r9
    335 	adcq	$0,%rdx
    336 	addq	%rbx,%r9
    337 	adcq	$0,%rdx
    338 	movq	%rdx,%r10
    339 	movq	120(%rcx),%rbp
    340 	movq	0(%rsi),%rax
    341 	mulq	%rbp
    342 	addq	%rax,%r11
    343 	adcq	$0,%rdx
    344 	movq	%r11,24(%rdi)
    345 	movq	%rdx,%rbx
    346 
    347 	movq	8(%rsi),%rax
    348 	mulq	%rbp
    349 	addq	%rax,%r12
    350 	adcq	$0,%rdx
    351 	addq	%rbx,%r12
    352 	adcq	$0,%rdx
    353 	movq	%rdx,%rbx
    354 
    355 	movq	16(%rsi),%rax
    356 	mulq	%rbp
    357 	addq	%rax,%r13
    358 	adcq	$0,%rdx
    359 	addq	%rbx,%r13
    360 	adcq	$0,%rdx
    361 	movq	%rdx,%rbx
    362 
    363 	movq	24(%rsi),%rax
    364 	mulq	%rbp
    365 	addq	%rax,%r14
    366 	adcq	$0,%rdx
    367 	addq	%rbx,%r14
    368 	adcq	$0,%rdx
    369 	movq	%rdx,%rbx
    370 
    371 	movq	32(%rsi),%rax
    372 	mulq	%rbp
    373 	addq	%rax,%r15
    374 	adcq	$0,%rdx
    375 	addq	%rbx,%r15
    376 	adcq	$0,%rdx
    377 	movq	%rdx,%rbx
    378 
    379 	movq	40(%rsi),%rax
    380 	mulq	%rbp
    381 	addq	%rax,%r8
    382 	adcq	$0,%rdx
    383 	addq	%rbx,%r8
    384 	adcq	$0,%rdx
    385 	movq	%rdx,%rbx
    386 
    387 	movq	48(%rsi),%rax
    388 	mulq	%rbp
    389 	addq	%rax,%r9
    390 	adcq	$0,%rdx
    391 	addq	%rbx,%r9
    392 	adcq	$0,%rdx
    393 	movq	%rdx,%rbx
    394 
    395 	movq	56(%rsi),%rax
    396 	mulq	%rbp
    397 	addq	%rax,%r10
    398 	adcq	$0,%rdx
    399 	addq	%rbx,%r10
    400 	adcq	$0,%rdx
    401 	movq	%rdx,%r11
    402 	xorq	%rax,%rax
    403 
    404 	addq	64(%rcx),%r8
    405 	adcq	72(%rcx),%r9
    406 	adcq	80(%rcx),%r10
    407 	adcq	88(%rcx),%r11
    408 	adcq	$0,%rax
    409 
    410 
    411 
    412 
    413 	movq	%r8,64(%rdi)
    414 	movq	%r9,72(%rdi)
    415 	movq	%r10,%rbp
    416 	movq	%r11,88(%rdi)
    417 
    418 	movq	%rax,384(%rsp)
    419 
    420 	movq	0(%rdi),%r8
    421 	movq	8(%rdi),%r9
    422 	movq	16(%rdi),%r10
    423 	movq	24(%rdi),%r11
    424 
    425 
    426 
    427 
    428 
    429 
    430 
    431 
    432 	addq	$80,%rdi
    433 
    434 	addq	$64,%rsi
    435 	leaq	296(%rsp),%rcx
    436 
    437 	call	MULADD_128x512
    438 
    439 	movq	384(%rsp),%rax
    440 
    441 
    442 	addq	-16(%rdi),%r8
    443 	adcq	-8(%rdi),%r9
    444 	movq	%r8,64(%rcx)
    445 	movq	%r9,72(%rcx)
    446 
    447 	adcq	%rax,%rax
    448 	movq	%rax,384(%rsp)
    449 
    450 	leaq	192(%rsp),%rdi
    451 	addq	$64,%rsi
    452 
    453 
    454 
    455 
    456 
    457 	movq	(%rsi),%r8
    458 	movq	8(%rsi),%rbx
    459 
    460 	movq	(%rcx),%rax
    461 	mulq	%r8
    462 	movq	%rax,%rbp
    463 	movq	%rdx,%r9
    464 
    465 	movq	8(%rcx),%rax
    466 	mulq	%r8
    467 	addq	%rax,%r9
    468 
    469 	movq	(%rcx),%rax
    470 	mulq	%rbx
    471 	addq	%rax,%r9
    472 
    473 	movq	%r9,8(%rdi)
    474 
    475 
    476 	subq	$192,%rsi
    477 
    478 	movq	(%rcx),%r8
    479 	movq	8(%rcx),%r9
    480 
    481 	call	MULADD_128x512
    482 
    483 
    484 
    485 
    486 	movq	0(%rsi),%rax
    487 	movq	8(%rsi),%rbx
    488 	movq	16(%rsi),%rdi
    489 	movq	24(%rsi),%rdx
    490 
    491 
    492 	movq	384(%rsp),%rbp
    493 
    494 	addq	64(%rcx),%r8
    495 	adcq	72(%rcx),%r9
    496 
    497 
    498 	adcq	%rbp,%rbp
    499 
    500 
    501 
    502 	shlq	$3,%rbp
    503 	movq	32(%rsp),%rcx
    504 	addq	%rcx,%rbp
    505 
    506 
    507 	xorq	%rsi,%rsi
    508 
    509 	addq	0(%rbp),%r10
    510 	adcq	64(%rbp),%r11
    511 	adcq	128(%rbp),%r12
    512 	adcq	192(%rbp),%r13
    513 	adcq	256(%rbp),%r14
    514 	adcq	320(%rbp),%r15
    515 	adcq	384(%rbp),%r8
    516 	adcq	448(%rbp),%r9
    517 
    518 
    519 
    520 	sbbq	$0,%rsi
    521 
    522 
    523 	andq	%rsi,%rax
    524 	andq	%rsi,%rbx
    525 	andq	%rsi,%rdi
    526 	andq	%rsi,%rdx
    527 
    528 	movq	$1,%rbp
    529 	subq	%rax,%r10
    530 	sbbq	%rbx,%r11
    531 	sbbq	%rdi,%r12
    532 	sbbq	%rdx,%r13
    533 
    534 
    535 
    536 
    537 	sbbq	$0,%rbp
    538 
    539 
    540 
    541 	addq	$512,%rcx
    542 	movq	32(%rcx),%rax
    543 	movq	40(%rcx),%rbx
    544 	movq	48(%rcx),%rdi
    545 	movq	56(%rcx),%rdx
    546 
    547 
    548 
    549 	andq	%rsi,%rax
    550 	andq	%rsi,%rbx
    551 	andq	%rsi,%rdi
    552 	andq	%rsi,%rdx
    553 
    554 
    555 
    556 	subq	$1,%rbp
    557 
    558 	sbbq	%rax,%r14
    559 	sbbq	%rbx,%r15
    560 	sbbq	%rdi,%r8
    561 	sbbq	%rdx,%r9
    562 
    563 
    564 
    565 	movq	144(%rsp),%rsi
    566 	movq	%r10,0(%rsi)
    567 	movq	%r11,8(%rsi)
    568 	movq	%r12,16(%rsi)
    569 	movq	%r13,24(%rsi)
    570 	movq	%r14,32(%rsi)
    571 	movq	%r15,40(%rsi)
    572 	movq	%r8,48(%rsi)
    573 	movq	%r9,56(%rsi)
    574 
    575 	.byte	0xf3,0xc3
    576 .size	mont_reduce,.-mont_reduce
    577 .type	mont_mul_a3b,@function
    578 .align	16
    579 mont_mul_a3b:
    580 
    581 
    582 
    583 
    584 	movq	0(%rdi),%rbp
    585 
    586 	movq	%r10,%rax
    587 	mulq	%rbp
    588 	movq	%rax,520(%rsp)
    589 	movq	%rdx,%r10
    590 	movq	%r11,%rax
    591 	mulq	%rbp
    592 	addq	%rax,%r10
    593 	adcq	$0,%rdx
    594 	movq	%rdx,%r11
    595 	movq	%r12,%rax
    596 	mulq	%rbp
    597 	addq	%rax,%r11
    598 	adcq	$0,%rdx
    599 	movq	%rdx,%r12
    600 	movq	%r13,%rax
    601 	mulq	%rbp
    602 	addq	%rax,%r12
    603 	adcq	$0,%rdx
    604 	movq	%rdx,%r13
    605 	movq	%r14,%rax
    606 	mulq	%rbp
    607 	addq	%rax,%r13
    608 	adcq	$0,%rdx
    609 	movq	%rdx,%r14
    610 	movq	%r15,%rax
    611 	mulq	%rbp
    612 	addq	%rax,%r14
    613 	adcq	$0,%rdx
    614 	movq	%rdx,%r15
    615 	movq	%r8,%rax
    616 	mulq	%rbp
    617 	addq	%rax,%r15
    618 	adcq	$0,%rdx
    619 	movq	%rdx,%r8
    620 	movq	%r9,%rax
    621 	mulq	%rbp
    622 	addq	%rax,%r8
    623 	adcq	$0,%rdx
    624 	movq	%rdx,%r9
    625 	movq	8(%rdi),%rbp
    626 	movq	0(%rsi),%rax
    627 	mulq	%rbp
    628 	addq	%rax,%r10
    629 	adcq	$0,%rdx
    630 	movq	%r10,528(%rsp)
    631 	movq	%rdx,%rbx
    632 
    633 	movq	8(%rsi),%rax
    634 	mulq	%rbp
    635 	addq	%rax,%r11
    636 	adcq	$0,%rdx
    637 	addq	%rbx,%r11
    638 	adcq	$0,%rdx
    639 	movq	%rdx,%rbx
    640 
    641 	movq	16(%rsi),%rax
    642 	mulq	%rbp
    643 	addq	%rax,%r12
    644 	adcq	$0,%rdx
    645 	addq	%rbx,%r12
    646 	adcq	$0,%rdx
    647 	movq	%rdx,%rbx
    648 
    649 	movq	24(%rsi),%rax
    650 	mulq	%rbp
    651 	addq	%rax,%r13
    652 	adcq	$0,%rdx
    653 	addq	%rbx,%r13
    654 	adcq	$0,%rdx
    655 	movq	%rdx,%rbx
    656 
    657 	movq	32(%rsi),%rax
    658 	mulq	%rbp
    659 	addq	%rax,%r14
    660 	adcq	$0,%rdx
    661 	addq	%rbx,%r14
    662 	adcq	$0,%rdx
    663 	movq	%rdx,%rbx
    664 
    665 	movq	40(%rsi),%rax
    666 	mulq	%rbp
    667 	addq	%rax,%r15
    668 	adcq	$0,%rdx
    669 	addq	%rbx,%r15
    670 	adcq	$0,%rdx
    671 	movq	%rdx,%rbx
    672 
    673 	movq	48(%rsi),%rax
    674 	mulq	%rbp
    675 	addq	%rax,%r8
    676 	adcq	$0,%rdx
    677 	addq	%rbx,%r8
    678 	adcq	$0,%rdx
    679 	movq	%rdx,%rbx
    680 
    681 	movq	56(%rsi),%rax
    682 	mulq	%rbp
    683 	addq	%rax,%r9
    684 	adcq	$0,%rdx
    685 	addq	%rbx,%r9
    686 	adcq	$0,%rdx
    687 	movq	%rdx,%r10
    688 	movq	16(%rdi),%rbp
    689 	movq	0(%rsi),%rax
    690 	mulq	%rbp
    691 	addq	%rax,%r11
    692 	adcq	$0,%rdx
    693 	movq	%r11,536(%rsp)
    694 	movq	%rdx,%rbx
    695 
    696 	movq	8(%rsi),%rax
    697 	mulq	%rbp
    698 	addq	%rax,%r12
    699 	adcq	$0,%rdx
    700 	addq	%rbx,%r12
    701 	adcq	$0,%rdx
    702 	movq	%rdx,%rbx
    703 
    704 	movq	16(%rsi),%rax
    705 	mulq	%rbp
    706 	addq	%rax,%r13
    707 	adcq	$0,%rdx
    708 	addq	%rbx,%r13
    709 	adcq	$0,%rdx
    710 	movq	%rdx,%rbx
    711 
    712 	movq	24(%rsi),%rax
    713 	mulq	%rbp
    714 	addq	%rax,%r14
    715 	adcq	$0,%rdx
    716 	addq	%rbx,%r14
    717 	adcq	$0,%rdx
    718 	movq	%rdx,%rbx
    719 
    720 	movq	32(%rsi),%rax
    721 	mulq	%rbp
    722 	addq	%rax,%r15
    723 	adcq	$0,%rdx
    724 	addq	%rbx,%r15
    725 	adcq	$0,%rdx
    726 	movq	%rdx,%rbx
    727 
    728 	movq	40(%rsi),%rax
    729 	mulq	%rbp
    730 	addq	%rax,%r8
    731 	adcq	$0,%rdx
    732 	addq	%rbx,%r8
    733 	adcq	$0,%rdx
    734 	movq	%rdx,%rbx
    735 
    736 	movq	48(%rsi),%rax
    737 	mulq	%rbp
    738 	addq	%rax,%r9
    739 	adcq	$0,%rdx
    740 	addq	%rbx,%r9
    741 	adcq	$0,%rdx
    742 	movq	%rdx,%rbx
    743 
    744 	movq	56(%rsi),%rax
    745 	mulq	%rbp
    746 	addq	%rax,%r10
    747 	adcq	$0,%rdx
    748 	addq	%rbx,%r10
    749 	adcq	$0,%rdx
    750 	movq	%rdx,%r11
    751 	movq	24(%rdi),%rbp
    752 	movq	0(%rsi),%rax
    753 	mulq	%rbp
    754 	addq	%rax,%r12
    755 	adcq	$0,%rdx
    756 	movq	%r12,544(%rsp)
    757 	movq	%rdx,%rbx
    758 
    759 	movq	8(%rsi),%rax
    760 	mulq	%rbp
    761 	addq	%rax,%r13
    762 	adcq	$0,%rdx
    763 	addq	%rbx,%r13
    764 	adcq	$0,%rdx
    765 	movq	%rdx,%rbx
    766 
    767 	movq	16(%rsi),%rax
    768 	mulq	%rbp
    769 	addq	%rax,%r14
    770 	adcq	$0,%rdx
    771 	addq	%rbx,%r14
    772 	adcq	$0,%rdx
    773 	movq	%rdx,%rbx
    774 
    775 	movq	24(%rsi),%rax
    776 	mulq	%rbp
    777 	addq	%rax,%r15
    778 	adcq	$0,%rdx
    779 	addq	%rbx,%r15
    780 	adcq	$0,%rdx
    781 	movq	%rdx,%rbx
    782 
    783 	movq	32(%rsi),%rax
    784 	mulq	%rbp
    785 	addq	%rax,%r8
    786 	adcq	$0,%rdx
    787 	addq	%rbx,%r8
    788 	adcq	$0,%rdx
    789 	movq	%rdx,%rbx
    790 
    791 	movq	40(%rsi),%rax
    792 	mulq	%rbp
    793 	addq	%rax,%r9
    794 	adcq	$0,%rdx
    795 	addq	%rbx,%r9
    796 	adcq	$0,%rdx
    797 	movq	%rdx,%rbx
    798 
    799 	movq	48(%rsi),%rax
    800 	mulq	%rbp
    801 	addq	%rax,%r10
    802 	adcq	$0,%rdx
    803 	addq	%rbx,%r10
    804 	adcq	$0,%rdx
    805 	movq	%rdx,%rbx
    806 
    807 	movq	56(%rsi),%rax
    808 	mulq	%rbp
    809 	addq	%rax,%r11
    810 	adcq	$0,%rdx
    811 	addq	%rbx,%r11
    812 	adcq	$0,%rdx
    813 	movq	%rdx,%r12
    814 	movq	32(%rdi),%rbp
    815 	movq	0(%rsi),%rax
    816 	mulq	%rbp
    817 	addq	%rax,%r13
    818 	adcq	$0,%rdx
    819 	movq	%r13,552(%rsp)
    820 	movq	%rdx,%rbx
    821 
    822 	movq	8(%rsi),%rax
    823 	mulq	%rbp
    824 	addq	%rax,%r14
    825 	adcq	$0,%rdx
    826 	addq	%rbx,%r14
    827 	adcq	$0,%rdx
    828 	movq	%rdx,%rbx
    829 
    830 	movq	16(%rsi),%rax
    831 	mulq	%rbp
    832 	addq	%rax,%r15
    833 	adcq	$0,%rdx
    834 	addq	%rbx,%r15
    835 	adcq	$0,%rdx
    836 	movq	%rdx,%rbx
    837 
    838 	movq	24(%rsi),%rax
    839 	mulq	%rbp
    840 	addq	%rax,%r8
    841 	adcq	$0,%rdx
    842 	addq	%rbx,%r8
    843 	adcq	$0,%rdx
    844 	movq	%rdx,%rbx
    845 
    846 	movq	32(%rsi),%rax
    847 	mulq	%rbp
    848 	addq	%rax,%r9
    849 	adcq	$0,%rdx
    850 	addq	%rbx,%r9
    851 	adcq	$0,%rdx
    852 	movq	%rdx,%rbx
    853 
    854 	movq	40(%rsi),%rax
    855 	mulq	%rbp
    856 	addq	%rax,%r10
    857 	adcq	$0,%rdx
    858 	addq	%rbx,%r10
    859 	adcq	$0,%rdx
    860 	movq	%rdx,%rbx
    861 
    862 	movq	48(%rsi),%rax
    863 	mulq	%rbp
    864 	addq	%rax,%r11
    865 	adcq	$0,%rdx
    866 	addq	%rbx,%r11
    867 	adcq	$0,%rdx
    868 	movq	%rdx,%rbx
    869 
    870 	movq	56(%rsi),%rax
    871 	mulq	%rbp
    872 	addq	%rax,%r12
    873 	adcq	$0,%rdx
    874 	addq	%rbx,%r12
    875 	adcq	$0,%rdx
    876 	movq	%rdx,%r13
    877 	movq	40(%rdi),%rbp
    878 	movq	0(%rsi),%rax
    879 	mulq	%rbp
    880 	addq	%rax,%r14
    881 	adcq	$0,%rdx
    882 	movq	%r14,560(%rsp)
    883 	movq	%rdx,%rbx
    884 
    885 	movq	8(%rsi),%rax
    886 	mulq	%rbp
    887 	addq	%rax,%r15
    888 	adcq	$0,%rdx
    889 	addq	%rbx,%r15
    890 	adcq	$0,%rdx
    891 	movq	%rdx,%rbx
    892 
    893 	movq	16(%rsi),%rax
    894 	mulq	%rbp
    895 	addq	%rax,%r8
    896 	adcq	$0,%rdx
    897 	addq	%rbx,%r8
    898 	adcq	$0,%rdx
    899 	movq	%rdx,%rbx
    900 
    901 	movq	24(%rsi),%rax
    902 	mulq	%rbp
    903 	addq	%rax,%r9
    904 	adcq	$0,%rdx
    905 	addq	%rbx,%r9
    906 	adcq	$0,%rdx
    907 	movq	%rdx,%rbx
    908 
    909 	movq	32(%rsi),%rax
    910 	mulq	%rbp
    911 	addq	%rax,%r10
    912 	adcq	$0,%rdx
    913 	addq	%rbx,%r10
    914 	adcq	$0,%rdx
    915 	movq	%rdx,%rbx
    916 
    917 	movq	40(%rsi),%rax
    918 	mulq	%rbp
    919 	addq	%rax,%r11
    920 	adcq	$0,%rdx
    921 	addq	%rbx,%r11
    922 	adcq	$0,%rdx
    923 	movq	%rdx,%rbx
    924 
    925 	movq	48(%rsi),%rax
    926 	mulq	%rbp
    927 	addq	%rax,%r12
    928 	adcq	$0,%rdx
    929 	addq	%rbx,%r12
    930 	adcq	$0,%rdx
    931 	movq	%rdx,%rbx
    932 
    933 	movq	56(%rsi),%rax
    934 	mulq	%rbp
    935 	addq	%rax,%r13
    936 	adcq	$0,%rdx
    937 	addq	%rbx,%r13
    938 	adcq	$0,%rdx
    939 	movq	%rdx,%r14
    940 	movq	48(%rdi),%rbp
    941 	movq	0(%rsi),%rax
    942 	mulq	%rbp
    943 	addq	%rax,%r15
    944 	adcq	$0,%rdx
    945 	movq	%r15,568(%rsp)
    946 	movq	%rdx,%rbx
    947 
    948 	movq	8(%rsi),%rax
    949 	mulq	%rbp
    950 	addq	%rax,%r8
    951 	adcq	$0,%rdx
    952 	addq	%rbx,%r8
    953 	adcq	$0,%rdx
    954 	movq	%rdx,%rbx
    955 
    956 	movq	16(%rsi),%rax
    957 	mulq	%rbp
    958 	addq	%rax,%r9
    959 	adcq	$0,%rdx
    960 	addq	%rbx,%r9
    961 	adcq	$0,%rdx
    962 	movq	%rdx,%rbx
    963 
    964 	movq	24(%rsi),%rax
    965 	mulq	%rbp
    966 	addq	%rax,%r10
    967 	adcq	$0,%rdx
    968 	addq	%rbx,%r10
    969 	adcq	$0,%rdx
    970 	movq	%rdx,%rbx
    971 
    972 	movq	32(%rsi),%rax
    973 	mulq	%rbp
    974 	addq	%rax,%r11
    975 	adcq	$0,%rdx
    976 	addq	%rbx,%r11
    977 	adcq	$0,%rdx
    978 	movq	%rdx,%rbx
    979 
    980 	movq	40(%rsi),%rax
    981 	mulq	%rbp
    982 	addq	%rax,%r12
    983 	adcq	$0,%rdx
    984 	addq	%rbx,%r12
    985 	adcq	$0,%rdx
    986 	movq	%rdx,%rbx
    987 
    988 	movq	48(%rsi),%rax
    989 	mulq	%rbp
    990 	addq	%rax,%r13
    991 	adcq	$0,%rdx
    992 	addq	%rbx,%r13
    993 	adcq	$0,%rdx
    994 	movq	%rdx,%rbx
    995 
    996 	movq	56(%rsi),%rax
    997 	mulq	%rbp
    998 	addq	%rax,%r14
    999 	adcq	$0,%rdx
   1000 	addq	%rbx,%r14
   1001 	adcq	$0,%rdx
   1002 	movq	%rdx,%r15
   1003 	movq	56(%rdi),%rbp
   1004 	movq	0(%rsi),%rax
   1005 	mulq	%rbp
   1006 	addq	%rax,%r8
   1007 	adcq	$0,%rdx
   1008 	movq	%r8,576(%rsp)
   1009 	movq	%rdx,%rbx
   1010 
   1011 	movq	8(%rsi),%rax
   1012 	mulq	%rbp
   1013 	addq	%rax,%r9
   1014 	adcq	$0,%rdx
   1015 	addq	%rbx,%r9
   1016 	adcq	$0,%rdx
   1017 	movq	%rdx,%rbx
   1018 
   1019 	movq	16(%rsi),%rax
   1020 	mulq	%rbp
   1021 	addq	%rax,%r10
   1022 	adcq	$0,%rdx
   1023 	addq	%rbx,%r10
   1024 	adcq	$0,%rdx
   1025 	movq	%rdx,%rbx
   1026 
   1027 	movq	24(%rsi),%rax
   1028 	mulq	%rbp
   1029 	addq	%rax,%r11
   1030 	adcq	$0,%rdx
   1031 	addq	%rbx,%r11
   1032 	adcq	$0,%rdx
   1033 	movq	%rdx,%rbx
   1034 
   1035 	movq	32(%rsi),%rax
   1036 	mulq	%rbp
   1037 	addq	%rax,%r12
   1038 	adcq	$0,%rdx
   1039 	addq	%rbx,%r12
   1040 	adcq	$0,%rdx
   1041 	movq	%rdx,%rbx
   1042 
   1043 	movq	40(%rsi),%rax
   1044 	mulq	%rbp
   1045 	addq	%rax,%r13
   1046 	adcq	$0,%rdx
   1047 	addq	%rbx,%r13
   1048 	adcq	$0,%rdx
   1049 	movq	%rdx,%rbx
   1050 
   1051 	movq	48(%rsi),%rax
   1052 	mulq	%rbp
   1053 	addq	%rax,%r14
   1054 	adcq	$0,%rdx
   1055 	addq	%rbx,%r14
   1056 	adcq	$0,%rdx
   1057 	movq	%rdx,%rbx
   1058 
   1059 	movq	56(%rsi),%rax
   1060 	mulq	%rbp
   1061 	addq	%rax,%r15
   1062 	adcq	$0,%rdx
   1063 	addq	%rbx,%r15
   1064 	adcq	$0,%rdx
   1065 	movq	%rdx,%r8
   1066 	movq	%r9,584(%rsp)
   1067 	movq	%r10,592(%rsp)
   1068 	movq	%r11,600(%rsp)
   1069 	movq	%r12,608(%rsp)
   1070 	movq	%r13,616(%rsp)
   1071 	movq	%r14,624(%rsp)
   1072 	movq	%r15,632(%rsp)
   1073 	movq	%r8,640(%rsp)
   1074 
   1075 
   1076 
   1077 
   1078 
   1079 	jmp	mont_reduce
   1080 
   1081 
   1082 .size	mont_mul_a3b,.-mont_mul_a3b
   1083 .type	sqr_reduce,@function
   1084 .align	16
   1085 sqr_reduce:
   1086 	movq	16(%rsp),%rcx
   1087 
   1088 
   1089 
   1090 	movq	%r10,%rbx
   1091 
   1092 	movq	%r11,%rax
   1093 	mulq	%rbx
   1094 	movq	%rax,528(%rsp)
   1095 	movq	%rdx,%r10
   1096 	movq	%r12,%rax
   1097 	mulq	%rbx
   1098 	addq	%rax,%r10
   1099 	adcq	$0,%rdx
   1100 	movq	%rdx,%r11
   1101 	movq	%r13,%rax
   1102 	mulq	%rbx
   1103 	addq	%rax,%r11
   1104 	adcq	$0,%rdx
   1105 	movq	%rdx,%r12
   1106 	movq	%r14,%rax
   1107 	mulq	%rbx
   1108 	addq	%rax,%r12
   1109 	adcq	$0,%rdx
   1110 	movq	%rdx,%r13
   1111 	movq	%r15,%rax
   1112 	mulq	%rbx
   1113 	addq	%rax,%r13
   1114 	adcq	$0,%rdx
   1115 	movq	%rdx,%r14
   1116 	movq	%r8,%rax
   1117 	mulq	%rbx
   1118 	addq	%rax,%r14
   1119 	adcq	$0,%rdx
   1120 	movq	%rdx,%r15
   1121 	movq	%r9,%rax
   1122 	mulq	%rbx
   1123 	addq	%rax,%r15
   1124 	adcq	$0,%rdx
   1125 	movq	%rdx,%rsi
   1126 
   1127 	movq	%r10,536(%rsp)
   1128 
   1129 
   1130 
   1131 
   1132 
   1133 	movq	8(%rcx),%rbx
   1134 
   1135 	movq	16(%rcx),%rax
   1136 	mulq	%rbx
   1137 	addq	%rax,%r11
   1138 	adcq	$0,%rdx
   1139 	movq	%r11,544(%rsp)
   1140 
   1141 	movq	%rdx,%r10
   1142 	movq	24(%rcx),%rax
   1143 	mulq	%rbx
   1144 	addq	%rax,%r12
   1145 	adcq	$0,%rdx
   1146 	addq	%r10,%r12
   1147 	adcq	$0,%rdx
   1148 	movq	%r12,552(%rsp)
   1149 
   1150 	movq	%rdx,%r10
   1151 	movq	32(%rcx),%rax
   1152 	mulq	%rbx
   1153 	addq	%rax,%r13
   1154 	adcq	$0,%rdx
   1155 	addq	%r10,%r13
   1156 	adcq	$0,%rdx
   1157 
   1158 	movq	%rdx,%r10
   1159 	movq	40(%rcx),%rax
   1160 	mulq	%rbx
   1161 	addq	%rax,%r14
   1162 	adcq	$0,%rdx
   1163 	addq	%r10,%r14
   1164 	adcq	$0,%rdx
   1165 
   1166 	movq	%rdx,%r10
   1167 	movq	%r8,%rax
   1168 	mulq	%rbx
   1169 	addq	%rax,%r15
   1170 	adcq	$0,%rdx
   1171 	addq	%r10,%r15
   1172 	adcq	$0,%rdx
   1173 
   1174 	movq	%rdx,%r10
   1175 	movq	%r9,%rax
   1176 	mulq	%rbx
   1177 	addq	%rax,%rsi
   1178 	adcq	$0,%rdx
   1179 	addq	%r10,%rsi
   1180 	adcq	$0,%rdx
   1181 
   1182 	movq	%rdx,%r11
   1183 
   1184 
   1185 
   1186 
   1187 	movq	16(%rcx),%rbx
   1188 
   1189 	movq	24(%rcx),%rax
   1190 	mulq	%rbx
   1191 	addq	%rax,%r13
   1192 	adcq	$0,%rdx
   1193 	movq	%r13,560(%rsp)
   1194 
   1195 	movq	%rdx,%r10
   1196 	movq	32(%rcx),%rax
   1197 	mulq	%rbx
   1198 	addq	%rax,%r14
   1199 	adcq	$0,%rdx
   1200 	addq	%r10,%r14
   1201 	adcq	$0,%rdx
   1202 	movq	%r14,568(%rsp)
   1203 
   1204 	movq	%rdx,%r10
   1205 	movq	40(%rcx),%rax
   1206 	mulq	%rbx
   1207 	addq	%rax,%r15
   1208 	adcq	$0,%rdx
   1209 	addq	%r10,%r15
   1210 	adcq	$0,%rdx
   1211 
   1212 	movq	%rdx,%r10
   1213 	movq	%r8,%rax
   1214 	mulq	%rbx
   1215 	addq	%rax,%rsi
   1216 	adcq	$0,%rdx
   1217 	addq	%r10,%rsi
   1218 	adcq	$0,%rdx
   1219 
   1220 	movq	%rdx,%r10
   1221 	movq	%r9,%rax
   1222 	mulq	%rbx
   1223 	addq	%rax,%r11
   1224 	adcq	$0,%rdx
   1225 	addq	%r10,%r11
   1226 	adcq	$0,%rdx
   1227 
   1228 	movq	%rdx,%r12
   1229 
   1230 
   1231 
   1232 
   1233 
   1234 	movq	24(%rcx),%rbx
   1235 
   1236 	movq	32(%rcx),%rax
   1237 	mulq	%rbx
   1238 	addq	%rax,%r15
   1239 	adcq	$0,%rdx
   1240 	movq	%r15,576(%rsp)
   1241 
   1242 	movq	%rdx,%r10
   1243 	movq	40(%rcx),%rax
   1244 	mulq	%rbx
   1245 	addq	%rax,%rsi
   1246 	adcq	$0,%rdx
   1247 	addq	%r10,%rsi
   1248 	adcq	$0,%rdx
   1249 	movq	%rsi,584(%rsp)
   1250 
   1251 	movq	%rdx,%r10
   1252 	movq	%r8,%rax
   1253 	mulq	%rbx
   1254 	addq	%rax,%r11
   1255 	adcq	$0,%rdx
   1256 	addq	%r10,%r11
   1257 	adcq	$0,%rdx
   1258 
   1259 	movq	%rdx,%r10
   1260 	movq	%r9,%rax
   1261 	mulq	%rbx
   1262 	addq	%rax,%r12
   1263 	adcq	$0,%rdx
   1264 	addq	%r10,%r12
   1265 	adcq	$0,%rdx
   1266 
   1267 	movq	%rdx,%r15
   1268 
   1269 
   1270 
   1271 
   1272 	movq	32(%rcx),%rbx
   1273 
   1274 	movq	40(%rcx),%rax
   1275 	mulq	%rbx
   1276 	addq	%rax,%r11
   1277 	adcq	$0,%rdx
   1278 	movq	%r11,592(%rsp)
   1279 
   1280 	movq	%rdx,%r10
   1281 	movq	%r8,%rax
   1282 	mulq	%rbx
   1283 	addq	%rax,%r12
   1284 	adcq	$0,%rdx
   1285 	addq	%r10,%r12
   1286 	adcq	$0,%rdx
   1287 	movq	%r12,600(%rsp)
   1288 
   1289 	movq	%rdx,%r10
   1290 	movq	%r9,%rax
   1291 	mulq	%rbx
   1292 	addq	%rax,%r15
   1293 	adcq	$0,%rdx
   1294 	addq	%r10,%r15
   1295 	adcq	$0,%rdx
   1296 
   1297 	movq	%rdx,%r11
   1298 
   1299 
   1300 
   1301 
   1302 	movq	40(%rcx),%rbx
   1303 
   1304 	movq	%r8,%rax
   1305 	mulq	%rbx
   1306 	addq	%rax,%r15
   1307 	adcq	$0,%rdx
   1308 	movq	%r15,608(%rsp)
   1309 
   1310 	movq	%rdx,%r10
   1311 	movq	%r9,%rax
   1312 	mulq	%rbx
   1313 	addq	%rax,%r11
   1314 	adcq	$0,%rdx
   1315 	addq	%r10,%r11
   1316 	adcq	$0,%rdx
   1317 	movq	%r11,616(%rsp)
   1318 
   1319 	movq	%rdx,%r12
   1320 
   1321 
   1322 
   1323 
   1324 	movq	%r8,%rbx
   1325 
   1326 	movq	%r9,%rax
   1327 	mulq	%rbx
   1328 	addq	%rax,%r12
   1329 	adcq	$0,%rdx
   1330 	movq	%r12,624(%rsp)
   1331 
   1332 	movq	%rdx,632(%rsp)
   1333 
   1334 
   1335 	movq	528(%rsp),%r10
   1336 	movq	536(%rsp),%r11
   1337 	movq	544(%rsp),%r12
   1338 	movq	552(%rsp),%r13
   1339 	movq	560(%rsp),%r14
   1340 	movq	568(%rsp),%r15
   1341 
   1342 	movq	24(%rcx),%rax
   1343 	mulq	%rax
   1344 	movq	%rax,%rdi
   1345 	movq	%rdx,%r8
   1346 
   1347 	addq	%r10,%r10
   1348 	adcq	%r11,%r11
   1349 	adcq	%r12,%r12
   1350 	adcq	%r13,%r13
   1351 	adcq	%r14,%r14
   1352 	adcq	%r15,%r15
   1353 	adcq	$0,%r8
   1354 
   1355 	movq	0(%rcx),%rax
   1356 	mulq	%rax
   1357 	movq	%rax,520(%rsp)
   1358 	movq	%rdx,%rbx
   1359 
   1360 	movq	8(%rcx),%rax
   1361 	mulq	%rax
   1362 
   1363 	addq	%rbx,%r10
   1364 	adcq	%rax,%r11
   1365 	adcq	$0,%rdx
   1366 
   1367 	movq	%rdx,%rbx
   1368 	movq	%r10,528(%rsp)
   1369 	movq	%r11,536(%rsp)
   1370 
   1371 	movq	16(%rcx),%rax
   1372 	mulq	%rax
   1373 
   1374 	addq	%rbx,%r12
   1375 	adcq	%rax,%r13
   1376 	adcq	$0,%rdx
   1377 
   1378 	movq	%rdx,%rbx
   1379 
   1380 	movq	%r12,544(%rsp)
   1381 	movq	%r13,552(%rsp)
   1382 
   1383 	xorq	%rbp,%rbp
   1384 	addq	%rbx,%r14
   1385 	adcq	%rdi,%r15
   1386 	adcq	$0,%rbp
   1387 
   1388 	movq	%r14,560(%rsp)
   1389 	movq	%r15,568(%rsp)
   1390 
   1391 
   1392 
   1393 
   1394 	movq	576(%rsp),%r10
   1395 	movq	584(%rsp),%r11
   1396 	movq	592(%rsp),%r12
   1397 	movq	600(%rsp),%r13
   1398 	movq	608(%rsp),%r14
   1399 	movq	616(%rsp),%r15
   1400 	movq	624(%rsp),%rdi
   1401 	movq	632(%rsp),%rsi
   1402 
   1403 	movq	%r9,%rax
   1404 	mulq	%rax
   1405 	movq	%rax,%r9
   1406 	movq	%rdx,%rbx
   1407 
   1408 	addq	%r10,%r10
   1409 	adcq	%r11,%r11
   1410 	adcq	%r12,%r12
   1411 	adcq	%r13,%r13
   1412 	adcq	%r14,%r14
   1413 	adcq	%r15,%r15
   1414 	adcq	%rdi,%rdi
   1415 	adcq	%rsi,%rsi
   1416 	adcq	$0,%rbx
   1417 
   1418 	addq	%rbp,%r10
   1419 
   1420 	movq	32(%rcx),%rax
   1421 	mulq	%rax
   1422 
   1423 	addq	%r8,%r10
   1424 	adcq	%rax,%r11
   1425 	adcq	$0,%rdx
   1426 
   1427 	movq	%rdx,%rbp
   1428 
   1429 	movq	%r10,576(%rsp)
   1430 	movq	%r11,584(%rsp)
   1431 
   1432 	movq	40(%rcx),%rax
   1433 	mulq	%rax
   1434 
   1435 	addq	%rbp,%r12
   1436 	adcq	%rax,%r13
   1437 	adcq	$0,%rdx
   1438 
   1439 	movq	%rdx,%rbp
   1440 
   1441 	movq	%r12,592(%rsp)
   1442 	movq	%r13,600(%rsp)
   1443 
   1444 	movq	48(%rcx),%rax
   1445 	mulq	%rax
   1446 
   1447 	addq	%rbp,%r14
   1448 	adcq	%rax,%r15
   1449 	adcq	$0,%rdx
   1450 
   1451 	movq	%r14,608(%rsp)
   1452 	movq	%r15,616(%rsp)
   1453 
   1454 	addq	%rdx,%rdi
   1455 	adcq	%r9,%rsi
   1456 	adcq	$0,%rbx
   1457 
   1458 	movq	%rdi,624(%rsp)
   1459 	movq	%rsi,632(%rsp)
   1460 	movq	%rbx,640(%rsp)
   1461 
   1462 	jmp	mont_reduce
   1463 
   1464 
   1465 .size	sqr_reduce,.-sqr_reduce
   1466 .globl	mod_exp_512
   1467 .hidden mod_exp_512
   1468 .type	mod_exp_512,@function
   1469 mod_exp_512:
   1470 	pushq	%rbp
   1471 	pushq	%rbx
   1472 	pushq	%r12
   1473 	pushq	%r13
   1474 	pushq	%r14
   1475 	pushq	%r15
   1476 
   1477 
   1478 	movq	%rsp,%r8
   1479 	subq	$2688,%rsp
   1480 	andq	$-64,%rsp
   1481 
   1482 
   1483 	movq	%r8,0(%rsp)
   1484 	movq	%rdi,8(%rsp)
   1485 	movq	%rsi,16(%rsp)
   1486 	movq	%rcx,24(%rsp)
   1487 .Lbody:
   1488 
   1489 
   1490 
   1491 	pxor	%xmm4,%xmm4
   1492 	movdqu	0(%rsi),%xmm0
   1493 	movdqu	16(%rsi),%xmm1
   1494 	movdqu	32(%rsi),%xmm2
   1495 	movdqu	48(%rsi),%xmm3
   1496 	movdqa	%xmm4,512(%rsp)
   1497 	movdqa	%xmm4,528(%rsp)
   1498 	movdqa	%xmm4,608(%rsp)
   1499 	movdqa	%xmm4,624(%rsp)
   1500 	movdqa	%xmm0,544(%rsp)
   1501 	movdqa	%xmm1,560(%rsp)
   1502 	movdqa	%xmm2,576(%rsp)
   1503 	movdqa	%xmm3,592(%rsp)
   1504 
   1505 
   1506 	movdqu	0(%rdx),%xmm0
   1507 	movdqu	16(%rdx),%xmm1
   1508 	movdqu	32(%rdx),%xmm2
   1509 	movdqu	48(%rdx),%xmm3
   1510 
   1511 	leaq	384(%rsp),%rbx
   1512 	movq	%rbx,136(%rsp)
   1513 	call	mont_reduce
   1514 
   1515 
   1516 	leaq	448(%rsp),%rcx
   1517 	xorq	%rax,%rax
   1518 	movq	%rax,0(%rcx)
   1519 	movq	%rax,8(%rcx)
   1520 	movq	%rax,24(%rcx)
   1521 	movq	%rax,32(%rcx)
   1522 	movq	%rax,40(%rcx)
   1523 	movq	%rax,48(%rcx)
   1524 	movq	%rax,56(%rcx)
   1525 	movq	%rax,128(%rsp)
   1526 	movq	$1,16(%rcx)
   1527 
   1528 	leaq	640(%rsp),%rbp
   1529 	movq	%rcx,%rsi
   1530 	movq	%rbp,%rdi
   1531 	movq	$8,%rax
   1532 loop_0:
   1533 	movq	(%rcx),%rbx
   1534 	movw	%bx,(%rdi)
   1535 	shrq	$16,%rbx
   1536 	movw	%bx,64(%rdi)
   1537 	shrq	$16,%rbx
   1538 	movw	%bx,128(%rdi)
   1539 	shrq	$16,%rbx
   1540 	movw	%bx,192(%rdi)
   1541 	leaq	8(%rcx),%rcx
   1542 	leaq	256(%rdi),%rdi
   1543 	decq	%rax
   1544 	jnz	loop_0
   1545 	movq	$31,%rax
   1546 	movq	%rax,32(%rsp)
   1547 	movq	%rbp,40(%rsp)
   1548 
   1549 	movq	%rsi,136(%rsp)
   1550 	movq	0(%rsi),%r10
   1551 	movq	8(%rsi),%r11
   1552 	movq	16(%rsi),%r12
   1553 	movq	24(%rsi),%r13
   1554 	movq	32(%rsi),%r14
   1555 	movq	40(%rsi),%r15
   1556 	movq	48(%rsi),%r8
   1557 	movq	56(%rsi),%r9
   1558 init_loop:
   1559 	leaq	384(%rsp),%rdi
   1560 	call	mont_mul_a3b
   1561 	leaq	448(%rsp),%rsi
   1562 	movq	40(%rsp),%rbp
   1563 	addq	$2,%rbp
   1564 	movq	%rbp,40(%rsp)
   1565 	movq	%rsi,%rcx
   1566 	movq	$8,%rax
   1567 loop_1:
   1568 	movq	(%rcx),%rbx
   1569 	movw	%bx,(%rbp)
   1570 	shrq	$16,%rbx
   1571 	movw	%bx,64(%rbp)
   1572 	shrq	$16,%rbx
   1573 	movw	%bx,128(%rbp)
   1574 	shrq	$16,%rbx
   1575 	movw	%bx,192(%rbp)
   1576 	leaq	8(%rcx),%rcx
   1577 	leaq	256(%rbp),%rbp
   1578 	decq	%rax
   1579 	jnz	loop_1
   1580 	movq	32(%rsp),%rax
   1581 	subq	$1,%rax
   1582 	movq	%rax,32(%rsp)
   1583 	jne	init_loop
   1584 
   1585 
   1586 
   1587 	movdqa	%xmm0,64(%rsp)
   1588 	movdqa	%xmm1,80(%rsp)
   1589 	movdqa	%xmm2,96(%rsp)
   1590 	movdqa	%xmm3,112(%rsp)
   1591 
   1592 
   1593 
   1594 
   1595 
   1596 	movl	126(%rsp),%eax
   1597 	movq	%rax,%rdx
   1598 	shrq	$11,%rax
   1599 	andl	$2047,%edx
   1600 	movl	%edx,126(%rsp)
   1601 	leaq	640(%rsp,%rax,2),%rsi
   1602 	movq	8(%rsp),%rdx
   1603 	movq	$4,%rbp
   1604 loop_2:
   1605 	movzwq	192(%rsi),%rbx
   1606 	movzwq	448(%rsi),%rax
   1607 	shlq	$16,%rbx
   1608 	shlq	$16,%rax
   1609 	movw	128(%rsi),%bx
   1610 	movw	384(%rsi),%ax
   1611 	shlq	$16,%rbx
   1612 	shlq	$16,%rax
   1613 	movw	64(%rsi),%bx
   1614 	movw	320(%rsi),%ax
   1615 	shlq	$16,%rbx
   1616 	shlq	$16,%rax
   1617 	movw	0(%rsi),%bx
   1618 	movw	256(%rsi),%ax
   1619 	movq	%rbx,0(%rdx)
   1620 	movq	%rax,8(%rdx)
   1621 	leaq	512(%rsi),%rsi
   1622 	leaq	16(%rdx),%rdx
   1623 	subq	$1,%rbp
   1624 	jnz	loop_2
   1625 	movq	$505,48(%rsp)
   1626 
   1627 	movq	8(%rsp),%rcx
   1628 	movq	%rcx,136(%rsp)
   1629 	movq	0(%rcx),%r10
   1630 	movq	8(%rcx),%r11
   1631 	movq	16(%rcx),%r12
   1632 	movq	24(%rcx),%r13
   1633 	movq	32(%rcx),%r14
   1634 	movq	40(%rcx),%r15
   1635 	movq	48(%rcx),%r8
   1636 	movq	56(%rcx),%r9
   1637 	jmp	sqr_2
   1638 
   1639 main_loop_a3b:
   1640 	call	sqr_reduce
   1641 	call	sqr_reduce
   1642 	call	sqr_reduce
   1643 sqr_2:
   1644 	call	sqr_reduce
   1645 	call	sqr_reduce
   1646 
   1647 
   1648 
   1649 	movq	48(%rsp),%rcx
   1650 	movq	%rcx,%rax
   1651 	shrq	$4,%rax
   1652 	movl	64(%rsp,%rax,2),%edx
   1653 	andq	$15,%rcx
   1654 	shrq	%cl,%rdx
   1655 	andq	$31,%rdx
   1656 
   1657 	leaq	640(%rsp,%rdx,2),%rsi
   1658 	leaq	448(%rsp),%rdx
   1659 	movq	%rdx,%rdi
   1660 	movq	$4,%rbp
   1661 loop_3:
   1662 	movzwq	192(%rsi),%rbx
   1663 	movzwq	448(%rsi),%rax
   1664 	shlq	$16,%rbx
   1665 	shlq	$16,%rax
   1666 	movw	128(%rsi),%bx
   1667 	movw	384(%rsi),%ax
   1668 	shlq	$16,%rbx
   1669 	shlq	$16,%rax
   1670 	movw	64(%rsi),%bx
   1671 	movw	320(%rsi),%ax
   1672 	shlq	$16,%rbx
   1673 	shlq	$16,%rax
   1674 	movw	0(%rsi),%bx
   1675 	movw	256(%rsi),%ax
   1676 	movq	%rbx,0(%rdx)
   1677 	movq	%rax,8(%rdx)
   1678 	leaq	512(%rsi),%rsi
   1679 	leaq	16(%rdx),%rdx
   1680 	subq	$1,%rbp
   1681 	jnz	loop_3
   1682 	movq	8(%rsp),%rsi
   1683 	call	mont_mul_a3b
   1684 
   1685 
   1686 
   1687 	movq	48(%rsp),%rcx
   1688 	subq	$5,%rcx
   1689 	movq	%rcx,48(%rsp)
   1690 	jge	main_loop_a3b
   1691 
   1692 
   1693 
   1694 end_main_loop_a3b:
   1695 
   1696 
   1697 	movq	8(%rsp),%rdx
   1698 	pxor	%xmm4,%xmm4
   1699 	movdqu	0(%rdx),%xmm0
   1700 	movdqu	16(%rdx),%xmm1
   1701 	movdqu	32(%rdx),%xmm2
   1702 	movdqu	48(%rdx),%xmm3
   1703 	movdqa	%xmm4,576(%rsp)
   1704 	movdqa	%xmm4,592(%rsp)
   1705 	movdqa	%xmm4,608(%rsp)
   1706 	movdqa	%xmm4,624(%rsp)
   1707 	movdqa	%xmm0,512(%rsp)
   1708 	movdqa	%xmm1,528(%rsp)
   1709 	movdqa	%xmm2,544(%rsp)
   1710 	movdqa	%xmm3,560(%rsp)
   1711 	call	mont_reduce
   1712 
   1713 
   1714 
   1715 	movq	8(%rsp),%rax
   1716 	movq	0(%rax),%r8
   1717 	movq	8(%rax),%r9
   1718 	movq	16(%rax),%r10
   1719 	movq	24(%rax),%r11
   1720 	movq	32(%rax),%r12
   1721 	movq	40(%rax),%r13
   1722 	movq	48(%rax),%r14
   1723 	movq	56(%rax),%r15
   1724 
   1725 
   1726 	movq	24(%rsp),%rbx
   1727 	addq	$512,%rbx
   1728 
   1729 	subq	0(%rbx),%r8
   1730 	sbbq	8(%rbx),%r9
   1731 	sbbq	16(%rbx),%r10
   1732 	sbbq	24(%rbx),%r11
   1733 	sbbq	32(%rbx),%r12
   1734 	sbbq	40(%rbx),%r13
   1735 	sbbq	48(%rbx),%r14
   1736 	sbbq	56(%rbx),%r15
   1737 
   1738 
   1739 	movq	0(%rax),%rsi
   1740 	movq	8(%rax),%rdi
   1741 	movq	16(%rax),%rcx
   1742 	movq	24(%rax),%rdx
   1743 	cmovncq	%r8,%rsi
   1744 	cmovncq	%r9,%rdi
   1745 	cmovncq	%r10,%rcx
   1746 	cmovncq	%r11,%rdx
   1747 	movq	%rsi,0(%rax)
   1748 	movq	%rdi,8(%rax)
   1749 	movq	%rcx,16(%rax)
   1750 	movq	%rdx,24(%rax)
   1751 
   1752 	movq	32(%rax),%rsi
   1753 	movq	40(%rax),%rdi
   1754 	movq	48(%rax),%rcx
   1755 	movq	56(%rax),%rdx
   1756 	cmovncq	%r12,%rsi
   1757 	cmovncq	%r13,%rdi
   1758 	cmovncq	%r14,%rcx
   1759 	cmovncq	%r15,%rdx
   1760 	movq	%rsi,32(%rax)
   1761 	movq	%rdi,40(%rax)
   1762 	movq	%rcx,48(%rax)
   1763 	movq	%rdx,56(%rax)
   1764 
   1765 	movq	0(%rsp),%rsi
   1766 	movq	0(%rsi),%r15
   1767 	movq	8(%rsi),%r14
   1768 	movq	16(%rsi),%r13
   1769 	movq	24(%rsi),%r12
   1770 	movq	32(%rsi),%rbx
   1771 	movq	40(%rsi),%rbp
   1772 	leaq	48(%rsi),%rsp
   1773 .Lepilogue:
   1774 	.byte	0xf3,0xc3
   1775 .size	mod_exp_512, . - mod_exp_512
   1776 #endif
   1777