Home | History | Annotate | Download | only in bn
      1 %ifidn __OUTPUT_FORMAT__,obj
      2 section	code	use32 class=code align=64
      3 %elifidn __OUTPUT_FORMAT__,win32
      4 %ifdef __YASM_VERSION_ID__
      5 %if __YASM_VERSION_ID__ < 01010000h
      6 %error yasm version 1.1.0 or later needed.
      7 %endif
      8 ; Yasm automatically includes .00 and complains about redefining it.
      9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
     10 %else
     11 $@feat.00 equ 1
     12 %endif
     13 section	.text	code align=64
     14 %else
     15 section	.text	code
     16 %endif
     17 global	_bn_mul_comba8
     18 align	16
     19 _bn_mul_comba8:
     20 L$_bn_mul_comba8_begin:
     21 	push	esi
     22 	mov	esi,DWORD [12+esp]
     23 	push	edi
     24 	mov	edi,DWORD [20+esp]
     25 	push	ebp
     26 	push	ebx
     27 	xor	ebx,ebx
     28 	mov	eax,DWORD [esi]
     29 	xor	ecx,ecx
     30 	mov	edx,DWORD [edi]
     31 	; ################## Calculate word 0
     32 	xor	ebp,ebp
     33 	; mul a[0]*b[0]
     34 	mul	edx
     35 	add	ebx,eax
     36 	mov	eax,DWORD [20+esp]
     37 	adc	ecx,edx
     38 	mov	edx,DWORD [edi]
     39 	adc	ebp,0
     40 	mov	DWORD [eax],ebx
     41 	mov	eax,DWORD [4+esi]
     42 	; saved r[0]
     43 	; ################## Calculate word 1
     44 	xor	ebx,ebx
     45 	; mul a[1]*b[0]
     46 	mul	edx
     47 	add	ecx,eax
     48 	mov	eax,DWORD [esi]
     49 	adc	ebp,edx
     50 	mov	edx,DWORD [4+edi]
     51 	adc	ebx,0
     52 	; mul a[0]*b[1]
     53 	mul	edx
     54 	add	ecx,eax
     55 	mov	eax,DWORD [20+esp]
     56 	adc	ebp,edx
     57 	mov	edx,DWORD [edi]
     58 	adc	ebx,0
     59 	mov	DWORD [4+eax],ecx
     60 	mov	eax,DWORD [8+esi]
     61 	; saved r[1]
     62 	; ################## Calculate word 2
     63 	xor	ecx,ecx
     64 	; mul a[2]*b[0]
     65 	mul	edx
     66 	add	ebp,eax
     67 	mov	eax,DWORD [4+esi]
     68 	adc	ebx,edx
     69 	mov	edx,DWORD [4+edi]
     70 	adc	ecx,0
     71 	; mul a[1]*b[1]
     72 	mul	edx
     73 	add	ebp,eax
     74 	mov	eax,DWORD [esi]
     75 	adc	ebx,edx
     76 	mov	edx,DWORD [8+edi]
     77 	adc	ecx,0
     78 	; mul a[0]*b[2]
     79 	mul	edx
     80 	add	ebp,eax
     81 	mov	eax,DWORD [20+esp]
     82 	adc	ebx,edx
     83 	mov	edx,DWORD [edi]
     84 	adc	ecx,0
     85 	mov	DWORD [8+eax],ebp
     86 	mov	eax,DWORD [12+esi]
     87 	; saved r[2]
     88 	; ################## Calculate word 3
     89 	xor	ebp,ebp
     90 	; mul a[3]*b[0]
     91 	mul	edx
     92 	add	ebx,eax
     93 	mov	eax,DWORD [8+esi]
     94 	adc	ecx,edx
     95 	mov	edx,DWORD [4+edi]
     96 	adc	ebp,0
     97 	; mul a[2]*b[1]
     98 	mul	edx
     99 	add	ebx,eax
    100 	mov	eax,DWORD [4+esi]
    101 	adc	ecx,edx
    102 	mov	edx,DWORD [8+edi]
    103 	adc	ebp,0
    104 	; mul a[1]*b[2]
    105 	mul	edx
    106 	add	ebx,eax
    107 	mov	eax,DWORD [esi]
    108 	adc	ecx,edx
    109 	mov	edx,DWORD [12+edi]
    110 	adc	ebp,0
    111 	; mul a[0]*b[3]
    112 	mul	edx
    113 	add	ebx,eax
    114 	mov	eax,DWORD [20+esp]
    115 	adc	ecx,edx
    116 	mov	edx,DWORD [edi]
    117 	adc	ebp,0
    118 	mov	DWORD [12+eax],ebx
    119 	mov	eax,DWORD [16+esi]
    120 	; saved r[3]
    121 	; ################## Calculate word 4
    122 	xor	ebx,ebx
    123 	; mul a[4]*b[0]
    124 	mul	edx
    125 	add	ecx,eax
    126 	mov	eax,DWORD [12+esi]
    127 	adc	ebp,edx
    128 	mov	edx,DWORD [4+edi]
    129 	adc	ebx,0
    130 	; mul a[3]*b[1]
    131 	mul	edx
    132 	add	ecx,eax
    133 	mov	eax,DWORD [8+esi]
    134 	adc	ebp,edx
    135 	mov	edx,DWORD [8+edi]
    136 	adc	ebx,0
    137 	; mul a[2]*b[2]
    138 	mul	edx
    139 	add	ecx,eax
    140 	mov	eax,DWORD [4+esi]
    141 	adc	ebp,edx
    142 	mov	edx,DWORD [12+edi]
    143 	adc	ebx,0
    144 	; mul a[1]*b[3]
    145 	mul	edx
    146 	add	ecx,eax
    147 	mov	eax,DWORD [esi]
    148 	adc	ebp,edx
    149 	mov	edx,DWORD [16+edi]
    150 	adc	ebx,0
    151 	; mul a[0]*b[4]
    152 	mul	edx
    153 	add	ecx,eax
    154 	mov	eax,DWORD [20+esp]
    155 	adc	ebp,edx
    156 	mov	edx,DWORD [edi]
    157 	adc	ebx,0
    158 	mov	DWORD [16+eax],ecx
    159 	mov	eax,DWORD [20+esi]
    160 	; saved r[4]
    161 	; ################## Calculate word 5
    162 	xor	ecx,ecx
    163 	; mul a[5]*b[0]
    164 	mul	edx
    165 	add	ebp,eax
    166 	mov	eax,DWORD [16+esi]
    167 	adc	ebx,edx
    168 	mov	edx,DWORD [4+edi]
    169 	adc	ecx,0
    170 	; mul a[4]*b[1]
    171 	mul	edx
    172 	add	ebp,eax
    173 	mov	eax,DWORD [12+esi]
    174 	adc	ebx,edx
    175 	mov	edx,DWORD [8+edi]
    176 	adc	ecx,0
    177 	; mul a[3]*b[2]
    178 	mul	edx
    179 	add	ebp,eax
    180 	mov	eax,DWORD [8+esi]
    181 	adc	ebx,edx
    182 	mov	edx,DWORD [12+edi]
    183 	adc	ecx,0
    184 	; mul a[2]*b[3]
    185 	mul	edx
    186 	add	ebp,eax
    187 	mov	eax,DWORD [4+esi]
    188 	adc	ebx,edx
    189 	mov	edx,DWORD [16+edi]
    190 	adc	ecx,0
    191 	; mul a[1]*b[4]
    192 	mul	edx
    193 	add	ebp,eax
    194 	mov	eax,DWORD [esi]
    195 	adc	ebx,edx
    196 	mov	edx,DWORD [20+edi]
    197 	adc	ecx,0
    198 	; mul a[0]*b[5]
    199 	mul	edx
    200 	add	ebp,eax
    201 	mov	eax,DWORD [20+esp]
    202 	adc	ebx,edx
    203 	mov	edx,DWORD [edi]
    204 	adc	ecx,0
    205 	mov	DWORD [20+eax],ebp
    206 	mov	eax,DWORD [24+esi]
    207 	; saved r[5]
    208 	; ################## Calculate word 6
    209 	xor	ebp,ebp
    210 	; mul a[6]*b[0]
    211 	mul	edx
    212 	add	ebx,eax
    213 	mov	eax,DWORD [20+esi]
    214 	adc	ecx,edx
    215 	mov	edx,DWORD [4+edi]
    216 	adc	ebp,0
    217 	; mul a[5]*b[1]
    218 	mul	edx
    219 	add	ebx,eax
    220 	mov	eax,DWORD [16+esi]
    221 	adc	ecx,edx
    222 	mov	edx,DWORD [8+edi]
    223 	adc	ebp,0
    224 	; mul a[4]*b[2]
    225 	mul	edx
    226 	add	ebx,eax
    227 	mov	eax,DWORD [12+esi]
    228 	adc	ecx,edx
    229 	mov	edx,DWORD [12+edi]
    230 	adc	ebp,0
    231 	; mul a[3]*b[3]
    232 	mul	edx
    233 	add	ebx,eax
    234 	mov	eax,DWORD [8+esi]
    235 	adc	ecx,edx
    236 	mov	edx,DWORD [16+edi]
    237 	adc	ebp,0
    238 	; mul a[2]*b[4]
    239 	mul	edx
    240 	add	ebx,eax
    241 	mov	eax,DWORD [4+esi]
    242 	adc	ecx,edx
    243 	mov	edx,DWORD [20+edi]
    244 	adc	ebp,0
    245 	; mul a[1]*b[5]
    246 	mul	edx
    247 	add	ebx,eax
    248 	mov	eax,DWORD [esi]
    249 	adc	ecx,edx
    250 	mov	edx,DWORD [24+edi]
    251 	adc	ebp,0
    252 	; mul a[0]*b[6]
    253 	mul	edx
    254 	add	ebx,eax
    255 	mov	eax,DWORD [20+esp]
    256 	adc	ecx,edx
    257 	mov	edx,DWORD [edi]
    258 	adc	ebp,0
    259 	mov	DWORD [24+eax],ebx
    260 	mov	eax,DWORD [28+esi]
    261 	; saved r[6]
    262 	; ################## Calculate word 7
    263 	xor	ebx,ebx
    264 	; mul a[7]*b[0]
    265 	mul	edx
    266 	add	ecx,eax
    267 	mov	eax,DWORD [24+esi]
    268 	adc	ebp,edx
    269 	mov	edx,DWORD [4+edi]
    270 	adc	ebx,0
    271 	; mul a[6]*b[1]
    272 	mul	edx
    273 	add	ecx,eax
    274 	mov	eax,DWORD [20+esi]
    275 	adc	ebp,edx
    276 	mov	edx,DWORD [8+edi]
    277 	adc	ebx,0
    278 	; mul a[5]*b[2]
    279 	mul	edx
    280 	add	ecx,eax
    281 	mov	eax,DWORD [16+esi]
    282 	adc	ebp,edx
    283 	mov	edx,DWORD [12+edi]
    284 	adc	ebx,0
    285 	; mul a[4]*b[3]
    286 	mul	edx
    287 	add	ecx,eax
    288 	mov	eax,DWORD [12+esi]
    289 	adc	ebp,edx
    290 	mov	edx,DWORD [16+edi]
    291 	adc	ebx,0
    292 	; mul a[3]*b[4]
    293 	mul	edx
    294 	add	ecx,eax
    295 	mov	eax,DWORD [8+esi]
    296 	adc	ebp,edx
    297 	mov	edx,DWORD [20+edi]
    298 	adc	ebx,0
    299 	; mul a[2]*b[5]
    300 	mul	edx
    301 	add	ecx,eax
    302 	mov	eax,DWORD [4+esi]
    303 	adc	ebp,edx
    304 	mov	edx,DWORD [24+edi]
    305 	adc	ebx,0
    306 	; mul a[1]*b[6]
    307 	mul	edx
    308 	add	ecx,eax
    309 	mov	eax,DWORD [esi]
    310 	adc	ebp,edx
    311 	mov	edx,DWORD [28+edi]
    312 	adc	ebx,0
    313 	; mul a[0]*b[7]
    314 	mul	edx
    315 	add	ecx,eax
    316 	mov	eax,DWORD [20+esp]
    317 	adc	ebp,edx
    318 	mov	edx,DWORD [4+edi]
    319 	adc	ebx,0
    320 	mov	DWORD [28+eax],ecx
    321 	mov	eax,DWORD [28+esi]
    322 	; saved r[7]
    323 	; ################## Calculate word 8
    324 	xor	ecx,ecx
    325 	; mul a[7]*b[1]
    326 	mul	edx
    327 	add	ebp,eax
    328 	mov	eax,DWORD [24+esi]
    329 	adc	ebx,edx
    330 	mov	edx,DWORD [8+edi]
    331 	adc	ecx,0
    332 	; mul a[6]*b[2]
    333 	mul	edx
    334 	add	ebp,eax
    335 	mov	eax,DWORD [20+esi]
    336 	adc	ebx,edx
    337 	mov	edx,DWORD [12+edi]
    338 	adc	ecx,0
    339 	; mul a[5]*b[3]
    340 	mul	edx
    341 	add	ebp,eax
    342 	mov	eax,DWORD [16+esi]
    343 	adc	ebx,edx
    344 	mov	edx,DWORD [16+edi]
    345 	adc	ecx,0
    346 	; mul a[4]*b[4]
    347 	mul	edx
    348 	add	ebp,eax
    349 	mov	eax,DWORD [12+esi]
    350 	adc	ebx,edx
    351 	mov	edx,DWORD [20+edi]
    352 	adc	ecx,0
    353 	; mul a[3]*b[5]
    354 	mul	edx
    355 	add	ebp,eax
    356 	mov	eax,DWORD [8+esi]
    357 	adc	ebx,edx
    358 	mov	edx,DWORD [24+edi]
    359 	adc	ecx,0
    360 	; mul a[2]*b[6]
    361 	mul	edx
    362 	add	ebp,eax
    363 	mov	eax,DWORD [4+esi]
    364 	adc	ebx,edx
    365 	mov	edx,DWORD [28+edi]
    366 	adc	ecx,0
    367 	; mul a[1]*b[7]
    368 	mul	edx
    369 	add	ebp,eax
    370 	mov	eax,DWORD [20+esp]
    371 	adc	ebx,edx
    372 	mov	edx,DWORD [8+edi]
    373 	adc	ecx,0
    374 	mov	DWORD [32+eax],ebp
    375 	mov	eax,DWORD [28+esi]
    376 	; saved r[8]
    377 	; ################## Calculate word 9
    378 	xor	ebp,ebp
    379 	; mul a[7]*b[2]
    380 	mul	edx
    381 	add	ebx,eax
    382 	mov	eax,DWORD [24+esi]
    383 	adc	ecx,edx
    384 	mov	edx,DWORD [12+edi]
    385 	adc	ebp,0
    386 	; mul a[6]*b[3]
    387 	mul	edx
    388 	add	ebx,eax
    389 	mov	eax,DWORD [20+esi]
    390 	adc	ecx,edx
    391 	mov	edx,DWORD [16+edi]
    392 	adc	ebp,0
    393 	; mul a[5]*b[4]
    394 	mul	edx
    395 	add	ebx,eax
    396 	mov	eax,DWORD [16+esi]
    397 	adc	ecx,edx
    398 	mov	edx,DWORD [20+edi]
    399 	adc	ebp,0
    400 	; mul a[4]*b[5]
    401 	mul	edx
    402 	add	ebx,eax
    403 	mov	eax,DWORD [12+esi]
    404 	adc	ecx,edx
    405 	mov	edx,DWORD [24+edi]
    406 	adc	ebp,0
    407 	; mul a[3]*b[6]
    408 	mul	edx
    409 	add	ebx,eax
    410 	mov	eax,DWORD [8+esi]
    411 	adc	ecx,edx
    412 	mov	edx,DWORD [28+edi]
    413 	adc	ebp,0
    414 	; mul a[2]*b[7]
    415 	mul	edx
    416 	add	ebx,eax
    417 	mov	eax,DWORD [20+esp]
    418 	adc	ecx,edx
    419 	mov	edx,DWORD [12+edi]
    420 	adc	ebp,0
    421 	mov	DWORD [36+eax],ebx
    422 	mov	eax,DWORD [28+esi]
    423 	; saved r[9]
    424 	; ################## Calculate word 10
    425 	xor	ebx,ebx
    426 	; mul a[7]*b[3]
    427 	mul	edx
    428 	add	ecx,eax
    429 	mov	eax,DWORD [24+esi]
    430 	adc	ebp,edx
    431 	mov	edx,DWORD [16+edi]
    432 	adc	ebx,0
    433 	; mul a[6]*b[4]
    434 	mul	edx
    435 	add	ecx,eax
    436 	mov	eax,DWORD [20+esi]
    437 	adc	ebp,edx
    438 	mov	edx,DWORD [20+edi]
    439 	adc	ebx,0
    440 	; mul a[5]*b[5]
    441 	mul	edx
    442 	add	ecx,eax
    443 	mov	eax,DWORD [16+esi]
    444 	adc	ebp,edx
    445 	mov	edx,DWORD [24+edi]
    446 	adc	ebx,0
    447 	; mul a[4]*b[6]
    448 	mul	edx
    449 	add	ecx,eax
    450 	mov	eax,DWORD [12+esi]
    451 	adc	ebp,edx
    452 	mov	edx,DWORD [28+edi]
    453 	adc	ebx,0
    454 	; mul a[3]*b[7]
    455 	mul	edx
    456 	add	ecx,eax
    457 	mov	eax,DWORD [20+esp]
    458 	adc	ebp,edx
    459 	mov	edx,DWORD [16+edi]
    460 	adc	ebx,0
    461 	mov	DWORD [40+eax],ecx
    462 	mov	eax,DWORD [28+esi]
    463 	; saved r[10]
    464 	; ################## Calculate word 11
    465 	xor	ecx,ecx
    466 	; mul a[7]*b[4]
    467 	mul	edx
    468 	add	ebp,eax
    469 	mov	eax,DWORD [24+esi]
    470 	adc	ebx,edx
    471 	mov	edx,DWORD [20+edi]
    472 	adc	ecx,0
    473 	; mul a[6]*b[5]
    474 	mul	edx
    475 	add	ebp,eax
    476 	mov	eax,DWORD [20+esi]
    477 	adc	ebx,edx
    478 	mov	edx,DWORD [24+edi]
    479 	adc	ecx,0
    480 	; mul a[5]*b[6]
    481 	mul	edx
    482 	add	ebp,eax
    483 	mov	eax,DWORD [16+esi]
    484 	adc	ebx,edx
    485 	mov	edx,DWORD [28+edi]
    486 	adc	ecx,0
    487 	; mul a[4]*b[7]
    488 	mul	edx
    489 	add	ebp,eax
    490 	mov	eax,DWORD [20+esp]
    491 	adc	ebx,edx
    492 	mov	edx,DWORD [20+edi]
    493 	adc	ecx,0
    494 	mov	DWORD [44+eax],ebp
    495 	mov	eax,DWORD [28+esi]
    496 	; saved r[11]
    497 	; ################## Calculate word 12
    498 	xor	ebp,ebp
    499 	; mul a[7]*b[5]
    500 	mul	edx
    501 	add	ebx,eax
    502 	mov	eax,DWORD [24+esi]
    503 	adc	ecx,edx
    504 	mov	edx,DWORD [24+edi]
    505 	adc	ebp,0
    506 	; mul a[6]*b[6]
    507 	mul	edx
    508 	add	ebx,eax
    509 	mov	eax,DWORD [20+esi]
    510 	adc	ecx,edx
    511 	mov	edx,DWORD [28+edi]
    512 	adc	ebp,0
    513 	; mul a[5]*b[7]
    514 	mul	edx
    515 	add	ebx,eax
    516 	mov	eax,DWORD [20+esp]
    517 	adc	ecx,edx
    518 	mov	edx,DWORD [24+edi]
    519 	adc	ebp,0
    520 	mov	DWORD [48+eax],ebx
    521 	mov	eax,DWORD [28+esi]
    522 	; saved r[12]
    523 	; ################## Calculate word 13
    524 	xor	ebx,ebx
    525 	; mul a[7]*b[6]
    526 	mul	edx
    527 	add	ecx,eax
    528 	mov	eax,DWORD [24+esi]
    529 	adc	ebp,edx
    530 	mov	edx,DWORD [28+edi]
    531 	adc	ebx,0
    532 	; mul a[6]*b[7]
    533 	mul	edx
    534 	add	ecx,eax
    535 	mov	eax,DWORD [20+esp]
    536 	adc	ebp,edx
    537 	mov	edx,DWORD [28+edi]
    538 	adc	ebx,0
    539 	mov	DWORD [52+eax],ecx
    540 	mov	eax,DWORD [28+esi]
    541 	; saved r[13]
    542 	; ################## Calculate word 14
    543 	xor	ecx,ecx
    544 	; mul a[7]*b[7]
    545 	mul	edx
    546 	add	ebp,eax
    547 	mov	eax,DWORD [20+esp]
    548 	adc	ebx,edx
    549 	adc	ecx,0
    550 	mov	DWORD [56+eax],ebp
    551 	; saved r[14]
    552 	; save r[15]
    553 	mov	DWORD [60+eax],ebx
    554 	pop	ebx
    555 	pop	ebp
    556 	pop	edi
    557 	pop	esi
    558 	ret
    559 global	_bn_mul_comba4
    560 align	16
    561 _bn_mul_comba4:
    562 L$_bn_mul_comba4_begin:
    563 	push	esi
    564 	mov	esi,DWORD [12+esp]
    565 	push	edi
    566 	mov	edi,DWORD [20+esp]
    567 	push	ebp
    568 	push	ebx
    569 	xor	ebx,ebx
    570 	mov	eax,DWORD [esi]
    571 	xor	ecx,ecx
    572 	mov	edx,DWORD [edi]
    573 	; ################## Calculate word 0
    574 	xor	ebp,ebp
    575 	; mul a[0]*b[0]
    576 	mul	edx
    577 	add	ebx,eax
    578 	mov	eax,DWORD [20+esp]
    579 	adc	ecx,edx
    580 	mov	edx,DWORD [edi]
    581 	adc	ebp,0
    582 	mov	DWORD [eax],ebx
    583 	mov	eax,DWORD [4+esi]
    584 	; saved r[0]
    585 	; ################## Calculate word 1
    586 	xor	ebx,ebx
    587 	; mul a[1]*b[0]
    588 	mul	edx
    589 	add	ecx,eax
    590 	mov	eax,DWORD [esi]
    591 	adc	ebp,edx
    592 	mov	edx,DWORD [4+edi]
    593 	adc	ebx,0
    594 	; mul a[0]*b[1]
    595 	mul	edx
    596 	add	ecx,eax
    597 	mov	eax,DWORD [20+esp]
    598 	adc	ebp,edx
    599 	mov	edx,DWORD [edi]
    600 	adc	ebx,0
    601 	mov	DWORD [4+eax],ecx
    602 	mov	eax,DWORD [8+esi]
    603 	; saved r[1]
    604 	; ################## Calculate word 2
    605 	xor	ecx,ecx
    606 	; mul a[2]*b[0]
    607 	mul	edx
    608 	add	ebp,eax
    609 	mov	eax,DWORD [4+esi]
    610 	adc	ebx,edx
    611 	mov	edx,DWORD [4+edi]
    612 	adc	ecx,0
    613 	; mul a[1]*b[1]
    614 	mul	edx
    615 	add	ebp,eax
    616 	mov	eax,DWORD [esi]
    617 	adc	ebx,edx
    618 	mov	edx,DWORD [8+edi]
    619 	adc	ecx,0
    620 	; mul a[0]*b[2]
    621 	mul	edx
    622 	add	ebp,eax
    623 	mov	eax,DWORD [20+esp]
    624 	adc	ebx,edx
    625 	mov	edx,DWORD [edi]
    626 	adc	ecx,0
    627 	mov	DWORD [8+eax],ebp
    628 	mov	eax,DWORD [12+esi]
    629 	; saved r[2]
    630 	; ################## Calculate word 3
    631 	xor	ebp,ebp
    632 	; mul a[3]*b[0]
    633 	mul	edx
    634 	add	ebx,eax
    635 	mov	eax,DWORD [8+esi]
    636 	adc	ecx,edx
    637 	mov	edx,DWORD [4+edi]
    638 	adc	ebp,0
    639 	; mul a[2]*b[1]
    640 	mul	edx
    641 	add	ebx,eax
    642 	mov	eax,DWORD [4+esi]
    643 	adc	ecx,edx
    644 	mov	edx,DWORD [8+edi]
    645 	adc	ebp,0
    646 	; mul a[1]*b[2]
    647 	mul	edx
    648 	add	ebx,eax
    649 	mov	eax,DWORD [esi]
    650 	adc	ecx,edx
    651 	mov	edx,DWORD [12+edi]
    652 	adc	ebp,0
    653 	; mul a[0]*b[3]
    654 	mul	edx
    655 	add	ebx,eax
    656 	mov	eax,DWORD [20+esp]
    657 	adc	ecx,edx
    658 	mov	edx,DWORD [4+edi]
    659 	adc	ebp,0
    660 	mov	DWORD [12+eax],ebx
    661 	mov	eax,DWORD [12+esi]
    662 	; saved r[3]
    663 	; ################## Calculate word 4
    664 	xor	ebx,ebx
    665 	; mul a[3]*b[1]
    666 	mul	edx
    667 	add	ecx,eax
    668 	mov	eax,DWORD [8+esi]
    669 	adc	ebp,edx
    670 	mov	edx,DWORD [8+edi]
    671 	adc	ebx,0
    672 	; mul a[2]*b[2]
    673 	mul	edx
    674 	add	ecx,eax
    675 	mov	eax,DWORD [4+esi]
    676 	adc	ebp,edx
    677 	mov	edx,DWORD [12+edi]
    678 	adc	ebx,0
    679 	; mul a[1]*b[3]
    680 	mul	edx
    681 	add	ecx,eax
    682 	mov	eax,DWORD [20+esp]
    683 	adc	ebp,edx
    684 	mov	edx,DWORD [8+edi]
    685 	adc	ebx,0
    686 	mov	DWORD [16+eax],ecx
    687 	mov	eax,DWORD [12+esi]
    688 	; saved r[4]
    689 	; ################## Calculate word 5
    690 	xor	ecx,ecx
    691 	; mul a[3]*b[2]
    692 	mul	edx
    693 	add	ebp,eax
    694 	mov	eax,DWORD [8+esi]
    695 	adc	ebx,edx
    696 	mov	edx,DWORD [12+edi]
    697 	adc	ecx,0
    698 	; mul a[2]*b[3]
    699 	mul	edx
    700 	add	ebp,eax
    701 	mov	eax,DWORD [20+esp]
    702 	adc	ebx,edx
    703 	mov	edx,DWORD [12+edi]
    704 	adc	ecx,0
    705 	mov	DWORD [20+eax],ebp
    706 	mov	eax,DWORD [12+esi]
    707 	; saved r[5]
    708 	; ################## Calculate word 6
    709 	xor	ebp,ebp
    710 	; mul a[3]*b[3]
    711 	mul	edx
    712 	add	ebx,eax
    713 	mov	eax,DWORD [20+esp]
    714 	adc	ecx,edx
    715 	adc	ebp,0
    716 	mov	DWORD [24+eax],ebx
    717 	; saved r[6]
    718 	; save r[7]
    719 	mov	DWORD [28+eax],ecx
    720 	pop	ebx
    721 	pop	ebp
    722 	pop	edi
    723 	pop	esi
    724 	ret
    725 global	_bn_sqr_comba8
    726 align	16
    727 _bn_sqr_comba8:
    728 L$_bn_sqr_comba8_begin:
    729 	push	esi
    730 	push	edi
    731 	push	ebp
    732 	push	ebx
    733 	mov	edi,DWORD [20+esp]
    734 	mov	esi,DWORD [24+esp]
    735 	xor	ebx,ebx
    736 	xor	ecx,ecx
    737 	mov	eax,DWORD [esi]
    738 	; ############### Calculate word 0
    739 	xor	ebp,ebp
    740 	; sqr a[0]*a[0]
    741 	mul	eax
    742 	add	ebx,eax
    743 	adc	ecx,edx
    744 	mov	edx,DWORD [esi]
    745 	adc	ebp,0
    746 	mov	DWORD [edi],ebx
    747 	mov	eax,DWORD [4+esi]
    748 	; saved r[0]
    749 	; ############### Calculate word 1
    750 	xor	ebx,ebx
    751 	; sqr a[1]*a[0]
    752 	mul	edx
    753 	add	eax,eax
    754 	adc	edx,edx
    755 	adc	ebx,0
    756 	add	ecx,eax
    757 	adc	ebp,edx
    758 	mov	eax,DWORD [8+esi]
    759 	adc	ebx,0
    760 	mov	DWORD [4+edi],ecx
    761 	mov	edx,DWORD [esi]
    762 	; saved r[1]
    763 	; ############### Calculate word 2
    764 	xor	ecx,ecx
    765 	; sqr a[2]*a[0]
    766 	mul	edx
    767 	add	eax,eax
    768 	adc	edx,edx
    769 	adc	ecx,0
    770 	add	ebp,eax
    771 	adc	ebx,edx
    772 	mov	eax,DWORD [4+esi]
    773 	adc	ecx,0
    774 	; sqr a[1]*a[1]
    775 	mul	eax
    776 	add	ebp,eax
    777 	adc	ebx,edx
    778 	mov	edx,DWORD [esi]
    779 	adc	ecx,0
    780 	mov	DWORD [8+edi],ebp
    781 	mov	eax,DWORD [12+esi]
    782 	; saved r[2]
    783 	; ############### Calculate word 3
    784 	xor	ebp,ebp
    785 	; sqr a[3]*a[0]
    786 	mul	edx
    787 	add	eax,eax
    788 	adc	edx,edx
    789 	adc	ebp,0
    790 	add	ebx,eax
    791 	adc	ecx,edx
    792 	mov	eax,DWORD [8+esi]
    793 	adc	ebp,0
    794 	mov	edx,DWORD [4+esi]
    795 	; sqr a[2]*a[1]
    796 	mul	edx
    797 	add	eax,eax
    798 	adc	edx,edx
    799 	adc	ebp,0
    800 	add	ebx,eax
    801 	adc	ecx,edx
    802 	mov	eax,DWORD [16+esi]
    803 	adc	ebp,0
    804 	mov	DWORD [12+edi],ebx
    805 	mov	edx,DWORD [esi]
    806 	; saved r[3]
    807 	; ############### Calculate word 4
    808 	xor	ebx,ebx
    809 	; sqr a[4]*a[0]
    810 	mul	edx
    811 	add	eax,eax
    812 	adc	edx,edx
    813 	adc	ebx,0
    814 	add	ecx,eax
    815 	adc	ebp,edx
    816 	mov	eax,DWORD [12+esi]
    817 	adc	ebx,0
    818 	mov	edx,DWORD [4+esi]
    819 	; sqr a[3]*a[1]
    820 	mul	edx
    821 	add	eax,eax
    822 	adc	edx,edx
    823 	adc	ebx,0
    824 	add	ecx,eax
    825 	adc	ebp,edx
    826 	mov	eax,DWORD [8+esi]
    827 	adc	ebx,0
    828 	; sqr a[2]*a[2]
    829 	mul	eax
    830 	add	ecx,eax
    831 	adc	ebp,edx
    832 	mov	edx,DWORD [esi]
    833 	adc	ebx,0
    834 	mov	DWORD [16+edi],ecx
    835 	mov	eax,DWORD [20+esi]
    836 	; saved r[4]
    837 	; ############### Calculate word 5
    838 	xor	ecx,ecx
    839 	; sqr a[5]*a[0]
    840 	mul	edx
    841 	add	eax,eax
    842 	adc	edx,edx
    843 	adc	ecx,0
    844 	add	ebp,eax
    845 	adc	ebx,edx
    846 	mov	eax,DWORD [16+esi]
    847 	adc	ecx,0
    848 	mov	edx,DWORD [4+esi]
    849 	; sqr a[4]*a[1]
    850 	mul	edx
    851 	add	eax,eax
    852 	adc	edx,edx
    853 	adc	ecx,0
    854 	add	ebp,eax
    855 	adc	ebx,edx
    856 	mov	eax,DWORD [12+esi]
    857 	adc	ecx,0
    858 	mov	edx,DWORD [8+esi]
    859 	; sqr a[3]*a[2]
    860 	mul	edx
    861 	add	eax,eax
    862 	adc	edx,edx
    863 	adc	ecx,0
    864 	add	ebp,eax
    865 	adc	ebx,edx
    866 	mov	eax,DWORD [24+esi]
    867 	adc	ecx,0
    868 	mov	DWORD [20+edi],ebp
    869 	mov	edx,DWORD [esi]
    870 	; saved r[5]
    871 	; ############### Calculate word 6
    872 	xor	ebp,ebp
    873 	; sqr a[6]*a[0]
    874 	mul	edx
    875 	add	eax,eax
    876 	adc	edx,edx
    877 	adc	ebp,0
    878 	add	ebx,eax
    879 	adc	ecx,edx
    880 	mov	eax,DWORD [20+esi]
    881 	adc	ebp,0
    882 	mov	edx,DWORD [4+esi]
    883 	; sqr a[5]*a[1]
    884 	mul	edx
    885 	add	eax,eax
    886 	adc	edx,edx
    887 	adc	ebp,0
    888 	add	ebx,eax
    889 	adc	ecx,edx
    890 	mov	eax,DWORD [16+esi]
    891 	adc	ebp,0
    892 	mov	edx,DWORD [8+esi]
    893 	; sqr a[4]*a[2]
    894 	mul	edx
    895 	add	eax,eax
    896 	adc	edx,edx
    897 	adc	ebp,0
    898 	add	ebx,eax
    899 	adc	ecx,edx
    900 	mov	eax,DWORD [12+esi]
    901 	adc	ebp,0
    902 	; sqr a[3]*a[3]
    903 	mul	eax
    904 	add	ebx,eax
    905 	adc	ecx,edx
    906 	mov	edx,DWORD [esi]
    907 	adc	ebp,0
    908 	mov	DWORD [24+edi],ebx
    909 	mov	eax,DWORD [28+esi]
    910 	; saved r[6]
    911 	; ############### Calculate word 7
    912 	xor	ebx,ebx
    913 	; sqr a[7]*a[0]
    914 	mul	edx
    915 	add	eax,eax
    916 	adc	edx,edx
    917 	adc	ebx,0
    918 	add	ecx,eax
    919 	adc	ebp,edx
    920 	mov	eax,DWORD [24+esi]
    921 	adc	ebx,0
    922 	mov	edx,DWORD [4+esi]
    923 	; sqr a[6]*a[1]
    924 	mul	edx
    925 	add	eax,eax
    926 	adc	edx,edx
    927 	adc	ebx,0
    928 	add	ecx,eax
    929 	adc	ebp,edx
    930 	mov	eax,DWORD [20+esi]
    931 	adc	ebx,0
    932 	mov	edx,DWORD [8+esi]
    933 	; sqr a[5]*a[2]
    934 	mul	edx
    935 	add	eax,eax
    936 	adc	edx,edx
    937 	adc	ebx,0
    938 	add	ecx,eax
    939 	adc	ebp,edx
    940 	mov	eax,DWORD [16+esi]
    941 	adc	ebx,0
    942 	mov	edx,DWORD [12+esi]
    943 	; sqr a[4]*a[3]
    944 	mul	edx
    945 	add	eax,eax
    946 	adc	edx,edx
    947 	adc	ebx,0
    948 	add	ecx,eax
    949 	adc	ebp,edx
    950 	mov	eax,DWORD [28+esi]
    951 	adc	ebx,0
    952 	mov	DWORD [28+edi],ecx
    953 	mov	edx,DWORD [4+esi]
    954 	; saved r[7]
    955 	; ############### Calculate word 8
    956 	xor	ecx,ecx
    957 	; sqr a[7]*a[1]
    958 	mul	edx
    959 	add	eax,eax
    960 	adc	edx,edx
    961 	adc	ecx,0
    962 	add	ebp,eax
    963 	adc	ebx,edx
    964 	mov	eax,DWORD [24+esi]
    965 	adc	ecx,0
    966 	mov	edx,DWORD [8+esi]
    967 	; sqr a[6]*a[2]
    968 	mul	edx
    969 	add	eax,eax
    970 	adc	edx,edx
    971 	adc	ecx,0
    972 	add	ebp,eax
    973 	adc	ebx,edx
    974 	mov	eax,DWORD [20+esi]
    975 	adc	ecx,0
    976 	mov	edx,DWORD [12+esi]
    977 	; sqr a[5]*a[3]
    978 	mul	edx
    979 	add	eax,eax
    980 	adc	edx,edx
    981 	adc	ecx,0
    982 	add	ebp,eax
    983 	adc	ebx,edx
    984 	mov	eax,DWORD [16+esi]
    985 	adc	ecx,0
    986 	; sqr a[4]*a[4]
    987 	mul	eax
    988 	add	ebp,eax
    989 	adc	ebx,edx
    990 	mov	edx,DWORD [8+esi]
    991 	adc	ecx,0
    992 	mov	DWORD [32+edi],ebp
    993 	mov	eax,DWORD [28+esi]
    994 	; saved r[8]
    995 	; ############### Calculate word 9
    996 	xor	ebp,ebp
    997 	; sqr a[7]*a[2]
    998 	mul	edx
    999 	add	eax,eax
   1000 	adc	edx,edx
   1001 	adc	ebp,0
   1002 	add	ebx,eax
   1003 	adc	ecx,edx
   1004 	mov	eax,DWORD [24+esi]
   1005 	adc	ebp,0
   1006 	mov	edx,DWORD [12+esi]
   1007 	; sqr a[6]*a[3]
   1008 	mul	edx
   1009 	add	eax,eax
   1010 	adc	edx,edx
   1011 	adc	ebp,0
   1012 	add	ebx,eax
   1013 	adc	ecx,edx
   1014 	mov	eax,DWORD [20+esi]
   1015 	adc	ebp,0
   1016 	mov	edx,DWORD [16+esi]
   1017 	; sqr a[5]*a[4]
   1018 	mul	edx
   1019 	add	eax,eax
   1020 	adc	edx,edx
   1021 	adc	ebp,0
   1022 	add	ebx,eax
   1023 	adc	ecx,edx
   1024 	mov	eax,DWORD [28+esi]
   1025 	adc	ebp,0
   1026 	mov	DWORD [36+edi],ebx
   1027 	mov	edx,DWORD [12+esi]
   1028 	; saved r[9]
   1029 	; ############### Calculate word 10
   1030 	xor	ebx,ebx
   1031 	; sqr a[7]*a[3]
   1032 	mul	edx
   1033 	add	eax,eax
   1034 	adc	edx,edx
   1035 	adc	ebx,0
   1036 	add	ecx,eax
   1037 	adc	ebp,edx
   1038 	mov	eax,DWORD [24+esi]
   1039 	adc	ebx,0
   1040 	mov	edx,DWORD [16+esi]
   1041 	; sqr a[6]*a[4]
   1042 	mul	edx
   1043 	add	eax,eax
   1044 	adc	edx,edx
   1045 	adc	ebx,0
   1046 	add	ecx,eax
   1047 	adc	ebp,edx
   1048 	mov	eax,DWORD [20+esi]
   1049 	adc	ebx,0
   1050 	; sqr a[5]*a[5]
   1051 	mul	eax
   1052 	add	ecx,eax
   1053 	adc	ebp,edx
   1054 	mov	edx,DWORD [16+esi]
   1055 	adc	ebx,0
   1056 	mov	DWORD [40+edi],ecx
   1057 	mov	eax,DWORD [28+esi]
   1058 	; saved r[10]
   1059 	; ############### Calculate word 11
   1060 	xor	ecx,ecx
   1061 	; sqr a[7]*a[4]
   1062 	mul	edx
   1063 	add	eax,eax
   1064 	adc	edx,edx
   1065 	adc	ecx,0
   1066 	add	ebp,eax
   1067 	adc	ebx,edx
   1068 	mov	eax,DWORD [24+esi]
   1069 	adc	ecx,0
   1070 	mov	edx,DWORD [20+esi]
   1071 	; sqr a[6]*a[5]
   1072 	mul	edx
   1073 	add	eax,eax
   1074 	adc	edx,edx
   1075 	adc	ecx,0
   1076 	add	ebp,eax
   1077 	adc	ebx,edx
   1078 	mov	eax,DWORD [28+esi]
   1079 	adc	ecx,0
   1080 	mov	DWORD [44+edi],ebp
   1081 	mov	edx,DWORD [20+esi]
   1082 	; saved r[11]
   1083 	; ############### Calculate word 12
   1084 	xor	ebp,ebp
   1085 	; sqr a[7]*a[5]
   1086 	mul	edx
   1087 	add	eax,eax
   1088 	adc	edx,edx
   1089 	adc	ebp,0
   1090 	add	ebx,eax
   1091 	adc	ecx,edx
   1092 	mov	eax,DWORD [24+esi]
   1093 	adc	ebp,0
   1094 	; sqr a[6]*a[6]
   1095 	mul	eax
   1096 	add	ebx,eax
   1097 	adc	ecx,edx
   1098 	mov	edx,DWORD [24+esi]
   1099 	adc	ebp,0
   1100 	mov	DWORD [48+edi],ebx
   1101 	mov	eax,DWORD [28+esi]
   1102 	; saved r[12]
   1103 	; ############### Calculate word 13
   1104 	xor	ebx,ebx
   1105 	; sqr a[7]*a[6]
   1106 	mul	edx
   1107 	add	eax,eax
   1108 	adc	edx,edx
   1109 	adc	ebx,0
   1110 	add	ecx,eax
   1111 	adc	ebp,edx
   1112 	mov	eax,DWORD [28+esi]
   1113 	adc	ebx,0
   1114 	mov	DWORD [52+edi],ecx
   1115 	; saved r[13]
   1116 	; ############### Calculate word 14
   1117 	xor	ecx,ecx
   1118 	; sqr a[7]*a[7]
   1119 	mul	eax
   1120 	add	ebp,eax
   1121 	adc	ebx,edx
   1122 	adc	ecx,0
   1123 	mov	DWORD [56+edi],ebp
   1124 	; saved r[14]
   1125 	mov	DWORD [60+edi],ebx
   1126 	pop	ebx
   1127 	pop	ebp
   1128 	pop	edi
   1129 	pop	esi
   1130 	ret
   1131 global	_bn_sqr_comba4
   1132 align	16
   1133 _bn_sqr_comba4:
   1134 L$_bn_sqr_comba4_begin:
   1135 	push	esi
   1136 	push	edi
   1137 	push	ebp
   1138 	push	ebx
   1139 	mov	edi,DWORD [20+esp]
   1140 	mov	esi,DWORD [24+esp]
   1141 	xor	ebx,ebx
   1142 	xor	ecx,ecx
   1143 	mov	eax,DWORD [esi]
   1144 	; ############### Calculate word 0
   1145 	xor	ebp,ebp
   1146 	; sqr a[0]*a[0]
   1147 	mul	eax
   1148 	add	ebx,eax
   1149 	adc	ecx,edx
   1150 	mov	edx,DWORD [esi]
   1151 	adc	ebp,0
   1152 	mov	DWORD [edi],ebx
   1153 	mov	eax,DWORD [4+esi]
   1154 	; saved r[0]
   1155 	; ############### Calculate word 1
   1156 	xor	ebx,ebx
   1157 	; sqr a[1]*a[0]
   1158 	mul	edx
   1159 	add	eax,eax
   1160 	adc	edx,edx
   1161 	adc	ebx,0
   1162 	add	ecx,eax
   1163 	adc	ebp,edx
   1164 	mov	eax,DWORD [8+esi]
   1165 	adc	ebx,0
   1166 	mov	DWORD [4+edi],ecx
   1167 	mov	edx,DWORD [esi]
   1168 	; saved r[1]
   1169 	; ############### Calculate word 2
   1170 	xor	ecx,ecx
   1171 	; sqr a[2]*a[0]
   1172 	mul	edx
   1173 	add	eax,eax
   1174 	adc	edx,edx
   1175 	adc	ecx,0
   1176 	add	ebp,eax
   1177 	adc	ebx,edx
   1178 	mov	eax,DWORD [4+esi]
   1179 	adc	ecx,0
   1180 	; sqr a[1]*a[1]
   1181 	mul	eax
   1182 	add	ebp,eax
   1183 	adc	ebx,edx
   1184 	mov	edx,DWORD [esi]
   1185 	adc	ecx,0
   1186 	mov	DWORD [8+edi],ebp
   1187 	mov	eax,DWORD [12+esi]
   1188 	; saved r[2]
   1189 	; ############### Calculate word 3
   1190 	xor	ebp,ebp
   1191 	; sqr a[3]*a[0]
   1192 	mul	edx
   1193 	add	eax,eax
   1194 	adc	edx,edx
   1195 	adc	ebp,0
   1196 	add	ebx,eax
   1197 	adc	ecx,edx
   1198 	mov	eax,DWORD [8+esi]
   1199 	adc	ebp,0
   1200 	mov	edx,DWORD [4+esi]
   1201 	; sqr a[2]*a[1]
   1202 	mul	edx
   1203 	add	eax,eax
   1204 	adc	edx,edx
   1205 	adc	ebp,0
   1206 	add	ebx,eax
   1207 	adc	ecx,edx
   1208 	mov	eax,DWORD [12+esi]
   1209 	adc	ebp,0
   1210 	mov	DWORD [12+edi],ebx
   1211 	mov	edx,DWORD [4+esi]
   1212 	; saved r[3]
   1213 	; ############### Calculate word 4
   1214 	xor	ebx,ebx
   1215 	; sqr a[3]*a[1]
   1216 	mul	edx
   1217 	add	eax,eax
   1218 	adc	edx,edx
   1219 	adc	ebx,0
   1220 	add	ecx,eax
   1221 	adc	ebp,edx
   1222 	mov	eax,DWORD [8+esi]
   1223 	adc	ebx,0
   1224 	; sqr a[2]*a[2]
   1225 	mul	eax
   1226 	add	ecx,eax
   1227 	adc	ebp,edx
   1228 	mov	edx,DWORD [8+esi]
   1229 	adc	ebx,0
   1230 	mov	DWORD [16+edi],ecx
   1231 	mov	eax,DWORD [12+esi]
   1232 	; saved r[4]
   1233 	; ############### Calculate word 5
   1234 	xor	ecx,ecx
   1235 	; sqr a[3]*a[2]
   1236 	mul	edx
   1237 	add	eax,eax
   1238 	adc	edx,edx
   1239 	adc	ecx,0
   1240 	add	ebp,eax
   1241 	adc	ebx,edx
   1242 	mov	eax,DWORD [12+esi]
   1243 	adc	ecx,0
   1244 	mov	DWORD [20+edi],ebp
   1245 	; saved r[5]
   1246 	; ############### Calculate word 6
   1247 	xor	ebp,ebp
   1248 	; sqr a[3]*a[3]
   1249 	mul	eax
   1250 	add	ebx,eax
   1251 	adc	ecx,edx
   1252 	adc	ebp,0
   1253 	mov	DWORD [24+edi],ebx
   1254 	; saved r[6]
   1255 	mov	DWORD [28+edi],ecx
   1256 	pop	ebx
   1257 	pop	ebp
   1258 	pop	edi
   1259 	pop	esi
   1260 	ret
   1261