Home | History | Annotate | Download | only in asm
      1 .set	mips2
      2 .rdata
      3 .asciiz	"mips3.s, Version 1.2"
      4 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>"
      5 
      6 .text
      7 .set	noat
      8 
      9 .align	5
     10 .globl	bn_mul_add_words
     11 .ent	bn_mul_add_words
     12 bn_mul_add_words:
     13 	.set	noreorder
     14 	bgtz	$6,bn_mul_add_words_internal
     15 	move	$2,$0
     16 	jr	$31
     17 	move	$4,$2
     18 .end	bn_mul_add_words
     19 
     20 .align	5
     21 .ent	bn_mul_add_words_internal
     22 bn_mul_add_words_internal:
     23 	.set	reorder
     24 	li	$3,-4
     25 	and	$8,$6,$3
     26 	lw	$12,0($5)
     27 	beqz	$8,.L_bn_mul_add_words_tail
     28 
     29 .L_bn_mul_add_words_loop:
     30 	multu	$12,$7
     31 	lw	$13,0($4)
     32 	lw	$14,4($5)
     33 	lw	$15,4($4)
     34 	lw	$8,2*4($5)
     35 	lw	$9,2*4($4)
     36 	addu	$13,$2
     37 	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
     38 				# values", but it seems to work fine
     39 				# even on 64-bit registers.
     40 	mflo	$1
     41 	mfhi	$12
     42 	addu	$13,$1
     43 	addu	$2,$12
     44 	 multu	$14,$7
     45 	sltu	$1,$13,$1
     46 	sw	$13,0($4)
     47 	addu	$2,$1
     48 
     49 	lw	$10,3*4($5)
     50 	lw	$11,3*4($4)
     51 	addu	$15,$2
     52 	sltu	$2,$15,$2
     53 	mflo	$1
     54 	mfhi	$14
     55 	addu	$15,$1
     56 	addu	$2,$14
     57 	 multu	$8,$7
     58 	sltu	$1,$15,$1
     59 	sw	$15,4($4)
     60 	addu	$2,$1
     61 
     62 	subu	$6,4
     63 	addu $4,4*4
     64 	addu $5,4*4
     65 	addu	$9,$2
     66 	sltu	$2,$9,$2
     67 	mflo	$1
     68 	mfhi	$8
     69 	addu	$9,$1
     70 	addu	$2,$8
     71 	 multu	$10,$7
     72 	sltu	$1,$9,$1
     73 	sw	$9,-2*4($4)
     74 	addu	$2,$1
     75 
     76 
     77 	and	$8,$6,$3
     78 	addu	$11,$2
     79 	sltu	$2,$11,$2
     80 	mflo	$1
     81 	mfhi	$10
     82 	addu	$11,$1
     83 	addu	$2,$10
     84 	sltu	$1,$11,$1
     85 	sw	$11,-4($4)
     86 	addu	$2,$1
     87 	.set	noreorder
     88 	bgtzl	$8,.L_bn_mul_add_words_loop
     89 	lw	$12,0($5)
     90 
     91 	beqz	$6,.L_bn_mul_add_words_return
     92 	nop
     93 
     94 .L_bn_mul_add_words_tail:
     95 	.set	reorder
     96 	lw	$12,0($5)
     97 	multu	$12,$7
     98 	lw	$13,0($4)
     99 	subu	$6,1
    100 	addu	$13,$2
    101 	sltu	$2,$13,$2
    102 	mflo	$1
    103 	mfhi	$12
    104 	addu	$13,$1
    105 	addu	$2,$12
    106 	sltu	$1,$13,$1
    107 	sw	$13,0($4)
    108 	addu	$2,$1
    109 	beqz	$6,.L_bn_mul_add_words_return
    110 
    111 	lw	$12,4($5)
    112 	multu	$12,$7
    113 	lw	$13,4($4)
    114 	subu	$6,1
    115 	addu	$13,$2
    116 	sltu	$2,$13,$2
    117 	mflo	$1
    118 	mfhi	$12
    119 	addu	$13,$1
    120 	addu	$2,$12
    121 	sltu	$1,$13,$1
    122 	sw	$13,4($4)
    123 	addu	$2,$1
    124 	beqz	$6,.L_bn_mul_add_words_return
    125 
    126 	lw	$12,2*4($5)
    127 	multu	$12,$7
    128 	lw	$13,2*4($4)
    129 	addu	$13,$2
    130 	sltu	$2,$13,$2
    131 	mflo	$1
    132 	mfhi	$12
    133 	addu	$13,$1
    134 	addu	$2,$12
    135 	sltu	$1,$13,$1
    136 	sw	$13,2*4($4)
    137 	addu	$2,$1
    138 
    139 .L_bn_mul_add_words_return:
    140 	.set	noreorder
    141 	jr	$31
    142 	move	$4,$2
    143 .end	bn_mul_add_words_internal
    144 
    145 .align	5
    146 .globl	bn_mul_words
    147 .ent	bn_mul_words
    148 bn_mul_words:
    149 	.set	noreorder
    150 	bgtz	$6,bn_mul_words_internal
    151 	move	$2,$0
    152 	jr	$31
    153 	move	$4,$2
    154 .end	bn_mul_words
    155 
    156 .align	5
    157 .ent	bn_mul_words_internal
    158 bn_mul_words_internal:
    159 	.set	reorder
    160 	li	$3,-4
    161 	and	$8,$6,$3
    162 	lw	$12,0($5)
    163 	beqz	$8,.L_bn_mul_words_tail
    164 
    165 .L_bn_mul_words_loop:
    166 	multu	$12,$7
    167 	lw	$14,4($5)
    168 	lw	$8,2*4($5)
    169 	lw	$10,3*4($5)
    170 	mflo	$1
    171 	mfhi	$12
    172 	addu	$2,$1
    173 	sltu	$13,$2,$1
    174 	 multu	$14,$7
    175 	sw	$2,0($4)
    176 	addu	$2,$13,$12
    177 
    178 	subu	$6,4
    179 	addu $4,4*4
    180 	addu $5,4*4
    181 	mflo	$1
    182 	mfhi	$14
    183 	addu	$2,$1
    184 	sltu	$15,$2,$1
    185 	 multu	$8,$7
    186 	sw	$2,-3*4($4)
    187 	addu	$2,$15,$14
    188 
    189 	mflo	$1
    190 	mfhi	$8
    191 	addu	$2,$1
    192 	sltu	$9,$2,$1
    193 	 multu	$10,$7
    194 	sw	$2,-2*4($4)
    195 	addu	$2,$9,$8
    196 
    197 	and	$8,$6,$3
    198 	mflo	$1
    199 	mfhi	$10
    200 	addu	$2,$1
    201 	sltu	$11,$2,$1
    202 	sw	$2,-4($4)
    203 	addu	$2,$11,$10
    204 	.set	noreorder
    205 	bgtzl	$8,.L_bn_mul_words_loop
    206 	lw	$12,0($5)
    207 
    208 	beqz	$6,.L_bn_mul_words_return
    209 	nop
    210 
    211 .L_bn_mul_words_tail:
    212 	.set	reorder
    213 	lw	$12,0($5)
    214 	multu	$12,$7
    215 	subu	$6,1
    216 	mflo	$1
    217 	mfhi	$12
    218 	addu	$2,$1
    219 	sltu	$13,$2,$1
    220 	sw	$2,0($4)
    221 	addu	$2,$13,$12
    222 	beqz	$6,.L_bn_mul_words_return
    223 
    224 	lw	$12,4($5)
    225 	multu	$12,$7
    226 	subu	$6,1
    227 	mflo	$1
    228 	mfhi	$12
    229 	addu	$2,$1
    230 	sltu	$13,$2,$1
    231 	sw	$2,4($4)
    232 	addu	$2,$13,$12
    233 	beqz	$6,.L_bn_mul_words_return
    234 
    235 	lw	$12,2*4($5)
    236 	multu	$12,$7
    237 	mflo	$1
    238 	mfhi	$12
    239 	addu	$2,$1
    240 	sltu	$13,$2,$1
    241 	sw	$2,2*4($4)
    242 	addu	$2,$13,$12
    243 
    244 .L_bn_mul_words_return:
    245 	.set	noreorder
    246 	jr	$31
    247 	move	$4,$2
    248 .end	bn_mul_words_internal
    249 
    250 .align	5
    251 .globl	bn_sqr_words
    252 .ent	bn_sqr_words
    253 bn_sqr_words:
    254 	.set	noreorder
    255 	bgtz	$6,bn_sqr_words_internal
    256 	move	$2,$0
    257 	jr	$31
    258 	move	$4,$2
    259 .end	bn_sqr_words
    260 
    261 .align	5
    262 .ent	bn_sqr_words_internal
    263 bn_sqr_words_internal:
    264 	.set	reorder
    265 	li	$3,-4
    266 	and	$8,$6,$3
    267 	lw	$12,0($5)
    268 	beqz	$8,.L_bn_sqr_words_tail
    269 
    270 .L_bn_sqr_words_loop:
    271 	multu	$12,$12
    272 	lw	$14,4($5)
    273 	lw	$8,2*4($5)
    274 	lw	$10,3*4($5)
    275 	mflo	$13
    276 	mfhi	$12
    277 	sw	$13,0($4)
    278 	sw	$12,4($4)
    279 
    280 	multu	$14,$14
    281 	subu	$6,4
    282 	addu $4,8*4
    283 	addu $5,4*4
    284 	mflo	$15
    285 	mfhi	$14
    286 	sw	$15,-6*4($4)
    287 	sw	$14,-5*4($4)
    288 
    289 	multu	$8,$8
    290 	mflo	$9
    291 	mfhi	$8
    292 	sw	$9,-4*4($4)
    293 	sw	$8,-3*4($4)
    294 
    295 
    296 	multu	$10,$10
    297 	and	$8,$6,$3
    298 	mflo	$11
    299 	mfhi	$10
    300 	sw	$11,-2*4($4)
    301 	sw	$10,-4($4)
    302 
    303 	.set	noreorder
    304 	bgtzl	$8,.L_bn_sqr_words_loop
    305 	lw	$12,0($5)
    306 
    307 	beqz	$6,.L_bn_sqr_words_return
    308 	nop
    309 
    310 .L_bn_sqr_words_tail:
    311 	.set	reorder
    312 	lw	$12,0($5)
    313 	multu	$12,$12
    314 	subu	$6,1
    315 	mflo	$13
    316 	mfhi	$12
    317 	sw	$13,0($4)
    318 	sw	$12,4($4)
    319 	beqz	$6,.L_bn_sqr_words_return
    320 
    321 	lw	$12,4($5)
    322 	multu	$12,$12
    323 	subu	$6,1
    324 	mflo	$13
    325 	mfhi	$12
    326 	sw	$13,2*4($4)
    327 	sw	$12,3*4($4)
    328 	beqz	$6,.L_bn_sqr_words_return
    329 
    330 	lw	$12,2*4($5)
    331 	multu	$12,$12
    332 	mflo	$13
    333 	mfhi	$12
    334 	sw	$13,4*4($4)
    335 	sw	$12,5*4($4)
    336 
    337 .L_bn_sqr_words_return:
    338 	.set	noreorder
    339 	jr	$31
    340 	move	$4,$2
    341 
    342 .end	bn_sqr_words_internal
    343 
    344 .align	5
    345 .globl	bn_add_words
    346 .ent	bn_add_words
    347 bn_add_words:
    348 	.set	noreorder
    349 	bgtz	$7,bn_add_words_internal
    350 	move	$2,$0
    351 	jr	$31
    352 	move	$4,$2
    353 .end	bn_add_words
    354 
    355 .align	5
    356 .ent	bn_add_words_internal
    357 bn_add_words_internal:
    358 	.set	reorder
    359 	li	$3,-4
    360 	and	$1,$7,$3
    361 	lw	$12,0($5)
    362 	beqz	$1,.L_bn_add_words_tail
    363 
    364 .L_bn_add_words_loop:
    365 	lw	$8,0($6)
    366 	subu	$7,4
    367 	lw	$13,4($5)
    368 	and	$1,$7,$3
    369 	lw	$14,2*4($5)
    370 	addu $6,4*4
    371 	lw	$15,3*4($5)
    372 	addu $4,4*4
    373 	lw	$9,-3*4($6)
    374 	addu $5,4*4
    375 	lw	$10,-2*4($6)
    376 	lw	$11,-4($6)
    377 	addu	$8,$12
    378 	sltu	$24,$8,$12
    379 	addu	$12,$8,$2
    380 	sltu	$2,$12,$8
    381 	sw	$12,-4*4($4)
    382 	addu	$2,$24
    383 
    384 	addu	$9,$13
    385 	sltu	$25,$9,$13
    386 	addu	$13,$9,$2
    387 	sltu	$2,$13,$9
    388 	sw	$13,-3*4($4)
    389 	addu	$2,$25
    390 
    391 	addu	$10,$14
    392 	sltu	$24,$10,$14
    393 	addu	$14,$10,$2
    394 	sltu	$2,$14,$10
    395 	sw	$14,-2*4($4)
    396 	addu	$2,$24
    397 
    398 	addu	$11,$15
    399 	sltu	$25,$11,$15
    400 	addu	$15,$11,$2
    401 	sltu	$2,$15,$11
    402 	sw	$15,-4($4)
    403 	addu	$2,$25
    404 
    405 	.set	noreorder
    406 	bgtzl	$1,.L_bn_add_words_loop
    407 	lw	$12,0($5)
    408 
    409 	beqz	$7,.L_bn_add_words_return
    410 	nop
    411 
    412 .L_bn_add_words_tail:
    413 	.set	reorder
    414 	lw	$12,0($5)
    415 	lw	$8,0($6)
    416 	addu	$8,$12
    417 	subu	$7,1
    418 	sltu	$24,$8,$12
    419 	addu	$12,$8,$2
    420 	sltu	$2,$12,$8
    421 	sw	$12,0($4)
    422 	addu	$2,$24
    423 	beqz	$7,.L_bn_add_words_return
    424 
    425 	lw	$13,4($5)
    426 	lw	$9,4($6)
    427 	addu	$9,$13
    428 	subu	$7,1
    429 	sltu	$25,$9,$13
    430 	addu	$13,$9,$2
    431 	sltu	$2,$13,$9
    432 	sw	$13,4($4)
    433 	addu	$2,$25
    434 	beqz	$7,.L_bn_add_words_return
    435 
    436 	lw	$14,2*4($5)
    437 	lw	$10,2*4($6)
    438 	addu	$10,$14
    439 	sltu	$24,$10,$14
    440 	addu	$14,$10,$2
    441 	sltu	$2,$14,$10
    442 	sw	$14,2*4($4)
    443 	addu	$2,$24
    444 
    445 .L_bn_add_words_return:
    446 	.set	noreorder
    447 	jr	$31
    448 	move	$4,$2
    449 
    450 .end	bn_add_words_internal
    451 
    452 .align	5
    453 .globl	bn_sub_words
    454 .ent	bn_sub_words
    455 bn_sub_words:
    456 	.set	noreorder
    457 	bgtz	$7,bn_sub_words_internal
    458 	move	$2,$0
    459 	jr	$31
    460 	move	$4,$0
    461 .end	bn_sub_words
    462 
    463 .align	5
    464 .ent	bn_sub_words_internal
    465 bn_sub_words_internal:
    466 	.set	reorder
    467 	li	$3,-4
    468 	and	$1,$7,$3
    469 	lw	$12,0($5)
    470 	beqz	$1,.L_bn_sub_words_tail
    471 
    472 .L_bn_sub_words_loop:
    473 	lw	$8,0($6)
    474 	subu	$7,4
    475 	lw	$13,4($5)
    476 	and	$1,$7,$3
    477 	lw	$14,2*4($5)
    478 	addu $6,4*4
    479 	lw	$15,3*4($5)
    480 	addu $4,4*4
    481 	lw	$9,-3*4($6)
    482 	addu $5,4*4
    483 	lw	$10,-2*4($6)
    484 	lw	$11,-4($6)
    485 	sltu	$24,$12,$8
    486 	subu	$8,$12,$8
    487 	subu	$12,$8,$2
    488 	sgtu	$2,$12,$8
    489 	sw	$12,-4*4($4)
    490 	addu	$2,$24
    491 
    492 	sltu	$25,$13,$9
    493 	subu	$9,$13,$9
    494 	subu	$13,$9,$2
    495 	sgtu	$2,$13,$9
    496 	sw	$13,-3*4($4)
    497 	addu	$2,$25
    498 
    499 
    500 	sltu	$24,$14,$10
    501 	subu	$10,$14,$10
    502 	subu	$14,$10,$2
    503 	sgtu	$2,$14,$10
    504 	sw	$14,-2*4($4)
    505 	addu	$2,$24
    506 
    507 	sltu	$25,$15,$11
    508 	subu	$11,$15,$11
    509 	subu	$15,$11,$2
    510 	sgtu	$2,$15,$11
    511 	sw	$15,-4($4)
    512 	addu	$2,$25
    513 
    514 	.set	noreorder
    515 	bgtzl	$1,.L_bn_sub_words_loop
    516 	lw	$12,0($5)
    517 
    518 	beqz	$7,.L_bn_sub_words_return
    519 	nop
    520 
    521 .L_bn_sub_words_tail:
    522 	.set	reorder
    523 	lw	$12,0($5)
    524 	lw	$8,0($6)
    525 	subu	$7,1
    526 	sltu	$24,$12,$8
    527 	subu	$8,$12,$8
    528 	subu	$12,$8,$2
    529 	sgtu	$2,$12,$8
    530 	sw	$12,0($4)
    531 	addu	$2,$24
    532 	beqz	$7,.L_bn_sub_words_return
    533 
    534 	lw	$13,4($5)
    535 	subu	$7,1
    536 	lw	$9,4($6)
    537 	sltu	$25,$13,$9
    538 	subu	$9,$13,$9
    539 	subu	$13,$9,$2
    540 	sgtu	$2,$13,$9
    541 	sw	$13,4($4)
    542 	addu	$2,$25
    543 	beqz	$7,.L_bn_sub_words_return
    544 
    545 	lw	$14,2*4($5)
    546 	lw	$10,2*4($6)
    547 	sltu	$24,$14,$10
    548 	subu	$10,$14,$10
    549 	subu	$14,$10,$2
    550 	sgtu	$2,$14,$10
    551 	sw	$14,2*4($4)
    552 	addu	$2,$24
    553 
    554 .L_bn_sub_words_return:
    555 	.set	noreorder
    556 	jr	$31
    557 	move	$4,$2
    558 .end	bn_sub_words_internal
    559 
    560 .align 5
    561 .globl	bn_div_3_words
    562 .ent	bn_div_3_words
    563 bn_div_3_words:
    564 	.set	noreorder
    565 	move	$7,$4		# we know that bn_div_words does not
    566 				# touch $7, $10, $11 and preserves $6
    567 				# so that we can save two arguments
    568 				# and return address in registers
    569 				# instead of stack:-)
    570 
    571 	lw	$4,($7)
    572 	move	$10,$5
    573 	bne	$4,$6,bn_div_3_words_internal
    574 	lw	$5,-4($7)
    575 	li	$2,-1
    576 	jr	$31
    577 	move	$4,$2
    578 .end	bn_div_3_words
    579 
    580 .align	5
    581 .ent	bn_div_3_words_internal
    582 bn_div_3_words_internal:
    583 	.set	reorder
    584 	move	$11,$31
    585 	bal	bn_div_words_internal
    586 	move	$31,$11
    587 	multu	$10,$2
    588 	lw	$14,-2*4($7)
    589 	move	$8,$0
    590 	mfhi	$13
    591 	mflo	$12
    592 	sltu	$24,$13,$5
    593 .L_bn_div_3_words_inner_loop:
    594 	bnez	$24,.L_bn_div_3_words_inner_loop_done
    595 	sgeu	$1,$14,$12
    596 	seq	$25,$13,$5
    597 	and	$1,$25
    598 	sltu	$15,$12,$10
    599 	addu	$5,$6
    600 	subu	$13,$15
    601 	subu	$12,$10
    602 	sltu	$24,$13,$5
    603 	sltu	$8,$5,$6
    604 	or	$24,$8
    605 	.set	noreorder
    606 	beqzl	$1,.L_bn_div_3_words_inner_loop
    607 	subu	$2,1
    608 	.set	reorder
    609 .L_bn_div_3_words_inner_loop_done:
    610 	.set	noreorder
    611 	jr	$31
    612 	move	$4,$2
    613 .end	bn_div_3_words_internal
    614 
    615 .align	5
    616 .globl	bn_div_words
    617 .ent	bn_div_words
    618 bn_div_words:
    619 	.set	noreorder
    620 	bnez	$6,bn_div_words_internal
    621 	li	$2,-1		# I would rather signal div-by-zero
    622 				# which can be done with 'break 7'
    623 	jr	$31
    624 	move	$4,$2
    625 .end	bn_div_words
    626 
    627 .align	5
    628 .ent	bn_div_words_internal
    629 bn_div_words_internal:
    630 	move	$3,$0
    631 	bltz	$6,.L_bn_div_words_body
    632 	move	$25,$3
    633 	sll	$6,1
    634 	bgtz	$6,.-4
    635 	addu	$25,1
    636 
    637 	.set	reorder
    638 	negu	$13,$25
    639 	li	$14,-1
    640 	sll	$14,$13
    641 	and	$14,$4
    642 	srl	$1,$5,$13
    643 	.set	noreorder
    644 	bnezl	$14,.+8
    645 	break	6		# signal overflow
    646 	.set	reorder
    647 	sll	$4,$25
    648 	sll	$5,$25
    649 	or	$4,$1
    650 .L_bn_div_words_body:
    651 	srl	$3,$6,4*4	# bits
    652 	sgeu	$1,$4,$6
    653 	.set	noreorder
    654 	bnezl	$1,.+8
    655 	subu	$4,$6
    656 	.set	reorder
    657 
    658 	li	$8,-1
    659 	srl	$9,$4,4*4	# bits
    660 	srl	$8,4*4	# q=0xffffffff
    661 	beq	$3,$9,.L_bn_div_words_skip_div1
    662 	divu	$0,$4,$3
    663 	mflo	$8
    664 .L_bn_div_words_skip_div1:
    665 	multu	$6,$8
    666 	sll	$15,$4,4*4	# bits
    667 	srl	$1,$5,4*4	# bits
    668 	or	$15,$1
    669 	mflo	$12
    670 	mfhi	$13
    671 .L_bn_div_words_inner_loop1:
    672 	sltu	$14,$15,$12
    673 	seq	$24,$9,$13
    674 	sltu	$1,$9,$13
    675 	and	$14,$24
    676 	sltu	$2,$12,$6
    677 	or	$1,$14
    678 	.set	noreorder
    679 	beqz	$1,.L_bn_div_words_inner_loop1_done
    680 	subu	$13,$2
    681 	subu	$12,$6
    682 	b	.L_bn_div_words_inner_loop1
    683 	subu	$8,1
    684 	.set	reorder
    685 .L_bn_div_words_inner_loop1_done:
    686 
    687 	sll	$5,4*4	# bits
    688 	subu	$4,$15,$12
    689 	sll	$2,$8,4*4	# bits
    690 
    691 	li	$8,-1
    692 	srl	$9,$4,4*4	# bits
    693 	srl	$8,4*4	# q=0xffffffff
    694 	beq	$3,$9,.L_bn_div_words_skip_div2
    695 	divu	$0,$4,$3
    696 	mflo	$8
    697 .L_bn_div_words_skip_div2:
    698 	multu	$6,$8
    699 	sll	$15,$4,4*4	# bits
    700 	srl	$1,$5,4*4	# bits
    701 	or	$15,$1
    702 	mflo	$12
    703 	mfhi	$13
    704 .L_bn_div_words_inner_loop2:
    705 	sltu	$14,$15,$12
    706 	seq	$24,$9,$13
    707 	sltu	$1,$9,$13
    708 	and	$14,$24
    709 	sltu	$3,$12,$6
    710 	or	$1,$14
    711 	.set	noreorder
    712 	beqz	$1,.L_bn_div_words_inner_loop2_done
    713 	subu	$13,$3
    714 	subu	$12,$6
    715 	b	.L_bn_div_words_inner_loop2
    716 	subu	$8,1
    717 	.set	reorder
    718 .L_bn_div_words_inner_loop2_done:
    719 
    720 	subu	$4,$15,$12
    721 	or	$2,$8
    722 	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
    723 	srl	$6,$25		# restore $6
    724 
    725 	.set	noreorder
    726 	move	$5,$3
    727 	jr	$31
    728 	move	$4,$2
    729 .end	bn_div_words_internal
    730 
    731 .align	5
    732 .globl	bn_mul_comba8
    733 .ent	bn_mul_comba8
    734 bn_mul_comba8:
    735 	.set	noreorder
    736 	.frame	$29,6*4,$31
    737 	.mask	0x003f0000,-4
    738 	subu $29,6*4
    739 	sw	$21,5*4($29)
    740 	sw	$20,4*4($29)
    741 	sw	$19,3*4($29)
    742 	sw	$18,2*4($29)
    743 	sw	$17,1*4($29)
    744 	sw	$16,0*4($29)
    745 
    746 	.set	reorder
    747 	lw	$12,0($5)	# If compiled with -mips3 option on
    748 				# R5000 box assembler barks on this
    749 				# 1ine with "should not have mult/div
    750 				# as last instruction in bb (R10K
    751 				# bug)" warning. If anybody out there
    752 				# has a clue about how to circumvent
    753 				# this do send me a note.
    754 				#		<appro@fy.chalmers.se>
    755 
    756 	lw	$8,0($6)
    757 	lw	$13,4($5)
    758 	lw	$14,2*4($5)
    759 	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
    760 	lw	$15,3*4($5)
    761 	lw	$9,4($6)
    762 	lw	$10,2*4($6)
    763 	lw	$11,3*4($6)
    764 	mflo	$2
    765 	mfhi	$3
    766 
    767 	lw	$16,4*4($5)
    768 	lw	$18,5*4($5)
    769 	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
    770 	lw	$20,6*4($5)
    771 	lw	$5,7*4($5)
    772 	lw	$17,4*4($6)
    773 	lw	$19,5*4($6)
    774 	mflo	$24
    775 	mfhi	$25
    776 	addu	$3,$24
    777 	sltu	$1,$3,$24
    778 	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
    779 	addu	$7,$25,$1
    780 	lw	$21,6*4($6)
    781 	lw	$6,7*4($6)
    782 	sw	$2,0($4)	# r[0]=c1;
    783 	mflo	$24
    784 	mfhi	$25
    785 	addu	$3,$24
    786 	sltu	$1,$3,$24
    787 	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
    788 	addu	$25,$1
    789 	addu	$7,$25
    790 	sltu	$2,$7,$25
    791 	sw	$3,4($4)	# r[1]=c2;
    792 
    793 	mflo	$24
    794 	mfhi	$25
    795 	addu	$7,$24
    796 	sltu	$1,$7,$24
    797 	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
    798 	addu	$25,$1
    799 	addu	$2,$25
    800 	mflo	$24
    801 	mfhi	$25
    802 	addu	$7,$24
    803 	sltu	$1,$7,$24
    804 	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
    805 	addu	$25,$1
    806 	addu	$2,$25
    807 	sltu	$3,$2,$25
    808 	mflo	$24
    809 	mfhi	$25
    810 	addu	$7,$24
    811 	sltu	$1,$7,$24
    812 	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
    813 	addu	$25,$1
    814 	addu	$2,$25
    815 	sltu	$1,$2,$25
    816 	addu	$3,$1
    817 	sw	$7,2*4($4)	# r[2]=c3;
    818 
    819 	mflo	$24
    820 	mfhi	$25
    821 	addu	$2,$24
    822 	sltu	$1,$2,$24
    823 	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
    824 	addu	$25,$1
    825 	addu	$3,$25
    826 	sltu	$7,$3,$25
    827 	mflo	$24
    828 	mfhi	$25
    829 	addu	$2,$24
    830 	sltu	$1,$2,$24
    831 	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
    832 	addu	$25,$1
    833 	addu	$3,$25
    834 	sltu	$1,$3,$25
    835 	addu	$7,$1
    836 	mflo	$24
    837 	mfhi	$25
    838 	addu	$2,$24
    839 	sltu	$1,$2,$24
    840 	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
    841 	addu	$25,$1
    842 	addu	$3,$25
    843 	sltu	$1,$3,$25
    844 	addu	$7,$1
    845 	mflo	$24
    846 	mfhi	$25
    847 	addu	$2,$24
    848 	sltu	$1,$2,$24
    849 	 multu	$16,$8		# mul_add_c(a[4],b[0],c2,c3,c1);
    850 	addu	$25,$1
    851 	addu	$3,$25
    852 	sltu	$1,$3,$25
    853 	addu	$7,$1
    854 	sw	$2,3*4($4)	# r[3]=c1;
    855 
    856 	mflo	$24
    857 	mfhi	$25
    858 	addu	$3,$24
    859 	sltu	$1,$3,$24
    860 	multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
    861 	addu	$25,$1
    862 	addu	$7,$25
    863 	sltu	$2,$7,$25
    864 	mflo	$24
    865 	mfhi	$25
    866 	addu	$3,$24
    867 	sltu	$1,$3,$24
    868 	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
    869 	addu	$25,$1
    870 	addu	$7,$25
    871 	sltu	$1,$7,$25
    872 	addu	$2,$1
    873 	mflo	$24
    874 	mfhi	$25
    875 	addu	$3,$24
    876 	sltu	$1,$3,$24
    877 	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
    878 	addu	$25,$1
    879 	addu	$7,$25
    880 	sltu	$1,$7,$25
    881 	addu	$2,$1
    882 	mflo	$24
    883 	mfhi	$25
    884 	addu	$3,$24
    885 	sltu	$1,$3,$24
    886 	multu	$12,$17		# mul_add_c(a[0],b[4],c2,c3,c1);
    887 	addu	$25,$1
    888 	addu	$7,$25
    889 	sltu	$1,$7,$25
    890 	addu	$2,$1
    891 	mflo	$24
    892 	mfhi	$25
    893 	addu	$3,$24
    894 	sltu	$1,$3,$24
    895 	 multu	$12,$19		# mul_add_c(a[0],b[5],c3,c1,c2);
    896 	addu	$25,$1
    897 	addu	$7,$25
    898 	sltu	$1,$7,$25
    899 	addu	$2,$1
    900 	sw	$3,4*4($4)	# r[4]=c2;
    901 
    902 	mflo	$24
    903 	mfhi	$25
    904 	addu	$7,$24
    905 	sltu	$1,$7,$24
    906 	multu	$13,$17		# mul_add_c(a[1],b[4],c3,c1,c2);
    907 	addu	$25,$1
    908 	addu	$2,$25
    909 	sltu	$3,$2,$25
    910 	mflo	$24
    911 	mfhi	$25
    912 	addu	$7,$24
    913 	sltu	$1,$7,$24
    914 	multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
    915 	addu	$25,$1
    916 	addu	$2,$25
    917 	sltu	$1,$2,$25
    918 	addu	$3,$1
    919 	mflo	$24
    920 	mfhi	$25
    921 	addu	$7,$24
    922 	sltu	$1,$7,$24
    923 	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
    924 	addu	$25,$1
    925 	addu	$2,$25
    926 	sltu	$1,$2,$25
    927 	addu	$3,$1
    928 	mflo	$24
    929 	mfhi	$25
    930 	addu	$7,$24
    931 	sltu	$1,$7,$24
    932 	multu	$16,$9		# mul_add_c(a[4],b[1],c3,c1,c2);
    933 	addu	$25,$1
    934 	addu	$2,$25
    935 	sltu	$1,$2,$25
    936 	addu	$3,$1
    937 	mflo	$24
    938 	mfhi	$25
    939 	addu	$7,$24
    940 	sltu	$1,$7,$24
    941 	multu	$18,$8		# mul_add_c(a[5],b[0],c3,c1,c2);
    942 	addu	$25,$1
    943 	addu	$2,$25
    944 	sltu	$1,$2,$25
    945 	addu	$3,$1
    946 	mflo	$24
    947 	mfhi	$25
    948 	addu	$7,$24
    949 	sltu	$1,$7,$24
    950 	 multu	$20,$8		# mul_add_c(a[6],b[0],c1,c2,c3);
    951 	addu	$25,$1
    952 	addu	$2,$25
    953 	sltu	$1,$2,$25
    954 	addu	$3,$1
    955 	sw	$7,5*4($4)	# r[5]=c3;
    956 
    957 	mflo	$24
    958 	mfhi	$25
    959 	addu	$2,$24
    960 	sltu	$1,$2,$24
    961 	multu	$18,$9		# mul_add_c(a[5],b[1],c1,c2,c3);
    962 	addu	$25,$1
    963 	addu	$3,$25
    964 	sltu	$7,$3,$25
    965 	mflo	$24
    966 	mfhi	$25
    967 	addu	$2,$24
    968 	sltu	$1,$2,$24
    969 	multu	$16,$10		# mul_add_c(a[4],b[2],c1,c2,c3);
    970 	addu	$25,$1
    971 	addu	$3,$25
    972 	sltu	$1,$3,$25
    973 	addu	$7,$1
    974 	mflo	$24
    975 	mfhi	$25
    976 	addu	$2,$24
    977 	sltu	$1,$2,$24
    978 	multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
    979 	addu	$25,$1
    980 	addu	$3,$25
    981 	sltu	$1,$3,$25
    982 	addu	$7,$1
    983 	mflo	$24
    984 	mfhi	$25
    985 	addu	$2,$24
    986 	sltu	$1,$2,$24
    987 	multu	$14,$17		# mul_add_c(a[2],b[4],c1,c2,c3);
    988 	addu	$25,$1
    989 	addu	$3,$25
    990 	sltu	$1,$3,$25
    991 	addu	$7,$1
    992 	mflo	$24
    993 	mfhi	$25
    994 	addu	$2,$24
    995 	sltu	$1,$2,$24
    996 	multu	$13,$19		# mul_add_c(a[1],b[5],c1,c2,c3);
    997 	addu	$25,$1
    998 	addu	$3,$25
    999 	sltu	$1,$3,$25
   1000 	addu	$7,$1
   1001 	mflo	$24
   1002 	mfhi	$25
   1003 	addu	$2,$24
   1004 	sltu	$1,$2,$24
   1005 	multu	$12,$21		# mul_add_c(a[0],b[6],c1,c2,c3);
   1006 	addu	$25,$1
   1007 	addu	$3,$25
   1008 	sltu	$1,$3,$25
   1009 	addu	$7,$1
   1010 	mflo	$24
   1011 	mfhi	$25
   1012 	addu	$2,$24
   1013 	sltu	$1,$2,$24
   1014 	 multu	$12,$6		# mul_add_c(a[0],b[7],c2,c3,c1);
   1015 	addu	$25,$1
   1016 	addu	$3,$25
   1017 	sltu	$1,$3,$25
   1018 	addu	$7,$1
   1019 	sw	$2,6*4($4)	# r[6]=c1;
   1020 
   1021 	mflo	$24
   1022 	mfhi	$25
   1023 	addu	$3,$24
   1024 	sltu	$1,$3,$24
   1025 	multu	$13,$21		# mul_add_c(a[1],b[6],c2,c3,c1);
   1026 	addu	$25,$1
   1027 	addu	$7,$25
   1028 	sltu	$2,$7,$25
   1029 	mflo	$24
   1030 	mfhi	$25
   1031 	addu	$3,$24
   1032 	sltu	$1,$3,$24
   1033 	multu	$14,$19		# mul_add_c(a[2],b[5],c2,c3,c1);
   1034 	addu	$25,$1
   1035 	addu	$7,$25
   1036 	sltu	$1,$7,$25
   1037 	addu	$2,$1
   1038 	mflo	$24
   1039 	mfhi	$25
   1040 	addu	$3,$24
   1041 	sltu	$1,$3,$24
   1042 	multu	$15,$17		# mul_add_c(a[3],b[4],c2,c3,c1);
   1043 	addu	$25,$1
   1044 	addu	$7,$25
   1045 	sltu	$1,$7,$25
   1046 	addu	$2,$1
   1047 	mflo	$24
   1048 	mfhi	$25
   1049 	addu	$3,$24
   1050 	sltu	$1,$3,$24
   1051 	multu	$16,$11		# mul_add_c(a[4],b[3],c2,c3,c1);
   1052 	addu	$25,$1
   1053 	addu	$7,$25
   1054 	sltu	$1,$7,$25
   1055 	addu	$2,$1
   1056 	mflo	$24
   1057 	mfhi	$25
   1058 	addu	$3,$24
   1059 	sltu	$1,$3,$24
   1060 	multu	$18,$10		# mul_add_c(a[5],b[2],c2,c3,c1);
   1061 	addu	$25,$1
   1062 	addu	$7,$25
   1063 	sltu	$1,$7,$25
   1064 	addu	$2,$1
   1065 	mflo	$24
   1066 	mfhi	$25
   1067 	addu	$3,$24
   1068 	sltu	$1,$3,$24
   1069 	multu	$20,$9		# mul_add_c(a[6],b[1],c2,c3,c1);
   1070 	addu	$25,$1
   1071 	addu	$7,$25
   1072 	sltu	$1,$7,$25
   1073 	addu	$2,$1
   1074 	mflo	$24
   1075 	mfhi	$25
   1076 	addu	$3,$24
   1077 	sltu	$1,$3,$24
   1078 	multu	$5,$8		# mul_add_c(a[7],b[0],c2,c3,c1);
   1079 	addu	$25,$1
   1080 	addu	$7,$25
   1081 	sltu	$1,$7,$25
   1082 	addu	$2,$1
   1083 	mflo	$24
   1084 	mfhi	$25
   1085 	addu	$3,$24
   1086 	sltu	$1,$3,$24
   1087 	 multu	$5,$9		# mul_add_c(a[7],b[1],c3,c1,c2);
   1088 	addu	$25,$1
   1089 	addu	$7,$25
   1090 	sltu	$1,$7,$25
   1091 	addu	$2,$1
   1092 	sw	$3,7*4($4)	# r[7]=c2;
   1093 
   1094 	mflo	$24
   1095 	mfhi	$25
   1096 	addu	$7,$24
   1097 	sltu	$1,$7,$24
   1098 	multu	$20,$10		# mul_add_c(a[6],b[2],c3,c1,c2);
   1099 	addu	$25,$1
   1100 	addu	$2,$25
   1101 	sltu	$3,$2,$25
   1102 	mflo	$24
   1103 	mfhi	$25
   1104 	addu	$7,$24
   1105 	sltu	$1,$7,$24
   1106 	multu	$18,$11		# mul_add_c(a[5],b[3],c3,c1,c2);
   1107 	addu	$25,$1
   1108 	addu	$2,$25
   1109 	sltu	$1,$2,$25
   1110 	addu	$3,$1
   1111 	mflo	$24
   1112 	mfhi	$25
   1113 	addu	$7,$24
   1114 	sltu	$1,$7,$24
   1115 	multu	$16,$17		# mul_add_c(a[4],b[4],c3,c1,c2);
   1116 	addu	$25,$1
   1117 	addu	$2,$25
   1118 	sltu	$1,$2,$25
   1119 	addu	$3,$1
   1120 	mflo	$24
   1121 	mfhi	$25
   1122 	addu	$7,$24
   1123 	sltu	$1,$7,$24
   1124 	multu	$15,$19		# mul_add_c(a[3],b[5],c3,c1,c2);
   1125 	addu	$25,$1
   1126 	addu	$2,$25
   1127 	sltu	$1,$2,$25
   1128 	addu	$3,$1
   1129 	mflo	$24
   1130 	mfhi	$25
   1131 	addu	$7,$24
   1132 	sltu	$1,$7,$24
   1133 	multu	$14,$21		# mul_add_c(a[2],b[6],c3,c1,c2);
   1134 	addu	$25,$1
   1135 	addu	$2,$25
   1136 	sltu	$1,$2,$25
   1137 	addu	$3,$1
   1138 	mflo	$24
   1139 	mfhi	$25
   1140 	addu	$7,$24
   1141 	sltu	$1,$7,$24
   1142 	multu	$13,$6		# mul_add_c(a[1],b[7],c3,c1,c2);
   1143 	addu	$25,$1
   1144 	addu	$2,$25
   1145 	sltu	$1,$2,$25
   1146 	addu	$3,$1
   1147 	mflo	$24
   1148 	mfhi	$25
   1149 	addu	$7,$24
   1150 	sltu	$1,$7,$24
   1151 	 multu	$14,$6		# mul_add_c(a[2],b[7],c1,c2,c3);
   1152 	addu	$25,$1
   1153 	addu	$2,$25
   1154 	sltu	$1,$2,$25
   1155 	addu	$3,$1
   1156 	sw	$7,8*4($4)	# r[8]=c3;
   1157 
   1158 	mflo	$24
   1159 	mfhi	$25
   1160 	addu	$2,$24
   1161 	sltu	$1,$2,$24
   1162 	multu	$15,$21		# mul_add_c(a[3],b[6],c1,c2,c3);
   1163 	addu	$25,$1
   1164 	addu	$3,$25
   1165 	sltu	$7,$3,$25
   1166 	mflo	$24
   1167 	mfhi	$25
   1168 	addu	$2,$24
   1169 	sltu	$1,$2,$24
   1170 	multu	$16,$19		# mul_add_c(a[4],b[5],c1,c2,c3);
   1171 	addu	$25,$1
   1172 	addu	$3,$25
   1173 	sltu	$1,$3,$25
   1174 	addu	$7,$1
   1175 	mflo	$24
   1176 	mfhi	$25
   1177 	addu	$2,$24
   1178 	sltu	$1,$2,$24
   1179 	multu	$18,$17		# mul_add_c(a[5],b[4],c1,c2,c3);
   1180 	addu	$25,$1
   1181 	addu	$3,$25
   1182 	sltu	$1,$3,$25
   1183 	addu	$7,$1
   1184 	mflo	$24
   1185 	mfhi	$25
   1186 	addu	$2,$24
   1187 	sltu	$1,$2,$24
   1188 	multu	$20,$11		# mul_add_c(a[6],b[3],c1,c2,c3);
   1189 	addu	$25,$1
   1190 	addu	$3,$25
   1191 	sltu	$1,$3,$25
   1192 	addu	$7,$1
   1193 	mflo	$24
   1194 	mfhi	$25
   1195 	addu	$2,$24
   1196 	sltu	$1,$2,$24
   1197 	multu	$5,$10		# mul_add_c(a[7],b[2],c1,c2,c3);
   1198 	addu	$25,$1
   1199 	addu	$3,$25
   1200 	sltu	$1,$3,$25
   1201 	addu	$7,$1
   1202 	mflo	$24
   1203 	mfhi	$25
   1204 	addu	$2,$24
   1205 	sltu	$1,$2,$24
   1206 	 multu	$5,$11		# mul_add_c(a[7],b[3],c2,c3,c1);
   1207 	addu	$25,$1
   1208 	addu	$3,$25
   1209 	sltu	$1,$3,$25
   1210 	addu	$7,$1
   1211 	sw	$2,9*4($4)	# r[9]=c1;
   1212 
   1213 	mflo	$24
   1214 	mfhi	$25
   1215 	addu	$3,$24
   1216 	sltu	$1,$3,$24
   1217 	multu	$20,$17		# mul_add_c(a[6],b[4],c2,c3,c1);
   1218 	addu	$25,$1
   1219 	addu	$7,$25
   1220 	sltu	$2,$7,$25
   1221 	mflo	$24
   1222 	mfhi	$25
   1223 	addu	$3,$24
   1224 	sltu	$1,$3,$24
   1225 	multu	$18,$19		# mul_add_c(a[5],b[5],c2,c3,c1);
   1226 	addu	$25,$1
   1227 	addu	$7,$25
   1228 	sltu	$1,$7,$25
   1229 	addu	$2,$1
   1230 	mflo	$24
   1231 	mfhi	$25
   1232 	addu	$3,$24
   1233 	sltu	$1,$3,$24
   1234 	multu	$16,$21		# mul_add_c(a[4],b[6],c2,c3,c1);
   1235 	addu	$25,$1
   1236 	addu	$7,$25
   1237 	sltu	$1,$7,$25
   1238 	addu	$2,$1
   1239 	mflo	$24
   1240 	mfhi	$25
   1241 	addu	$3,$24
   1242 	sltu	$1,$3,$24
   1243 	multu	$15,$6		# mul_add_c(a[3],b[7],c2,c3,c1);
   1244 	addu	$25,$1
   1245 	addu	$7,$25
   1246 	sltu	$1,$7,$25
   1247 	addu	$2,$1
   1248 	mflo	$24
   1249 	mfhi	$25
   1250 	addu	$3,$24
   1251 	sltu	$1,$3,$24
   1252 	multu	$16,$6		# mul_add_c(a[4],b[7],c3,c1,c2);
   1253 	addu	$25,$1
   1254 	addu	$7,$25
   1255 	sltu	$1,$7,$25
   1256 	addu	$2,$1
   1257 	sw	$3,10*4($4)	# r[10]=c2;
   1258 
   1259 	mflo	$24
   1260 	mfhi	$25
   1261 	addu	$7,$24
   1262 	sltu	$1,$7,$24
   1263 	multu	$18,$21		# mul_add_c(a[5],b[6],c3,c1,c2);
   1264 	addu	$25,$1
   1265 	addu	$2,$25
   1266 	sltu	$3,$2,$25
   1267 	mflo	$24
   1268 	mfhi	$25
   1269 	addu	$7,$24
   1270 	sltu	$1,$7,$24
   1271 	multu	$20,$19		# mul_add_c(a[6],b[5],c3,c1,c2);
   1272 	addu	$25,$1
   1273 	addu	$2,$25
   1274 	sltu	$1,$2,$25
   1275 	addu	$3,$1
   1276 	mflo	$24
   1277 	mfhi	$25
   1278 	addu	$7,$24
   1279 	sltu	$1,$7,$24
   1280 	multu	$5,$17		# mul_add_c(a[7],b[4],c3,c1,c2);
   1281 	addu	$25,$1
   1282 	addu	$2,$25
   1283 	sltu	$1,$2,$25
   1284 	addu	$3,$1
   1285 	mflo	$24
   1286 	mfhi	$25
   1287 	addu	$7,$24
   1288 	sltu	$1,$7,$24
   1289 	 multu	$5,$19		# mul_add_c(a[7],b[5],c1,c2,c3);
   1290 	addu	$25,$1
   1291 	addu	$2,$25
   1292 	sltu	$1,$2,$25
   1293 	addu	$3,$1
   1294 	sw	$7,11*4($4)	# r[11]=c3;
   1295 
   1296 	mflo	$24
   1297 	mfhi	$25
   1298 	addu	$2,$24
   1299 	sltu	$1,$2,$24
   1300 	multu	$20,$21		# mul_add_c(a[6],b[6],c1,c2,c3);
   1301 	addu	$25,$1
   1302 	addu	$3,$25
   1303 	sltu	$7,$3,$25
   1304 	mflo	$24
   1305 	mfhi	$25
   1306 	addu	$2,$24
   1307 	sltu	$1,$2,$24
   1308 	multu	$18,$6		# mul_add_c(a[5],b[7],c1,c2,c3);
   1309 	addu	$25,$1
   1310 	addu	$3,$25
   1311 	sltu	$1,$3,$25
   1312 	addu	$7,$1
   1313 	mflo	$24
   1314 	mfhi	$25
   1315 	addu	$2,$24
   1316 	sltu	$1,$2,$24
   1317 	 multu	$20,$6		# mul_add_c(a[6],b[7],c2,c3,c1);
   1318 	addu	$25,$1
   1319 	addu	$3,$25
   1320 	sltu	$1,$3,$25
   1321 	addu	$7,$1
   1322 	sw	$2,12*4($4)	# r[12]=c1;
   1323 
   1324 	mflo	$24
   1325 	mfhi	$25
   1326 	addu	$3,$24
   1327 	sltu	$1,$3,$24
   1328 	multu	$5,$21		# mul_add_c(a[7],b[6],c2,c3,c1);
   1329 	addu	$25,$1
   1330 	addu	$7,$25
   1331 	sltu	$2,$7,$25
   1332 	mflo	$24
   1333 	mfhi	$25
   1334 	addu	$3,$24
   1335 	sltu	$1,$3,$24
   1336 	multu	$5,$6		# mul_add_c(a[7],b[7],c3,c1,c2);
   1337 	addu	$25,$1
   1338 	addu	$7,$25
   1339 	sltu	$1,$7,$25
   1340 	addu	$2,$1
   1341 	sw	$3,13*4($4)	# r[13]=c2;
   1342 
   1343 	mflo	$24
   1344 	mfhi	$25
   1345 	addu	$7,$24
   1346 	sltu	$1,$7,$24
   1347 	addu	$25,$1
   1348 	addu	$2,$25
   1349 	sw	$7,14*4($4)	# r[14]=c3;
   1350 	sw	$2,15*4($4)	# r[15]=c1;
   1351 
   1352 	.set	noreorder
   1353 	lw	$21,5*4($29)
   1354 	lw	$20,4*4($29)
   1355 	lw	$19,3*4($29)
   1356 	lw	$18,2*4($29)
   1357 	lw	$17,1*4($29)
   1358 	lw	$16,0*4($29)
   1359 	jr	$31
   1360 	addu $29,6*4
   1361 .end	bn_mul_comba8
   1362 
   1363 .align	5
   1364 .globl	bn_mul_comba4
   1365 .ent	bn_mul_comba4
   1366 bn_mul_comba4:
   1367 	.set	reorder
   1368 	lw	$12,0($5)
   1369 	lw	$8,0($6)
   1370 	lw	$13,4($5)
   1371 	lw	$14,2*4($5)
   1372 	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
   1373 	lw	$15,3*4($5)
   1374 	lw	$9,4($6)
   1375 	lw	$10,2*4($6)
   1376 	lw	$11,3*4($6)
   1377 	mflo	$2
   1378 	mfhi	$3
   1379 	sw	$2,0($4)
   1380 
   1381 	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
   1382 	mflo	$24
   1383 	mfhi	$25
   1384 	addu	$3,$24
   1385 	sltu	$1,$3,$24
   1386 	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
   1387 	addu	$7,$25,$1
   1388 	mflo	$24
   1389 	mfhi	$25
   1390 	addu	$3,$24
   1391 	sltu	$1,$3,$24
   1392 	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
   1393 	addu	$25,$1
   1394 	addu	$7,$25
   1395 	sltu	$2,$7,$25
   1396 	sw	$3,4($4)
   1397 
   1398 	mflo	$24
   1399 	mfhi	$25
   1400 	addu	$7,$24
   1401 	sltu	$1,$7,$24
   1402 	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
   1403 	addu	$25,$1
   1404 	addu	$2,$25
   1405 	mflo	$24
   1406 	mfhi	$25
   1407 	addu	$7,$24
   1408 	sltu	$1,$7,$24
   1409 	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
   1410 	addu	$25,$1
   1411 	addu	$2,$25
   1412 	sltu	$3,$2,$25
   1413 	mflo	$24
   1414 	mfhi	$25
   1415 	addu	$7,$24
   1416 	sltu	$1,$7,$24
   1417 	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
   1418 	addu	$25,$1
   1419 	addu	$2,$25
   1420 	sltu	$1,$2,$25
   1421 	addu	$3,$1
   1422 	sw	$7,2*4($4)
   1423 
   1424 	mflo	$24
   1425 	mfhi	$25
   1426 	addu	$2,$24
   1427 	sltu	$1,$2,$24
   1428 	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
   1429 	addu	$25,$1
   1430 	addu	$3,$25
   1431 	sltu	$7,$3,$25
   1432 	mflo	$24
   1433 	mfhi	$25
   1434 	addu	$2,$24
   1435 	sltu	$1,$2,$24
   1436 	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
   1437 	addu	$25,$1
   1438 	addu	$3,$25
   1439 	sltu	$1,$3,$25
   1440 	addu	$7,$1
   1441 	mflo	$24
   1442 	mfhi	$25
   1443 	addu	$2,$24
   1444 	sltu	$1,$2,$24
   1445 	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
   1446 	addu	$25,$1
   1447 	addu	$3,$25
   1448 	sltu	$1,$3,$25
   1449 	addu	$7,$1
   1450 	mflo	$24
   1451 	mfhi	$25
   1452 	addu	$2,$24
   1453 	sltu	$1,$2,$24
   1454 	 multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
   1455 	addu	$25,$1
   1456 	addu	$3,$25
   1457 	sltu	$1,$3,$25
   1458 	addu	$7,$1
   1459 	sw	$2,3*4($4)
   1460 
   1461 	mflo	$24
   1462 	mfhi	$25
   1463 	addu	$3,$24
   1464 	sltu	$1,$3,$24
   1465 	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
   1466 	addu	$25,$1
   1467 	addu	$7,$25
   1468 	sltu	$2,$7,$25
   1469 	mflo	$24
   1470 	mfhi	$25
   1471 	addu	$3,$24
   1472 	sltu	$1,$3,$24
   1473 	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
   1474 	addu	$25,$1
   1475 	addu	$7,$25
   1476 	sltu	$1,$7,$25
   1477 	addu	$2,$1
   1478 	mflo	$24
   1479 	mfhi	$25
   1480 	addu	$3,$24
   1481 	sltu	$1,$3,$24
   1482 	 multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
   1483 	addu	$25,$1
   1484 	addu	$7,$25
   1485 	sltu	$1,$7,$25
   1486 	addu	$2,$1
   1487 	sw	$3,4*4($4)
   1488 
   1489 	mflo	$24
   1490 	mfhi	$25
   1491 	addu	$7,$24
   1492 	sltu	$1,$7,$24
   1493 	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
   1494 	addu	$25,$1
   1495 	addu	$2,$25
   1496 	sltu	$3,$2,$25
   1497 	mflo	$24
   1498 	mfhi	$25
   1499 	addu	$7,$24
   1500 	sltu	$1,$7,$24
   1501 	 multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
   1502 	addu	$25,$1
   1503 	addu	$2,$25
   1504 	sltu	$1,$2,$25
   1505 	addu	$3,$1
   1506 	sw	$7,5*4($4)
   1507 
   1508 	mflo	$24
   1509 	mfhi	$25
   1510 	addu	$2,$24
   1511 	sltu	$1,$2,$24
   1512 	addu	$25,$1
   1513 	addu	$3,$25
   1514 	sw	$2,6*4($4)
   1515 	sw	$3,7*4($4)
   1516 
   1517 	.set	noreorder
   1518 	jr	$31
   1519 	nop
   1520 .end	bn_mul_comba4
   1521 
   1522 .align	5
   1523 .globl	bn_sqr_comba8
   1524 .ent	bn_sqr_comba8
   1525 bn_sqr_comba8:
   1526 	.set	reorder
   1527 	lw	$12,0($5)
   1528 	lw	$13,4($5)
   1529 	lw	$14,2*4($5)
   1530 	lw	$15,3*4($5)
   1531 
   1532 	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
   1533 	lw	$8,4*4($5)
   1534 	lw	$9,5*4($5)
   1535 	lw	$10,6*4($5)
   1536 	lw	$11,7*4($5)
   1537 	mflo	$2
   1538 	mfhi	$3
   1539 	sw	$2,0($4)
   1540 
   1541 	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
   1542 	mflo	$24
   1543 	mfhi	$25
   1544 	slt	$2,$25,$0
   1545 	sll	$25,1
   1546 	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
   1547 	slt	$6,$24,$0
   1548 	addu	$25,$6
   1549 	sll	$24,1
   1550 	addu	$3,$24
   1551 	sltu	$1,$3,$24
   1552 	addu	$7,$25,$1
   1553 	sw	$3,4($4)
   1554 
   1555 	mflo	$24
   1556 	mfhi	$25
   1557 	slt	$3,$25,$0
   1558 	sll	$25,1
   1559 	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
   1560 	slt	$6,$24,$0
   1561 	addu	$25,$6
   1562 	sll	$24,1
   1563 	addu	$7,$24
   1564 	sltu	$1,$7,$24
   1565 	addu	$25,$1
   1566 	addu	$2,$25
   1567 	sltu	$1,$2,$25
   1568 	addu	$3,$1
   1569 	mflo	$24
   1570 	mfhi	$25
   1571 	addu	$7,$24
   1572 	sltu	$1,$7,$24
   1573 	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
   1574 	addu	$25,$1
   1575 	addu	$2,$25
   1576 	sltu	$1,$2,$25
   1577 	addu	$3,$1
   1578 	sw	$7,2*4($4)
   1579 
   1580 	mflo	$24
   1581 	mfhi	$25
   1582 	slt	$7,$25,$0
   1583 	sll	$25,1
   1584 	multu	$13,$14		# mul_add_c2(a[1],b[2],c1,c2,c3);
   1585 	slt	$6,$24,$0
   1586 	addu	$25,$6
   1587 	sll	$24,1
   1588 	addu	$2,$24
   1589 	sltu	$1,$2,$24
   1590 	addu	$25,$1
   1591 	addu	$3,$25
   1592 	sltu	$1,$3,$25
   1593 	addu	$7,$1
   1594 	mflo	$24
   1595 	mfhi	$25
   1596 	slt	$1,$25,$0
   1597 	addu	$7,$1
   1598 	 multu	$8,$12		# mul_add_c2(a[4],b[0],c2,c3,c1);
   1599 	sll	$25,1
   1600 	slt	$6,$24,$0
   1601 	addu	$25,$6
   1602 	sll	$24,1
   1603 	addu	$2,$24
   1604 	sltu	$1,$2,$24
   1605 	addu	$25,$1
   1606 	addu	$3,$25
   1607 	sltu	$1,$3,$25
   1608 	addu	$7,$1
   1609 	sw	$2,3*4($4)
   1610 
   1611 	mflo	$24
   1612 	mfhi	$25
   1613 	slt	$2,$25,$0
   1614 	sll	$25,1
   1615 	multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
   1616 	slt	$6,$24,$0
   1617 	addu	$25,$6
   1618 	sll	$24,1
   1619 	addu	$3,$24
   1620 	sltu	$1,$3,$24
   1621 	addu	$25,$1
   1622 	addu	$7,$25
   1623 	sltu	$1,$7,$25
   1624 	addu	$2,$1
   1625 	mflo	$24
   1626 	mfhi	$25
   1627 	slt	$1,$25,$0
   1628 	addu	$2,$1
   1629 	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
   1630 	sll	$25,1
   1631 	slt	$6,$24,$0
   1632 	addu	$25,$6
   1633 	sll	$24,1
   1634 	addu	$3,$24
   1635 	sltu	$1,$3,$24
   1636 	addu	$25,$1
   1637 	addu	$7,$25
   1638 	sltu	$1,$7,$25
   1639 	addu	$2,$1
   1640 	mflo	$24
   1641 	mfhi	$25
   1642 	addu	$3,$24
   1643 	sltu	$1,$3,$24
   1644 	 multu	$12,$9		# mul_add_c2(a[0],b[5],c3,c1,c2);
   1645 	addu	$25,$1
   1646 	addu	$7,$25
   1647 	sltu	$1,$7,$25
   1648 	addu	$2,$1
   1649 	sw	$3,4*4($4)
   1650 
   1651 	mflo	$24
   1652 	mfhi	$25
   1653 	slt	$3,$25,$0
   1654 	sll	$25,1
   1655 	multu	$13,$8		# mul_add_c2(a[1],b[4],c3,c1,c2);
   1656 	slt	$6,$24,$0
   1657 	addu	$25,$6
   1658 	sll	$24,1
   1659 	addu	$7,$24
   1660 	sltu	$1,$7,$24
   1661 	addu	$25,$1
   1662 	addu	$2,$25
   1663 	sltu	$1,$2,$25
   1664 	addu	$3,$1
   1665 	mflo	$24
   1666 	mfhi	$25
   1667 	slt	$1,$25,$0
   1668 	addu	$3,$1
   1669 	multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
   1670 	sll	$25,1
   1671 	slt	$6,$24,$0
   1672 	addu	$25,$6
   1673 	sll	$24,1
   1674 	addu	$7,$24
   1675 	sltu	$1,$7,$24
   1676 	addu	$25,$1
   1677 	addu	$2,$25
   1678 	sltu	$1,$2,$25
   1679 	addu	$3,$1
   1680 	mflo	$24
   1681 	mfhi	$25
   1682 	slt	$1,$25,$0
   1683 	 multu	$10,$12		# mul_add_c2(a[6],b[0],c1,c2,c3);
   1684 	addu	$3,$1
   1685 	sll	$25,1
   1686 	slt	$6,$24,$0
   1687 	addu	$25,$6
   1688 	sll	$24,1
   1689 	addu	$7,$24
   1690 	sltu	$1,$7,$24
   1691 	addu	$25,$1
   1692 	addu	$2,$25
   1693 	sltu	$1,$2,$25
   1694 	addu	$3,$1
   1695 	sw	$7,5*4($4)
   1696 
   1697 	mflo	$24
   1698 	mfhi	$25
   1699 	slt	$7,$25,$0
   1700 	sll	$25,1
   1701 	multu	$9,$13		# mul_add_c2(a[5],b[1],c1,c2,c3);
   1702 	slt	$6,$24,$0
   1703 	addu	$25,$6
   1704 	sll	$24,1
   1705 	addu	$2,$24
   1706 	sltu	$1,$2,$24
   1707 	addu	$25,$1
   1708 	addu	$3,$25
   1709 	sltu	$1,$3,$25
   1710 	addu	$7,$1
   1711 	mflo	$24
   1712 	mfhi	$25
   1713 	slt	$1,$25,$0
   1714 	addu	$7,$1
   1715 	multu	$8,$14		# mul_add_c2(a[4],b[2],c1,c2,c3);
   1716 	sll	$25,1
   1717 	slt	$6,$24,$0
   1718 	addu	$25,$6
   1719 	sll	$24,1
   1720 	addu	$2,$24
   1721 	sltu	$1,$2,$24
   1722 	addu	$25,$1
   1723 	addu	$3,$25
   1724 	sltu	$1,$3,$25
   1725 	addu	$7,$1
   1726 	mflo	$24
   1727 	mfhi	$25
   1728 	slt	$1,$25,$0
   1729 	addu	$7,$1
   1730 	multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
   1731 	sll	$25,1
   1732 	slt	$6,$24,$0
   1733 	addu	$25,$6
   1734 	sll	$24,1
   1735 	addu	$2,$24
   1736 	sltu	$1,$2,$24
   1737 	addu	$25,$1
   1738 	addu	$3,$25
   1739 	sltu	$1,$3,$25
   1740 	addu	$7,$1
   1741 	mflo	$24
   1742 	mfhi	$25
   1743 	addu	$2,$24
   1744 	sltu	$1,$2,$24
   1745 	 multu	$12,$11		# mul_add_c2(a[0],b[7],c2,c3,c1);
   1746 	addu	$25,$1
   1747 	addu	$3,$25
   1748 	sltu	$1,$3,$25
   1749 	addu	$7,$1
   1750 	sw	$2,6*4($4)
   1751 
   1752 	mflo	$24
   1753 	mfhi	$25
   1754 	slt	$2,$25,$0
   1755 	sll	$25,1
   1756 	multu	$13,$10		# mul_add_c2(a[1],b[6],c2,c3,c1);
   1757 	slt	$6,$24,$0
   1758 	addu	$25,$6
   1759 	sll	$24,1
   1760 	addu	$3,$24
   1761 	sltu	$1,$3,$24
   1762 	addu	$25,$1
   1763 	addu	$7,$25
   1764 	sltu	$1,$7,$25
   1765 	addu	$2,$1
   1766 	mflo	$24
   1767 	mfhi	$25
   1768 	slt	$1,$25,$0
   1769 	addu	$2,$1
   1770 	multu	$14,$9		# mul_add_c2(a[2],b[5],c2,c3,c1);
   1771 	sll	$25,1
   1772 	slt	$6,$24,$0
   1773 	addu	$25,$6
   1774 	sll	$24,1
   1775 	addu	$3,$24
   1776 	sltu	$1,$3,$24
   1777 	addu	$25,$1
   1778 	addu	$7,$25
   1779 	sltu	$1,$7,$25
   1780 	addu	$2,$1
   1781 	mflo	$24
   1782 	mfhi	$25
   1783 	slt	$1,$25,$0
   1784 	addu	$2,$1
   1785 	multu	$15,$8		# mul_add_c2(a[3],b[4],c2,c3,c1);
   1786 	sll	$25,1
   1787 	slt	$6,$24,$0
   1788 	addu	$25,$6
   1789 	sll	$24,1
   1790 	addu	$3,$24
   1791 	sltu	$1,$3,$24
   1792 	addu	$25,$1
   1793 	addu	$7,$25
   1794 	sltu	$1,$7,$25
   1795 	addu	$2,$1
   1796 	mflo	$24
   1797 	mfhi	$25
   1798 	slt	$1,$25,$0
   1799 	addu	$2,$1
   1800 	 multu	$11,$13		# mul_add_c2(a[7],b[1],c3,c1,c2);
   1801 	sll	$25,1
   1802 	slt	$6,$24,$0
   1803 	addu	$25,$6
   1804 	sll	$24,1
   1805 	addu	$3,$24
   1806 	sltu	$1,$3,$24
   1807 	addu	$25,$1
   1808 	addu	$7,$25
   1809 	sltu	$1,$7,$25
   1810 	addu	$2,$1
   1811 	sw	$3,7*4($4)
   1812 
   1813 	mflo	$24
   1814 	mfhi	$25
   1815 	slt	$3,$25,$0
   1816 	sll	$25,1
   1817 	multu	$10,$14		# mul_add_c2(a[6],b[2],c3,c1,c2);
   1818 	slt	$6,$24,$0
   1819 	addu	$25,$6
   1820 	sll	$24,1
   1821 	addu	$7,$24
   1822 	sltu	$1,$7,$24
   1823 	addu	$25,$1
   1824 	addu	$2,$25
   1825 	sltu	$1,$2,$25
   1826 	addu	$3,$1
   1827 	mflo	$24
   1828 	mfhi	$25
   1829 	slt	$1,$25,$0
   1830 	addu	$3,$1
   1831 	multu	$9,$15		# mul_add_c2(a[5],b[3],c3,c1,c2);
   1832 	sll	$25,1
   1833 	slt	$6,$24,$0
   1834 	addu	$25,$6
   1835 	sll	$24,1
   1836 	addu	$7,$24
   1837 	sltu	$1,$7,$24
   1838 	addu	$25,$1
   1839 	addu	$2,$25
   1840 	sltu	$1,$2,$25
   1841 	addu	$3,$1
   1842 	mflo	$24
   1843 	mfhi	$25
   1844 	slt	$1,$25,$0
   1845 	addu	$3,$1
   1846 	multu	$8,$8		# mul_add_c(a[4],b[4],c3,c1,c2);
   1847 	sll	$25,1
   1848 	slt	$6,$24,$0
   1849 	addu	$25,$6
   1850 	sll	$24,1
   1851 	addu	$7,$24
   1852 	sltu	$1,$7,$24
   1853 	addu	$25,$1
   1854 	addu	$2,$25
   1855 	sltu	$1,$2,$25
   1856 	addu	$3,$1
   1857 	mflo	$24
   1858 	mfhi	$25
   1859 	addu	$7,$24
   1860 	sltu	$1,$7,$24
   1861 	 multu	$14,$11		# mul_add_c2(a[2],b[7],c1,c2,c3);
   1862 	addu	$25,$1
   1863 	addu	$2,$25
   1864 	sltu	$1,$2,$25
   1865 	addu	$3,$1
   1866 	sw	$7,8*4($4)
   1867 
   1868 	mflo	$24
   1869 	mfhi	$25
   1870 	slt	$7,$25,$0
   1871 	sll	$25,1
   1872 	multu	$15,$10		# mul_add_c2(a[3],b[6],c1,c2,c3);
   1873 	slt	$6,$24,$0
   1874 	addu	$25,$6
   1875 	sll	$24,1
   1876 	addu	$2,$24
   1877 	sltu	$1,$2,$24
   1878 	addu	$25,$1
   1879 	addu	$3,$25
   1880 	sltu	$1,$3,$25
   1881 	addu	$7,$1
   1882 	mflo	$24
   1883 	mfhi	$25
   1884 	slt	$1,$25,$0
   1885 	addu	$7,$1
   1886 	multu	$8,$9		# mul_add_c2(a[4],b[5],c1,c2,c3);
   1887 	sll	$25,1
   1888 	slt	$6,$24,$0
   1889 	addu	$25,$6
   1890 	sll	$24,1
   1891 	addu	$2,$24
   1892 	sltu	$1,$2,$24
   1893 	addu	$25,$1
   1894 	addu	$3,$25
   1895 	sltu	$1,$3,$25
   1896 	addu	$7,$1
   1897 	mflo	$24
   1898 	mfhi	$25
   1899 	slt	$1,$25,$0
   1900 	addu	$7,$1
   1901 	 multu	$11,$15		# mul_add_c2(a[7],b[3],c2,c3,c1);
   1902 	sll	$25,1
   1903 	slt	$6,$24,$0
   1904 	addu	$25,$6
   1905 	sll	$24,1
   1906 	addu	$2,$24
   1907 	sltu	$1,$2,$24
   1908 	addu	$25,$1
   1909 	addu	$3,$25
   1910 	sltu	$1,$3,$25
   1911 	addu	$7,$1
   1912 	sw	$2,9*4($4)
   1913 
   1914 	mflo	$24
   1915 	mfhi	$25
   1916 	slt	$2,$25,$0
   1917 	sll	$25,1
   1918 	multu	$10,$8		# mul_add_c2(a[6],b[4],c2,c3,c1);
   1919 	slt	$6,$24,$0
   1920 	addu	$25,$6
   1921 	sll	$24,1
   1922 	addu	$3,$24
   1923 	sltu	$1,$3,$24
   1924 	addu	$25,$1
   1925 	addu	$7,$25
   1926 	sltu	$1,$7,$25
   1927 	addu	$2,$1
   1928 	mflo	$24
   1929 	mfhi	$25
   1930 	slt	$1,$25,$0
   1931 	addu	$2,$1
   1932 	multu	$9,$9		# mul_add_c(a[5],b[5],c2,c3,c1);
   1933 	sll	$25,1
   1934 	slt	$6,$24,$0
   1935 	addu	$25,$6
   1936 	sll	$24,1
   1937 	addu	$3,$24
   1938 	sltu	$1,$3,$24
   1939 	addu	$25,$1
   1940 	addu	$7,$25
   1941 	sltu	$1,$7,$25
   1942 	addu	$2,$1
   1943 	mflo	$24
   1944 	mfhi	$25
   1945 	addu	$3,$24
   1946 	sltu	$1,$3,$24
   1947 	 multu	$8,$11		# mul_add_c2(a[4],b[7],c3,c1,c2);
   1948 	addu	$25,$1
   1949 	addu	$7,$25
   1950 	sltu	$1,$7,$25
   1951 	addu	$2,$1
   1952 	sw	$3,10*4($4)
   1953 
   1954 	mflo	$24
   1955 	mfhi	$25
   1956 	slt	$3,$25,$0
   1957 	sll	$25,1
   1958 	multu	$9,$10		# mul_add_c2(a[5],b[6],c3,c1,c2);
   1959 	slt	$6,$24,$0
   1960 	addu	$25,$6
   1961 	sll	$24,1
   1962 	addu	$7,$24
   1963 	sltu	$1,$7,$24
   1964 	addu	$25,$1
   1965 	addu	$2,$25
   1966 	sltu	$1,$2,$25
   1967 	addu	$3,$1
   1968 	mflo	$24
   1969 	mfhi	$25
   1970 	slt	$1,$25,$0
   1971 	addu	$3,$1
   1972 	 multu	$11,$9		# mul_add_c2(a[7],b[5],c1,c2,c3);
   1973 	sll	$25,1
   1974 	slt	$6,$24,$0
   1975 	addu	$25,$6
   1976 	sll	$24,1
   1977 	addu	$7,$24
   1978 	sltu	$1,$7,$24
   1979 	addu	$25,$1
   1980 	addu	$2,$25
   1981 	sltu	$1,$2,$25
   1982 	addu	$3,$1
   1983 	sw	$7,11*4($4)
   1984 
   1985 	mflo	$24
   1986 	mfhi	$25
   1987 	slt	$7,$25,$0
   1988 	sll	$25,1
   1989 	multu	$10,$10		# mul_add_c(a[6],b[6],c1,c2,c3);
   1990 	slt	$6,$24,$0
   1991 	addu	$25,$6
   1992 	sll	$24,1
   1993 	addu	$2,$24
   1994 	sltu	$1,$2,$24
   1995 	addu	$25,$1
   1996 	addu	$3,$25
   1997 	sltu	$1,$3,$25
   1998 	addu	$7,$1
   1999 	mflo	$24
   2000 	mfhi	$25
   2001 	addu	$2,$24
   2002 	sltu	$1,$2,$24
   2003 	 multu	$10,$11		# mul_add_c2(a[6],b[7],c2,c3,c1);
   2004 	addu	$25,$1
   2005 	addu	$3,$25
   2006 	sltu	$1,$3,$25
   2007 	addu	$7,$1
   2008 	sw	$2,12*4($4)
   2009 
   2010 	mflo	$24
   2011 	mfhi	$25
   2012 	slt	$2,$25,$0
   2013 	sll	$25,1
   2014 	 multu	$11,$11		# mul_add_c(a[7],b[7],c3,c1,c2);
   2015 	slt	$6,$24,$0
   2016 	addu	$25,$6
   2017 	sll	$24,1
   2018 	addu	$3,$24
   2019 	sltu	$1,$3,$24
   2020 	addu	$25,$1
   2021 	addu	$7,$25
   2022 	sltu	$1,$7,$25
   2023 	addu	$2,$1
   2024 	sw	$3,13*4($4)
   2025 
   2026 	mflo	$24
   2027 	mfhi	$25
   2028 	addu	$7,$24
   2029 	sltu	$1,$7,$24
   2030 	addu	$25,$1
   2031 	addu	$2,$25
   2032 	sw	$7,14*4($4)
   2033 	sw	$2,15*4($4)
   2034 
   2035 	.set	noreorder
   2036 	jr	$31
   2037 	nop
   2038 .end	bn_sqr_comba8
   2039 
   2040 .align	5
   2041 .globl	bn_sqr_comba4
   2042 .ent	bn_sqr_comba4
   2043 bn_sqr_comba4:
   2044 	.set	reorder
   2045 	lw	$12,0($5)
   2046 	lw	$13,4($5)
   2047 	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
   2048 	lw	$14,2*4($5)
   2049 	lw	$15,3*4($5)
   2050 	mflo	$2
   2051 	mfhi	$3
   2052 	sw	$2,0($4)
   2053 
   2054 	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
   2055 	mflo	$24
   2056 	mfhi	$25
   2057 	slt	$2,$25,$0
   2058 	sll	$25,1
   2059 	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
   2060 	slt	$6,$24,$0
   2061 	addu	$25,$6
   2062 	sll	$24,1
   2063 	addu	$3,$24
   2064 	sltu	$1,$3,$24
   2065 	addu	$7,$25,$1
   2066 	sw	$3,4($4)
   2067 
   2068 	mflo	$24
   2069 	mfhi	$25
   2070 	slt	$3,$25,$0
   2071 	sll	$25,1
   2072 	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
   2073 	slt	$6,$24,$0
   2074 	addu	$25,$6
   2075 	sll	$24,1
   2076 	addu	$7,$24
   2077 	sltu	$1,$7,$24
   2078 	addu	$25,$1
   2079 	addu	$2,$25
   2080 	sltu	$1,$2,$25
   2081 	addu	$3,$1
   2082 	mflo	$24
   2083 	mfhi	$25
   2084 	addu	$7,$24
   2085 	sltu	$1,$7,$24
   2086 	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
   2087 	addu	$25,$1
   2088 	addu	$2,$25
   2089 	sltu	$1,$2,$25
   2090 	addu	$3,$1
   2091 	sw	$7,2*4($4)
   2092 
   2093 	mflo	$24
   2094 	mfhi	$25
   2095 	slt	$7,$25,$0
   2096 	sll	$25,1
   2097 	multu	$13,$14		# mul_add_c(a2[1],b[2],c1,c2,c3);
   2098 	slt	$6,$24,$0
   2099 	addu	$25,$6
   2100 	sll	$24,1
   2101 	addu	$2,$24
   2102 	sltu	$1,$2,$24
   2103 	addu	$25,$1
   2104 	addu	$3,$25
   2105 	sltu	$1,$3,$25
   2106 	addu	$7,$1
   2107 	mflo	$24
   2108 	mfhi	$25
   2109 	slt	$1,$25,$0
   2110 	addu	$7,$1
   2111 	 multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
   2112 	sll	$25,1
   2113 	slt	$6,$24,$0
   2114 	addu	$25,$6
   2115 	sll	$24,1
   2116 	addu	$2,$24
   2117 	sltu	$1,$2,$24
   2118 	addu	$25,$1
   2119 	addu	$3,$25
   2120 	sltu	$1,$3,$25
   2121 	addu	$7,$1
   2122 	sw	$2,3*4($4)
   2123 
   2124 	mflo	$24
   2125 	mfhi	$25
   2126 	slt	$2,$25,$0
   2127 	sll	$25,1
   2128 	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
   2129 	slt	$6,$24,$0
   2130 	addu	$25,$6
   2131 	sll	$24,1
   2132 	addu	$3,$24
   2133 	sltu	$1,$3,$24
   2134 	addu	$25,$1
   2135 	addu	$7,$25
   2136 	sltu	$1,$7,$25
   2137 	addu	$2,$1
   2138 	mflo	$24
   2139 	mfhi	$25
   2140 	addu	$3,$24
   2141 	sltu	$1,$3,$24
   2142 	 multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
   2143 	addu	$25,$1
   2144 	addu	$7,$25
   2145 	sltu	$1,$7,$25
   2146 	addu	$2,$1
   2147 	sw	$3,4*4($4)
   2148 
   2149 	mflo	$24
   2150 	mfhi	$25
   2151 	slt	$3,$25,$0
   2152 	sll	$25,1
   2153 	 multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
   2154 	slt	$6,$24,$0
   2155 	addu	$25,$6
   2156 	sll	$24,1
   2157 	addu	$7,$24
   2158 	sltu	$1,$7,$24
   2159 	addu	$25,$1
   2160 	addu	$2,$25
   2161 	sltu	$1,$2,$25
   2162 	addu	$3,$1
   2163 	sw	$7,5*4($4)
   2164 
   2165 	mflo	$24
   2166 	mfhi	$25
   2167 	addu	$2,$24
   2168 	sltu	$1,$2,$24
   2169 	addu	$25,$1
   2170 	addu	$3,$25
   2171 	sw	$2,6*4($4)
   2172 	sw	$3,7*4($4)
   2173 
   2174 	.set	noreorder
   2175 	jr	$31
   2176 	nop
   2177 .end	bn_sqr_comba4
   2178