Home | History | Annotate | Download | only in asm
      1 .set	mips2
      2 .rdata
      3 .asciiz	"mips3.s, Version 1.2"
      4 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>"
      5 
      6 .text
      7 .set	noat
      8 
      9 .align	5
     10 .globl	bn_mul_add_words
     11 .ent	bn_mul_add_words
     12 bn_mul_add_words:
     13 	.set	noreorder
     14 	bgtz	$6,bn_mul_add_words_internal
     15 	move	$2,$0
     16 	jr	$31
     17 	move	$4,$2
     18 .end	bn_mul_add_words
     19 
     20 .align	5
     21 .ent	bn_mul_add_words_internal
     22 bn_mul_add_words_internal:
     23 	.set	reorder
     24 	li	$3,-4
     25 	and	$8,$6,$3
     26 	beqz	$8,.L_bn_mul_add_words_tail
     27 
     28 .L_bn_mul_add_words_loop:
     29 	lw	$12,0($5)
     30 	multu	$12,$7
     31 	lw	$13,0($4)
     32 	lw	$14,4($5)
     33 	lw	$15,4($4)
     34 	lw	$8,2*4($5)
     35 	lw	$9,2*4($4)
     36 	addu	$13,$2
     37 	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
     38 				# values", but it seems to work fine
     39 				# even on 64-bit registers.
     40 	mflo	$1
     41 	mfhi	$12
     42 	addu	$13,$1
     43 	addu	$2,$12
     44 	 multu	$14,$7
     45 	sltu	$1,$13,$1
     46 	sw	$13,0($4)
     47 	addu	$2,$1
     48 
     49 	lw	$10,3*4($5)
     50 	lw	$11,3*4($4)
     51 	addu	$15,$2
     52 	sltu	$2,$15,$2
     53 	mflo	$1
     54 	mfhi	$14
     55 	addu	$15,$1
     56 	addu	$2,$14
     57 	 multu	$8,$7
     58 	sltu	$1,$15,$1
     59 	sw	$15,4($4)
     60 	addu	$2,$1
     61 
     62 	subu	$6,4
     63 	addu $4,4*4
     64 	addu $5,4*4
     65 	addu	$9,$2
     66 	sltu	$2,$9,$2
     67 	mflo	$1
     68 	mfhi	$8
     69 	addu	$9,$1
     70 	addu	$2,$8
     71 	 multu	$10,$7
     72 	sltu	$1,$9,$1
     73 	sw	$9,-2*4($4)
     74 	addu	$2,$1
     75 
     76 
     77 	and	$8,$6,$3
     78 	addu	$11,$2
     79 	sltu	$2,$11,$2
     80 	mflo	$1
     81 	mfhi	$10
     82 	addu	$11,$1
     83 	addu	$2,$10
     84 	sltu	$1,$11,$1
     85 	sw	$11,-4($4)
     86 	.set	noreorder
     87 	bgtz	$8,.L_bn_mul_add_words_loop
     88 	addu	$2,$1
     89 
     90 	beqz	$6,.L_bn_mul_add_words_return
     91 	nop
     92 
     93 .L_bn_mul_add_words_tail:
     94 	.set	reorder
     95 	lw	$12,0($5)
     96 	multu	$12,$7
     97 	lw	$13,0($4)
     98 	subu	$6,1
     99 	addu	$13,$2
    100 	sltu	$2,$13,$2
    101 	mflo	$1
    102 	mfhi	$12
    103 	addu	$13,$1
    104 	addu	$2,$12
    105 	sltu	$1,$13,$1
    106 	sw	$13,0($4)
    107 	addu	$2,$1
    108 	beqz	$6,.L_bn_mul_add_words_return
    109 
    110 	lw	$12,4($5)
    111 	multu	$12,$7
    112 	lw	$13,4($4)
    113 	subu	$6,1
    114 	addu	$13,$2
    115 	sltu	$2,$13,$2
    116 	mflo	$1
    117 	mfhi	$12
    118 	addu	$13,$1
    119 	addu	$2,$12
    120 	sltu	$1,$13,$1
    121 	sw	$13,4($4)
    122 	addu	$2,$1
    123 	beqz	$6,.L_bn_mul_add_words_return
    124 
    125 	lw	$12,2*4($5)
    126 	multu	$12,$7
    127 	lw	$13,2*4($4)
    128 	addu	$13,$2
    129 	sltu	$2,$13,$2
    130 	mflo	$1
    131 	mfhi	$12
    132 	addu	$13,$1
    133 	addu	$2,$12
    134 	sltu	$1,$13,$1
    135 	sw	$13,2*4($4)
    136 	addu	$2,$1
    137 
    138 .L_bn_mul_add_words_return:
    139 	.set	noreorder
    140 	jr	$31
    141 	move	$4,$2
    142 .end	bn_mul_add_words_internal
    143 
    144 .align	5
    145 .globl	bn_mul_words
    146 .ent	bn_mul_words
    147 bn_mul_words:
    148 	.set	noreorder
    149 	bgtz	$6,bn_mul_words_internal
    150 	move	$2,$0
    151 	jr	$31
    152 	move	$4,$2
    153 .end	bn_mul_words
    154 
    155 .align	5
    156 .ent	bn_mul_words_internal
    157 bn_mul_words_internal:
    158 	.set	reorder
    159 	li	$3,-4
    160 	and	$8,$6,$3
    161 	beqz	$8,.L_bn_mul_words_tail
    162 
    163 .L_bn_mul_words_loop:
    164 	lw	$12,0($5)
    165 	multu	$12,$7
    166 	lw	$14,4($5)
    167 	lw	$8,2*4($5)
    168 	lw	$10,3*4($5)
    169 	mflo	$1
    170 	mfhi	$12
    171 	addu	$2,$1
    172 	sltu	$13,$2,$1
    173 	 multu	$14,$7
    174 	sw	$2,0($4)
    175 	addu	$2,$13,$12
    176 
    177 	subu	$6,4
    178 	addu $4,4*4
    179 	addu $5,4*4
    180 	mflo	$1
    181 	mfhi	$14
    182 	addu	$2,$1
    183 	sltu	$15,$2,$1
    184 	 multu	$8,$7
    185 	sw	$2,-3*4($4)
    186 	addu	$2,$15,$14
    187 
    188 	mflo	$1
    189 	mfhi	$8
    190 	addu	$2,$1
    191 	sltu	$9,$2,$1
    192 	 multu	$10,$7
    193 	sw	$2,-2*4($4)
    194 	addu	$2,$9,$8
    195 
    196 	and	$8,$6,$3
    197 	mflo	$1
    198 	mfhi	$10
    199 	addu	$2,$1
    200 	sltu	$11,$2,$1
    201 	sw	$2,-4($4)
    202 	.set	noreorder
    203 	bgtz	$8,.L_bn_mul_words_loop
    204 	addu	$2,$11,$10
    205 
    206 	beqz	$6,.L_bn_mul_words_return
    207 	nop
    208 
    209 .L_bn_mul_words_tail:
    210 	.set	reorder
    211 	lw	$12,0($5)
    212 	multu	$12,$7
    213 	subu	$6,1
    214 	mflo	$1
    215 	mfhi	$12
    216 	addu	$2,$1
    217 	sltu	$13,$2,$1
    218 	sw	$2,0($4)
    219 	addu	$2,$13,$12
    220 	beqz	$6,.L_bn_mul_words_return
    221 
    222 	lw	$12,4($5)
    223 	multu	$12,$7
    224 	subu	$6,1
    225 	mflo	$1
    226 	mfhi	$12
    227 	addu	$2,$1
    228 	sltu	$13,$2,$1
    229 	sw	$2,4($4)
    230 	addu	$2,$13,$12
    231 	beqz	$6,.L_bn_mul_words_return
    232 
    233 	lw	$12,2*4($5)
    234 	multu	$12,$7
    235 	mflo	$1
    236 	mfhi	$12
    237 	addu	$2,$1
    238 	sltu	$13,$2,$1
    239 	sw	$2,2*4($4)
    240 	addu	$2,$13,$12
    241 
    242 .L_bn_mul_words_return:
    243 	.set	noreorder
    244 	jr	$31
    245 	move	$4,$2
    246 .end	bn_mul_words_internal
    247 
    248 .align	5
    249 .globl	bn_sqr_words
    250 .ent	bn_sqr_words
    251 bn_sqr_words:
    252 	.set	noreorder
    253 	bgtz	$6,bn_sqr_words_internal
    254 	move	$2,$0
    255 	jr	$31
    256 	move	$4,$2
    257 .end	bn_sqr_words
    258 
    259 .align	5
    260 .ent	bn_sqr_words_internal
    261 bn_sqr_words_internal:
    262 	.set	reorder
    263 	li	$3,-4
    264 	and	$8,$6,$3
    265 	beqz	$8,.L_bn_sqr_words_tail
    266 
    267 .L_bn_sqr_words_loop:
    268 	lw	$12,0($5)
    269 	multu	$12,$12
    270 	lw	$14,4($5)
    271 	lw	$8,2*4($5)
    272 	lw	$10,3*4($5)
    273 	mflo	$13
    274 	mfhi	$12
    275 	sw	$13,0($4)
    276 	sw	$12,4($4)
    277 
    278 	multu	$14,$14
    279 	subu	$6,4
    280 	addu $4,8*4
    281 	addu $5,4*4
    282 	mflo	$15
    283 	mfhi	$14
    284 	sw	$15,-6*4($4)
    285 	sw	$14,-5*4($4)
    286 
    287 	multu	$8,$8
    288 	mflo	$9
    289 	mfhi	$8
    290 	sw	$9,-4*4($4)
    291 	sw	$8,-3*4($4)
    292 
    293 
    294 	multu	$10,$10
    295 	and	$8,$6,$3
    296 	mflo	$11
    297 	mfhi	$10
    298 	sw	$11,-2*4($4)
    299 
    300 	.set	noreorder
    301 	bgtz	$8,.L_bn_sqr_words_loop
    302 	sw	$10,-4($4)
    303 
    304 	beqz	$6,.L_bn_sqr_words_return
    305 	nop
    306 
    307 .L_bn_sqr_words_tail:
    308 	.set	reorder
    309 	lw	$12,0($5)
    310 	multu	$12,$12
    311 	subu	$6,1
    312 	mflo	$13
    313 	mfhi	$12
    314 	sw	$13,0($4)
    315 	sw	$12,4($4)
    316 	beqz	$6,.L_bn_sqr_words_return
    317 
    318 	lw	$12,4($5)
    319 	multu	$12,$12
    320 	subu	$6,1
    321 	mflo	$13
    322 	mfhi	$12
    323 	sw	$13,2*4($4)
    324 	sw	$12,3*4($4)
    325 	beqz	$6,.L_bn_sqr_words_return
    326 
    327 	lw	$12,2*4($5)
    328 	multu	$12,$12
    329 	mflo	$13
    330 	mfhi	$12
    331 	sw	$13,4*4($4)
    332 	sw	$12,5*4($4)
    333 
    334 .L_bn_sqr_words_return:
    335 	.set	noreorder
    336 	jr	$31
    337 	move	$4,$2
    338 
    339 .end	bn_sqr_words_internal
    340 
    341 .align	5
    342 .globl	bn_add_words
    343 .ent	bn_add_words
    344 bn_add_words:
    345 	.set	noreorder
    346 	bgtz	$7,bn_add_words_internal
    347 	move	$2,$0
    348 	jr	$31
    349 	move	$4,$2
    350 .end	bn_add_words
    351 
    352 .align	5
    353 .ent	bn_add_words_internal
    354 bn_add_words_internal:
    355 	.set	reorder
    356 	li	$3,-4
    357 	and	$1,$7,$3
    358 	beqz	$1,.L_bn_add_words_tail
    359 
    360 .L_bn_add_words_loop:
    361 	lw	$12,0($5)
    362 	lw	$8,0($6)
    363 	subu	$7,4
    364 	lw	$13,4($5)
    365 	and	$1,$7,$3
    366 	lw	$14,2*4($5)
    367 	addu $6,4*4
    368 	lw	$15,3*4($5)
    369 	addu $4,4*4
    370 	lw	$9,-3*4($6)
    371 	addu $5,4*4
    372 	lw	$10,-2*4($6)
    373 	lw	$11,-4($6)
    374 	addu	$8,$12
    375 	sltu	$24,$8,$12
    376 	addu	$12,$8,$2
    377 	sltu	$2,$12,$8
    378 	sw	$12,-4*4($4)
    379 	addu	$2,$24
    380 
    381 	addu	$9,$13
    382 	sltu	$25,$9,$13
    383 	addu	$13,$9,$2
    384 	sltu	$2,$13,$9
    385 	sw	$13,-3*4($4)
    386 	addu	$2,$25
    387 
    388 	addu	$10,$14
    389 	sltu	$24,$10,$14
    390 	addu	$14,$10,$2
    391 	sltu	$2,$14,$10
    392 	sw	$14,-2*4($4)
    393 	addu	$2,$24
    394 
    395 	addu	$11,$15
    396 	sltu	$25,$11,$15
    397 	addu	$15,$11,$2
    398 	sltu	$2,$15,$11
    399 	sw	$15,-4($4)
    400 
    401 	.set	noreorder
    402 	bgtz	$1,.L_bn_add_words_loop
    403 	addu	$2,$25
    404 
    405 	beqz	$7,.L_bn_add_words_return
    406 	nop
    407 
    408 .L_bn_add_words_tail:
    409 	.set	reorder
    410 	lw	$12,0($5)
    411 	lw	$8,0($6)
    412 	addu	$8,$12
    413 	subu	$7,1
    414 	sltu	$24,$8,$12
    415 	addu	$12,$8,$2
    416 	sltu	$2,$12,$8
    417 	sw	$12,0($4)
    418 	addu	$2,$24
    419 	beqz	$7,.L_bn_add_words_return
    420 
    421 	lw	$13,4($5)
    422 	lw	$9,4($6)
    423 	addu	$9,$13
    424 	subu	$7,1
    425 	sltu	$25,$9,$13
    426 	addu	$13,$9,$2
    427 	sltu	$2,$13,$9
    428 	sw	$13,4($4)
    429 	addu	$2,$25
    430 	beqz	$7,.L_bn_add_words_return
    431 
    432 	lw	$14,2*4($5)
    433 	lw	$10,2*4($6)
    434 	addu	$10,$14
    435 	sltu	$24,$10,$14
    436 	addu	$14,$10,$2
    437 	sltu	$2,$14,$10
    438 	sw	$14,2*4($4)
    439 	addu	$2,$24
    440 
    441 .L_bn_add_words_return:
    442 	.set	noreorder
    443 	jr	$31
    444 	move	$4,$2
    445 
    446 .end	bn_add_words_internal
    447 
    448 .align	5
    449 .globl	bn_sub_words
    450 .ent	bn_sub_words
    451 bn_sub_words:
    452 	.set	noreorder
    453 	bgtz	$7,bn_sub_words_internal
    454 	move	$2,$0
    455 	jr	$31
    456 	move	$4,$0
    457 .end	bn_sub_words
    458 
    459 .align	5
    460 .ent	bn_sub_words_internal
    461 bn_sub_words_internal:
    462 	.set	reorder
    463 	li	$3,-4
    464 	and	$1,$7,$3
    465 	beqz	$1,.L_bn_sub_words_tail
    466 
    467 .L_bn_sub_words_loop:
    468 	lw	$12,0($5)
    469 	lw	$8,0($6)
    470 	subu	$7,4
    471 	lw	$13,4($5)
    472 	and	$1,$7,$3
    473 	lw	$14,2*4($5)
    474 	addu $6,4*4
    475 	lw	$15,3*4($5)
    476 	addu $4,4*4
    477 	lw	$9,-3*4($6)
    478 	addu $5,4*4
    479 	lw	$10,-2*4($6)
    480 	lw	$11,-4($6)
    481 	sltu	$24,$12,$8
    482 	subu	$8,$12,$8
    483 	subu	$12,$8,$2
    484 	sgtu	$2,$12,$8
    485 	sw	$12,-4*4($4)
    486 	addu	$2,$24
    487 
    488 	sltu	$25,$13,$9
    489 	subu	$9,$13,$9
    490 	subu	$13,$9,$2
    491 	sgtu	$2,$13,$9
    492 	sw	$13,-3*4($4)
    493 	addu	$2,$25
    494 
    495 
    496 	sltu	$24,$14,$10
    497 	subu	$10,$14,$10
    498 	subu	$14,$10,$2
    499 	sgtu	$2,$14,$10
    500 	sw	$14,-2*4($4)
    501 	addu	$2,$24
    502 
    503 	sltu	$25,$15,$11
    504 	subu	$11,$15,$11
    505 	subu	$15,$11,$2
    506 	sgtu	$2,$15,$11
    507 	sw	$15,-4($4)
    508 
    509 	.set	noreorder
    510 	bgtz	$1,.L_bn_sub_words_loop
    511 	addu	$2,$25
    512 
    513 	beqz	$7,.L_bn_sub_words_return
    514 	nop
    515 
    516 .L_bn_sub_words_tail:
    517 	.set	reorder
    518 	lw	$12,0($5)
    519 	lw	$8,0($6)
    520 	subu	$7,1
    521 	sltu	$24,$12,$8
    522 	subu	$8,$12,$8
    523 	subu	$12,$8,$2
    524 	sgtu	$2,$12,$8
    525 	sw	$12,0($4)
    526 	addu	$2,$24
    527 	beqz	$7,.L_bn_sub_words_return
    528 
    529 	lw	$13,4($5)
    530 	subu	$7,1
    531 	lw	$9,4($6)
    532 	sltu	$25,$13,$9
    533 	subu	$9,$13,$9
    534 	subu	$13,$9,$2
    535 	sgtu	$2,$13,$9
    536 	sw	$13,4($4)
    537 	addu	$2,$25
    538 	beqz	$7,.L_bn_sub_words_return
    539 
    540 	lw	$14,2*4($5)
    541 	lw	$10,2*4($6)
    542 	sltu	$24,$14,$10
    543 	subu	$10,$14,$10
    544 	subu	$14,$10,$2
    545 	sgtu	$2,$14,$10
    546 	sw	$14,2*4($4)
    547 	addu	$2,$24
    548 
    549 .L_bn_sub_words_return:
    550 	.set	noreorder
    551 	jr	$31
    552 	move	$4,$2
    553 .end	bn_sub_words_internal
    554 
    555 .align 5
    556 .globl	bn_div_3_words
    557 .ent	bn_div_3_words
    558 bn_div_3_words:
    559 	.set	noreorder
    560 	move	$7,$4		# we know that bn_div_words does not
    561 				# touch $7, $10, $11 and preserves $6
    562 				# so that we can save two arguments
    563 				# and return address in registers
    564 				# instead of stack:-)
    565 
    566 	lw	$4,($7)
    567 	move	$10,$5
    568 	bne	$4,$6,bn_div_3_words_internal
    569 	lw	$5,-4($7)
    570 	li	$2,-1
    571 	jr	$31
    572 	move	$4,$2
    573 .end	bn_div_3_words
    574 
    575 .align	5
    576 .ent	bn_div_3_words_internal
    577 bn_div_3_words_internal:
    578 	.set	reorder
    579 	move	$11,$31
    580 	bal	bn_div_words_internal
    581 	move	$31,$11
    582 	multu	$10,$2
    583 	lw	$14,-2*4($7)
    584 	move	$8,$0
    585 	mfhi	$13
    586 	mflo	$12
    587 	sltu	$24,$13,$5
    588 .L_bn_div_3_words_inner_loop:
    589 	bnez	$24,.L_bn_div_3_words_inner_loop_done
    590 	sgeu	$1,$14,$12
    591 	seq	$25,$13,$5
    592 	and	$1,$25
    593 	sltu	$15,$12,$10
    594 	addu	$5,$6
    595 	subu	$13,$15
    596 	subu	$12,$10
    597 	sltu	$24,$13,$5
    598 	sltu	$8,$5,$6
    599 	or	$24,$8
    600 	.set	noreorder
    601 	beqz	$1,.L_bn_div_3_words_inner_loop
    602 	subu	$2,1
    603 	addu	$2,1
    604 	.set	reorder
    605 .L_bn_div_3_words_inner_loop_done:
    606 	.set	noreorder
    607 	jr	$31
    608 	move	$4,$2
    609 .end	bn_div_3_words_internal
    610 
    611 .align	5
    612 .globl	bn_div_words
    613 .ent	bn_div_words
    614 bn_div_words:
    615 	.set	noreorder
    616 	bnez	$6,bn_div_words_internal
    617 	li	$2,-1		# I would rather signal div-by-zero
    618 				# which can be done with 'break 7'
    619 	jr	$31
    620 	move	$4,$2
    621 .end	bn_div_words
    622 
    623 .align	5
    624 .ent	bn_div_words_internal
    625 bn_div_words_internal:
    626 	move	$3,$0
    627 	bltz	$6,.L_bn_div_words_body
    628 	move	$25,$3
    629 	sll	$6,1
    630 	bgtz	$6,.-4
    631 	addu	$25,1
    632 
    633 	.set	reorder
    634 	negu	$13,$25
    635 	li	$14,-1
    636 	sll	$14,$13
    637 	and	$14,$4
    638 	srl	$1,$5,$13
    639 	.set	noreorder
    640 	beqz	$14,.+12
    641 	nop
    642 	break	6		# signal overflow
    643 	.set	reorder
    644 	sll	$4,$25
    645 	sll	$5,$25
    646 	or	$4,$1
    647 .L_bn_div_words_body:
    648 	srl	$3,$6,4*4	# bits
    649 	sgeu	$1,$4,$6
    650 	.set	noreorder
    651 	beqz	$1,.+12
    652 	nop
    653 	subu	$4,$6
    654 	.set	reorder
    655 
    656 	li	$8,-1
    657 	srl	$9,$4,4*4	# bits
    658 	srl	$8,4*4	# q=0xffffffff
    659 	beq	$3,$9,.L_bn_div_words_skip_div1
    660 	divu	$0,$4,$3
    661 	mflo	$8
    662 .L_bn_div_words_skip_div1:
    663 	multu	$6,$8
    664 	sll	$15,$4,4*4	# bits
    665 	srl	$1,$5,4*4	# bits
    666 	or	$15,$1
    667 	mflo	$12
    668 	mfhi	$13
    669 .L_bn_div_words_inner_loop1:
    670 	sltu	$14,$15,$12
    671 	seq	$24,$9,$13
    672 	sltu	$1,$9,$13
    673 	and	$14,$24
    674 	sltu	$2,$12,$6
    675 	or	$1,$14
    676 	.set	noreorder
    677 	beqz	$1,.L_bn_div_words_inner_loop1_done
    678 	subu	$13,$2
    679 	subu	$12,$6
    680 	b	.L_bn_div_words_inner_loop1
    681 	subu	$8,1
    682 	.set	reorder
    683 .L_bn_div_words_inner_loop1_done:
    684 
    685 	sll	$5,4*4	# bits
    686 	subu	$4,$15,$12
    687 	sll	$2,$8,4*4	# bits
    688 
    689 	li	$8,-1
    690 	srl	$9,$4,4*4	# bits
    691 	srl	$8,4*4	# q=0xffffffff
    692 	beq	$3,$9,.L_bn_div_words_skip_div2
    693 	divu	$0,$4,$3
    694 	mflo	$8
    695 .L_bn_div_words_skip_div2:
    696 	multu	$6,$8
    697 	sll	$15,$4,4*4	# bits
    698 	srl	$1,$5,4*4	# bits
    699 	or	$15,$1
    700 	mflo	$12
    701 	mfhi	$13
    702 .L_bn_div_words_inner_loop2:
    703 	sltu	$14,$15,$12
    704 	seq	$24,$9,$13
    705 	sltu	$1,$9,$13
    706 	and	$14,$24
    707 	sltu	$3,$12,$6
    708 	or	$1,$14
    709 	.set	noreorder
    710 	beqz	$1,.L_bn_div_words_inner_loop2_done
    711 	subu	$13,$3
    712 	subu	$12,$6
    713 	b	.L_bn_div_words_inner_loop2
    714 	subu	$8,1
    715 	.set	reorder
    716 .L_bn_div_words_inner_loop2_done:
    717 
    718 	subu	$4,$15,$12
    719 	or	$2,$8
    720 	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
    721 	srl	$6,$25		# restore $6
    722 
    723 	.set	noreorder
    724 	move	$5,$3
    725 	jr	$31
    726 	move	$4,$2
    727 .end	bn_div_words_internal
    728 
    729 .align	5
    730 .globl	bn_mul_comba8
    731 .ent	bn_mul_comba8
    732 bn_mul_comba8:
    733 	.set	noreorder
    734 	.frame	$29,6*4,$31
    735 	.mask	0x003f0000,-4
    736 	subu $29,6*4
    737 	sw	$21,5*4($29)
    738 	sw	$20,4*4($29)
    739 	sw	$19,3*4($29)
    740 	sw	$18,2*4($29)
    741 	sw	$17,1*4($29)
    742 	sw	$16,0*4($29)
    743 
    744 	.set	reorder
    745 	lw	$12,0($5)	# If compiled with -mips3 option on
    746 				# R5000 box assembler barks on this
    747 				# 1ine with "should not have mult/div
    748 				# as last instruction in bb (R10K
    749 				# bug)" warning. If anybody out there
    750 				# has a clue about how to circumvent
    751 				# this do send me a note.
    752 				#		<appro@fy.chalmers.se>
    753 
    754 	lw	$8,0($6)
    755 	lw	$13,4($5)
    756 	lw	$14,2*4($5)
    757 	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
    758 	lw	$15,3*4($5)
    759 	lw	$9,4($6)
    760 	lw	$10,2*4($6)
    761 	lw	$11,3*4($6)
    762 	mflo	$2
    763 	mfhi	$3
    764 
    765 	lw	$16,4*4($5)
    766 	lw	$18,5*4($5)
    767 	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
    768 	lw	$20,6*4($5)
    769 	lw	$5,7*4($5)
    770 	lw	$17,4*4($6)
    771 	lw	$19,5*4($6)
    772 	mflo	$24
    773 	mfhi	$25
    774 	addu	$3,$24
    775 	sltu	$1,$3,$24
    776 	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
    777 	addu	$7,$25,$1
    778 	lw	$21,6*4($6)
    779 	lw	$6,7*4($6)
    780 	sw	$2,0($4)	# r[0]=c1;
    781 	mflo	$24
    782 	mfhi	$25
    783 	addu	$3,$24
    784 	sltu	$1,$3,$24
    785 	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
    786 	addu	$25,$1
    787 	addu	$7,$25
    788 	sltu	$2,$7,$25
    789 	sw	$3,4($4)	# r[1]=c2;
    790 
    791 	mflo	$24
    792 	mfhi	$25
    793 	addu	$7,$24
    794 	sltu	$1,$7,$24
    795 	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
    796 	addu	$25,$1
    797 	addu	$2,$25
    798 	mflo	$24
    799 	mfhi	$25
    800 	addu	$7,$24
    801 	sltu	$1,$7,$24
    802 	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
    803 	addu	$25,$1
    804 	addu	$2,$25
    805 	sltu	$3,$2,$25
    806 	mflo	$24
    807 	mfhi	$25
    808 	addu	$7,$24
    809 	sltu	$1,$7,$24
    810 	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
    811 	addu	$25,$1
    812 	addu	$2,$25
    813 	sltu	$1,$2,$25
    814 	addu	$3,$1
    815 	sw	$7,2*4($4)	# r[2]=c3;
    816 
    817 	mflo	$24
    818 	mfhi	$25
    819 	addu	$2,$24
    820 	sltu	$1,$2,$24
    821 	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
    822 	addu	$25,$1
    823 	addu	$3,$25
    824 	sltu	$7,$3,$25
    825 	mflo	$24
    826 	mfhi	$25
    827 	addu	$2,$24
    828 	sltu	$1,$2,$24
    829 	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
    830 	addu	$25,$1
    831 	addu	$3,$25
    832 	sltu	$1,$3,$25
    833 	addu	$7,$1
    834 	mflo	$24
    835 	mfhi	$25
    836 	addu	$2,$24
    837 	sltu	$1,$2,$24
    838 	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
    839 	addu	$25,$1
    840 	addu	$3,$25
    841 	sltu	$1,$3,$25
    842 	addu	$7,$1
    843 	mflo	$24
    844 	mfhi	$25
    845 	addu	$2,$24
    846 	sltu	$1,$2,$24
    847 	 multu	$16,$8		# mul_add_c(a[4],b[0],c2,c3,c1);
    848 	addu	$25,$1
    849 	addu	$3,$25
    850 	sltu	$1,$3,$25
    851 	addu	$7,$1
    852 	sw	$2,3*4($4)	# r[3]=c1;
    853 
    854 	mflo	$24
    855 	mfhi	$25
    856 	addu	$3,$24
    857 	sltu	$1,$3,$24
    858 	multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
    859 	addu	$25,$1
    860 	addu	$7,$25
    861 	sltu	$2,$7,$25
    862 	mflo	$24
    863 	mfhi	$25
    864 	addu	$3,$24
    865 	sltu	$1,$3,$24
    866 	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
    867 	addu	$25,$1
    868 	addu	$7,$25
    869 	sltu	$1,$7,$25
    870 	addu	$2,$1
    871 	mflo	$24
    872 	mfhi	$25
    873 	addu	$3,$24
    874 	sltu	$1,$3,$24
    875 	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
    876 	addu	$25,$1
    877 	addu	$7,$25
    878 	sltu	$1,$7,$25
    879 	addu	$2,$1
    880 	mflo	$24
    881 	mfhi	$25
    882 	addu	$3,$24
    883 	sltu	$1,$3,$24
    884 	multu	$12,$17		# mul_add_c(a[0],b[4],c2,c3,c1);
    885 	addu	$25,$1
    886 	addu	$7,$25
    887 	sltu	$1,$7,$25
    888 	addu	$2,$1
    889 	mflo	$24
    890 	mfhi	$25
    891 	addu	$3,$24
    892 	sltu	$1,$3,$24
    893 	 multu	$12,$19		# mul_add_c(a[0],b[5],c3,c1,c2);
    894 	addu	$25,$1
    895 	addu	$7,$25
    896 	sltu	$1,$7,$25
    897 	addu	$2,$1
    898 	sw	$3,4*4($4)	# r[4]=c2;
    899 
    900 	mflo	$24
    901 	mfhi	$25
    902 	addu	$7,$24
    903 	sltu	$1,$7,$24
    904 	multu	$13,$17		# mul_add_c(a[1],b[4],c3,c1,c2);
    905 	addu	$25,$1
    906 	addu	$2,$25
    907 	sltu	$3,$2,$25
    908 	mflo	$24
    909 	mfhi	$25
    910 	addu	$7,$24
    911 	sltu	$1,$7,$24
    912 	multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
    913 	addu	$25,$1
    914 	addu	$2,$25
    915 	sltu	$1,$2,$25
    916 	addu	$3,$1
    917 	mflo	$24
    918 	mfhi	$25
    919 	addu	$7,$24
    920 	sltu	$1,$7,$24
    921 	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
    922 	addu	$25,$1
    923 	addu	$2,$25
    924 	sltu	$1,$2,$25
    925 	addu	$3,$1
    926 	mflo	$24
    927 	mfhi	$25
    928 	addu	$7,$24
    929 	sltu	$1,$7,$24
    930 	multu	$16,$9		# mul_add_c(a[4],b[1],c3,c1,c2);
    931 	addu	$25,$1
    932 	addu	$2,$25
    933 	sltu	$1,$2,$25
    934 	addu	$3,$1
    935 	mflo	$24
    936 	mfhi	$25
    937 	addu	$7,$24
    938 	sltu	$1,$7,$24
    939 	multu	$18,$8		# mul_add_c(a[5],b[0],c3,c1,c2);
    940 	addu	$25,$1
    941 	addu	$2,$25
    942 	sltu	$1,$2,$25
    943 	addu	$3,$1
    944 	mflo	$24
    945 	mfhi	$25
    946 	addu	$7,$24
    947 	sltu	$1,$7,$24
    948 	 multu	$20,$8		# mul_add_c(a[6],b[0],c1,c2,c3);
    949 	addu	$25,$1
    950 	addu	$2,$25
    951 	sltu	$1,$2,$25
    952 	addu	$3,$1
    953 	sw	$7,5*4($4)	# r[5]=c3;
    954 
    955 	mflo	$24
    956 	mfhi	$25
    957 	addu	$2,$24
    958 	sltu	$1,$2,$24
    959 	multu	$18,$9		# mul_add_c(a[5],b[1],c1,c2,c3);
    960 	addu	$25,$1
    961 	addu	$3,$25
    962 	sltu	$7,$3,$25
    963 	mflo	$24
    964 	mfhi	$25
    965 	addu	$2,$24
    966 	sltu	$1,$2,$24
    967 	multu	$16,$10		# mul_add_c(a[4],b[2],c1,c2,c3);
    968 	addu	$25,$1
    969 	addu	$3,$25
    970 	sltu	$1,$3,$25
    971 	addu	$7,$1
    972 	mflo	$24
    973 	mfhi	$25
    974 	addu	$2,$24
    975 	sltu	$1,$2,$24
    976 	multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
    977 	addu	$25,$1
    978 	addu	$3,$25
    979 	sltu	$1,$3,$25
    980 	addu	$7,$1
    981 	mflo	$24
    982 	mfhi	$25
    983 	addu	$2,$24
    984 	sltu	$1,$2,$24
    985 	multu	$14,$17		# mul_add_c(a[2],b[4],c1,c2,c3);
    986 	addu	$25,$1
    987 	addu	$3,$25
    988 	sltu	$1,$3,$25
    989 	addu	$7,$1
    990 	mflo	$24
    991 	mfhi	$25
    992 	addu	$2,$24
    993 	sltu	$1,$2,$24
    994 	multu	$13,$19		# mul_add_c(a[1],b[5],c1,c2,c3);
    995 	addu	$25,$1
    996 	addu	$3,$25
    997 	sltu	$1,$3,$25
    998 	addu	$7,$1
    999 	mflo	$24
   1000 	mfhi	$25
   1001 	addu	$2,$24
   1002 	sltu	$1,$2,$24
   1003 	multu	$12,$21		# mul_add_c(a[0],b[6],c1,c2,c3);
   1004 	addu	$25,$1
   1005 	addu	$3,$25
   1006 	sltu	$1,$3,$25
   1007 	addu	$7,$1
   1008 	mflo	$24
   1009 	mfhi	$25
   1010 	addu	$2,$24
   1011 	sltu	$1,$2,$24
   1012 	 multu	$12,$6		# mul_add_c(a[0],b[7],c2,c3,c1);
   1013 	addu	$25,$1
   1014 	addu	$3,$25
   1015 	sltu	$1,$3,$25
   1016 	addu	$7,$1
   1017 	sw	$2,6*4($4)	# r[6]=c1;
   1018 
   1019 	mflo	$24
   1020 	mfhi	$25
   1021 	addu	$3,$24
   1022 	sltu	$1,$3,$24
   1023 	multu	$13,$21		# mul_add_c(a[1],b[6],c2,c3,c1);
   1024 	addu	$25,$1
   1025 	addu	$7,$25
   1026 	sltu	$2,$7,$25
   1027 	mflo	$24
   1028 	mfhi	$25
   1029 	addu	$3,$24
   1030 	sltu	$1,$3,$24
   1031 	multu	$14,$19		# mul_add_c(a[2],b[5],c2,c3,c1);
   1032 	addu	$25,$1
   1033 	addu	$7,$25
   1034 	sltu	$1,$7,$25
   1035 	addu	$2,$1
   1036 	mflo	$24
   1037 	mfhi	$25
   1038 	addu	$3,$24
   1039 	sltu	$1,$3,$24
   1040 	multu	$15,$17		# mul_add_c(a[3],b[4],c2,c3,c1);
   1041 	addu	$25,$1
   1042 	addu	$7,$25
   1043 	sltu	$1,$7,$25
   1044 	addu	$2,$1
   1045 	mflo	$24
   1046 	mfhi	$25
   1047 	addu	$3,$24
   1048 	sltu	$1,$3,$24
   1049 	multu	$16,$11		# mul_add_c(a[4],b[3],c2,c3,c1);
   1050 	addu	$25,$1
   1051 	addu	$7,$25
   1052 	sltu	$1,$7,$25
   1053 	addu	$2,$1
   1054 	mflo	$24
   1055 	mfhi	$25
   1056 	addu	$3,$24
   1057 	sltu	$1,$3,$24
   1058 	multu	$18,$10		# mul_add_c(a[5],b[2],c2,c3,c1);
   1059 	addu	$25,$1
   1060 	addu	$7,$25
   1061 	sltu	$1,$7,$25
   1062 	addu	$2,$1
   1063 	mflo	$24
   1064 	mfhi	$25
   1065 	addu	$3,$24
   1066 	sltu	$1,$3,$24
   1067 	multu	$20,$9		# mul_add_c(a[6],b[1],c2,c3,c1);
   1068 	addu	$25,$1
   1069 	addu	$7,$25
   1070 	sltu	$1,$7,$25
   1071 	addu	$2,$1
   1072 	mflo	$24
   1073 	mfhi	$25
   1074 	addu	$3,$24
   1075 	sltu	$1,$3,$24
   1076 	multu	$5,$8		# mul_add_c(a[7],b[0],c2,c3,c1);
   1077 	addu	$25,$1
   1078 	addu	$7,$25
   1079 	sltu	$1,$7,$25
   1080 	addu	$2,$1
   1081 	mflo	$24
   1082 	mfhi	$25
   1083 	addu	$3,$24
   1084 	sltu	$1,$3,$24
   1085 	 multu	$5,$9		# mul_add_c(a[7],b[1],c3,c1,c2);
   1086 	addu	$25,$1
   1087 	addu	$7,$25
   1088 	sltu	$1,$7,$25
   1089 	addu	$2,$1
   1090 	sw	$3,7*4($4)	# r[7]=c2;
   1091 
   1092 	mflo	$24
   1093 	mfhi	$25
   1094 	addu	$7,$24
   1095 	sltu	$1,$7,$24
   1096 	multu	$20,$10		# mul_add_c(a[6],b[2],c3,c1,c2);
   1097 	addu	$25,$1
   1098 	addu	$2,$25
   1099 	sltu	$3,$2,$25
   1100 	mflo	$24
   1101 	mfhi	$25
   1102 	addu	$7,$24
   1103 	sltu	$1,$7,$24
   1104 	multu	$18,$11		# mul_add_c(a[5],b[3],c3,c1,c2);
   1105 	addu	$25,$1
   1106 	addu	$2,$25
   1107 	sltu	$1,$2,$25
   1108 	addu	$3,$1
   1109 	mflo	$24
   1110 	mfhi	$25
   1111 	addu	$7,$24
   1112 	sltu	$1,$7,$24
   1113 	multu	$16,$17		# mul_add_c(a[4],b[4],c3,c1,c2);
   1114 	addu	$25,$1
   1115 	addu	$2,$25
   1116 	sltu	$1,$2,$25
   1117 	addu	$3,$1
   1118 	mflo	$24
   1119 	mfhi	$25
   1120 	addu	$7,$24
   1121 	sltu	$1,$7,$24
   1122 	multu	$15,$19		# mul_add_c(a[3],b[5],c3,c1,c2);
   1123 	addu	$25,$1
   1124 	addu	$2,$25
   1125 	sltu	$1,$2,$25
   1126 	addu	$3,$1
   1127 	mflo	$24
   1128 	mfhi	$25
   1129 	addu	$7,$24
   1130 	sltu	$1,$7,$24
   1131 	multu	$14,$21		# mul_add_c(a[2],b[6],c3,c1,c2);
   1132 	addu	$25,$1
   1133 	addu	$2,$25
   1134 	sltu	$1,$2,$25
   1135 	addu	$3,$1
   1136 	mflo	$24
   1137 	mfhi	$25
   1138 	addu	$7,$24
   1139 	sltu	$1,$7,$24
   1140 	multu	$13,$6		# mul_add_c(a[1],b[7],c3,c1,c2);
   1141 	addu	$25,$1
   1142 	addu	$2,$25
   1143 	sltu	$1,$2,$25
   1144 	addu	$3,$1
   1145 	mflo	$24
   1146 	mfhi	$25
   1147 	addu	$7,$24
   1148 	sltu	$1,$7,$24
   1149 	 multu	$14,$6		# mul_add_c(a[2],b[7],c1,c2,c3);
   1150 	addu	$25,$1
   1151 	addu	$2,$25
   1152 	sltu	$1,$2,$25
   1153 	addu	$3,$1
   1154 	sw	$7,8*4($4)	# r[8]=c3;
   1155 
   1156 	mflo	$24
   1157 	mfhi	$25
   1158 	addu	$2,$24
   1159 	sltu	$1,$2,$24
   1160 	multu	$15,$21		# mul_add_c(a[3],b[6],c1,c2,c3);
   1161 	addu	$25,$1
   1162 	addu	$3,$25
   1163 	sltu	$7,$3,$25
   1164 	mflo	$24
   1165 	mfhi	$25
   1166 	addu	$2,$24
   1167 	sltu	$1,$2,$24
   1168 	multu	$16,$19		# mul_add_c(a[4],b[5],c1,c2,c3);
   1169 	addu	$25,$1
   1170 	addu	$3,$25
   1171 	sltu	$1,$3,$25
   1172 	addu	$7,$1
   1173 	mflo	$24
   1174 	mfhi	$25
   1175 	addu	$2,$24
   1176 	sltu	$1,$2,$24
   1177 	multu	$18,$17		# mul_add_c(a[5],b[4],c1,c2,c3);
   1178 	addu	$25,$1
   1179 	addu	$3,$25
   1180 	sltu	$1,$3,$25
   1181 	addu	$7,$1
   1182 	mflo	$24
   1183 	mfhi	$25
   1184 	addu	$2,$24
   1185 	sltu	$1,$2,$24
   1186 	multu	$20,$11		# mul_add_c(a[6],b[3],c1,c2,c3);
   1187 	addu	$25,$1
   1188 	addu	$3,$25
   1189 	sltu	$1,$3,$25
   1190 	addu	$7,$1
   1191 	mflo	$24
   1192 	mfhi	$25
   1193 	addu	$2,$24
   1194 	sltu	$1,$2,$24
   1195 	multu	$5,$10		# mul_add_c(a[7],b[2],c1,c2,c3);
   1196 	addu	$25,$1
   1197 	addu	$3,$25
   1198 	sltu	$1,$3,$25
   1199 	addu	$7,$1
   1200 	mflo	$24
   1201 	mfhi	$25
   1202 	addu	$2,$24
   1203 	sltu	$1,$2,$24
   1204 	 multu	$5,$11		# mul_add_c(a[7],b[3],c2,c3,c1);
   1205 	addu	$25,$1
   1206 	addu	$3,$25
   1207 	sltu	$1,$3,$25
   1208 	addu	$7,$1
   1209 	sw	$2,9*4($4)	# r[9]=c1;
   1210 
   1211 	mflo	$24
   1212 	mfhi	$25
   1213 	addu	$3,$24
   1214 	sltu	$1,$3,$24
   1215 	multu	$20,$17		# mul_add_c(a[6],b[4],c2,c3,c1);
   1216 	addu	$25,$1
   1217 	addu	$7,$25
   1218 	sltu	$2,$7,$25
   1219 	mflo	$24
   1220 	mfhi	$25
   1221 	addu	$3,$24
   1222 	sltu	$1,$3,$24
   1223 	multu	$18,$19		# mul_add_c(a[5],b[5],c2,c3,c1);
   1224 	addu	$25,$1
   1225 	addu	$7,$25
   1226 	sltu	$1,$7,$25
   1227 	addu	$2,$1
   1228 	mflo	$24
   1229 	mfhi	$25
   1230 	addu	$3,$24
   1231 	sltu	$1,$3,$24
   1232 	multu	$16,$21		# mul_add_c(a[4],b[6],c2,c3,c1);
   1233 	addu	$25,$1
   1234 	addu	$7,$25
   1235 	sltu	$1,$7,$25
   1236 	addu	$2,$1
   1237 	mflo	$24
   1238 	mfhi	$25
   1239 	addu	$3,$24
   1240 	sltu	$1,$3,$24
   1241 	multu	$15,$6		# mul_add_c(a[3],b[7],c2,c3,c1);
   1242 	addu	$25,$1
   1243 	addu	$7,$25
   1244 	sltu	$1,$7,$25
   1245 	addu	$2,$1
   1246 	mflo	$24
   1247 	mfhi	$25
   1248 	addu	$3,$24
   1249 	sltu	$1,$3,$24
   1250 	multu	$16,$6		# mul_add_c(a[4],b[7],c3,c1,c2);
   1251 	addu	$25,$1
   1252 	addu	$7,$25
   1253 	sltu	$1,$7,$25
   1254 	addu	$2,$1
   1255 	sw	$3,10*4($4)	# r[10]=c2;
   1256 
   1257 	mflo	$24
   1258 	mfhi	$25
   1259 	addu	$7,$24
   1260 	sltu	$1,$7,$24
   1261 	multu	$18,$21		# mul_add_c(a[5],b[6],c3,c1,c2);
   1262 	addu	$25,$1
   1263 	addu	$2,$25
   1264 	sltu	$3,$2,$25
   1265 	mflo	$24
   1266 	mfhi	$25
   1267 	addu	$7,$24
   1268 	sltu	$1,$7,$24
   1269 	multu	$20,$19		# mul_add_c(a[6],b[5],c3,c1,c2);
   1270 	addu	$25,$1
   1271 	addu	$2,$25
   1272 	sltu	$1,$2,$25
   1273 	addu	$3,$1
   1274 	mflo	$24
   1275 	mfhi	$25
   1276 	addu	$7,$24
   1277 	sltu	$1,$7,$24
   1278 	multu	$5,$17		# mul_add_c(a[7],b[4],c3,c1,c2);
   1279 	addu	$25,$1
   1280 	addu	$2,$25
   1281 	sltu	$1,$2,$25
   1282 	addu	$3,$1
   1283 	mflo	$24
   1284 	mfhi	$25
   1285 	addu	$7,$24
   1286 	sltu	$1,$7,$24
   1287 	 multu	$5,$19		# mul_add_c(a[7],b[5],c1,c2,c3);
   1288 	addu	$25,$1
   1289 	addu	$2,$25
   1290 	sltu	$1,$2,$25
   1291 	addu	$3,$1
   1292 	sw	$7,11*4($4)	# r[11]=c3;
   1293 
   1294 	mflo	$24
   1295 	mfhi	$25
   1296 	addu	$2,$24
   1297 	sltu	$1,$2,$24
   1298 	multu	$20,$21		# mul_add_c(a[6],b[6],c1,c2,c3);
   1299 	addu	$25,$1
   1300 	addu	$3,$25
   1301 	sltu	$7,$3,$25
   1302 	mflo	$24
   1303 	mfhi	$25
   1304 	addu	$2,$24
   1305 	sltu	$1,$2,$24
   1306 	multu	$18,$6		# mul_add_c(a[5],b[7],c1,c2,c3);
   1307 	addu	$25,$1
   1308 	addu	$3,$25
   1309 	sltu	$1,$3,$25
   1310 	addu	$7,$1
   1311 	mflo	$24
   1312 	mfhi	$25
   1313 	addu	$2,$24
   1314 	sltu	$1,$2,$24
   1315 	 multu	$20,$6		# mul_add_c(a[6],b[7],c2,c3,c1);
   1316 	addu	$25,$1
   1317 	addu	$3,$25
   1318 	sltu	$1,$3,$25
   1319 	addu	$7,$1
   1320 	sw	$2,12*4($4)	# r[12]=c1;
   1321 
   1322 	mflo	$24
   1323 	mfhi	$25
   1324 	addu	$3,$24
   1325 	sltu	$1,$3,$24
   1326 	multu	$5,$21		# mul_add_c(a[7],b[6],c2,c3,c1);
   1327 	addu	$25,$1
   1328 	addu	$7,$25
   1329 	sltu	$2,$7,$25
   1330 	mflo	$24
   1331 	mfhi	$25
   1332 	addu	$3,$24
   1333 	sltu	$1,$3,$24
   1334 	multu	$5,$6		# mul_add_c(a[7],b[7],c3,c1,c2);
   1335 	addu	$25,$1
   1336 	addu	$7,$25
   1337 	sltu	$1,$7,$25
   1338 	addu	$2,$1
   1339 	sw	$3,13*4($4)	# r[13]=c2;
   1340 
   1341 	mflo	$24
   1342 	mfhi	$25
   1343 	addu	$7,$24
   1344 	sltu	$1,$7,$24
   1345 	addu	$25,$1
   1346 	addu	$2,$25
   1347 	sw	$7,14*4($4)	# r[14]=c3;
   1348 	sw	$2,15*4($4)	# r[15]=c1;
   1349 
   1350 	.set	noreorder
   1351 	lw	$21,5*4($29)
   1352 	lw	$20,4*4($29)
   1353 	lw	$19,3*4($29)
   1354 	lw	$18,2*4($29)
   1355 	lw	$17,1*4($29)
   1356 	lw	$16,0*4($29)
   1357 	jr	$31
   1358 	addu $29,6*4
   1359 .end	bn_mul_comba8
   1360 
   1361 .align	5
   1362 .globl	bn_mul_comba4
   1363 .ent	bn_mul_comba4
   1364 bn_mul_comba4:
   1365 	.set	reorder
   1366 	lw	$12,0($5)
   1367 	lw	$8,0($6)
   1368 	lw	$13,4($5)
   1369 	lw	$14,2*4($5)
   1370 	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
   1371 	lw	$15,3*4($5)
   1372 	lw	$9,4($6)
   1373 	lw	$10,2*4($6)
   1374 	lw	$11,3*4($6)
   1375 	mflo	$2
   1376 	mfhi	$3
   1377 	sw	$2,0($4)
   1378 
   1379 	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
   1380 	mflo	$24
   1381 	mfhi	$25
   1382 	addu	$3,$24
   1383 	sltu	$1,$3,$24
   1384 	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
   1385 	addu	$7,$25,$1
   1386 	mflo	$24
   1387 	mfhi	$25
   1388 	addu	$3,$24
   1389 	sltu	$1,$3,$24
   1390 	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
   1391 	addu	$25,$1
   1392 	addu	$7,$25
   1393 	sltu	$2,$7,$25
   1394 	sw	$3,4($4)
   1395 
   1396 	mflo	$24
   1397 	mfhi	$25
   1398 	addu	$7,$24
   1399 	sltu	$1,$7,$24
   1400 	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
   1401 	addu	$25,$1
   1402 	addu	$2,$25
   1403 	mflo	$24
   1404 	mfhi	$25
   1405 	addu	$7,$24
   1406 	sltu	$1,$7,$24
   1407 	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
   1408 	addu	$25,$1
   1409 	addu	$2,$25
   1410 	sltu	$3,$2,$25
   1411 	mflo	$24
   1412 	mfhi	$25
   1413 	addu	$7,$24
   1414 	sltu	$1,$7,$24
   1415 	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
   1416 	addu	$25,$1
   1417 	addu	$2,$25
   1418 	sltu	$1,$2,$25
   1419 	addu	$3,$1
   1420 	sw	$7,2*4($4)
   1421 
   1422 	mflo	$24
   1423 	mfhi	$25
   1424 	addu	$2,$24
   1425 	sltu	$1,$2,$24
   1426 	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
   1427 	addu	$25,$1
   1428 	addu	$3,$25
   1429 	sltu	$7,$3,$25
   1430 	mflo	$24
   1431 	mfhi	$25
   1432 	addu	$2,$24
   1433 	sltu	$1,$2,$24
   1434 	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
   1435 	addu	$25,$1
   1436 	addu	$3,$25
   1437 	sltu	$1,$3,$25
   1438 	addu	$7,$1
   1439 	mflo	$24
   1440 	mfhi	$25
   1441 	addu	$2,$24
   1442 	sltu	$1,$2,$24
   1443 	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
   1444 	addu	$25,$1
   1445 	addu	$3,$25
   1446 	sltu	$1,$3,$25
   1447 	addu	$7,$1
   1448 	mflo	$24
   1449 	mfhi	$25
   1450 	addu	$2,$24
   1451 	sltu	$1,$2,$24
   1452 	 multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
   1453 	addu	$25,$1
   1454 	addu	$3,$25
   1455 	sltu	$1,$3,$25
   1456 	addu	$7,$1
   1457 	sw	$2,3*4($4)
   1458 
   1459 	mflo	$24
   1460 	mfhi	$25
   1461 	addu	$3,$24
   1462 	sltu	$1,$3,$24
   1463 	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
   1464 	addu	$25,$1
   1465 	addu	$7,$25
   1466 	sltu	$2,$7,$25
   1467 	mflo	$24
   1468 	mfhi	$25
   1469 	addu	$3,$24
   1470 	sltu	$1,$3,$24
   1471 	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
   1472 	addu	$25,$1
   1473 	addu	$7,$25
   1474 	sltu	$1,$7,$25
   1475 	addu	$2,$1
   1476 	mflo	$24
   1477 	mfhi	$25
   1478 	addu	$3,$24
   1479 	sltu	$1,$3,$24
   1480 	 multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
   1481 	addu	$25,$1
   1482 	addu	$7,$25
   1483 	sltu	$1,$7,$25
   1484 	addu	$2,$1
   1485 	sw	$3,4*4($4)
   1486 
   1487 	mflo	$24
   1488 	mfhi	$25
   1489 	addu	$7,$24
   1490 	sltu	$1,$7,$24
   1491 	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
   1492 	addu	$25,$1
   1493 	addu	$2,$25
   1494 	sltu	$3,$2,$25
   1495 	mflo	$24
   1496 	mfhi	$25
   1497 	addu	$7,$24
   1498 	sltu	$1,$7,$24
   1499 	 multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
   1500 	addu	$25,$1
   1501 	addu	$2,$25
   1502 	sltu	$1,$2,$25
   1503 	addu	$3,$1
   1504 	sw	$7,5*4($4)
   1505 
   1506 	mflo	$24
   1507 	mfhi	$25
   1508 	addu	$2,$24
   1509 	sltu	$1,$2,$24
   1510 	addu	$25,$1
   1511 	addu	$3,$25
   1512 	sw	$2,6*4($4)
   1513 	sw	$3,7*4($4)
   1514 
   1515 	.set	noreorder
   1516 	jr	$31
   1517 	nop
   1518 .end	bn_mul_comba4
   1519 
   1520 .align	5
   1521 .globl	bn_sqr_comba8
   1522 .ent	bn_sqr_comba8
   1523 bn_sqr_comba8:
   1524 	.set	reorder
   1525 	lw	$12,0($5)
   1526 	lw	$13,4($5)
   1527 	lw	$14,2*4($5)
   1528 	lw	$15,3*4($5)
   1529 
   1530 	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
   1531 	lw	$8,4*4($5)
   1532 	lw	$9,5*4($5)
   1533 	lw	$10,6*4($5)
   1534 	lw	$11,7*4($5)
   1535 	mflo	$2
   1536 	mfhi	$3
   1537 	sw	$2,0($4)
   1538 
   1539 	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
   1540 	mflo	$24
   1541 	mfhi	$25
   1542 	slt	$2,$25,$0
   1543 	sll	$25,1
   1544 	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
   1545 	slt	$6,$24,$0
   1546 	addu	$25,$6
   1547 	sll	$24,1
   1548 	addu	$3,$24
   1549 	sltu	$1,$3,$24
   1550 	addu	$7,$25,$1
   1551 	sw	$3,4($4)
   1552 
   1553 	mflo	$24
   1554 	mfhi	$25
   1555 	slt	$3,$25,$0
   1556 	sll	$25,1
   1557 	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
   1558 	slt	$6,$24,$0
   1559 	addu	$25,$6
   1560 	sll	$24,1
   1561 	addu	$7,$24
   1562 	sltu	$1,$7,$24
   1563 	addu	$25,$1
   1564 	addu	$2,$25
   1565 	sltu	$1,$2,$25
   1566 	addu	$3,$1
   1567 	mflo	$24
   1568 	mfhi	$25
   1569 	addu	$7,$24
   1570 	sltu	$1,$7,$24
   1571 	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
   1572 	addu	$25,$1
   1573 	addu	$2,$25
   1574 	sltu	$1,$2,$25
   1575 	addu	$3,$1
   1576 	sw	$7,2*4($4)
   1577 
   1578 	mflo	$24
   1579 	mfhi	$25
   1580 	slt	$7,$25,$0
   1581 	sll	$25,1
   1582 	multu	$13,$14		# mul_add_c2(a[1],b[2],c1,c2,c3);
   1583 	slt	$6,$24,$0
   1584 	addu	$25,$6
   1585 	sll	$24,1
   1586 	addu	$2,$24
   1587 	sltu	$1,$2,$24
   1588 	addu	$25,$1
   1589 	addu	$3,$25
   1590 	sltu	$1,$3,$25
   1591 	addu	$7,$1
   1592 	mflo	$24
   1593 	mfhi	$25
   1594 	slt	$1,$25,$0
   1595 	addu	$7,$1
   1596 	 multu	$8,$12		# mul_add_c2(a[4],b[0],c2,c3,c1);
   1597 	sll	$25,1
   1598 	slt	$6,$24,$0
   1599 	addu	$25,$6
   1600 	sll	$24,1
   1601 	addu	$2,$24
   1602 	sltu	$1,$2,$24
   1603 	addu	$25,$1
   1604 	addu	$3,$25
   1605 	sltu	$1,$3,$25
   1606 	addu	$7,$1
   1607 	sw	$2,3*4($4)
   1608 
   1609 	mflo	$24
   1610 	mfhi	$25
   1611 	slt	$2,$25,$0
   1612 	sll	$25,1
   1613 	multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
   1614 	slt	$6,$24,$0
   1615 	addu	$25,$6
   1616 	sll	$24,1
   1617 	addu	$3,$24
   1618 	sltu	$1,$3,$24
   1619 	addu	$25,$1
   1620 	addu	$7,$25
   1621 	sltu	$1,$7,$25
   1622 	addu	$2,$1
   1623 	mflo	$24
   1624 	mfhi	$25
   1625 	slt	$1,$25,$0
   1626 	addu	$2,$1
   1627 	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
   1628 	sll	$25,1
   1629 	slt	$6,$24,$0
   1630 	addu	$25,$6
   1631 	sll	$24,1
   1632 	addu	$3,$24
   1633 	sltu	$1,$3,$24
   1634 	addu	$25,$1
   1635 	addu	$7,$25
   1636 	sltu	$1,$7,$25
   1637 	addu	$2,$1
   1638 	mflo	$24
   1639 	mfhi	$25
   1640 	addu	$3,$24
   1641 	sltu	$1,$3,$24
   1642 	 multu	$12,$9		# mul_add_c2(a[0],b[5],c3,c1,c2);
   1643 	addu	$25,$1
   1644 	addu	$7,$25
   1645 	sltu	$1,$7,$25
   1646 	addu	$2,$1
   1647 	sw	$3,4*4($4)
   1648 
   1649 	mflo	$24
   1650 	mfhi	$25
   1651 	slt	$3,$25,$0
   1652 	sll	$25,1
   1653 	multu	$13,$8		# mul_add_c2(a[1],b[4],c3,c1,c2);
   1654 	slt	$6,$24,$0
   1655 	addu	$25,$6
   1656 	sll	$24,1
   1657 	addu	$7,$24
   1658 	sltu	$1,$7,$24
   1659 	addu	$25,$1
   1660 	addu	$2,$25
   1661 	sltu	$1,$2,$25
   1662 	addu	$3,$1
   1663 	mflo	$24
   1664 	mfhi	$25
   1665 	slt	$1,$25,$0
   1666 	addu	$3,$1
   1667 	multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
   1668 	sll	$25,1
   1669 	slt	$6,$24,$0
   1670 	addu	$25,$6
   1671 	sll	$24,1
   1672 	addu	$7,$24
   1673 	sltu	$1,$7,$24
   1674 	addu	$25,$1
   1675 	addu	$2,$25
   1676 	sltu	$1,$2,$25
   1677 	addu	$3,$1
   1678 	mflo	$24
   1679 	mfhi	$25
   1680 	slt	$1,$25,$0
   1681 	 multu	$10,$12		# mul_add_c2(a[6],b[0],c1,c2,c3);
   1682 	addu	$3,$1
   1683 	sll	$25,1
   1684 	slt	$6,$24,$0
   1685 	addu	$25,$6
   1686 	sll	$24,1
   1687 	addu	$7,$24
   1688 	sltu	$1,$7,$24
   1689 	addu	$25,$1
   1690 	addu	$2,$25
   1691 	sltu	$1,$2,$25
   1692 	addu	$3,$1
   1693 	sw	$7,5*4($4)
   1694 
   1695 	mflo	$24
   1696 	mfhi	$25
   1697 	slt	$7,$25,$0
   1698 	sll	$25,1
   1699 	multu	$9,$13		# mul_add_c2(a[5],b[1],c1,c2,c3);
   1700 	slt	$6,$24,$0
   1701 	addu	$25,$6
   1702 	sll	$24,1
   1703 	addu	$2,$24
   1704 	sltu	$1,$2,$24
   1705 	addu	$25,$1
   1706 	addu	$3,$25
   1707 	sltu	$1,$3,$25
   1708 	addu	$7,$1
   1709 	mflo	$24
   1710 	mfhi	$25
   1711 	slt	$1,$25,$0
   1712 	addu	$7,$1
   1713 	multu	$8,$14		# mul_add_c2(a[4],b[2],c1,c2,c3);
   1714 	sll	$25,1
   1715 	slt	$6,$24,$0
   1716 	addu	$25,$6
   1717 	sll	$24,1
   1718 	addu	$2,$24
   1719 	sltu	$1,$2,$24
   1720 	addu	$25,$1
   1721 	addu	$3,$25
   1722 	sltu	$1,$3,$25
   1723 	addu	$7,$1
   1724 	mflo	$24
   1725 	mfhi	$25
   1726 	slt	$1,$25,$0
   1727 	addu	$7,$1
   1728 	multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
   1729 	sll	$25,1
   1730 	slt	$6,$24,$0
   1731 	addu	$25,$6
   1732 	sll	$24,1
   1733 	addu	$2,$24
   1734 	sltu	$1,$2,$24
   1735 	addu	$25,$1
   1736 	addu	$3,$25
   1737 	sltu	$1,$3,$25
   1738 	addu	$7,$1
   1739 	mflo	$24
   1740 	mfhi	$25
   1741 	addu	$2,$24
   1742 	sltu	$1,$2,$24
   1743 	 multu	$12,$11		# mul_add_c2(a[0],b[7],c2,c3,c1);
   1744 	addu	$25,$1
   1745 	addu	$3,$25
   1746 	sltu	$1,$3,$25
   1747 	addu	$7,$1
   1748 	sw	$2,6*4($4)
   1749 
   1750 	mflo	$24
   1751 	mfhi	$25
   1752 	slt	$2,$25,$0
   1753 	sll	$25,1
   1754 	multu	$13,$10		# mul_add_c2(a[1],b[6],c2,c3,c1);
   1755 	slt	$6,$24,$0
   1756 	addu	$25,$6
   1757 	sll	$24,1
   1758 	addu	$3,$24
   1759 	sltu	$1,$3,$24
   1760 	addu	$25,$1
   1761 	addu	$7,$25
   1762 	sltu	$1,$7,$25
   1763 	addu	$2,$1
   1764 	mflo	$24
   1765 	mfhi	$25
   1766 	slt	$1,$25,$0
   1767 	addu	$2,$1
   1768 	multu	$14,$9		# mul_add_c2(a[2],b[5],c2,c3,c1);
   1769 	sll	$25,1
   1770 	slt	$6,$24,$0
   1771 	addu	$25,$6
   1772 	sll	$24,1
   1773 	addu	$3,$24
   1774 	sltu	$1,$3,$24
   1775 	addu	$25,$1
   1776 	addu	$7,$25
   1777 	sltu	$1,$7,$25
   1778 	addu	$2,$1
   1779 	mflo	$24
   1780 	mfhi	$25
   1781 	slt	$1,$25,$0
   1782 	addu	$2,$1
   1783 	multu	$15,$8		# mul_add_c2(a[3],b[4],c2,c3,c1);
   1784 	sll	$25,1
   1785 	slt	$6,$24,$0
   1786 	addu	$25,$6
   1787 	sll	$24,1
   1788 	addu	$3,$24
   1789 	sltu	$1,$3,$24
   1790 	addu	$25,$1
   1791 	addu	$7,$25
   1792 	sltu	$1,$7,$25
   1793 	addu	$2,$1
   1794 	mflo	$24
   1795 	mfhi	$25
   1796 	slt	$1,$25,$0
   1797 	addu	$2,$1
   1798 	 multu	$11,$13		# mul_add_c2(a[7],b[1],c3,c1,c2);
   1799 	sll	$25,1
   1800 	slt	$6,$24,$0
   1801 	addu	$25,$6
   1802 	sll	$24,1
   1803 	addu	$3,$24
   1804 	sltu	$1,$3,$24
   1805 	addu	$25,$1
   1806 	addu	$7,$25
   1807 	sltu	$1,$7,$25
   1808 	addu	$2,$1
   1809 	sw	$3,7*4($4)
   1810 
   1811 	mflo	$24
   1812 	mfhi	$25
   1813 	slt	$3,$25,$0
   1814 	sll	$25,1
   1815 	multu	$10,$14		# mul_add_c2(a[6],b[2],c3,c1,c2);
   1816 	slt	$6,$24,$0
   1817 	addu	$25,$6
   1818 	sll	$24,1
   1819 	addu	$7,$24
   1820 	sltu	$1,$7,$24
   1821 	addu	$25,$1
   1822 	addu	$2,$25
   1823 	sltu	$1,$2,$25
   1824 	addu	$3,$1
   1825 	mflo	$24
   1826 	mfhi	$25
   1827 	slt	$1,$25,$0
   1828 	addu	$3,$1
   1829 	multu	$9,$15		# mul_add_c2(a[5],b[3],c3,c1,c2);
   1830 	sll	$25,1
   1831 	slt	$6,$24,$0
   1832 	addu	$25,$6
   1833 	sll	$24,1
   1834 	addu	$7,$24
   1835 	sltu	$1,$7,$24
   1836 	addu	$25,$1
   1837 	addu	$2,$25
   1838 	sltu	$1,$2,$25
   1839 	addu	$3,$1
   1840 	mflo	$24
   1841 	mfhi	$25
   1842 	slt	$1,$25,$0
   1843 	addu	$3,$1
   1844 	multu	$8,$8		# mul_add_c(a[4],b[4],c3,c1,c2);
   1845 	sll	$25,1
   1846 	slt	$6,$24,$0
   1847 	addu	$25,$6
   1848 	sll	$24,1
   1849 	addu	$7,$24
   1850 	sltu	$1,$7,$24
   1851 	addu	$25,$1
   1852 	addu	$2,$25
   1853 	sltu	$1,$2,$25
   1854 	addu	$3,$1
   1855 	mflo	$24
   1856 	mfhi	$25
   1857 	addu	$7,$24
   1858 	sltu	$1,$7,$24
   1859 	 multu	$14,$11		# mul_add_c2(a[2],b[7],c1,c2,c3);
   1860 	addu	$25,$1
   1861 	addu	$2,$25
   1862 	sltu	$1,$2,$25
   1863 	addu	$3,$1
   1864 	sw	$7,8*4($4)
   1865 
   1866 	mflo	$24
   1867 	mfhi	$25
   1868 	slt	$7,$25,$0
   1869 	sll	$25,1
   1870 	multu	$15,$10		# mul_add_c2(a[3],b[6],c1,c2,c3);
   1871 	slt	$6,$24,$0
   1872 	addu	$25,$6
   1873 	sll	$24,1
   1874 	addu	$2,$24
   1875 	sltu	$1,$2,$24
   1876 	addu	$25,$1
   1877 	addu	$3,$25
   1878 	sltu	$1,$3,$25
   1879 	addu	$7,$1
   1880 	mflo	$24
   1881 	mfhi	$25
   1882 	slt	$1,$25,$0
   1883 	addu	$7,$1
   1884 	multu	$8,$9		# mul_add_c2(a[4],b[5],c1,c2,c3);
   1885 	sll	$25,1
   1886 	slt	$6,$24,$0
   1887 	addu	$25,$6
   1888 	sll	$24,1
   1889 	addu	$2,$24
   1890 	sltu	$1,$2,$24
   1891 	addu	$25,$1
   1892 	addu	$3,$25
   1893 	sltu	$1,$3,$25
   1894 	addu	$7,$1
   1895 	mflo	$24
   1896 	mfhi	$25
   1897 	slt	$1,$25,$0
   1898 	addu	$7,$1
   1899 	 multu	$11,$15		# mul_add_c2(a[7],b[3],c2,c3,c1);
   1900 	sll	$25,1
   1901 	slt	$6,$24,$0
   1902 	addu	$25,$6
   1903 	sll	$24,1
   1904 	addu	$2,$24
   1905 	sltu	$1,$2,$24
   1906 	addu	$25,$1
   1907 	addu	$3,$25
   1908 	sltu	$1,$3,$25
   1909 	addu	$7,$1
   1910 	sw	$2,9*4($4)
   1911 
   1912 	mflo	$24
   1913 	mfhi	$25
   1914 	slt	$2,$25,$0
   1915 	sll	$25,1
   1916 	multu	$10,$8		# mul_add_c2(a[6],b[4],c2,c3,c1);
   1917 	slt	$6,$24,$0
   1918 	addu	$25,$6
   1919 	sll	$24,1
   1920 	addu	$3,$24
   1921 	sltu	$1,$3,$24
   1922 	addu	$25,$1
   1923 	addu	$7,$25
   1924 	sltu	$1,$7,$25
   1925 	addu	$2,$1
   1926 	mflo	$24
   1927 	mfhi	$25
   1928 	slt	$1,$25,$0
   1929 	addu	$2,$1
   1930 	multu	$9,$9		# mul_add_c(a[5],b[5],c2,c3,c1);
   1931 	sll	$25,1
   1932 	slt	$6,$24,$0
   1933 	addu	$25,$6
   1934 	sll	$24,1
   1935 	addu	$3,$24
   1936 	sltu	$1,$3,$24
   1937 	addu	$25,$1
   1938 	addu	$7,$25
   1939 	sltu	$1,$7,$25
   1940 	addu	$2,$1
   1941 	mflo	$24
   1942 	mfhi	$25
   1943 	addu	$3,$24
   1944 	sltu	$1,$3,$24
   1945 	 multu	$8,$11		# mul_add_c2(a[4],b[7],c3,c1,c2);
   1946 	addu	$25,$1
   1947 	addu	$7,$25
   1948 	sltu	$1,$7,$25
   1949 	addu	$2,$1
   1950 	sw	$3,10*4($4)
   1951 
   1952 	mflo	$24
   1953 	mfhi	$25
   1954 	slt	$3,$25,$0
   1955 	sll	$25,1
   1956 	multu	$9,$10		# mul_add_c2(a[5],b[6],c3,c1,c2);
   1957 	slt	$6,$24,$0
   1958 	addu	$25,$6
   1959 	sll	$24,1
   1960 	addu	$7,$24
   1961 	sltu	$1,$7,$24
   1962 	addu	$25,$1
   1963 	addu	$2,$25
   1964 	sltu	$1,$2,$25
   1965 	addu	$3,$1
   1966 	mflo	$24
   1967 	mfhi	$25
   1968 	slt	$1,$25,$0
   1969 	addu	$3,$1
   1970 	 multu	$11,$9		# mul_add_c2(a[7],b[5],c1,c2,c3);
   1971 	sll	$25,1
   1972 	slt	$6,$24,$0
   1973 	addu	$25,$6
   1974 	sll	$24,1
   1975 	addu	$7,$24
   1976 	sltu	$1,$7,$24
   1977 	addu	$25,$1
   1978 	addu	$2,$25
   1979 	sltu	$1,$2,$25
   1980 	addu	$3,$1
   1981 	sw	$7,11*4($4)
   1982 
   1983 	mflo	$24
   1984 	mfhi	$25
   1985 	slt	$7,$25,$0
   1986 	sll	$25,1
   1987 	multu	$10,$10		# mul_add_c(a[6],b[6],c1,c2,c3);
   1988 	slt	$6,$24,$0
   1989 	addu	$25,$6
   1990 	sll	$24,1
   1991 	addu	$2,$24
   1992 	sltu	$1,$2,$24
   1993 	addu	$25,$1
   1994 	addu	$3,$25
   1995 	sltu	$1,$3,$25
   1996 	addu	$7,$1
   1997 	mflo	$24
   1998 	mfhi	$25
   1999 	addu	$2,$24
   2000 	sltu	$1,$2,$24
   2001 	 multu	$10,$11		# mul_add_c2(a[6],b[7],c2,c3,c1);
   2002 	addu	$25,$1
   2003 	addu	$3,$25
   2004 	sltu	$1,$3,$25
   2005 	addu	$7,$1
   2006 	sw	$2,12*4($4)
   2007 
   2008 	mflo	$24
   2009 	mfhi	$25
   2010 	slt	$2,$25,$0
   2011 	sll	$25,1
   2012 	 multu	$11,$11		# mul_add_c(a[7],b[7],c3,c1,c2);
   2013 	slt	$6,$24,$0
   2014 	addu	$25,$6
   2015 	sll	$24,1
   2016 	addu	$3,$24
   2017 	sltu	$1,$3,$24
   2018 	addu	$25,$1
   2019 	addu	$7,$25
   2020 	sltu	$1,$7,$25
   2021 	addu	$2,$1
   2022 	sw	$3,13*4($4)
   2023 
   2024 	mflo	$24
   2025 	mfhi	$25
   2026 	addu	$7,$24
   2027 	sltu	$1,$7,$24
   2028 	addu	$25,$1
   2029 	addu	$2,$25
   2030 	sw	$7,14*4($4)
   2031 	sw	$2,15*4($4)
   2032 
   2033 	.set	noreorder
   2034 	jr	$31
   2035 	nop
   2036 .end	bn_sqr_comba8
   2037 
   2038 .align	5
   2039 .globl	bn_sqr_comba4
   2040 .ent	bn_sqr_comba4
   2041 bn_sqr_comba4:
   2042 	.set	reorder
   2043 	lw	$12,0($5)
   2044 	lw	$13,4($5)
   2045 	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
   2046 	lw	$14,2*4($5)
   2047 	lw	$15,3*4($5)
   2048 	mflo	$2
   2049 	mfhi	$3
   2050 	sw	$2,0($4)
   2051 
   2052 	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
   2053 	mflo	$24
   2054 	mfhi	$25
   2055 	slt	$2,$25,$0
   2056 	sll	$25,1
   2057 	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
   2058 	slt	$6,$24,$0
   2059 	addu	$25,$6
   2060 	sll	$24,1
   2061 	addu	$3,$24
   2062 	sltu	$1,$3,$24
   2063 	addu	$7,$25,$1
   2064 	sw	$3,4($4)
   2065 
   2066 	mflo	$24
   2067 	mfhi	$25
   2068 	slt	$3,$25,$0
   2069 	sll	$25,1
   2070 	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
   2071 	slt	$6,$24,$0
   2072 	addu	$25,$6
   2073 	sll	$24,1
   2074 	addu	$7,$24
   2075 	sltu	$1,$7,$24
   2076 	addu	$25,$1
   2077 	addu	$2,$25
   2078 	sltu	$1,$2,$25
   2079 	addu	$3,$1
   2080 	mflo	$24
   2081 	mfhi	$25
   2082 	addu	$7,$24
   2083 	sltu	$1,$7,$24
   2084 	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
   2085 	addu	$25,$1
   2086 	addu	$2,$25
   2087 	sltu	$1,$2,$25
   2088 	addu	$3,$1
   2089 	sw	$7,2*4($4)
   2090 
   2091 	mflo	$24
   2092 	mfhi	$25
   2093 	slt	$7,$25,$0
   2094 	sll	$25,1
   2095 	multu	$13,$14		# mul_add_c(a2[1],b[2],c1,c2,c3);
   2096 	slt	$6,$24,$0
   2097 	addu	$25,$6
   2098 	sll	$24,1
   2099 	addu	$2,$24
   2100 	sltu	$1,$2,$24
   2101 	addu	$25,$1
   2102 	addu	$3,$25
   2103 	sltu	$1,$3,$25
   2104 	addu	$7,$1
   2105 	mflo	$24
   2106 	mfhi	$25
   2107 	slt	$1,$25,$0
   2108 	addu	$7,$1
   2109 	 multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
   2110 	sll	$25,1
   2111 	slt	$6,$24,$0
   2112 	addu	$25,$6
   2113 	sll	$24,1
   2114 	addu	$2,$24
   2115 	sltu	$1,$2,$24
   2116 	addu	$25,$1
   2117 	addu	$3,$25
   2118 	sltu	$1,$3,$25
   2119 	addu	$7,$1
   2120 	sw	$2,3*4($4)
   2121 
   2122 	mflo	$24
   2123 	mfhi	$25
   2124 	slt	$2,$25,$0
   2125 	sll	$25,1
   2126 	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
   2127 	slt	$6,$24,$0
   2128 	addu	$25,$6
   2129 	sll	$24,1
   2130 	addu	$3,$24
   2131 	sltu	$1,$3,$24
   2132 	addu	$25,$1
   2133 	addu	$7,$25
   2134 	sltu	$1,$7,$25
   2135 	addu	$2,$1
   2136 	mflo	$24
   2137 	mfhi	$25
   2138 	addu	$3,$24
   2139 	sltu	$1,$3,$24
   2140 	 multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
   2141 	addu	$25,$1
   2142 	addu	$7,$25
   2143 	sltu	$1,$7,$25
   2144 	addu	$2,$1
   2145 	sw	$3,4*4($4)
   2146 
   2147 	mflo	$24
   2148 	mfhi	$25
   2149 	slt	$3,$25,$0
   2150 	sll	$25,1
   2151 	 multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
   2152 	slt	$6,$24,$0
   2153 	addu	$25,$6
   2154 	sll	$24,1
   2155 	addu	$7,$24
   2156 	sltu	$1,$7,$24
   2157 	addu	$25,$1
   2158 	addu	$2,$25
   2159 	sltu	$1,$2,$25
   2160 	addu	$3,$1
   2161 	sw	$7,5*4($4)
   2162 
   2163 	mflo	$24
   2164 	mfhi	$25
   2165 	addu	$2,$24
   2166 	sltu	$1,$2,$24
   2167 	addu	$25,$1
   2168 	addu	$3,$25
   2169 	sw	$2,6*4($4)
   2170 	sw	$3,7*4($4)
   2171 
   2172 	.set	noreorder
   2173 	jr	$31
   2174 	nop
   2175 .end	bn_sqr_comba4
   2176