1 .set mips2 2 .rdata 3 .asciiz "mips3.s, Version 1.2" 4 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>" 5 6 .text 7 .set noat 8 9 .align 5 10 .globl bn_mul_add_words 11 .ent bn_mul_add_words 12 bn_mul_add_words: 13 .set noreorder 14 bgtz $6,bn_mul_add_words_internal 15 move $2,$0 16 jr $31 17 move $4,$2 18 .end bn_mul_add_words 19 20 .align 5 21 .ent bn_mul_add_words_internal 22 bn_mul_add_words_internal: 23 .set reorder 24 li $3,-4 25 and $8,$6,$3 26 lw $12,0($5) 27 beqz $8,.L_bn_mul_add_words_tail 28 29 .L_bn_mul_add_words_loop: 30 multu $12,$7 31 lw $13,0($4) 32 lw $14,4($5) 33 lw $15,4($4) 34 lw $8,2*4($5) 35 lw $9,2*4($4) 36 addu $13,$2 37 sltu $2,$13,$2 # All manuals say it "compares 32-bit 38 # values", but it seems to work fine 39 # even on 64-bit registers. 40 mflo $1 41 mfhi $12 42 addu $13,$1 43 addu $2,$12 44 multu $14,$7 45 sltu $1,$13,$1 46 sw $13,0($4) 47 addu $2,$1 48 49 lw $10,3*4($5) 50 lw $11,3*4($4) 51 addu $15,$2 52 sltu $2,$15,$2 53 mflo $1 54 mfhi $14 55 addu $15,$1 56 addu $2,$14 57 multu $8,$7 58 sltu $1,$15,$1 59 sw $15,4($4) 60 addu $2,$1 61 62 subu $6,4 63 addu $4,4*4 64 addu $5,4*4 65 addu $9,$2 66 sltu $2,$9,$2 67 mflo $1 68 mfhi $8 69 addu $9,$1 70 addu $2,$8 71 multu $10,$7 72 sltu $1,$9,$1 73 sw $9,-2*4($4) 74 addu $2,$1 75 76 77 and $8,$6,$3 78 addu $11,$2 79 sltu $2,$11,$2 80 mflo $1 81 mfhi $10 82 addu $11,$1 83 addu $2,$10 84 sltu $1,$11,$1 85 sw $11,-4($4) 86 addu $2,$1 87 .set noreorder 88 bgtzl $8,.L_bn_mul_add_words_loop 89 lw $12,0($5) 90 91 beqz $6,.L_bn_mul_add_words_return 92 nop 93 94 .L_bn_mul_add_words_tail: 95 .set reorder 96 lw $12,0($5) 97 multu $12,$7 98 lw $13,0($4) 99 subu $6,1 100 addu $13,$2 101 sltu $2,$13,$2 102 mflo $1 103 mfhi $12 104 addu $13,$1 105 addu $2,$12 106 sltu $1,$13,$1 107 sw $13,0($4) 108 addu $2,$1 109 beqz $6,.L_bn_mul_add_words_return 110 111 lw $12,4($5) 112 multu $12,$7 113 lw $13,4($4) 114 subu $6,1 115 addu $13,$2 116 sltu $2,$13,$2 117 mflo $1 118 mfhi $12 119 addu $13,$1 120 addu $2,$12 121 sltu $1,$13,$1 122 sw $13,4($4) 123 addu $2,$1 124 beqz $6,.L_bn_mul_add_words_return 125 126 lw $12,2*4($5) 127 multu $12,$7 128 lw $13,2*4($4) 129 addu $13,$2 130 sltu $2,$13,$2 131 mflo $1 132 mfhi $12 133 addu $13,$1 134 addu $2,$12 135 sltu $1,$13,$1 136 sw $13,2*4($4) 137 addu $2,$1 138 139 .L_bn_mul_add_words_return: 140 .set noreorder 141 jr $31 142 move $4,$2 143 .end bn_mul_add_words_internal 144 145 .align 5 146 .globl bn_mul_words 147 .ent bn_mul_words 148 bn_mul_words: 149 .set noreorder 150 bgtz $6,bn_mul_words_internal 151 move $2,$0 152 jr $31 153 move $4,$2 154 .end bn_mul_words 155 156 .align 5 157 .ent bn_mul_words_internal 158 bn_mul_words_internal: 159 .set reorder 160 li $3,-4 161 and $8,$6,$3 162 lw $12,0($5) 163 beqz $8,.L_bn_mul_words_tail 164 165 .L_bn_mul_words_loop: 166 multu $12,$7 167 lw $14,4($5) 168 lw $8,2*4($5) 169 lw $10,3*4($5) 170 mflo $1 171 mfhi $12 172 addu $2,$1 173 sltu $13,$2,$1 174 multu $14,$7 175 sw $2,0($4) 176 addu $2,$13,$12 177 178 subu $6,4 179 addu $4,4*4 180 addu $5,4*4 181 mflo $1 182 mfhi $14 183 addu $2,$1 184 sltu $15,$2,$1 185 multu $8,$7 186 sw $2,-3*4($4) 187 addu $2,$15,$14 188 189 mflo $1 190 mfhi $8 191 addu $2,$1 192 sltu $9,$2,$1 193 multu $10,$7 194 sw $2,-2*4($4) 195 addu $2,$9,$8 196 197 and $8,$6,$3 198 mflo $1 199 mfhi $10 200 addu $2,$1 201 sltu $11,$2,$1 202 sw $2,-4($4) 203 addu $2,$11,$10 204 .set noreorder 205 bgtzl $8,.L_bn_mul_words_loop 206 lw $12,0($5) 207 208 beqz $6,.L_bn_mul_words_return 209 nop 210 211 .L_bn_mul_words_tail: 212 .set reorder 213 lw $12,0($5) 214 multu $12,$7 215 subu $6,1 216 mflo $1 217 mfhi $12 218 addu $2,$1 219 sltu $13,$2,$1 220 sw $2,0($4) 221 addu $2,$13,$12 222 beqz $6,.L_bn_mul_words_return 223 224 lw $12,4($5) 225 multu $12,$7 226 subu $6,1 227 mflo $1 228 mfhi $12 229 addu $2,$1 230 sltu $13,$2,$1 231 sw $2,4($4) 232 addu $2,$13,$12 233 beqz $6,.L_bn_mul_words_return 234 235 lw $12,2*4($5) 236 multu $12,$7 237 mflo $1 238 mfhi $12 239 addu $2,$1 240 sltu $13,$2,$1 241 sw $2,2*4($4) 242 addu $2,$13,$12 243 244 .L_bn_mul_words_return: 245 .set noreorder 246 jr $31 247 move $4,$2 248 .end bn_mul_words_internal 249 250 .align 5 251 .globl bn_sqr_words 252 .ent bn_sqr_words 253 bn_sqr_words: 254 .set noreorder 255 bgtz $6,bn_sqr_words_internal 256 move $2,$0 257 jr $31 258 move $4,$2 259 .end bn_sqr_words 260 261 .align 5 262 .ent bn_sqr_words_internal 263 bn_sqr_words_internal: 264 .set reorder 265 li $3,-4 266 and $8,$6,$3 267 lw $12,0($5) 268 beqz $8,.L_bn_sqr_words_tail 269 270 .L_bn_sqr_words_loop: 271 multu $12,$12 272 lw $14,4($5) 273 lw $8,2*4($5) 274 lw $10,3*4($5) 275 mflo $13 276 mfhi $12 277 sw $13,0($4) 278 sw $12,4($4) 279 280 multu $14,$14 281 subu $6,4 282 addu $4,8*4 283 addu $5,4*4 284 mflo $15 285 mfhi $14 286 sw $15,-6*4($4) 287 sw $14,-5*4($4) 288 289 multu $8,$8 290 mflo $9 291 mfhi $8 292 sw $9,-4*4($4) 293 sw $8,-3*4($4) 294 295 296 multu $10,$10 297 and $8,$6,$3 298 mflo $11 299 mfhi $10 300 sw $11,-2*4($4) 301 sw $10,-4($4) 302 303 .set noreorder 304 bgtzl $8,.L_bn_sqr_words_loop 305 lw $12,0($5) 306 307 beqz $6,.L_bn_sqr_words_return 308 nop 309 310 .L_bn_sqr_words_tail: 311 .set reorder 312 lw $12,0($5) 313 multu $12,$12 314 subu $6,1 315 mflo $13 316 mfhi $12 317 sw $13,0($4) 318 sw $12,4($4) 319 beqz $6,.L_bn_sqr_words_return 320 321 lw $12,4($5) 322 multu $12,$12 323 subu $6,1 324 mflo $13 325 mfhi $12 326 sw $13,2*4($4) 327 sw $12,3*4($4) 328 beqz $6,.L_bn_sqr_words_return 329 330 lw $12,2*4($5) 331 multu $12,$12 332 mflo $13 333 mfhi $12 334 sw $13,4*4($4) 335 sw $12,5*4($4) 336 337 .L_bn_sqr_words_return: 338 .set noreorder 339 jr $31 340 move $4,$2 341 342 .end bn_sqr_words_internal 343 344 .align 5 345 .globl bn_add_words 346 .ent bn_add_words 347 bn_add_words: 348 .set noreorder 349 bgtz $7,bn_add_words_internal 350 move $2,$0 351 jr $31 352 move $4,$2 353 .end bn_add_words 354 355 .align 5 356 .ent bn_add_words_internal 357 bn_add_words_internal: 358 .set reorder 359 li $3,-4 360 and $1,$7,$3 361 lw $12,0($5) 362 beqz $1,.L_bn_add_words_tail 363 364 .L_bn_add_words_loop: 365 lw $8,0($6) 366 subu $7,4 367 lw $13,4($5) 368 and $1,$7,$3 369 lw $14,2*4($5) 370 addu $6,4*4 371 lw $15,3*4($5) 372 addu $4,4*4 373 lw $9,-3*4($6) 374 addu $5,4*4 375 lw $10,-2*4($6) 376 lw $11,-4($6) 377 addu $8,$12 378 sltu $24,$8,$12 379 addu $12,$8,$2 380 sltu $2,$12,$8 381 sw $12,-4*4($4) 382 addu $2,$24 383 384 addu $9,$13 385 sltu $25,$9,$13 386 addu $13,$9,$2 387 sltu $2,$13,$9 388 sw $13,-3*4($4) 389 addu $2,$25 390 391 addu $10,$14 392 sltu $24,$10,$14 393 addu $14,$10,$2 394 sltu $2,$14,$10 395 sw $14,-2*4($4) 396 addu $2,$24 397 398 addu $11,$15 399 sltu $25,$11,$15 400 addu $15,$11,$2 401 sltu $2,$15,$11 402 sw $15,-4($4) 403 addu $2,$25 404 405 .set noreorder 406 bgtzl $1,.L_bn_add_words_loop 407 lw $12,0($5) 408 409 beqz $7,.L_bn_add_words_return 410 nop 411 412 .L_bn_add_words_tail: 413 .set reorder 414 lw $12,0($5) 415 lw $8,0($6) 416 addu $8,$12 417 subu $7,1 418 sltu $24,$8,$12 419 addu $12,$8,$2 420 sltu $2,$12,$8 421 sw $12,0($4) 422 addu $2,$24 423 beqz $7,.L_bn_add_words_return 424 425 lw $13,4($5) 426 lw $9,4($6) 427 addu $9,$13 428 subu $7,1 429 sltu $25,$9,$13 430 addu $13,$9,$2 431 sltu $2,$13,$9 432 sw $13,4($4) 433 addu $2,$25 434 beqz $7,.L_bn_add_words_return 435 436 lw $14,2*4($5) 437 lw $10,2*4($6) 438 addu $10,$14 439 sltu $24,$10,$14 440 addu $14,$10,$2 441 sltu $2,$14,$10 442 sw $14,2*4($4) 443 addu $2,$24 444 445 .L_bn_add_words_return: 446 .set noreorder 447 jr $31 448 move $4,$2 449 450 .end bn_add_words_internal 451 452 .align 5 453 .globl bn_sub_words 454 .ent bn_sub_words 455 bn_sub_words: 456 .set noreorder 457 bgtz $7,bn_sub_words_internal 458 move $2,$0 459 jr $31 460 move $4,$0 461 .end bn_sub_words 462 463 .align 5 464 .ent bn_sub_words_internal 465 bn_sub_words_internal: 466 .set reorder 467 li $3,-4 468 and $1,$7,$3 469 lw $12,0($5) 470 beqz $1,.L_bn_sub_words_tail 471 472 .L_bn_sub_words_loop: 473 lw $8,0($6) 474 subu $7,4 475 lw $13,4($5) 476 and $1,$7,$3 477 lw $14,2*4($5) 478 addu $6,4*4 479 lw $15,3*4($5) 480 addu $4,4*4 481 lw $9,-3*4($6) 482 addu $5,4*4 483 lw $10,-2*4($6) 484 lw $11,-4($6) 485 sltu $24,$12,$8 486 subu $8,$12,$8 487 subu $12,$8,$2 488 sgtu $2,$12,$8 489 sw $12,-4*4($4) 490 addu $2,$24 491 492 sltu $25,$13,$9 493 subu $9,$13,$9 494 subu $13,$9,$2 495 sgtu $2,$13,$9 496 sw $13,-3*4($4) 497 addu $2,$25 498 499 500 sltu $24,$14,$10 501 subu $10,$14,$10 502 subu $14,$10,$2 503 sgtu $2,$14,$10 504 sw $14,-2*4($4) 505 addu $2,$24 506 507 sltu $25,$15,$11 508 subu $11,$15,$11 509 subu $15,$11,$2 510 sgtu $2,$15,$11 511 sw $15,-4($4) 512 addu $2,$25 513 514 .set noreorder 515 bgtzl $1,.L_bn_sub_words_loop 516 lw $12,0($5) 517 518 beqz $7,.L_bn_sub_words_return 519 nop 520 521 .L_bn_sub_words_tail: 522 .set reorder 523 lw $12,0($5) 524 lw $8,0($6) 525 subu $7,1 526 sltu $24,$12,$8 527 subu $8,$12,$8 528 subu $12,$8,$2 529 sgtu $2,$12,$8 530 sw $12,0($4) 531 addu $2,$24 532 beqz $7,.L_bn_sub_words_return 533 534 lw $13,4($5) 535 subu $7,1 536 lw $9,4($6) 537 sltu $25,$13,$9 538 subu $9,$13,$9 539 subu $13,$9,$2 540 sgtu $2,$13,$9 541 sw $13,4($4) 542 addu $2,$25 543 beqz $7,.L_bn_sub_words_return 544 545 lw $14,2*4($5) 546 lw $10,2*4($6) 547 sltu $24,$14,$10 548 subu $10,$14,$10 549 subu $14,$10,$2 550 sgtu $2,$14,$10 551 sw $14,2*4($4) 552 addu $2,$24 553 554 .L_bn_sub_words_return: 555 .set noreorder 556 jr $31 557 move $4,$2 558 .end bn_sub_words_internal 559 560 .align 5 561 .globl bn_div_3_words 562 .ent bn_div_3_words 563 bn_div_3_words: 564 .set noreorder 565 move $7,$4 # we know that bn_div_words does not 566 # touch $7, $10, $11 and preserves $6 567 # so that we can save two arguments 568 # and return address in registers 569 # instead of stack:-) 570 571 lw $4,($7) 572 move $10,$5 573 bne $4,$6,bn_div_3_words_internal 574 lw $5,-4($7) 575 li $2,-1 576 jr $31 577 move $4,$2 578 .end bn_div_3_words 579 580 .align 5 581 .ent bn_div_3_words_internal 582 bn_div_3_words_internal: 583 .set reorder 584 move $11,$31 585 bal bn_div_words_internal 586 move $31,$11 587 multu $10,$2 588 lw $14,-2*4($7) 589 move $8,$0 590 mfhi $13 591 mflo $12 592 sltu $24,$13,$5 593 .L_bn_div_3_words_inner_loop: 594 bnez $24,.L_bn_div_3_words_inner_loop_done 595 sgeu $1,$14,$12 596 seq $25,$13,$5 597 and $1,$25 598 sltu $15,$12,$10 599 addu $5,$6 600 subu $13,$15 601 subu $12,$10 602 sltu $24,$13,$5 603 sltu $8,$5,$6 604 or $24,$8 605 .set noreorder 606 beqzl $1,.L_bn_div_3_words_inner_loop 607 subu $2,1 608 .set reorder 609 .L_bn_div_3_words_inner_loop_done: 610 .set noreorder 611 jr $31 612 move $4,$2 613 .end bn_div_3_words_internal 614 615 .align 5 616 .globl bn_div_words 617 .ent bn_div_words 618 bn_div_words: 619 .set noreorder 620 bnez $6,bn_div_words_internal 621 li $2,-1 # I would rather signal div-by-zero 622 # which can be done with 'break 7' 623 jr $31 624 move $4,$2 625 .end bn_div_words 626 627 .align 5 628 .ent bn_div_words_internal 629 bn_div_words_internal: 630 move $3,$0 631 bltz $6,.L_bn_div_words_body 632 move $25,$3 633 sll $6,1 634 bgtz $6,.-4 635 addu $25,1 636 637 .set reorder 638 negu $13,$25 639 li $14,-1 640 sll $14,$13 641 and $14,$4 642 srl $1,$5,$13 643 .set noreorder 644 bnezl $14,.+8 645 break 6 # signal overflow 646 .set reorder 647 sll $4,$25 648 sll $5,$25 649 or $4,$1 650 .L_bn_div_words_body: 651 srl $3,$6,4*4 # bits 652 sgeu $1,$4,$6 653 .set noreorder 654 bnezl $1,.+8 655 subu $4,$6 656 .set reorder 657 658 li $8,-1 659 srl $9,$4,4*4 # bits 660 srl $8,4*4 # q=0xffffffff 661 beq $3,$9,.L_bn_div_words_skip_div1 662 divu $0,$4,$3 663 mflo $8 664 .L_bn_div_words_skip_div1: 665 multu $6,$8 666 sll $15,$4,4*4 # bits 667 srl $1,$5,4*4 # bits 668 or $15,$1 669 mflo $12 670 mfhi $13 671 .L_bn_div_words_inner_loop1: 672 sltu $14,$15,$12 673 seq $24,$9,$13 674 sltu $1,$9,$13 675 and $14,$24 676 sltu $2,$12,$6 677 or $1,$14 678 .set noreorder 679 beqz $1,.L_bn_div_words_inner_loop1_done 680 subu $13,$2 681 subu $12,$6 682 b .L_bn_div_words_inner_loop1 683 subu $8,1 684 .set reorder 685 .L_bn_div_words_inner_loop1_done: 686 687 sll $5,4*4 # bits 688 subu $4,$15,$12 689 sll $2,$8,4*4 # bits 690 691 li $8,-1 692 srl $9,$4,4*4 # bits 693 srl $8,4*4 # q=0xffffffff 694 beq $3,$9,.L_bn_div_words_skip_div2 695 divu $0,$4,$3 696 mflo $8 697 .L_bn_div_words_skip_div2: 698 multu $6,$8 699 sll $15,$4,4*4 # bits 700 srl $1,$5,4*4 # bits 701 or $15,$1 702 mflo $12 703 mfhi $13 704 .L_bn_div_words_inner_loop2: 705 sltu $14,$15,$12 706 seq $24,$9,$13 707 sltu $1,$9,$13 708 and $14,$24 709 sltu $3,$12,$6 710 or $1,$14 711 .set noreorder 712 beqz $1,.L_bn_div_words_inner_loop2_done 713 subu $13,$3 714 subu $12,$6 715 b .L_bn_div_words_inner_loop2 716 subu $8,1 717 .set reorder 718 .L_bn_div_words_inner_loop2_done: 719 720 subu $4,$15,$12 721 or $2,$8 722 srl $3,$4,$25 # $3 contains remainder if anybody wants it 723 srl $6,$25 # restore $6 724 725 .set noreorder 726 move $5,$3 727 jr $31 728 move $4,$2 729 .end bn_div_words_internal 730 731 .align 5 732 .globl bn_mul_comba8 733 .ent bn_mul_comba8 734 bn_mul_comba8: 735 .set noreorder 736 .frame $29,6*4,$31 737 .mask 0x003f0000,-4 738 subu $29,6*4 739 sw $21,5*4($29) 740 sw $20,4*4($29) 741 sw $19,3*4($29) 742 sw $18,2*4($29) 743 sw $17,1*4($29) 744 sw $16,0*4($29) 745 746 .set reorder 747 lw $12,0($5) # If compiled with -mips3 option on 748 # R5000 box assembler barks on this 749 # 1ine with "should not have mult/div 750 # as last instruction in bb (R10K 751 # bug)" warning. If anybody out there 752 # has a clue about how to circumvent 753 # this do send me a note. 754 # <appro@fy.chalmers.se> 755 756 lw $8,0($6) 757 lw $13,4($5) 758 lw $14,2*4($5) 759 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 760 lw $15,3*4($5) 761 lw $9,4($6) 762 lw $10,2*4($6) 763 lw $11,3*4($6) 764 mflo $2 765 mfhi $3 766 767 lw $16,4*4($5) 768 lw $18,5*4($5) 769 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 770 lw $20,6*4($5) 771 lw $5,7*4($5) 772 lw $17,4*4($6) 773 lw $19,5*4($6) 774 mflo $24 775 mfhi $25 776 addu $3,$24 777 sltu $1,$3,$24 778 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 779 addu $7,$25,$1 780 lw $21,6*4($6) 781 lw $6,7*4($6) 782 sw $2,0($4) # r[0]=c1; 783 mflo $24 784 mfhi $25 785 addu $3,$24 786 sltu $1,$3,$24 787 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 788 addu $25,$1 789 addu $7,$25 790 sltu $2,$7,$25 791 sw $3,4($4) # r[1]=c2; 792 793 mflo $24 794 mfhi $25 795 addu $7,$24 796 sltu $1,$7,$24 797 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 798 addu $25,$1 799 addu $2,$25 800 mflo $24 801 mfhi $25 802 addu $7,$24 803 sltu $1,$7,$24 804 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 805 addu $25,$1 806 addu $2,$25 807 sltu $3,$2,$25 808 mflo $24 809 mfhi $25 810 addu $7,$24 811 sltu $1,$7,$24 812 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 813 addu $25,$1 814 addu $2,$25 815 sltu $1,$2,$25 816 addu $3,$1 817 sw $7,2*4($4) # r[2]=c3; 818 819 mflo $24 820 mfhi $25 821 addu $2,$24 822 sltu $1,$2,$24 823 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 824 addu $25,$1 825 addu $3,$25 826 sltu $7,$3,$25 827 mflo $24 828 mfhi $25 829 addu $2,$24 830 sltu $1,$2,$24 831 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 832 addu $25,$1 833 addu $3,$25 834 sltu $1,$3,$25 835 addu $7,$1 836 mflo $24 837 mfhi $25 838 addu $2,$24 839 sltu $1,$2,$24 840 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 841 addu $25,$1 842 addu $3,$25 843 sltu $1,$3,$25 844 addu $7,$1 845 mflo $24 846 mfhi $25 847 addu $2,$24 848 sltu $1,$2,$24 849 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); 850 addu $25,$1 851 addu $3,$25 852 sltu $1,$3,$25 853 addu $7,$1 854 sw $2,3*4($4) # r[3]=c1; 855 856 mflo $24 857 mfhi $25 858 addu $3,$24 859 sltu $1,$3,$24 860 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 861 addu $25,$1 862 addu $7,$25 863 sltu $2,$7,$25 864 mflo $24 865 mfhi $25 866 addu $3,$24 867 sltu $1,$3,$24 868 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 869 addu $25,$1 870 addu $7,$25 871 sltu $1,$7,$25 872 addu $2,$1 873 mflo $24 874 mfhi $25 875 addu $3,$24 876 sltu $1,$3,$24 877 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 878 addu $25,$1 879 addu $7,$25 880 sltu $1,$7,$25 881 addu $2,$1 882 mflo $24 883 mfhi $25 884 addu $3,$24 885 sltu $1,$3,$24 886 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); 887 addu $25,$1 888 addu $7,$25 889 sltu $1,$7,$25 890 addu $2,$1 891 mflo $24 892 mfhi $25 893 addu $3,$24 894 sltu $1,$3,$24 895 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); 896 addu $25,$1 897 addu $7,$25 898 sltu $1,$7,$25 899 addu $2,$1 900 sw $3,4*4($4) # r[4]=c2; 901 902 mflo $24 903 mfhi $25 904 addu $7,$24 905 sltu $1,$7,$24 906 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); 907 addu $25,$1 908 addu $2,$25 909 sltu $3,$2,$25 910 mflo $24 911 mfhi $25 912 addu $7,$24 913 sltu $1,$7,$24 914 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 915 addu $25,$1 916 addu $2,$25 917 sltu $1,$2,$25 918 addu $3,$1 919 mflo $24 920 mfhi $25 921 addu $7,$24 922 sltu $1,$7,$24 923 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 924 addu $25,$1 925 addu $2,$25 926 sltu $1,$2,$25 927 addu $3,$1 928 mflo $24 929 mfhi $25 930 addu $7,$24 931 sltu $1,$7,$24 932 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); 933 addu $25,$1 934 addu $2,$25 935 sltu $1,$2,$25 936 addu $3,$1 937 mflo $24 938 mfhi $25 939 addu $7,$24 940 sltu $1,$7,$24 941 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); 942 addu $25,$1 943 addu $2,$25 944 sltu $1,$2,$25 945 addu $3,$1 946 mflo $24 947 mfhi $25 948 addu $7,$24 949 sltu $1,$7,$24 950 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); 951 addu $25,$1 952 addu $2,$25 953 sltu $1,$2,$25 954 addu $3,$1 955 sw $7,5*4($4) # r[5]=c3; 956 957 mflo $24 958 mfhi $25 959 addu $2,$24 960 sltu $1,$2,$24 961 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); 962 addu $25,$1 963 addu $3,$25 964 sltu $7,$3,$25 965 mflo $24 966 mfhi $25 967 addu $2,$24 968 sltu $1,$2,$24 969 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); 970 addu $25,$1 971 addu $3,$25 972 sltu $1,$3,$25 973 addu $7,$1 974 mflo $24 975 mfhi $25 976 addu $2,$24 977 sltu $1,$2,$24 978 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 979 addu $25,$1 980 addu $3,$25 981 sltu $1,$3,$25 982 addu $7,$1 983 mflo $24 984 mfhi $25 985 addu $2,$24 986 sltu $1,$2,$24 987 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); 988 addu $25,$1 989 addu $3,$25 990 sltu $1,$3,$25 991 addu $7,$1 992 mflo $24 993 mfhi $25 994 addu $2,$24 995 sltu $1,$2,$24 996 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); 997 addu $25,$1 998 addu $3,$25 999 sltu $1,$3,$25 1000 addu $7,$1 1001 mflo $24 1002 mfhi $25 1003 addu $2,$24 1004 sltu $1,$2,$24 1005 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); 1006 addu $25,$1 1007 addu $3,$25 1008 sltu $1,$3,$25 1009 addu $7,$1 1010 mflo $24 1011 mfhi $25 1012 addu $2,$24 1013 sltu $1,$2,$24 1014 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); 1015 addu $25,$1 1016 addu $3,$25 1017 sltu $1,$3,$25 1018 addu $7,$1 1019 sw $2,6*4($4) # r[6]=c1; 1020 1021 mflo $24 1022 mfhi $25 1023 addu $3,$24 1024 sltu $1,$3,$24 1025 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); 1026 addu $25,$1 1027 addu $7,$25 1028 sltu $2,$7,$25 1029 mflo $24 1030 mfhi $25 1031 addu $3,$24 1032 sltu $1,$3,$24 1033 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); 1034 addu $25,$1 1035 addu $7,$25 1036 sltu $1,$7,$25 1037 addu $2,$1 1038 mflo $24 1039 mfhi $25 1040 addu $3,$24 1041 sltu $1,$3,$24 1042 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); 1043 addu $25,$1 1044 addu $7,$25 1045 sltu $1,$7,$25 1046 addu $2,$1 1047 mflo $24 1048 mfhi $25 1049 addu $3,$24 1050 sltu $1,$3,$24 1051 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); 1052 addu $25,$1 1053 addu $7,$25 1054 sltu $1,$7,$25 1055 addu $2,$1 1056 mflo $24 1057 mfhi $25 1058 addu $3,$24 1059 sltu $1,$3,$24 1060 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); 1061 addu $25,$1 1062 addu $7,$25 1063 sltu $1,$7,$25 1064 addu $2,$1 1065 mflo $24 1066 mfhi $25 1067 addu $3,$24 1068 sltu $1,$3,$24 1069 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); 1070 addu $25,$1 1071 addu $7,$25 1072 sltu $1,$7,$25 1073 addu $2,$1 1074 mflo $24 1075 mfhi $25 1076 addu $3,$24 1077 sltu $1,$3,$24 1078 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); 1079 addu $25,$1 1080 addu $7,$25 1081 sltu $1,$7,$25 1082 addu $2,$1 1083 mflo $24 1084 mfhi $25 1085 addu $3,$24 1086 sltu $1,$3,$24 1087 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); 1088 addu $25,$1 1089 addu $7,$25 1090 sltu $1,$7,$25 1091 addu $2,$1 1092 sw $3,7*4($4) # r[7]=c2; 1093 1094 mflo $24 1095 mfhi $25 1096 addu $7,$24 1097 sltu $1,$7,$24 1098 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); 1099 addu $25,$1 1100 addu $2,$25 1101 sltu $3,$2,$25 1102 mflo $24 1103 mfhi $25 1104 addu $7,$24 1105 sltu $1,$7,$24 1106 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); 1107 addu $25,$1 1108 addu $2,$25 1109 sltu $1,$2,$25 1110 addu $3,$1 1111 mflo $24 1112 mfhi $25 1113 addu $7,$24 1114 sltu $1,$7,$24 1115 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); 1116 addu $25,$1 1117 addu $2,$25 1118 sltu $1,$2,$25 1119 addu $3,$1 1120 mflo $24 1121 mfhi $25 1122 addu $7,$24 1123 sltu $1,$7,$24 1124 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); 1125 addu $25,$1 1126 addu $2,$25 1127 sltu $1,$2,$25 1128 addu $3,$1 1129 mflo $24 1130 mfhi $25 1131 addu $7,$24 1132 sltu $1,$7,$24 1133 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); 1134 addu $25,$1 1135 addu $2,$25 1136 sltu $1,$2,$25 1137 addu $3,$1 1138 mflo $24 1139 mfhi $25 1140 addu $7,$24 1141 sltu $1,$7,$24 1142 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); 1143 addu $25,$1 1144 addu $2,$25 1145 sltu $1,$2,$25 1146 addu $3,$1 1147 mflo $24 1148 mfhi $25 1149 addu $7,$24 1150 sltu $1,$7,$24 1151 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); 1152 addu $25,$1 1153 addu $2,$25 1154 sltu $1,$2,$25 1155 addu $3,$1 1156 sw $7,8*4($4) # r[8]=c3; 1157 1158 mflo $24 1159 mfhi $25 1160 addu $2,$24 1161 sltu $1,$2,$24 1162 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); 1163 addu $25,$1 1164 addu $3,$25 1165 sltu $7,$3,$25 1166 mflo $24 1167 mfhi $25 1168 addu $2,$24 1169 sltu $1,$2,$24 1170 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); 1171 addu $25,$1 1172 addu $3,$25 1173 sltu $1,$3,$25 1174 addu $7,$1 1175 mflo $24 1176 mfhi $25 1177 addu $2,$24 1178 sltu $1,$2,$24 1179 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); 1180 addu $25,$1 1181 addu $3,$25 1182 sltu $1,$3,$25 1183 addu $7,$1 1184 mflo $24 1185 mfhi $25 1186 addu $2,$24 1187 sltu $1,$2,$24 1188 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); 1189 addu $25,$1 1190 addu $3,$25 1191 sltu $1,$3,$25 1192 addu $7,$1 1193 mflo $24 1194 mfhi $25 1195 addu $2,$24 1196 sltu $1,$2,$24 1197 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); 1198 addu $25,$1 1199 addu $3,$25 1200 sltu $1,$3,$25 1201 addu $7,$1 1202 mflo $24 1203 mfhi $25 1204 addu $2,$24 1205 sltu $1,$2,$24 1206 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); 1207 addu $25,$1 1208 addu $3,$25 1209 sltu $1,$3,$25 1210 addu $7,$1 1211 sw $2,9*4($4) # r[9]=c1; 1212 1213 mflo $24 1214 mfhi $25 1215 addu $3,$24 1216 sltu $1,$3,$24 1217 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); 1218 addu $25,$1 1219 addu $7,$25 1220 sltu $2,$7,$25 1221 mflo $24 1222 mfhi $25 1223 addu $3,$24 1224 sltu $1,$3,$24 1225 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); 1226 addu $25,$1 1227 addu $7,$25 1228 sltu $1,$7,$25 1229 addu $2,$1 1230 mflo $24 1231 mfhi $25 1232 addu $3,$24 1233 sltu $1,$3,$24 1234 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); 1235 addu $25,$1 1236 addu $7,$25 1237 sltu $1,$7,$25 1238 addu $2,$1 1239 mflo $24 1240 mfhi $25 1241 addu $3,$24 1242 sltu $1,$3,$24 1243 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); 1244 addu $25,$1 1245 addu $7,$25 1246 sltu $1,$7,$25 1247 addu $2,$1 1248 mflo $24 1249 mfhi $25 1250 addu $3,$24 1251 sltu $1,$3,$24 1252 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); 1253 addu $25,$1 1254 addu $7,$25 1255 sltu $1,$7,$25 1256 addu $2,$1 1257 sw $3,10*4($4) # r[10]=c2; 1258 1259 mflo $24 1260 mfhi $25 1261 addu $7,$24 1262 sltu $1,$7,$24 1263 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); 1264 addu $25,$1 1265 addu $2,$25 1266 sltu $3,$2,$25 1267 mflo $24 1268 mfhi $25 1269 addu $7,$24 1270 sltu $1,$7,$24 1271 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); 1272 addu $25,$1 1273 addu $2,$25 1274 sltu $1,$2,$25 1275 addu $3,$1 1276 mflo $24 1277 mfhi $25 1278 addu $7,$24 1279 sltu $1,$7,$24 1280 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); 1281 addu $25,$1 1282 addu $2,$25 1283 sltu $1,$2,$25 1284 addu $3,$1 1285 mflo $24 1286 mfhi $25 1287 addu $7,$24 1288 sltu $1,$7,$24 1289 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); 1290 addu $25,$1 1291 addu $2,$25 1292 sltu $1,$2,$25 1293 addu $3,$1 1294 sw $7,11*4($4) # r[11]=c3; 1295 1296 mflo $24 1297 mfhi $25 1298 addu $2,$24 1299 sltu $1,$2,$24 1300 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); 1301 addu $25,$1 1302 addu $3,$25 1303 sltu $7,$3,$25 1304 mflo $24 1305 mfhi $25 1306 addu $2,$24 1307 sltu $1,$2,$24 1308 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); 1309 addu $25,$1 1310 addu $3,$25 1311 sltu $1,$3,$25 1312 addu $7,$1 1313 mflo $24 1314 mfhi $25 1315 addu $2,$24 1316 sltu $1,$2,$24 1317 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); 1318 addu $25,$1 1319 addu $3,$25 1320 sltu $1,$3,$25 1321 addu $7,$1 1322 sw $2,12*4($4) # r[12]=c1; 1323 1324 mflo $24 1325 mfhi $25 1326 addu $3,$24 1327 sltu $1,$3,$24 1328 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); 1329 addu $25,$1 1330 addu $7,$25 1331 sltu $2,$7,$25 1332 mflo $24 1333 mfhi $25 1334 addu $3,$24 1335 sltu $1,$3,$24 1336 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); 1337 addu $25,$1 1338 addu $7,$25 1339 sltu $1,$7,$25 1340 addu $2,$1 1341 sw $3,13*4($4) # r[13]=c2; 1342 1343 mflo $24 1344 mfhi $25 1345 addu $7,$24 1346 sltu $1,$7,$24 1347 addu $25,$1 1348 addu $2,$25 1349 sw $7,14*4($4) # r[14]=c3; 1350 sw $2,15*4($4) # r[15]=c1; 1351 1352 .set noreorder 1353 lw $21,5*4($29) 1354 lw $20,4*4($29) 1355 lw $19,3*4($29) 1356 lw $18,2*4($29) 1357 lw $17,1*4($29) 1358 lw $16,0*4($29) 1359 jr $31 1360 addu $29,6*4 1361 .end bn_mul_comba8 1362 1363 .align 5 1364 .globl bn_mul_comba4 1365 .ent bn_mul_comba4 1366 bn_mul_comba4: 1367 .set reorder 1368 lw $12,0($5) 1369 lw $8,0($6) 1370 lw $13,4($5) 1371 lw $14,2*4($5) 1372 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 1373 lw $15,3*4($5) 1374 lw $9,4($6) 1375 lw $10,2*4($6) 1376 lw $11,3*4($6) 1377 mflo $2 1378 mfhi $3 1379 sw $2,0($4) 1380 1381 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 1382 mflo $24 1383 mfhi $25 1384 addu $3,$24 1385 sltu $1,$3,$24 1386 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 1387 addu $7,$25,$1 1388 mflo $24 1389 mfhi $25 1390 addu $3,$24 1391 sltu $1,$3,$24 1392 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 1393 addu $25,$1 1394 addu $7,$25 1395 sltu $2,$7,$25 1396 sw $3,4($4) 1397 1398 mflo $24 1399 mfhi $25 1400 addu $7,$24 1401 sltu $1,$7,$24 1402 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 1403 addu $25,$1 1404 addu $2,$25 1405 mflo $24 1406 mfhi $25 1407 addu $7,$24 1408 sltu $1,$7,$24 1409 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 1410 addu $25,$1 1411 addu $2,$25 1412 sltu $3,$2,$25 1413 mflo $24 1414 mfhi $25 1415 addu $7,$24 1416 sltu $1,$7,$24 1417 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 1418 addu $25,$1 1419 addu $2,$25 1420 sltu $1,$2,$25 1421 addu $3,$1 1422 sw $7,2*4($4) 1423 1424 mflo $24 1425 mfhi $25 1426 addu $2,$24 1427 sltu $1,$2,$24 1428 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 1429 addu $25,$1 1430 addu $3,$25 1431 sltu $7,$3,$25 1432 mflo $24 1433 mfhi $25 1434 addu $2,$24 1435 sltu $1,$2,$24 1436 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 1437 addu $25,$1 1438 addu $3,$25 1439 sltu $1,$3,$25 1440 addu $7,$1 1441 mflo $24 1442 mfhi $25 1443 addu $2,$24 1444 sltu $1,$2,$24 1445 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 1446 addu $25,$1 1447 addu $3,$25 1448 sltu $1,$3,$25 1449 addu $7,$1 1450 mflo $24 1451 mfhi $25 1452 addu $2,$24 1453 sltu $1,$2,$24 1454 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 1455 addu $25,$1 1456 addu $3,$25 1457 sltu $1,$3,$25 1458 addu $7,$1 1459 sw $2,3*4($4) 1460 1461 mflo $24 1462 mfhi $25 1463 addu $3,$24 1464 sltu $1,$3,$24 1465 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 1466 addu $25,$1 1467 addu $7,$25 1468 sltu $2,$7,$25 1469 mflo $24 1470 mfhi $25 1471 addu $3,$24 1472 sltu $1,$3,$24 1473 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 1474 addu $25,$1 1475 addu $7,$25 1476 sltu $1,$7,$25 1477 addu $2,$1 1478 mflo $24 1479 mfhi $25 1480 addu $3,$24 1481 sltu $1,$3,$24 1482 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 1483 addu $25,$1 1484 addu $7,$25 1485 sltu $1,$7,$25 1486 addu $2,$1 1487 sw $3,4*4($4) 1488 1489 mflo $24 1490 mfhi $25 1491 addu $7,$24 1492 sltu $1,$7,$24 1493 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 1494 addu $25,$1 1495 addu $2,$25 1496 sltu $3,$2,$25 1497 mflo $24 1498 mfhi $25 1499 addu $7,$24 1500 sltu $1,$7,$24 1501 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 1502 addu $25,$1 1503 addu $2,$25 1504 sltu $1,$2,$25 1505 addu $3,$1 1506 sw $7,5*4($4) 1507 1508 mflo $24 1509 mfhi $25 1510 addu $2,$24 1511 sltu $1,$2,$24 1512 addu $25,$1 1513 addu $3,$25 1514 sw $2,6*4($4) 1515 sw $3,7*4($4) 1516 1517 .set noreorder 1518 jr $31 1519 nop 1520 .end bn_mul_comba4 1521 1522 .align 5 1523 .globl bn_sqr_comba8 1524 .ent bn_sqr_comba8 1525 bn_sqr_comba8: 1526 .set reorder 1527 lw $12,0($5) 1528 lw $13,4($5) 1529 lw $14,2*4($5) 1530 lw $15,3*4($5) 1531 1532 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 1533 lw $8,4*4($5) 1534 lw $9,5*4($5) 1535 lw $10,6*4($5) 1536 lw $11,7*4($5) 1537 mflo $2 1538 mfhi $3 1539 sw $2,0($4) 1540 1541 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 1542 mflo $24 1543 mfhi $25 1544 slt $2,$25,$0 1545 sll $25,1 1546 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 1547 slt $6,$24,$0 1548 addu $25,$6 1549 sll $24,1 1550 addu $3,$24 1551 sltu $1,$3,$24 1552 addu $7,$25,$1 1553 sw $3,4($4) 1554 1555 mflo $24 1556 mfhi $25 1557 slt $3,$25,$0 1558 sll $25,1 1559 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); 1560 slt $6,$24,$0 1561 addu $25,$6 1562 sll $24,1 1563 addu $7,$24 1564 sltu $1,$7,$24 1565 addu $25,$1 1566 addu $2,$25 1567 sltu $1,$2,$25 1568 addu $3,$1 1569 mflo $24 1570 mfhi $25 1571 addu $7,$24 1572 sltu $1,$7,$24 1573 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 1574 addu $25,$1 1575 addu $2,$25 1576 sltu $1,$2,$25 1577 addu $3,$1 1578 sw $7,2*4($4) 1579 1580 mflo $24 1581 mfhi $25 1582 slt $7,$25,$0 1583 sll $25,1 1584 multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); 1585 slt $6,$24,$0 1586 addu $25,$6 1587 sll $24,1 1588 addu $2,$24 1589 sltu $1,$2,$24 1590 addu $25,$1 1591 addu $3,$25 1592 sltu $1,$3,$25 1593 addu $7,$1 1594 mflo $24 1595 mfhi $25 1596 slt $1,$25,$0 1597 addu $7,$1 1598 multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); 1599 sll $25,1 1600 slt $6,$24,$0 1601 addu $25,$6 1602 sll $24,1 1603 addu $2,$24 1604 sltu $1,$2,$24 1605 addu $25,$1 1606 addu $3,$25 1607 sltu $1,$3,$25 1608 addu $7,$1 1609 sw $2,3*4($4) 1610 1611 mflo $24 1612 mfhi $25 1613 slt $2,$25,$0 1614 sll $25,1 1615 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); 1616 slt $6,$24,$0 1617 addu $25,$6 1618 sll $24,1 1619 addu $3,$24 1620 sltu $1,$3,$24 1621 addu $25,$1 1622 addu $7,$25 1623 sltu $1,$7,$25 1624 addu $2,$1 1625 mflo $24 1626 mfhi $25 1627 slt $1,$25,$0 1628 addu $2,$1 1629 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); 1630 sll $25,1 1631 slt $6,$24,$0 1632 addu $25,$6 1633 sll $24,1 1634 addu $3,$24 1635 sltu $1,$3,$24 1636 addu $25,$1 1637 addu $7,$25 1638 sltu $1,$7,$25 1639 addu $2,$1 1640 mflo $24 1641 mfhi $25 1642 addu $3,$24 1643 sltu $1,$3,$24 1644 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); 1645 addu $25,$1 1646 addu $7,$25 1647 sltu $1,$7,$25 1648 addu $2,$1 1649 sw $3,4*4($4) 1650 1651 mflo $24 1652 mfhi $25 1653 slt $3,$25,$0 1654 sll $25,1 1655 multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); 1656 slt $6,$24,$0 1657 addu $25,$6 1658 sll $24,1 1659 addu $7,$24 1660 sltu $1,$7,$24 1661 addu $25,$1 1662 addu $2,$25 1663 sltu $1,$2,$25 1664 addu $3,$1 1665 mflo $24 1666 mfhi $25 1667 slt $1,$25,$0 1668 addu $3,$1 1669 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); 1670 sll $25,1 1671 slt $6,$24,$0 1672 addu $25,$6 1673 sll $24,1 1674 addu $7,$24 1675 sltu $1,$7,$24 1676 addu $25,$1 1677 addu $2,$25 1678 sltu $1,$2,$25 1679 addu $3,$1 1680 mflo $24 1681 mfhi $25 1682 slt $1,$25,$0 1683 multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); 1684 addu $3,$1 1685 sll $25,1 1686 slt $6,$24,$0 1687 addu $25,$6 1688 sll $24,1 1689 addu $7,$24 1690 sltu $1,$7,$24 1691 addu $25,$1 1692 addu $2,$25 1693 sltu $1,$2,$25 1694 addu $3,$1 1695 sw $7,5*4($4) 1696 1697 mflo $24 1698 mfhi $25 1699 slt $7,$25,$0 1700 sll $25,1 1701 multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); 1702 slt $6,$24,$0 1703 addu $25,$6 1704 sll $24,1 1705 addu $2,$24 1706 sltu $1,$2,$24 1707 addu $25,$1 1708 addu $3,$25 1709 sltu $1,$3,$25 1710 addu $7,$1 1711 mflo $24 1712 mfhi $25 1713 slt $1,$25,$0 1714 addu $7,$1 1715 multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); 1716 sll $25,1 1717 slt $6,$24,$0 1718 addu $25,$6 1719 sll $24,1 1720 addu $2,$24 1721 sltu $1,$2,$24 1722 addu $25,$1 1723 addu $3,$25 1724 sltu $1,$3,$25 1725 addu $7,$1 1726 mflo $24 1727 mfhi $25 1728 slt $1,$25,$0 1729 addu $7,$1 1730 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); 1731 sll $25,1 1732 slt $6,$24,$0 1733 addu $25,$6 1734 sll $24,1 1735 addu $2,$24 1736 sltu $1,$2,$24 1737 addu $25,$1 1738 addu $3,$25 1739 sltu $1,$3,$25 1740 addu $7,$1 1741 mflo $24 1742 mfhi $25 1743 addu $2,$24 1744 sltu $1,$2,$24 1745 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); 1746 addu $25,$1 1747 addu $3,$25 1748 sltu $1,$3,$25 1749 addu $7,$1 1750 sw $2,6*4($4) 1751 1752 mflo $24 1753 mfhi $25 1754 slt $2,$25,$0 1755 sll $25,1 1756 multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); 1757 slt $6,$24,$0 1758 addu $25,$6 1759 sll $24,1 1760 addu $3,$24 1761 sltu $1,$3,$24 1762 addu $25,$1 1763 addu $7,$25 1764 sltu $1,$7,$25 1765 addu $2,$1 1766 mflo $24 1767 mfhi $25 1768 slt $1,$25,$0 1769 addu $2,$1 1770 multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); 1771 sll $25,1 1772 slt $6,$24,$0 1773 addu $25,$6 1774 sll $24,1 1775 addu $3,$24 1776 sltu $1,$3,$24 1777 addu $25,$1 1778 addu $7,$25 1779 sltu $1,$7,$25 1780 addu $2,$1 1781 mflo $24 1782 mfhi $25 1783 slt $1,$25,$0 1784 addu $2,$1 1785 multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); 1786 sll $25,1 1787 slt $6,$24,$0 1788 addu $25,$6 1789 sll $24,1 1790 addu $3,$24 1791 sltu $1,$3,$24 1792 addu $25,$1 1793 addu $7,$25 1794 sltu $1,$7,$25 1795 addu $2,$1 1796 mflo $24 1797 mfhi $25 1798 slt $1,$25,$0 1799 addu $2,$1 1800 multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); 1801 sll $25,1 1802 slt $6,$24,$0 1803 addu $25,$6 1804 sll $24,1 1805 addu $3,$24 1806 sltu $1,$3,$24 1807 addu $25,$1 1808 addu $7,$25 1809 sltu $1,$7,$25 1810 addu $2,$1 1811 sw $3,7*4($4) 1812 1813 mflo $24 1814 mfhi $25 1815 slt $3,$25,$0 1816 sll $25,1 1817 multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); 1818 slt $6,$24,$0 1819 addu $25,$6 1820 sll $24,1 1821 addu $7,$24 1822 sltu $1,$7,$24 1823 addu $25,$1 1824 addu $2,$25 1825 sltu $1,$2,$25 1826 addu $3,$1 1827 mflo $24 1828 mfhi $25 1829 slt $1,$25,$0 1830 addu $3,$1 1831 multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); 1832 sll $25,1 1833 slt $6,$24,$0 1834 addu $25,$6 1835 sll $24,1 1836 addu $7,$24 1837 sltu $1,$7,$24 1838 addu $25,$1 1839 addu $2,$25 1840 sltu $1,$2,$25 1841 addu $3,$1 1842 mflo $24 1843 mfhi $25 1844 slt $1,$25,$0 1845 addu $3,$1 1846 multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); 1847 sll $25,1 1848 slt $6,$24,$0 1849 addu $25,$6 1850 sll $24,1 1851 addu $7,$24 1852 sltu $1,$7,$24 1853 addu $25,$1 1854 addu $2,$25 1855 sltu $1,$2,$25 1856 addu $3,$1 1857 mflo $24 1858 mfhi $25 1859 addu $7,$24 1860 sltu $1,$7,$24 1861 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); 1862 addu $25,$1 1863 addu $2,$25 1864 sltu $1,$2,$25 1865 addu $3,$1 1866 sw $7,8*4($4) 1867 1868 mflo $24 1869 mfhi $25 1870 slt $7,$25,$0 1871 sll $25,1 1872 multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); 1873 slt $6,$24,$0 1874 addu $25,$6 1875 sll $24,1 1876 addu $2,$24 1877 sltu $1,$2,$24 1878 addu $25,$1 1879 addu $3,$25 1880 sltu $1,$3,$25 1881 addu $7,$1 1882 mflo $24 1883 mfhi $25 1884 slt $1,$25,$0 1885 addu $7,$1 1886 multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); 1887 sll $25,1 1888 slt $6,$24,$0 1889 addu $25,$6 1890 sll $24,1 1891 addu $2,$24 1892 sltu $1,$2,$24 1893 addu $25,$1 1894 addu $3,$25 1895 sltu $1,$3,$25 1896 addu $7,$1 1897 mflo $24 1898 mfhi $25 1899 slt $1,$25,$0 1900 addu $7,$1 1901 multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); 1902 sll $25,1 1903 slt $6,$24,$0 1904 addu $25,$6 1905 sll $24,1 1906 addu $2,$24 1907 sltu $1,$2,$24 1908 addu $25,$1 1909 addu $3,$25 1910 sltu $1,$3,$25 1911 addu $7,$1 1912 sw $2,9*4($4) 1913 1914 mflo $24 1915 mfhi $25 1916 slt $2,$25,$0 1917 sll $25,1 1918 multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); 1919 slt $6,$24,$0 1920 addu $25,$6 1921 sll $24,1 1922 addu $3,$24 1923 sltu $1,$3,$24 1924 addu $25,$1 1925 addu $7,$25 1926 sltu $1,$7,$25 1927 addu $2,$1 1928 mflo $24 1929 mfhi $25 1930 slt $1,$25,$0 1931 addu $2,$1 1932 multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); 1933 sll $25,1 1934 slt $6,$24,$0 1935 addu $25,$6 1936 sll $24,1 1937 addu $3,$24 1938 sltu $1,$3,$24 1939 addu $25,$1 1940 addu $7,$25 1941 sltu $1,$7,$25 1942 addu $2,$1 1943 mflo $24 1944 mfhi $25 1945 addu $3,$24 1946 sltu $1,$3,$24 1947 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); 1948 addu $25,$1 1949 addu $7,$25 1950 sltu $1,$7,$25 1951 addu $2,$1 1952 sw $3,10*4($4) 1953 1954 mflo $24 1955 mfhi $25 1956 slt $3,$25,$0 1957 sll $25,1 1958 multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); 1959 slt $6,$24,$0 1960 addu $25,$6 1961 sll $24,1 1962 addu $7,$24 1963 sltu $1,$7,$24 1964 addu $25,$1 1965 addu $2,$25 1966 sltu $1,$2,$25 1967 addu $3,$1 1968 mflo $24 1969 mfhi $25 1970 slt $1,$25,$0 1971 addu $3,$1 1972 multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); 1973 sll $25,1 1974 slt $6,$24,$0 1975 addu $25,$6 1976 sll $24,1 1977 addu $7,$24 1978 sltu $1,$7,$24 1979 addu $25,$1 1980 addu $2,$25 1981 sltu $1,$2,$25 1982 addu $3,$1 1983 sw $7,11*4($4) 1984 1985 mflo $24 1986 mfhi $25 1987 slt $7,$25,$0 1988 sll $25,1 1989 multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); 1990 slt $6,$24,$0 1991 addu $25,$6 1992 sll $24,1 1993 addu $2,$24 1994 sltu $1,$2,$24 1995 addu $25,$1 1996 addu $3,$25 1997 sltu $1,$3,$25 1998 addu $7,$1 1999 mflo $24 2000 mfhi $25 2001 addu $2,$24 2002 sltu $1,$2,$24 2003 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); 2004 addu $25,$1 2005 addu $3,$25 2006 sltu $1,$3,$25 2007 addu $7,$1 2008 sw $2,12*4($4) 2009 2010 mflo $24 2011 mfhi $25 2012 slt $2,$25,$0 2013 sll $25,1 2014 multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); 2015 slt $6,$24,$0 2016 addu $25,$6 2017 sll $24,1 2018 addu $3,$24 2019 sltu $1,$3,$24 2020 addu $25,$1 2021 addu $7,$25 2022 sltu $1,$7,$25 2023 addu $2,$1 2024 sw $3,13*4($4) 2025 2026 mflo $24 2027 mfhi $25 2028 addu $7,$24 2029 sltu $1,$7,$24 2030 addu $25,$1 2031 addu $2,$25 2032 sw $7,14*4($4) 2033 sw $2,15*4($4) 2034 2035 .set noreorder 2036 jr $31 2037 nop 2038 .end bn_sqr_comba8 2039 2040 .align 5 2041 .globl bn_sqr_comba4 2042 .ent bn_sqr_comba4 2043 bn_sqr_comba4: 2044 .set reorder 2045 lw $12,0($5) 2046 lw $13,4($5) 2047 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 2048 lw $14,2*4($5) 2049 lw $15,3*4($5) 2050 mflo $2 2051 mfhi $3 2052 sw $2,0($4) 2053 2054 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 2055 mflo $24 2056 mfhi $25 2057 slt $2,$25,$0 2058 sll $25,1 2059 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 2060 slt $6,$24,$0 2061 addu $25,$6 2062 sll $24,1 2063 addu $3,$24 2064 sltu $1,$3,$24 2065 addu $7,$25,$1 2066 sw $3,4($4) 2067 2068 mflo $24 2069 mfhi $25 2070 slt $3,$25,$0 2071 sll $25,1 2072 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); 2073 slt $6,$24,$0 2074 addu $25,$6 2075 sll $24,1 2076 addu $7,$24 2077 sltu $1,$7,$24 2078 addu $25,$1 2079 addu $2,$25 2080 sltu $1,$2,$25 2081 addu $3,$1 2082 mflo $24 2083 mfhi $25 2084 addu $7,$24 2085 sltu $1,$7,$24 2086 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 2087 addu $25,$1 2088 addu $2,$25 2089 sltu $1,$2,$25 2090 addu $3,$1 2091 sw $7,2*4($4) 2092 2093 mflo $24 2094 mfhi $25 2095 slt $7,$25,$0 2096 sll $25,1 2097 multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); 2098 slt $6,$24,$0 2099 addu $25,$6 2100 sll $24,1 2101 addu $2,$24 2102 sltu $1,$2,$24 2103 addu $25,$1 2104 addu $3,$25 2105 sltu $1,$3,$25 2106 addu $7,$1 2107 mflo $24 2108 mfhi $25 2109 slt $1,$25,$0 2110 addu $7,$1 2111 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); 2112 sll $25,1 2113 slt $6,$24,$0 2114 addu $25,$6 2115 sll $24,1 2116 addu $2,$24 2117 sltu $1,$2,$24 2118 addu $25,$1 2119 addu $3,$25 2120 sltu $1,$3,$25 2121 addu $7,$1 2122 sw $2,3*4($4) 2123 2124 mflo $24 2125 mfhi $25 2126 slt $2,$25,$0 2127 sll $25,1 2128 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); 2129 slt $6,$24,$0 2130 addu $25,$6 2131 sll $24,1 2132 addu $3,$24 2133 sltu $1,$3,$24 2134 addu $25,$1 2135 addu $7,$25 2136 sltu $1,$7,$25 2137 addu $2,$1 2138 mflo $24 2139 mfhi $25 2140 addu $3,$24 2141 sltu $1,$3,$24 2142 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); 2143 addu $25,$1 2144 addu $7,$25 2145 sltu $1,$7,$25 2146 addu $2,$1 2147 sw $3,4*4($4) 2148 2149 mflo $24 2150 mfhi $25 2151 slt $3,$25,$0 2152 sll $25,1 2153 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); 2154 slt $6,$24,$0 2155 addu $25,$6 2156 sll $24,1 2157 addu $7,$24 2158 sltu $1,$7,$24 2159 addu $25,$1 2160 addu $2,$25 2161 sltu $1,$2,$25 2162 addu $3,$1 2163 sw $7,5*4($4) 2164 2165 mflo $24 2166 mfhi $25 2167 addu $2,$24 2168 sltu $1,$2,$24 2169 addu $25,$1 2170 addu $3,$25 2171 sw $2,6*4($4) 2172 sw $3,7*4($4) 2173 2174 .set noreorder 2175 jr $31 2176 nop 2177 .end bn_sqr_comba4 2178