1 .set mips2 2 .rdata 3 .asciiz "mips3.s, Version 1.2" 4 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>" 5 6 .text 7 .set noat 8 9 .align 5 10 .globl bn_mul_add_words 11 .ent bn_mul_add_words 12 bn_mul_add_words: 13 .set noreorder 14 bgtz $6,bn_mul_add_words_internal 15 move $2,$0 16 jr $31 17 move $4,$2 18 .end bn_mul_add_words 19 20 .align 5 21 .ent bn_mul_add_words_internal 22 bn_mul_add_words_internal: 23 .set reorder 24 li $3,-4 25 and $8,$6,$3 26 beqz $8,.L_bn_mul_add_words_tail 27 28 .L_bn_mul_add_words_loop: 29 lw $12,0($5) 30 multu $12,$7 31 lw $13,0($4) 32 lw $14,4($5) 33 lw $15,4($4) 34 lw $8,2*4($5) 35 lw $9,2*4($4) 36 addu $13,$2 37 sltu $2,$13,$2 # All manuals say it "compares 32-bit 38 # values", but it seems to work fine 39 # even on 64-bit registers. 40 mflo $1 41 mfhi $12 42 addu $13,$1 43 addu $2,$12 44 multu $14,$7 45 sltu $1,$13,$1 46 sw $13,0($4) 47 addu $2,$1 48 49 lw $10,3*4($5) 50 lw $11,3*4($4) 51 addu $15,$2 52 sltu $2,$15,$2 53 mflo $1 54 mfhi $14 55 addu $15,$1 56 addu $2,$14 57 multu $8,$7 58 sltu $1,$15,$1 59 sw $15,4($4) 60 addu $2,$1 61 62 subu $6,4 63 addu $4,4*4 64 addu $5,4*4 65 addu $9,$2 66 sltu $2,$9,$2 67 mflo $1 68 mfhi $8 69 addu $9,$1 70 addu $2,$8 71 multu $10,$7 72 sltu $1,$9,$1 73 sw $9,-2*4($4) 74 addu $2,$1 75 76 77 and $8,$6,$3 78 addu $11,$2 79 sltu $2,$11,$2 80 mflo $1 81 mfhi $10 82 addu $11,$1 83 addu $2,$10 84 sltu $1,$11,$1 85 sw $11,-4($4) 86 .set noreorder 87 bgtz $8,.L_bn_mul_add_words_loop 88 addu $2,$1 89 90 beqz $6,.L_bn_mul_add_words_return 91 nop 92 93 .L_bn_mul_add_words_tail: 94 .set reorder 95 lw $12,0($5) 96 multu $12,$7 97 lw $13,0($4) 98 subu $6,1 99 addu $13,$2 100 sltu $2,$13,$2 101 mflo $1 102 mfhi $12 103 addu $13,$1 104 addu $2,$12 105 sltu $1,$13,$1 106 sw $13,0($4) 107 addu $2,$1 108 beqz $6,.L_bn_mul_add_words_return 109 110 lw $12,4($5) 111 multu $12,$7 112 lw $13,4($4) 113 subu $6,1 114 addu $13,$2 115 sltu $2,$13,$2 116 mflo $1 117 mfhi $12 118 addu $13,$1 119 addu $2,$12 120 sltu $1,$13,$1 121 sw $13,4($4) 122 addu $2,$1 123 beqz $6,.L_bn_mul_add_words_return 124 125 lw $12,2*4($5) 126 multu $12,$7 127 lw $13,2*4($4) 128 addu $13,$2 129 sltu $2,$13,$2 130 mflo $1 131 mfhi $12 132 addu $13,$1 133 addu $2,$12 134 sltu $1,$13,$1 135 sw $13,2*4($4) 136 addu $2,$1 137 138 .L_bn_mul_add_words_return: 139 .set noreorder 140 jr $31 141 move $4,$2 142 .end bn_mul_add_words_internal 143 144 .align 5 145 .globl bn_mul_words 146 .ent bn_mul_words 147 bn_mul_words: 148 .set noreorder 149 bgtz $6,bn_mul_words_internal 150 move $2,$0 151 jr $31 152 move $4,$2 153 .end bn_mul_words 154 155 .align 5 156 .ent bn_mul_words_internal 157 bn_mul_words_internal: 158 .set reorder 159 li $3,-4 160 and $8,$6,$3 161 beqz $8,.L_bn_mul_words_tail 162 163 .L_bn_mul_words_loop: 164 lw $12,0($5) 165 multu $12,$7 166 lw $14,4($5) 167 lw $8,2*4($5) 168 lw $10,3*4($5) 169 mflo $1 170 mfhi $12 171 addu $2,$1 172 sltu $13,$2,$1 173 multu $14,$7 174 sw $2,0($4) 175 addu $2,$13,$12 176 177 subu $6,4 178 addu $4,4*4 179 addu $5,4*4 180 mflo $1 181 mfhi $14 182 addu $2,$1 183 sltu $15,$2,$1 184 multu $8,$7 185 sw $2,-3*4($4) 186 addu $2,$15,$14 187 188 mflo $1 189 mfhi $8 190 addu $2,$1 191 sltu $9,$2,$1 192 multu $10,$7 193 sw $2,-2*4($4) 194 addu $2,$9,$8 195 196 and $8,$6,$3 197 mflo $1 198 mfhi $10 199 addu $2,$1 200 sltu $11,$2,$1 201 sw $2,-4($4) 202 .set noreorder 203 bgtz $8,.L_bn_mul_words_loop 204 addu $2,$11,$10 205 206 beqz $6,.L_bn_mul_words_return 207 nop 208 209 .L_bn_mul_words_tail: 210 .set reorder 211 lw $12,0($5) 212 multu $12,$7 213 subu $6,1 214 mflo $1 215 mfhi $12 216 addu $2,$1 217 sltu $13,$2,$1 218 sw $2,0($4) 219 addu $2,$13,$12 220 beqz $6,.L_bn_mul_words_return 221 222 lw $12,4($5) 223 multu $12,$7 224 subu $6,1 225 mflo $1 226 mfhi $12 227 addu $2,$1 228 sltu $13,$2,$1 229 sw $2,4($4) 230 addu $2,$13,$12 231 beqz $6,.L_bn_mul_words_return 232 233 lw $12,2*4($5) 234 multu $12,$7 235 mflo $1 236 mfhi $12 237 addu $2,$1 238 sltu $13,$2,$1 239 sw $2,2*4($4) 240 addu $2,$13,$12 241 242 .L_bn_mul_words_return: 243 .set noreorder 244 jr $31 245 move $4,$2 246 .end bn_mul_words_internal 247 248 .align 5 249 .globl bn_sqr_words 250 .ent bn_sqr_words 251 bn_sqr_words: 252 .set noreorder 253 bgtz $6,bn_sqr_words_internal 254 move $2,$0 255 jr $31 256 move $4,$2 257 .end bn_sqr_words 258 259 .align 5 260 .ent bn_sqr_words_internal 261 bn_sqr_words_internal: 262 .set reorder 263 li $3,-4 264 and $8,$6,$3 265 beqz $8,.L_bn_sqr_words_tail 266 267 .L_bn_sqr_words_loop: 268 lw $12,0($5) 269 multu $12,$12 270 lw $14,4($5) 271 lw $8,2*4($5) 272 lw $10,3*4($5) 273 mflo $13 274 mfhi $12 275 sw $13,0($4) 276 sw $12,4($4) 277 278 multu $14,$14 279 subu $6,4 280 addu $4,8*4 281 addu $5,4*4 282 mflo $15 283 mfhi $14 284 sw $15,-6*4($4) 285 sw $14,-5*4($4) 286 287 multu $8,$8 288 mflo $9 289 mfhi $8 290 sw $9,-4*4($4) 291 sw $8,-3*4($4) 292 293 294 multu $10,$10 295 and $8,$6,$3 296 mflo $11 297 mfhi $10 298 sw $11,-2*4($4) 299 300 .set noreorder 301 bgtz $8,.L_bn_sqr_words_loop 302 sw $10,-4($4) 303 304 beqz $6,.L_bn_sqr_words_return 305 nop 306 307 .L_bn_sqr_words_tail: 308 .set reorder 309 lw $12,0($5) 310 multu $12,$12 311 subu $6,1 312 mflo $13 313 mfhi $12 314 sw $13,0($4) 315 sw $12,4($4) 316 beqz $6,.L_bn_sqr_words_return 317 318 lw $12,4($5) 319 multu $12,$12 320 subu $6,1 321 mflo $13 322 mfhi $12 323 sw $13,2*4($4) 324 sw $12,3*4($4) 325 beqz $6,.L_bn_sqr_words_return 326 327 lw $12,2*4($5) 328 multu $12,$12 329 mflo $13 330 mfhi $12 331 sw $13,4*4($4) 332 sw $12,5*4($4) 333 334 .L_bn_sqr_words_return: 335 .set noreorder 336 jr $31 337 move $4,$2 338 339 .end bn_sqr_words_internal 340 341 .align 5 342 .globl bn_add_words 343 .ent bn_add_words 344 bn_add_words: 345 .set noreorder 346 bgtz $7,bn_add_words_internal 347 move $2,$0 348 jr $31 349 move $4,$2 350 .end bn_add_words 351 352 .align 5 353 .ent bn_add_words_internal 354 bn_add_words_internal: 355 .set reorder 356 li $3,-4 357 and $1,$7,$3 358 beqz $1,.L_bn_add_words_tail 359 360 .L_bn_add_words_loop: 361 lw $12,0($5) 362 lw $8,0($6) 363 subu $7,4 364 lw $13,4($5) 365 and $1,$7,$3 366 lw $14,2*4($5) 367 addu $6,4*4 368 lw $15,3*4($5) 369 addu $4,4*4 370 lw $9,-3*4($6) 371 addu $5,4*4 372 lw $10,-2*4($6) 373 lw $11,-4($6) 374 addu $8,$12 375 sltu $24,$8,$12 376 addu $12,$8,$2 377 sltu $2,$12,$8 378 sw $12,-4*4($4) 379 addu $2,$24 380 381 addu $9,$13 382 sltu $25,$9,$13 383 addu $13,$9,$2 384 sltu $2,$13,$9 385 sw $13,-3*4($4) 386 addu $2,$25 387 388 addu $10,$14 389 sltu $24,$10,$14 390 addu $14,$10,$2 391 sltu $2,$14,$10 392 sw $14,-2*4($4) 393 addu $2,$24 394 395 addu $11,$15 396 sltu $25,$11,$15 397 addu $15,$11,$2 398 sltu $2,$15,$11 399 sw $15,-4($4) 400 401 .set noreorder 402 bgtz $1,.L_bn_add_words_loop 403 addu $2,$25 404 405 beqz $7,.L_bn_add_words_return 406 nop 407 408 .L_bn_add_words_tail: 409 .set reorder 410 lw $12,0($5) 411 lw $8,0($6) 412 addu $8,$12 413 subu $7,1 414 sltu $24,$8,$12 415 addu $12,$8,$2 416 sltu $2,$12,$8 417 sw $12,0($4) 418 addu $2,$24 419 beqz $7,.L_bn_add_words_return 420 421 lw $13,4($5) 422 lw $9,4($6) 423 addu $9,$13 424 subu $7,1 425 sltu $25,$9,$13 426 addu $13,$9,$2 427 sltu $2,$13,$9 428 sw $13,4($4) 429 addu $2,$25 430 beqz $7,.L_bn_add_words_return 431 432 lw $14,2*4($5) 433 lw $10,2*4($6) 434 addu $10,$14 435 sltu $24,$10,$14 436 addu $14,$10,$2 437 sltu $2,$14,$10 438 sw $14,2*4($4) 439 addu $2,$24 440 441 .L_bn_add_words_return: 442 .set noreorder 443 jr $31 444 move $4,$2 445 446 .end bn_add_words_internal 447 448 .align 5 449 .globl bn_sub_words 450 .ent bn_sub_words 451 bn_sub_words: 452 .set noreorder 453 bgtz $7,bn_sub_words_internal 454 move $2,$0 455 jr $31 456 move $4,$0 457 .end bn_sub_words 458 459 .align 5 460 .ent bn_sub_words_internal 461 bn_sub_words_internal: 462 .set reorder 463 li $3,-4 464 and $1,$7,$3 465 beqz $1,.L_bn_sub_words_tail 466 467 .L_bn_sub_words_loop: 468 lw $12,0($5) 469 lw $8,0($6) 470 subu $7,4 471 lw $13,4($5) 472 and $1,$7,$3 473 lw $14,2*4($5) 474 addu $6,4*4 475 lw $15,3*4($5) 476 addu $4,4*4 477 lw $9,-3*4($6) 478 addu $5,4*4 479 lw $10,-2*4($6) 480 lw $11,-4($6) 481 sltu $24,$12,$8 482 subu $8,$12,$8 483 subu $12,$8,$2 484 sgtu $2,$12,$8 485 sw $12,-4*4($4) 486 addu $2,$24 487 488 sltu $25,$13,$9 489 subu $9,$13,$9 490 subu $13,$9,$2 491 sgtu $2,$13,$9 492 sw $13,-3*4($4) 493 addu $2,$25 494 495 496 sltu $24,$14,$10 497 subu $10,$14,$10 498 subu $14,$10,$2 499 sgtu $2,$14,$10 500 sw $14,-2*4($4) 501 addu $2,$24 502 503 sltu $25,$15,$11 504 subu $11,$15,$11 505 subu $15,$11,$2 506 sgtu $2,$15,$11 507 sw $15,-4($4) 508 509 .set noreorder 510 bgtz $1,.L_bn_sub_words_loop 511 addu $2,$25 512 513 beqz $7,.L_bn_sub_words_return 514 nop 515 516 .L_bn_sub_words_tail: 517 .set reorder 518 lw $12,0($5) 519 lw $8,0($6) 520 subu $7,1 521 sltu $24,$12,$8 522 subu $8,$12,$8 523 subu $12,$8,$2 524 sgtu $2,$12,$8 525 sw $12,0($4) 526 addu $2,$24 527 beqz $7,.L_bn_sub_words_return 528 529 lw $13,4($5) 530 subu $7,1 531 lw $9,4($6) 532 sltu $25,$13,$9 533 subu $9,$13,$9 534 subu $13,$9,$2 535 sgtu $2,$13,$9 536 sw $13,4($4) 537 addu $2,$25 538 beqz $7,.L_bn_sub_words_return 539 540 lw $14,2*4($5) 541 lw $10,2*4($6) 542 sltu $24,$14,$10 543 subu $10,$14,$10 544 subu $14,$10,$2 545 sgtu $2,$14,$10 546 sw $14,2*4($4) 547 addu $2,$24 548 549 .L_bn_sub_words_return: 550 .set noreorder 551 jr $31 552 move $4,$2 553 .end bn_sub_words_internal 554 555 .align 5 556 .globl bn_div_3_words 557 .ent bn_div_3_words 558 bn_div_3_words: 559 .set noreorder 560 move $7,$4 # we know that bn_div_words does not 561 # touch $7, $10, $11 and preserves $6 562 # so that we can save two arguments 563 # and return address in registers 564 # instead of stack:-) 565 566 lw $4,($7) 567 move $10,$5 568 bne $4,$6,bn_div_3_words_internal 569 lw $5,-4($7) 570 li $2,-1 571 jr $31 572 move $4,$2 573 .end bn_div_3_words 574 575 .align 5 576 .ent bn_div_3_words_internal 577 bn_div_3_words_internal: 578 .set reorder 579 move $11,$31 580 bal bn_div_words_internal 581 move $31,$11 582 multu $10,$2 583 lw $14,-2*4($7) 584 move $8,$0 585 mfhi $13 586 mflo $12 587 sltu $24,$13,$5 588 .L_bn_div_3_words_inner_loop: 589 bnez $24,.L_bn_div_3_words_inner_loop_done 590 sgeu $1,$14,$12 591 seq $25,$13,$5 592 and $1,$25 593 sltu $15,$12,$10 594 addu $5,$6 595 subu $13,$15 596 subu $12,$10 597 sltu $24,$13,$5 598 sltu $8,$5,$6 599 or $24,$8 600 .set noreorder 601 beqz $1,.L_bn_div_3_words_inner_loop 602 subu $2,1 603 addu $2,1 604 .set reorder 605 .L_bn_div_3_words_inner_loop_done: 606 .set noreorder 607 jr $31 608 move $4,$2 609 .end bn_div_3_words_internal 610 611 .align 5 612 .globl bn_div_words 613 .ent bn_div_words 614 bn_div_words: 615 .set noreorder 616 bnez $6,bn_div_words_internal 617 li $2,-1 # I would rather signal div-by-zero 618 # which can be done with 'break 7' 619 jr $31 620 move $4,$2 621 .end bn_div_words 622 623 .align 5 624 .ent bn_div_words_internal 625 bn_div_words_internal: 626 move $3,$0 627 bltz $6,.L_bn_div_words_body 628 move $25,$3 629 sll $6,1 630 bgtz $6,.-4 631 addu $25,1 632 633 .set reorder 634 negu $13,$25 635 li $14,-1 636 sll $14,$13 637 and $14,$4 638 srl $1,$5,$13 639 .set noreorder 640 beqz $14,.+12 641 nop 642 break 6 # signal overflow 643 .set reorder 644 sll $4,$25 645 sll $5,$25 646 or $4,$1 647 .L_bn_div_words_body: 648 srl $3,$6,4*4 # bits 649 sgeu $1,$4,$6 650 .set noreorder 651 beqz $1,.+12 652 nop 653 subu $4,$6 654 .set reorder 655 656 li $8,-1 657 srl $9,$4,4*4 # bits 658 srl $8,4*4 # q=0xffffffff 659 beq $3,$9,.L_bn_div_words_skip_div1 660 divu $0,$4,$3 661 mflo $8 662 .L_bn_div_words_skip_div1: 663 multu $6,$8 664 sll $15,$4,4*4 # bits 665 srl $1,$5,4*4 # bits 666 or $15,$1 667 mflo $12 668 mfhi $13 669 .L_bn_div_words_inner_loop1: 670 sltu $14,$15,$12 671 seq $24,$9,$13 672 sltu $1,$9,$13 673 and $14,$24 674 sltu $2,$12,$6 675 or $1,$14 676 .set noreorder 677 beqz $1,.L_bn_div_words_inner_loop1_done 678 subu $13,$2 679 subu $12,$6 680 b .L_bn_div_words_inner_loop1 681 subu $8,1 682 .set reorder 683 .L_bn_div_words_inner_loop1_done: 684 685 sll $5,4*4 # bits 686 subu $4,$15,$12 687 sll $2,$8,4*4 # bits 688 689 li $8,-1 690 srl $9,$4,4*4 # bits 691 srl $8,4*4 # q=0xffffffff 692 beq $3,$9,.L_bn_div_words_skip_div2 693 divu $0,$4,$3 694 mflo $8 695 .L_bn_div_words_skip_div2: 696 multu $6,$8 697 sll $15,$4,4*4 # bits 698 srl $1,$5,4*4 # bits 699 or $15,$1 700 mflo $12 701 mfhi $13 702 .L_bn_div_words_inner_loop2: 703 sltu $14,$15,$12 704 seq $24,$9,$13 705 sltu $1,$9,$13 706 and $14,$24 707 sltu $3,$12,$6 708 or $1,$14 709 .set noreorder 710 beqz $1,.L_bn_div_words_inner_loop2_done 711 subu $13,$3 712 subu $12,$6 713 b .L_bn_div_words_inner_loop2 714 subu $8,1 715 .set reorder 716 .L_bn_div_words_inner_loop2_done: 717 718 subu $4,$15,$12 719 or $2,$8 720 srl $3,$4,$25 # $3 contains remainder if anybody wants it 721 srl $6,$25 # restore $6 722 723 .set noreorder 724 move $5,$3 725 jr $31 726 move $4,$2 727 .end bn_div_words_internal 728 729 .align 5 730 .globl bn_mul_comba8 731 .ent bn_mul_comba8 732 bn_mul_comba8: 733 .set noreorder 734 .frame $29,6*4,$31 735 .mask 0x003f0000,-4 736 subu $29,6*4 737 sw $21,5*4($29) 738 sw $20,4*4($29) 739 sw $19,3*4($29) 740 sw $18,2*4($29) 741 sw $17,1*4($29) 742 sw $16,0*4($29) 743 744 .set reorder 745 lw $12,0($5) # If compiled with -mips3 option on 746 # R5000 box assembler barks on this 747 # 1ine with "should not have mult/div 748 # as last instruction in bb (R10K 749 # bug)" warning. If anybody out there 750 # has a clue about how to circumvent 751 # this do send me a note. 752 # <appro@fy.chalmers.se> 753 754 lw $8,0($6) 755 lw $13,4($5) 756 lw $14,2*4($5) 757 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 758 lw $15,3*4($5) 759 lw $9,4($6) 760 lw $10,2*4($6) 761 lw $11,3*4($6) 762 mflo $2 763 mfhi $3 764 765 lw $16,4*4($5) 766 lw $18,5*4($5) 767 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 768 lw $20,6*4($5) 769 lw $5,7*4($5) 770 lw $17,4*4($6) 771 lw $19,5*4($6) 772 mflo $24 773 mfhi $25 774 addu $3,$24 775 sltu $1,$3,$24 776 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 777 addu $7,$25,$1 778 lw $21,6*4($6) 779 lw $6,7*4($6) 780 sw $2,0($4) # r[0]=c1; 781 mflo $24 782 mfhi $25 783 addu $3,$24 784 sltu $1,$3,$24 785 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 786 addu $25,$1 787 addu $7,$25 788 sltu $2,$7,$25 789 sw $3,4($4) # r[1]=c2; 790 791 mflo $24 792 mfhi $25 793 addu $7,$24 794 sltu $1,$7,$24 795 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 796 addu $25,$1 797 addu $2,$25 798 mflo $24 799 mfhi $25 800 addu $7,$24 801 sltu $1,$7,$24 802 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 803 addu $25,$1 804 addu $2,$25 805 sltu $3,$2,$25 806 mflo $24 807 mfhi $25 808 addu $7,$24 809 sltu $1,$7,$24 810 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 811 addu $25,$1 812 addu $2,$25 813 sltu $1,$2,$25 814 addu $3,$1 815 sw $7,2*4($4) # r[2]=c3; 816 817 mflo $24 818 mfhi $25 819 addu $2,$24 820 sltu $1,$2,$24 821 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 822 addu $25,$1 823 addu $3,$25 824 sltu $7,$3,$25 825 mflo $24 826 mfhi $25 827 addu $2,$24 828 sltu $1,$2,$24 829 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 830 addu $25,$1 831 addu $3,$25 832 sltu $1,$3,$25 833 addu $7,$1 834 mflo $24 835 mfhi $25 836 addu $2,$24 837 sltu $1,$2,$24 838 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 839 addu $25,$1 840 addu $3,$25 841 sltu $1,$3,$25 842 addu $7,$1 843 mflo $24 844 mfhi $25 845 addu $2,$24 846 sltu $1,$2,$24 847 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); 848 addu $25,$1 849 addu $3,$25 850 sltu $1,$3,$25 851 addu $7,$1 852 sw $2,3*4($4) # r[3]=c1; 853 854 mflo $24 855 mfhi $25 856 addu $3,$24 857 sltu $1,$3,$24 858 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 859 addu $25,$1 860 addu $7,$25 861 sltu $2,$7,$25 862 mflo $24 863 mfhi $25 864 addu $3,$24 865 sltu $1,$3,$24 866 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 867 addu $25,$1 868 addu $7,$25 869 sltu $1,$7,$25 870 addu $2,$1 871 mflo $24 872 mfhi $25 873 addu $3,$24 874 sltu $1,$3,$24 875 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 876 addu $25,$1 877 addu $7,$25 878 sltu $1,$7,$25 879 addu $2,$1 880 mflo $24 881 mfhi $25 882 addu $3,$24 883 sltu $1,$3,$24 884 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); 885 addu $25,$1 886 addu $7,$25 887 sltu $1,$7,$25 888 addu $2,$1 889 mflo $24 890 mfhi $25 891 addu $3,$24 892 sltu $1,$3,$24 893 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); 894 addu $25,$1 895 addu $7,$25 896 sltu $1,$7,$25 897 addu $2,$1 898 sw $3,4*4($4) # r[4]=c2; 899 900 mflo $24 901 mfhi $25 902 addu $7,$24 903 sltu $1,$7,$24 904 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); 905 addu $25,$1 906 addu $2,$25 907 sltu $3,$2,$25 908 mflo $24 909 mfhi $25 910 addu $7,$24 911 sltu $1,$7,$24 912 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 913 addu $25,$1 914 addu $2,$25 915 sltu $1,$2,$25 916 addu $3,$1 917 mflo $24 918 mfhi $25 919 addu $7,$24 920 sltu $1,$7,$24 921 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 922 addu $25,$1 923 addu $2,$25 924 sltu $1,$2,$25 925 addu $3,$1 926 mflo $24 927 mfhi $25 928 addu $7,$24 929 sltu $1,$7,$24 930 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); 931 addu $25,$1 932 addu $2,$25 933 sltu $1,$2,$25 934 addu $3,$1 935 mflo $24 936 mfhi $25 937 addu $7,$24 938 sltu $1,$7,$24 939 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); 940 addu $25,$1 941 addu $2,$25 942 sltu $1,$2,$25 943 addu $3,$1 944 mflo $24 945 mfhi $25 946 addu $7,$24 947 sltu $1,$7,$24 948 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); 949 addu $25,$1 950 addu $2,$25 951 sltu $1,$2,$25 952 addu $3,$1 953 sw $7,5*4($4) # r[5]=c3; 954 955 mflo $24 956 mfhi $25 957 addu $2,$24 958 sltu $1,$2,$24 959 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); 960 addu $25,$1 961 addu $3,$25 962 sltu $7,$3,$25 963 mflo $24 964 mfhi $25 965 addu $2,$24 966 sltu $1,$2,$24 967 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); 968 addu $25,$1 969 addu $3,$25 970 sltu $1,$3,$25 971 addu $7,$1 972 mflo $24 973 mfhi $25 974 addu $2,$24 975 sltu $1,$2,$24 976 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 977 addu $25,$1 978 addu $3,$25 979 sltu $1,$3,$25 980 addu $7,$1 981 mflo $24 982 mfhi $25 983 addu $2,$24 984 sltu $1,$2,$24 985 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); 986 addu $25,$1 987 addu $3,$25 988 sltu $1,$3,$25 989 addu $7,$1 990 mflo $24 991 mfhi $25 992 addu $2,$24 993 sltu $1,$2,$24 994 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); 995 addu $25,$1 996 addu $3,$25 997 sltu $1,$3,$25 998 addu $7,$1 999 mflo $24 1000 mfhi $25 1001 addu $2,$24 1002 sltu $1,$2,$24 1003 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); 1004 addu $25,$1 1005 addu $3,$25 1006 sltu $1,$3,$25 1007 addu $7,$1 1008 mflo $24 1009 mfhi $25 1010 addu $2,$24 1011 sltu $1,$2,$24 1012 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); 1013 addu $25,$1 1014 addu $3,$25 1015 sltu $1,$3,$25 1016 addu $7,$1 1017 sw $2,6*4($4) # r[6]=c1; 1018 1019 mflo $24 1020 mfhi $25 1021 addu $3,$24 1022 sltu $1,$3,$24 1023 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); 1024 addu $25,$1 1025 addu $7,$25 1026 sltu $2,$7,$25 1027 mflo $24 1028 mfhi $25 1029 addu $3,$24 1030 sltu $1,$3,$24 1031 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); 1032 addu $25,$1 1033 addu $7,$25 1034 sltu $1,$7,$25 1035 addu $2,$1 1036 mflo $24 1037 mfhi $25 1038 addu $3,$24 1039 sltu $1,$3,$24 1040 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); 1041 addu $25,$1 1042 addu $7,$25 1043 sltu $1,$7,$25 1044 addu $2,$1 1045 mflo $24 1046 mfhi $25 1047 addu $3,$24 1048 sltu $1,$3,$24 1049 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); 1050 addu $25,$1 1051 addu $7,$25 1052 sltu $1,$7,$25 1053 addu $2,$1 1054 mflo $24 1055 mfhi $25 1056 addu $3,$24 1057 sltu $1,$3,$24 1058 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); 1059 addu $25,$1 1060 addu $7,$25 1061 sltu $1,$7,$25 1062 addu $2,$1 1063 mflo $24 1064 mfhi $25 1065 addu $3,$24 1066 sltu $1,$3,$24 1067 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); 1068 addu $25,$1 1069 addu $7,$25 1070 sltu $1,$7,$25 1071 addu $2,$1 1072 mflo $24 1073 mfhi $25 1074 addu $3,$24 1075 sltu $1,$3,$24 1076 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); 1077 addu $25,$1 1078 addu $7,$25 1079 sltu $1,$7,$25 1080 addu $2,$1 1081 mflo $24 1082 mfhi $25 1083 addu $3,$24 1084 sltu $1,$3,$24 1085 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); 1086 addu $25,$1 1087 addu $7,$25 1088 sltu $1,$7,$25 1089 addu $2,$1 1090 sw $3,7*4($4) # r[7]=c2; 1091 1092 mflo $24 1093 mfhi $25 1094 addu $7,$24 1095 sltu $1,$7,$24 1096 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); 1097 addu $25,$1 1098 addu $2,$25 1099 sltu $3,$2,$25 1100 mflo $24 1101 mfhi $25 1102 addu $7,$24 1103 sltu $1,$7,$24 1104 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); 1105 addu $25,$1 1106 addu $2,$25 1107 sltu $1,$2,$25 1108 addu $3,$1 1109 mflo $24 1110 mfhi $25 1111 addu $7,$24 1112 sltu $1,$7,$24 1113 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); 1114 addu $25,$1 1115 addu $2,$25 1116 sltu $1,$2,$25 1117 addu $3,$1 1118 mflo $24 1119 mfhi $25 1120 addu $7,$24 1121 sltu $1,$7,$24 1122 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); 1123 addu $25,$1 1124 addu $2,$25 1125 sltu $1,$2,$25 1126 addu $3,$1 1127 mflo $24 1128 mfhi $25 1129 addu $7,$24 1130 sltu $1,$7,$24 1131 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); 1132 addu $25,$1 1133 addu $2,$25 1134 sltu $1,$2,$25 1135 addu $3,$1 1136 mflo $24 1137 mfhi $25 1138 addu $7,$24 1139 sltu $1,$7,$24 1140 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); 1141 addu $25,$1 1142 addu $2,$25 1143 sltu $1,$2,$25 1144 addu $3,$1 1145 mflo $24 1146 mfhi $25 1147 addu $7,$24 1148 sltu $1,$7,$24 1149 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); 1150 addu $25,$1 1151 addu $2,$25 1152 sltu $1,$2,$25 1153 addu $3,$1 1154 sw $7,8*4($4) # r[8]=c3; 1155 1156 mflo $24 1157 mfhi $25 1158 addu $2,$24 1159 sltu $1,$2,$24 1160 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); 1161 addu $25,$1 1162 addu $3,$25 1163 sltu $7,$3,$25 1164 mflo $24 1165 mfhi $25 1166 addu $2,$24 1167 sltu $1,$2,$24 1168 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); 1169 addu $25,$1 1170 addu $3,$25 1171 sltu $1,$3,$25 1172 addu $7,$1 1173 mflo $24 1174 mfhi $25 1175 addu $2,$24 1176 sltu $1,$2,$24 1177 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); 1178 addu $25,$1 1179 addu $3,$25 1180 sltu $1,$3,$25 1181 addu $7,$1 1182 mflo $24 1183 mfhi $25 1184 addu $2,$24 1185 sltu $1,$2,$24 1186 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); 1187 addu $25,$1 1188 addu $3,$25 1189 sltu $1,$3,$25 1190 addu $7,$1 1191 mflo $24 1192 mfhi $25 1193 addu $2,$24 1194 sltu $1,$2,$24 1195 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); 1196 addu $25,$1 1197 addu $3,$25 1198 sltu $1,$3,$25 1199 addu $7,$1 1200 mflo $24 1201 mfhi $25 1202 addu $2,$24 1203 sltu $1,$2,$24 1204 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); 1205 addu $25,$1 1206 addu $3,$25 1207 sltu $1,$3,$25 1208 addu $7,$1 1209 sw $2,9*4($4) # r[9]=c1; 1210 1211 mflo $24 1212 mfhi $25 1213 addu $3,$24 1214 sltu $1,$3,$24 1215 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); 1216 addu $25,$1 1217 addu $7,$25 1218 sltu $2,$7,$25 1219 mflo $24 1220 mfhi $25 1221 addu $3,$24 1222 sltu $1,$3,$24 1223 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); 1224 addu $25,$1 1225 addu $7,$25 1226 sltu $1,$7,$25 1227 addu $2,$1 1228 mflo $24 1229 mfhi $25 1230 addu $3,$24 1231 sltu $1,$3,$24 1232 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); 1233 addu $25,$1 1234 addu $7,$25 1235 sltu $1,$7,$25 1236 addu $2,$1 1237 mflo $24 1238 mfhi $25 1239 addu $3,$24 1240 sltu $1,$3,$24 1241 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); 1242 addu $25,$1 1243 addu $7,$25 1244 sltu $1,$7,$25 1245 addu $2,$1 1246 mflo $24 1247 mfhi $25 1248 addu $3,$24 1249 sltu $1,$3,$24 1250 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); 1251 addu $25,$1 1252 addu $7,$25 1253 sltu $1,$7,$25 1254 addu $2,$1 1255 sw $3,10*4($4) # r[10]=c2; 1256 1257 mflo $24 1258 mfhi $25 1259 addu $7,$24 1260 sltu $1,$7,$24 1261 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); 1262 addu $25,$1 1263 addu $2,$25 1264 sltu $3,$2,$25 1265 mflo $24 1266 mfhi $25 1267 addu $7,$24 1268 sltu $1,$7,$24 1269 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); 1270 addu $25,$1 1271 addu $2,$25 1272 sltu $1,$2,$25 1273 addu $3,$1 1274 mflo $24 1275 mfhi $25 1276 addu $7,$24 1277 sltu $1,$7,$24 1278 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); 1279 addu $25,$1 1280 addu $2,$25 1281 sltu $1,$2,$25 1282 addu $3,$1 1283 mflo $24 1284 mfhi $25 1285 addu $7,$24 1286 sltu $1,$7,$24 1287 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); 1288 addu $25,$1 1289 addu $2,$25 1290 sltu $1,$2,$25 1291 addu $3,$1 1292 sw $7,11*4($4) # r[11]=c3; 1293 1294 mflo $24 1295 mfhi $25 1296 addu $2,$24 1297 sltu $1,$2,$24 1298 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); 1299 addu $25,$1 1300 addu $3,$25 1301 sltu $7,$3,$25 1302 mflo $24 1303 mfhi $25 1304 addu $2,$24 1305 sltu $1,$2,$24 1306 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); 1307 addu $25,$1 1308 addu $3,$25 1309 sltu $1,$3,$25 1310 addu $7,$1 1311 mflo $24 1312 mfhi $25 1313 addu $2,$24 1314 sltu $1,$2,$24 1315 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); 1316 addu $25,$1 1317 addu $3,$25 1318 sltu $1,$3,$25 1319 addu $7,$1 1320 sw $2,12*4($4) # r[12]=c1; 1321 1322 mflo $24 1323 mfhi $25 1324 addu $3,$24 1325 sltu $1,$3,$24 1326 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); 1327 addu $25,$1 1328 addu $7,$25 1329 sltu $2,$7,$25 1330 mflo $24 1331 mfhi $25 1332 addu $3,$24 1333 sltu $1,$3,$24 1334 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); 1335 addu $25,$1 1336 addu $7,$25 1337 sltu $1,$7,$25 1338 addu $2,$1 1339 sw $3,13*4($4) # r[13]=c2; 1340 1341 mflo $24 1342 mfhi $25 1343 addu $7,$24 1344 sltu $1,$7,$24 1345 addu $25,$1 1346 addu $2,$25 1347 sw $7,14*4($4) # r[14]=c3; 1348 sw $2,15*4($4) # r[15]=c1; 1349 1350 .set noreorder 1351 lw $21,5*4($29) 1352 lw $20,4*4($29) 1353 lw $19,3*4($29) 1354 lw $18,2*4($29) 1355 lw $17,1*4($29) 1356 lw $16,0*4($29) 1357 jr $31 1358 addu $29,6*4 1359 .end bn_mul_comba8 1360 1361 .align 5 1362 .globl bn_mul_comba4 1363 .ent bn_mul_comba4 1364 bn_mul_comba4: 1365 .set reorder 1366 lw $12,0($5) 1367 lw $8,0($6) 1368 lw $13,4($5) 1369 lw $14,2*4($5) 1370 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 1371 lw $15,3*4($5) 1372 lw $9,4($6) 1373 lw $10,2*4($6) 1374 lw $11,3*4($6) 1375 mflo $2 1376 mfhi $3 1377 sw $2,0($4) 1378 1379 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 1380 mflo $24 1381 mfhi $25 1382 addu $3,$24 1383 sltu $1,$3,$24 1384 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 1385 addu $7,$25,$1 1386 mflo $24 1387 mfhi $25 1388 addu $3,$24 1389 sltu $1,$3,$24 1390 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 1391 addu $25,$1 1392 addu $7,$25 1393 sltu $2,$7,$25 1394 sw $3,4($4) 1395 1396 mflo $24 1397 mfhi $25 1398 addu $7,$24 1399 sltu $1,$7,$24 1400 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 1401 addu $25,$1 1402 addu $2,$25 1403 mflo $24 1404 mfhi $25 1405 addu $7,$24 1406 sltu $1,$7,$24 1407 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 1408 addu $25,$1 1409 addu $2,$25 1410 sltu $3,$2,$25 1411 mflo $24 1412 mfhi $25 1413 addu $7,$24 1414 sltu $1,$7,$24 1415 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 1416 addu $25,$1 1417 addu $2,$25 1418 sltu $1,$2,$25 1419 addu $3,$1 1420 sw $7,2*4($4) 1421 1422 mflo $24 1423 mfhi $25 1424 addu $2,$24 1425 sltu $1,$2,$24 1426 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 1427 addu $25,$1 1428 addu $3,$25 1429 sltu $7,$3,$25 1430 mflo $24 1431 mfhi $25 1432 addu $2,$24 1433 sltu $1,$2,$24 1434 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 1435 addu $25,$1 1436 addu $3,$25 1437 sltu $1,$3,$25 1438 addu $7,$1 1439 mflo $24 1440 mfhi $25 1441 addu $2,$24 1442 sltu $1,$2,$24 1443 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 1444 addu $25,$1 1445 addu $3,$25 1446 sltu $1,$3,$25 1447 addu $7,$1 1448 mflo $24 1449 mfhi $25 1450 addu $2,$24 1451 sltu $1,$2,$24 1452 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 1453 addu $25,$1 1454 addu $3,$25 1455 sltu $1,$3,$25 1456 addu $7,$1 1457 sw $2,3*4($4) 1458 1459 mflo $24 1460 mfhi $25 1461 addu $3,$24 1462 sltu $1,$3,$24 1463 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 1464 addu $25,$1 1465 addu $7,$25 1466 sltu $2,$7,$25 1467 mflo $24 1468 mfhi $25 1469 addu $3,$24 1470 sltu $1,$3,$24 1471 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 1472 addu $25,$1 1473 addu $7,$25 1474 sltu $1,$7,$25 1475 addu $2,$1 1476 mflo $24 1477 mfhi $25 1478 addu $3,$24 1479 sltu $1,$3,$24 1480 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 1481 addu $25,$1 1482 addu $7,$25 1483 sltu $1,$7,$25 1484 addu $2,$1 1485 sw $3,4*4($4) 1486 1487 mflo $24 1488 mfhi $25 1489 addu $7,$24 1490 sltu $1,$7,$24 1491 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 1492 addu $25,$1 1493 addu $2,$25 1494 sltu $3,$2,$25 1495 mflo $24 1496 mfhi $25 1497 addu $7,$24 1498 sltu $1,$7,$24 1499 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 1500 addu $25,$1 1501 addu $2,$25 1502 sltu $1,$2,$25 1503 addu $3,$1 1504 sw $7,5*4($4) 1505 1506 mflo $24 1507 mfhi $25 1508 addu $2,$24 1509 sltu $1,$2,$24 1510 addu $25,$1 1511 addu $3,$25 1512 sw $2,6*4($4) 1513 sw $3,7*4($4) 1514 1515 .set noreorder 1516 jr $31 1517 nop 1518 .end bn_mul_comba4 1519 1520 .align 5 1521 .globl bn_sqr_comba8 1522 .ent bn_sqr_comba8 1523 bn_sqr_comba8: 1524 .set reorder 1525 lw $12,0($5) 1526 lw $13,4($5) 1527 lw $14,2*4($5) 1528 lw $15,3*4($5) 1529 1530 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 1531 lw $8,4*4($5) 1532 lw $9,5*4($5) 1533 lw $10,6*4($5) 1534 lw $11,7*4($5) 1535 mflo $2 1536 mfhi $3 1537 sw $2,0($4) 1538 1539 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 1540 mflo $24 1541 mfhi $25 1542 slt $2,$25,$0 1543 sll $25,1 1544 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 1545 slt $6,$24,$0 1546 addu $25,$6 1547 sll $24,1 1548 addu $3,$24 1549 sltu $1,$3,$24 1550 addu $7,$25,$1 1551 sw $3,4($4) 1552 1553 mflo $24 1554 mfhi $25 1555 slt $3,$25,$0 1556 sll $25,1 1557 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); 1558 slt $6,$24,$0 1559 addu $25,$6 1560 sll $24,1 1561 addu $7,$24 1562 sltu $1,$7,$24 1563 addu $25,$1 1564 addu $2,$25 1565 sltu $1,$2,$25 1566 addu $3,$1 1567 mflo $24 1568 mfhi $25 1569 addu $7,$24 1570 sltu $1,$7,$24 1571 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 1572 addu $25,$1 1573 addu $2,$25 1574 sltu $1,$2,$25 1575 addu $3,$1 1576 sw $7,2*4($4) 1577 1578 mflo $24 1579 mfhi $25 1580 slt $7,$25,$0 1581 sll $25,1 1582 multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); 1583 slt $6,$24,$0 1584 addu $25,$6 1585 sll $24,1 1586 addu $2,$24 1587 sltu $1,$2,$24 1588 addu $25,$1 1589 addu $3,$25 1590 sltu $1,$3,$25 1591 addu $7,$1 1592 mflo $24 1593 mfhi $25 1594 slt $1,$25,$0 1595 addu $7,$1 1596 multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); 1597 sll $25,1 1598 slt $6,$24,$0 1599 addu $25,$6 1600 sll $24,1 1601 addu $2,$24 1602 sltu $1,$2,$24 1603 addu $25,$1 1604 addu $3,$25 1605 sltu $1,$3,$25 1606 addu $7,$1 1607 sw $2,3*4($4) 1608 1609 mflo $24 1610 mfhi $25 1611 slt $2,$25,$0 1612 sll $25,1 1613 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); 1614 slt $6,$24,$0 1615 addu $25,$6 1616 sll $24,1 1617 addu $3,$24 1618 sltu $1,$3,$24 1619 addu $25,$1 1620 addu $7,$25 1621 sltu $1,$7,$25 1622 addu $2,$1 1623 mflo $24 1624 mfhi $25 1625 slt $1,$25,$0 1626 addu $2,$1 1627 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); 1628 sll $25,1 1629 slt $6,$24,$0 1630 addu $25,$6 1631 sll $24,1 1632 addu $3,$24 1633 sltu $1,$3,$24 1634 addu $25,$1 1635 addu $7,$25 1636 sltu $1,$7,$25 1637 addu $2,$1 1638 mflo $24 1639 mfhi $25 1640 addu $3,$24 1641 sltu $1,$3,$24 1642 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); 1643 addu $25,$1 1644 addu $7,$25 1645 sltu $1,$7,$25 1646 addu $2,$1 1647 sw $3,4*4($4) 1648 1649 mflo $24 1650 mfhi $25 1651 slt $3,$25,$0 1652 sll $25,1 1653 multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); 1654 slt $6,$24,$0 1655 addu $25,$6 1656 sll $24,1 1657 addu $7,$24 1658 sltu $1,$7,$24 1659 addu $25,$1 1660 addu $2,$25 1661 sltu $1,$2,$25 1662 addu $3,$1 1663 mflo $24 1664 mfhi $25 1665 slt $1,$25,$0 1666 addu $3,$1 1667 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); 1668 sll $25,1 1669 slt $6,$24,$0 1670 addu $25,$6 1671 sll $24,1 1672 addu $7,$24 1673 sltu $1,$7,$24 1674 addu $25,$1 1675 addu $2,$25 1676 sltu $1,$2,$25 1677 addu $3,$1 1678 mflo $24 1679 mfhi $25 1680 slt $1,$25,$0 1681 multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); 1682 addu $3,$1 1683 sll $25,1 1684 slt $6,$24,$0 1685 addu $25,$6 1686 sll $24,1 1687 addu $7,$24 1688 sltu $1,$7,$24 1689 addu $25,$1 1690 addu $2,$25 1691 sltu $1,$2,$25 1692 addu $3,$1 1693 sw $7,5*4($4) 1694 1695 mflo $24 1696 mfhi $25 1697 slt $7,$25,$0 1698 sll $25,1 1699 multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); 1700 slt $6,$24,$0 1701 addu $25,$6 1702 sll $24,1 1703 addu $2,$24 1704 sltu $1,$2,$24 1705 addu $25,$1 1706 addu $3,$25 1707 sltu $1,$3,$25 1708 addu $7,$1 1709 mflo $24 1710 mfhi $25 1711 slt $1,$25,$0 1712 addu $7,$1 1713 multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); 1714 sll $25,1 1715 slt $6,$24,$0 1716 addu $25,$6 1717 sll $24,1 1718 addu $2,$24 1719 sltu $1,$2,$24 1720 addu $25,$1 1721 addu $3,$25 1722 sltu $1,$3,$25 1723 addu $7,$1 1724 mflo $24 1725 mfhi $25 1726 slt $1,$25,$0 1727 addu $7,$1 1728 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); 1729 sll $25,1 1730 slt $6,$24,$0 1731 addu $25,$6 1732 sll $24,1 1733 addu $2,$24 1734 sltu $1,$2,$24 1735 addu $25,$1 1736 addu $3,$25 1737 sltu $1,$3,$25 1738 addu $7,$1 1739 mflo $24 1740 mfhi $25 1741 addu $2,$24 1742 sltu $1,$2,$24 1743 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); 1744 addu $25,$1 1745 addu $3,$25 1746 sltu $1,$3,$25 1747 addu $7,$1 1748 sw $2,6*4($4) 1749 1750 mflo $24 1751 mfhi $25 1752 slt $2,$25,$0 1753 sll $25,1 1754 multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); 1755 slt $6,$24,$0 1756 addu $25,$6 1757 sll $24,1 1758 addu $3,$24 1759 sltu $1,$3,$24 1760 addu $25,$1 1761 addu $7,$25 1762 sltu $1,$7,$25 1763 addu $2,$1 1764 mflo $24 1765 mfhi $25 1766 slt $1,$25,$0 1767 addu $2,$1 1768 multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); 1769 sll $25,1 1770 slt $6,$24,$0 1771 addu $25,$6 1772 sll $24,1 1773 addu $3,$24 1774 sltu $1,$3,$24 1775 addu $25,$1 1776 addu $7,$25 1777 sltu $1,$7,$25 1778 addu $2,$1 1779 mflo $24 1780 mfhi $25 1781 slt $1,$25,$0 1782 addu $2,$1 1783 multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); 1784 sll $25,1 1785 slt $6,$24,$0 1786 addu $25,$6 1787 sll $24,1 1788 addu $3,$24 1789 sltu $1,$3,$24 1790 addu $25,$1 1791 addu $7,$25 1792 sltu $1,$7,$25 1793 addu $2,$1 1794 mflo $24 1795 mfhi $25 1796 slt $1,$25,$0 1797 addu $2,$1 1798 multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); 1799 sll $25,1 1800 slt $6,$24,$0 1801 addu $25,$6 1802 sll $24,1 1803 addu $3,$24 1804 sltu $1,$3,$24 1805 addu $25,$1 1806 addu $7,$25 1807 sltu $1,$7,$25 1808 addu $2,$1 1809 sw $3,7*4($4) 1810 1811 mflo $24 1812 mfhi $25 1813 slt $3,$25,$0 1814 sll $25,1 1815 multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); 1816 slt $6,$24,$0 1817 addu $25,$6 1818 sll $24,1 1819 addu $7,$24 1820 sltu $1,$7,$24 1821 addu $25,$1 1822 addu $2,$25 1823 sltu $1,$2,$25 1824 addu $3,$1 1825 mflo $24 1826 mfhi $25 1827 slt $1,$25,$0 1828 addu $3,$1 1829 multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); 1830 sll $25,1 1831 slt $6,$24,$0 1832 addu $25,$6 1833 sll $24,1 1834 addu $7,$24 1835 sltu $1,$7,$24 1836 addu $25,$1 1837 addu $2,$25 1838 sltu $1,$2,$25 1839 addu $3,$1 1840 mflo $24 1841 mfhi $25 1842 slt $1,$25,$0 1843 addu $3,$1 1844 multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); 1845 sll $25,1 1846 slt $6,$24,$0 1847 addu $25,$6 1848 sll $24,1 1849 addu $7,$24 1850 sltu $1,$7,$24 1851 addu $25,$1 1852 addu $2,$25 1853 sltu $1,$2,$25 1854 addu $3,$1 1855 mflo $24 1856 mfhi $25 1857 addu $7,$24 1858 sltu $1,$7,$24 1859 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); 1860 addu $25,$1 1861 addu $2,$25 1862 sltu $1,$2,$25 1863 addu $3,$1 1864 sw $7,8*4($4) 1865 1866 mflo $24 1867 mfhi $25 1868 slt $7,$25,$0 1869 sll $25,1 1870 multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); 1871 slt $6,$24,$0 1872 addu $25,$6 1873 sll $24,1 1874 addu $2,$24 1875 sltu $1,$2,$24 1876 addu $25,$1 1877 addu $3,$25 1878 sltu $1,$3,$25 1879 addu $7,$1 1880 mflo $24 1881 mfhi $25 1882 slt $1,$25,$0 1883 addu $7,$1 1884 multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); 1885 sll $25,1 1886 slt $6,$24,$0 1887 addu $25,$6 1888 sll $24,1 1889 addu $2,$24 1890 sltu $1,$2,$24 1891 addu $25,$1 1892 addu $3,$25 1893 sltu $1,$3,$25 1894 addu $7,$1 1895 mflo $24 1896 mfhi $25 1897 slt $1,$25,$0 1898 addu $7,$1 1899 multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); 1900 sll $25,1 1901 slt $6,$24,$0 1902 addu $25,$6 1903 sll $24,1 1904 addu $2,$24 1905 sltu $1,$2,$24 1906 addu $25,$1 1907 addu $3,$25 1908 sltu $1,$3,$25 1909 addu $7,$1 1910 sw $2,9*4($4) 1911 1912 mflo $24 1913 mfhi $25 1914 slt $2,$25,$0 1915 sll $25,1 1916 multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); 1917 slt $6,$24,$0 1918 addu $25,$6 1919 sll $24,1 1920 addu $3,$24 1921 sltu $1,$3,$24 1922 addu $25,$1 1923 addu $7,$25 1924 sltu $1,$7,$25 1925 addu $2,$1 1926 mflo $24 1927 mfhi $25 1928 slt $1,$25,$0 1929 addu $2,$1 1930 multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); 1931 sll $25,1 1932 slt $6,$24,$0 1933 addu $25,$6 1934 sll $24,1 1935 addu $3,$24 1936 sltu $1,$3,$24 1937 addu $25,$1 1938 addu $7,$25 1939 sltu $1,$7,$25 1940 addu $2,$1 1941 mflo $24 1942 mfhi $25 1943 addu $3,$24 1944 sltu $1,$3,$24 1945 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); 1946 addu $25,$1 1947 addu $7,$25 1948 sltu $1,$7,$25 1949 addu $2,$1 1950 sw $3,10*4($4) 1951 1952 mflo $24 1953 mfhi $25 1954 slt $3,$25,$0 1955 sll $25,1 1956 multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); 1957 slt $6,$24,$0 1958 addu $25,$6 1959 sll $24,1 1960 addu $7,$24 1961 sltu $1,$7,$24 1962 addu $25,$1 1963 addu $2,$25 1964 sltu $1,$2,$25 1965 addu $3,$1 1966 mflo $24 1967 mfhi $25 1968 slt $1,$25,$0 1969 addu $3,$1 1970 multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); 1971 sll $25,1 1972 slt $6,$24,$0 1973 addu $25,$6 1974 sll $24,1 1975 addu $7,$24 1976 sltu $1,$7,$24 1977 addu $25,$1 1978 addu $2,$25 1979 sltu $1,$2,$25 1980 addu $3,$1 1981 sw $7,11*4($4) 1982 1983 mflo $24 1984 mfhi $25 1985 slt $7,$25,$0 1986 sll $25,1 1987 multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); 1988 slt $6,$24,$0 1989 addu $25,$6 1990 sll $24,1 1991 addu $2,$24 1992 sltu $1,$2,$24 1993 addu $25,$1 1994 addu $3,$25 1995 sltu $1,$3,$25 1996 addu $7,$1 1997 mflo $24 1998 mfhi $25 1999 addu $2,$24 2000 sltu $1,$2,$24 2001 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); 2002 addu $25,$1 2003 addu $3,$25 2004 sltu $1,$3,$25 2005 addu $7,$1 2006 sw $2,12*4($4) 2007 2008 mflo $24 2009 mfhi $25 2010 slt $2,$25,$0 2011 sll $25,1 2012 multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); 2013 slt $6,$24,$0 2014 addu $25,$6 2015 sll $24,1 2016 addu $3,$24 2017 sltu $1,$3,$24 2018 addu $25,$1 2019 addu $7,$25 2020 sltu $1,$7,$25 2021 addu $2,$1 2022 sw $3,13*4($4) 2023 2024 mflo $24 2025 mfhi $25 2026 addu $7,$24 2027 sltu $1,$7,$24 2028 addu $25,$1 2029 addu $2,$25 2030 sw $7,14*4($4) 2031 sw $2,15*4($4) 2032 2033 .set noreorder 2034 jr $31 2035 nop 2036 .end bn_sqr_comba8 2037 2038 .align 5 2039 .globl bn_sqr_comba4 2040 .ent bn_sqr_comba4 2041 bn_sqr_comba4: 2042 .set reorder 2043 lw $12,0($5) 2044 lw $13,4($5) 2045 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 2046 lw $14,2*4($5) 2047 lw $15,3*4($5) 2048 mflo $2 2049 mfhi $3 2050 sw $2,0($4) 2051 2052 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 2053 mflo $24 2054 mfhi $25 2055 slt $2,$25,$0 2056 sll $25,1 2057 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 2058 slt $6,$24,$0 2059 addu $25,$6 2060 sll $24,1 2061 addu $3,$24 2062 sltu $1,$3,$24 2063 addu $7,$25,$1 2064 sw $3,4($4) 2065 2066 mflo $24 2067 mfhi $25 2068 slt $3,$25,$0 2069 sll $25,1 2070 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); 2071 slt $6,$24,$0 2072 addu $25,$6 2073 sll $24,1 2074 addu $7,$24 2075 sltu $1,$7,$24 2076 addu $25,$1 2077 addu $2,$25 2078 sltu $1,$2,$25 2079 addu $3,$1 2080 mflo $24 2081 mfhi $25 2082 addu $7,$24 2083 sltu $1,$7,$24 2084 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 2085 addu $25,$1 2086 addu $2,$25 2087 sltu $1,$2,$25 2088 addu $3,$1 2089 sw $7,2*4($4) 2090 2091 mflo $24 2092 mfhi $25 2093 slt $7,$25,$0 2094 sll $25,1 2095 multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); 2096 slt $6,$24,$0 2097 addu $25,$6 2098 sll $24,1 2099 addu $2,$24 2100 sltu $1,$2,$24 2101 addu $25,$1 2102 addu $3,$25 2103 sltu $1,$3,$25 2104 addu $7,$1 2105 mflo $24 2106 mfhi $25 2107 slt $1,$25,$0 2108 addu $7,$1 2109 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); 2110 sll $25,1 2111 slt $6,$24,$0 2112 addu $25,$6 2113 sll $24,1 2114 addu $2,$24 2115 sltu $1,$2,$24 2116 addu $25,$1 2117 addu $3,$25 2118 sltu $1,$3,$25 2119 addu $7,$1 2120 sw $2,3*4($4) 2121 2122 mflo $24 2123 mfhi $25 2124 slt $2,$25,$0 2125 sll $25,1 2126 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); 2127 slt $6,$24,$0 2128 addu $25,$6 2129 sll $24,1 2130 addu $3,$24 2131 sltu $1,$3,$24 2132 addu $25,$1 2133 addu $7,$25 2134 sltu $1,$7,$25 2135 addu $2,$1 2136 mflo $24 2137 mfhi $25 2138 addu $3,$24 2139 sltu $1,$3,$24 2140 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); 2141 addu $25,$1 2142 addu $7,$25 2143 sltu $1,$7,$25 2144 addu $2,$1 2145 sw $3,4*4($4) 2146 2147 mflo $24 2148 mfhi $25 2149 slt $3,$25,$0 2150 sll $25,1 2151 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); 2152 slt $6,$24,$0 2153 addu $25,$6 2154 sll $24,1 2155 addu $7,$24 2156 sltu $1,$7,$24 2157 addu $25,$1 2158 addu $2,$25 2159 sltu $1,$2,$25 2160 addu $3,$1 2161 sw $7,5*4($4) 2162 2163 mflo $24 2164 mfhi $25 2165 addu $2,$24 2166 sltu $1,$2,$24 2167 addu $25,$1 2168 addu $3,$25 2169 sw $2,6*4($4) 2170 sw $3,7*4($4) 2171 2172 .set noreorder 2173 jr $31 2174 nop 2175 .end bn_sqr_comba4 2176