1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !amd64,!arm gccgo appengine 6 7 package poly1305 8 9 // Based on original, public domain implementation from NaCl by D. J. 10 // Bernstein. 11 12 import "math" 13 14 const ( 15 alpham80 = 0.00000000558793544769287109375 16 alpham48 = 24.0 17 alpham16 = 103079215104.0 18 alpha0 = 6755399441055744.0 19 alpha18 = 1770887431076116955136.0 20 alpha32 = 29014219670751100192948224.0 21 alpha50 = 7605903601369376408980219232256.0 22 alpha64 = 124615124604835863084731911901282304.0 23 alpha82 = 32667107224410092492483962313449748299776.0 24 alpha96 = 535217884764734955396857238543560676143529984.0 25 alpha112 = 35076039295941670036888435985190792471742381031424.0 26 alpha130 = 9194973245195333150150082162901855101712434733101613056.0 27 scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 28 offset0 = 6755408030990331.0 29 offset1 = 29014256564239239022116864.0 30 offset2 = 124615283061160854719918951570079744.0 31 offset3 = 535219245894202480694386063513315216128475136.0 32 ) 33 34 // Sum generates an authenticator for m using a one-time key and puts the 35 // 16-byte result into out. Authenticating two different messages with the same 36 // key allows an attacker to forge messages at will. 37 func Sum(out *[16]byte, m []byte, key *[32]byte) { 38 r := key 39 s := key[16:] 40 var ( 41 y7 float64 42 y6 float64 43 y1 float64 44 y0 float64 45 y5 float64 46 y4 float64 47 x7 float64 48 x6 float64 49 x1 float64 50 x0 float64 51 y3 float64 52 y2 float64 53 x5 float64 54 r3lowx0 float64 55 x4 float64 56 r0lowx6 float64 57 x3 float64 58 r3highx0 float64 59 x2 float64 60 r0highx6 float64 61 r0lowx0 float64 62 sr1lowx6 float64 63 r0highx0 float64 64 sr1highx6 float64 65 sr3low float64 66 r1lowx0 float64 67 sr2lowx6 float64 68 r1highx0 float64 69 sr2highx6 float64 70 r2lowx0 float64 71 sr3lowx6 float64 72 r2highx0 float64 73 sr3highx6 float64 74 r1highx4 float64 75 r1lowx4 float64 76 r0highx4 float64 77 r0lowx4 float64 78 sr3highx4 float64 79 sr3lowx4 float64 80 sr2highx4 float64 81 sr2lowx4 float64 82 r0lowx2 float64 83 r0highx2 float64 84 r1lowx2 float64 85 r1highx2 float64 86 r2lowx2 float64 87 r2highx2 float64 88 sr3lowx2 float64 89 sr3highx2 float64 90 z0 float64 91 z1 float64 92 z2 float64 93 z3 float64 94 m0 int64 95 m1 int64 96 m2 int64 97 m3 int64 98 m00 uint32 99 m01 uint32 100 m02 uint32 101 m03 uint32 102 m10 uint32 103 m11 uint32 104 m12 uint32 105 m13 uint32 106 m20 uint32 107 m21 uint32 108 m22 uint32 109 m23 uint32 110 m30 uint32 111 m31 uint32 112 m32 uint32 113 m33 uint64 114 lbelow2 int32 115 lbelow3 int32 116 lbelow4 int32 117 lbelow5 int32 118 lbelow6 int32 119 lbelow7 int32 120 lbelow8 int32 121 lbelow9 int32 122 lbelow10 int32 123 lbelow11 int32 124 lbelow12 int32 125 lbelow13 int32 126 lbelow14 int32 127 lbelow15 int32 128 s00 uint32 129 s01 uint32 130 s02 uint32 131 s03 uint32 132 s10 uint32 133 s11 uint32 134 s12 uint32 135 s13 uint32 136 s20 uint32 137 s21 uint32 138 s22 uint32 139 s23 uint32 140 s30 uint32 141 s31 uint32 142 s32 uint32 143 s33 uint32 144 bits32 uint64 145 f uint64 146 f0 uint64 147 f1 uint64 148 f2 uint64 149 f3 uint64 150 f4 uint64 151 g uint64 152 g0 uint64 153 g1 uint64 154 g2 uint64 155 g3 uint64 156 g4 uint64 157 ) 158 159 var p int32 160 161 l := int32(len(m)) 162 163 r00 := uint32(r[0]) 164 165 r01 := uint32(r[1]) 166 167 r02 := uint32(r[2]) 168 r0 := int64(2151) 169 170 r03 := uint32(r[3]) 171 r03 &= 15 172 r0 <<= 51 173 174 r10 := uint32(r[4]) 175 r10 &= 252 176 r01 <<= 8 177 r0 += int64(r00) 178 179 r11 := uint32(r[5]) 180 r02 <<= 16 181 r0 += int64(r01) 182 183 r12 := uint32(r[6]) 184 r03 <<= 24 185 r0 += int64(r02) 186 187 r13 := uint32(r[7]) 188 r13 &= 15 189 r1 := int64(2215) 190 r0 += int64(r03) 191 192 d0 := r0 193 r1 <<= 51 194 r2 := int64(2279) 195 196 r20 := uint32(r[8]) 197 r20 &= 252 198 r11 <<= 8 199 r1 += int64(r10) 200 201 r21 := uint32(r[9]) 202 r12 <<= 16 203 r1 += int64(r11) 204 205 r22 := uint32(r[10]) 206 r13 <<= 24 207 r1 += int64(r12) 208 209 r23 := uint32(r[11]) 210 r23 &= 15 211 r2 <<= 51 212 r1 += int64(r13) 213 214 d1 := r1 215 r21 <<= 8 216 r2 += int64(r20) 217 218 r30 := uint32(r[12]) 219 r30 &= 252 220 r22 <<= 16 221 r2 += int64(r21) 222 223 r31 := uint32(r[13]) 224 r23 <<= 24 225 r2 += int64(r22) 226 227 r32 := uint32(r[14]) 228 r2 += int64(r23) 229 r3 := int64(2343) 230 231 d2 := r2 232 r3 <<= 51 233 234 r33 := uint32(r[15]) 235 r33 &= 15 236 r31 <<= 8 237 r3 += int64(r30) 238 239 r32 <<= 16 240 r3 += int64(r31) 241 242 r33 <<= 24 243 r3 += int64(r32) 244 245 r3 += int64(r33) 246 h0 := alpha32 - alpha32 247 248 d3 := r3 249 h1 := alpha32 - alpha32 250 251 h2 := alpha32 - alpha32 252 253 h3 := alpha32 - alpha32 254 255 h4 := alpha32 - alpha32 256 257 r0low := math.Float64frombits(uint64(d0)) 258 h5 := alpha32 - alpha32 259 260 r1low := math.Float64frombits(uint64(d1)) 261 h6 := alpha32 - alpha32 262 263 r2low := math.Float64frombits(uint64(d2)) 264 h7 := alpha32 - alpha32 265 266 r0low -= alpha0 267 268 r1low -= alpha32 269 270 r2low -= alpha64 271 272 r0high := r0low + alpha18 273 274 r3low := math.Float64frombits(uint64(d3)) 275 276 r1high := r1low + alpha50 277 sr1low := scale * r1low 278 279 r2high := r2low + alpha82 280 sr2low := scale * r2low 281 282 r0high -= alpha18 283 r0high_stack := r0high 284 285 r3low -= alpha96 286 287 r1high -= alpha50 288 r1high_stack := r1high 289 290 sr1high := sr1low + alpham80 291 292 r0low -= r0high 293 294 r2high -= alpha82 295 sr3low = scale * r3low 296 297 sr2high := sr2low + alpham48 298 299 r1low -= r1high 300 r1low_stack := r1low 301 302 sr1high -= alpham80 303 sr1high_stack := sr1high 304 305 r2low -= r2high 306 r2low_stack := r2low 307 308 sr2high -= alpham48 309 sr2high_stack := sr2high 310 311 r3high := r3low + alpha112 312 r0low_stack := r0low 313 314 sr1low -= sr1high 315 sr1low_stack := sr1low 316 317 sr3high := sr3low + alpham16 318 r2high_stack := r2high 319 320 sr2low -= sr2high 321 sr2low_stack := sr2low 322 323 r3high -= alpha112 324 r3high_stack := r3high 325 326 sr3high -= alpham16 327 sr3high_stack := sr3high 328 329 r3low -= r3high 330 r3low_stack := r3low 331 332 sr3low -= sr3high 333 sr3low_stack := sr3low 334 335 if l < 16 { 336 goto addatmost15bytes 337 } 338 339 m00 = uint32(m[p+0]) 340 m0 = 2151 341 342 m0 <<= 51 343 m1 = 2215 344 m01 = uint32(m[p+1]) 345 346 m1 <<= 51 347 m2 = 2279 348 m02 = uint32(m[p+2]) 349 350 m2 <<= 51 351 m3 = 2343 352 m03 = uint32(m[p+3]) 353 354 m10 = uint32(m[p+4]) 355 m01 <<= 8 356 m0 += int64(m00) 357 358 m11 = uint32(m[p+5]) 359 m02 <<= 16 360 m0 += int64(m01) 361 362 m12 = uint32(m[p+6]) 363 m03 <<= 24 364 m0 += int64(m02) 365 366 m13 = uint32(m[p+7]) 367 m3 <<= 51 368 m0 += int64(m03) 369 370 m20 = uint32(m[p+8]) 371 m11 <<= 8 372 m1 += int64(m10) 373 374 m21 = uint32(m[p+9]) 375 m12 <<= 16 376 m1 += int64(m11) 377 378 m22 = uint32(m[p+10]) 379 m13 <<= 24 380 m1 += int64(m12) 381 382 m23 = uint32(m[p+11]) 383 m1 += int64(m13) 384 385 m30 = uint32(m[p+12]) 386 m21 <<= 8 387 m2 += int64(m20) 388 389 m31 = uint32(m[p+13]) 390 m22 <<= 16 391 m2 += int64(m21) 392 393 m32 = uint32(m[p+14]) 394 m23 <<= 24 395 m2 += int64(m22) 396 397 m33 = uint64(m[p+15]) 398 m2 += int64(m23) 399 400 d0 = m0 401 m31 <<= 8 402 m3 += int64(m30) 403 404 d1 = m1 405 m32 <<= 16 406 m3 += int64(m31) 407 408 d2 = m2 409 m33 += 256 410 411 m33 <<= 24 412 m3 += int64(m32) 413 414 m3 += int64(m33) 415 d3 = m3 416 417 p += 16 418 l -= 16 419 420 z0 = math.Float64frombits(uint64(d0)) 421 422 z1 = math.Float64frombits(uint64(d1)) 423 424 z2 = math.Float64frombits(uint64(d2)) 425 426 z3 = math.Float64frombits(uint64(d3)) 427 428 z0 -= alpha0 429 430 z1 -= alpha32 431 432 z2 -= alpha64 433 434 z3 -= alpha96 435 436 h0 += z0 437 438 h1 += z1 439 440 h3 += z2 441 442 h5 += z3 443 444 if l < 16 { 445 goto multiplyaddatmost15bytes 446 } 447 448 multiplyaddatleast16bytes: 449 450 m2 = 2279 451 m20 = uint32(m[p+8]) 452 y7 = h7 + alpha130 453 454 m2 <<= 51 455 m3 = 2343 456 m21 = uint32(m[p+9]) 457 y6 = h6 + alpha130 458 459 m3 <<= 51 460 m0 = 2151 461 m22 = uint32(m[p+10]) 462 y1 = h1 + alpha32 463 464 m0 <<= 51 465 m1 = 2215 466 m23 = uint32(m[p+11]) 467 y0 = h0 + alpha32 468 469 m1 <<= 51 470 m30 = uint32(m[p+12]) 471 y7 -= alpha130 472 473 m21 <<= 8 474 m2 += int64(m20) 475 m31 = uint32(m[p+13]) 476 y6 -= alpha130 477 478 m22 <<= 16 479 m2 += int64(m21) 480 m32 = uint32(m[p+14]) 481 y1 -= alpha32 482 483 m23 <<= 24 484 m2 += int64(m22) 485 m33 = uint64(m[p+15]) 486 y0 -= alpha32 487 488 m2 += int64(m23) 489 m00 = uint32(m[p+0]) 490 y5 = h5 + alpha96 491 492 m31 <<= 8 493 m3 += int64(m30) 494 m01 = uint32(m[p+1]) 495 y4 = h4 + alpha96 496 497 m32 <<= 16 498 m02 = uint32(m[p+2]) 499 x7 = h7 - y7 500 y7 *= scale 501 502 m33 += 256 503 m03 = uint32(m[p+3]) 504 x6 = h6 - y6 505 y6 *= scale 506 507 m33 <<= 24 508 m3 += int64(m31) 509 m10 = uint32(m[p+4]) 510 x1 = h1 - y1 511 512 m01 <<= 8 513 m3 += int64(m32) 514 m11 = uint32(m[p+5]) 515 x0 = h0 - y0 516 517 m3 += int64(m33) 518 m0 += int64(m00) 519 m12 = uint32(m[p+6]) 520 y5 -= alpha96 521 522 m02 <<= 16 523 m0 += int64(m01) 524 m13 = uint32(m[p+7]) 525 y4 -= alpha96 526 527 m03 <<= 24 528 m0 += int64(m02) 529 d2 = m2 530 x1 += y7 531 532 m0 += int64(m03) 533 d3 = m3 534 x0 += y6 535 536 m11 <<= 8 537 m1 += int64(m10) 538 d0 = m0 539 x7 += y5 540 541 m12 <<= 16 542 m1 += int64(m11) 543 x6 += y4 544 545 m13 <<= 24 546 m1 += int64(m12) 547 y3 = h3 + alpha64 548 549 m1 += int64(m13) 550 d1 = m1 551 y2 = h2 + alpha64 552 553 x0 += x1 554 555 x6 += x7 556 557 y3 -= alpha64 558 r3low = r3low_stack 559 560 y2 -= alpha64 561 r0low = r0low_stack 562 563 x5 = h5 - y5 564 r3lowx0 = r3low * x0 565 r3high = r3high_stack 566 567 x4 = h4 - y4 568 r0lowx6 = r0low * x6 569 r0high = r0high_stack 570 571 x3 = h3 - y3 572 r3highx0 = r3high * x0 573 sr1low = sr1low_stack 574 575 x2 = h2 - y2 576 r0highx6 = r0high * x6 577 sr1high = sr1high_stack 578 579 x5 += y3 580 r0lowx0 = r0low * x0 581 r1low = r1low_stack 582 583 h6 = r3lowx0 + r0lowx6 584 sr1lowx6 = sr1low * x6 585 r1high = r1high_stack 586 587 x4 += y2 588 r0highx0 = r0high * x0 589 sr2low = sr2low_stack 590 591 h7 = r3highx0 + r0highx6 592 sr1highx6 = sr1high * x6 593 sr2high = sr2high_stack 594 595 x3 += y1 596 r1lowx0 = r1low * x0 597 r2low = r2low_stack 598 599 h0 = r0lowx0 + sr1lowx6 600 sr2lowx6 = sr2low * x6 601 r2high = r2high_stack 602 603 x2 += y0 604 r1highx0 = r1high * x0 605 sr3low = sr3low_stack 606 607 h1 = r0highx0 + sr1highx6 608 sr2highx6 = sr2high * x6 609 sr3high = sr3high_stack 610 611 x4 += x5 612 r2lowx0 = r2low * x0 613 z2 = math.Float64frombits(uint64(d2)) 614 615 h2 = r1lowx0 + sr2lowx6 616 sr3lowx6 = sr3low * x6 617 618 x2 += x3 619 r2highx0 = r2high * x0 620 z3 = math.Float64frombits(uint64(d3)) 621 622 h3 = r1highx0 + sr2highx6 623 sr3highx6 = sr3high * x6 624 625 r1highx4 = r1high * x4 626 z2 -= alpha64 627 628 h4 = r2lowx0 + sr3lowx6 629 r1lowx4 = r1low * x4 630 631 r0highx4 = r0high * x4 632 z3 -= alpha96 633 634 h5 = r2highx0 + sr3highx6 635 r0lowx4 = r0low * x4 636 637 h7 += r1highx4 638 sr3highx4 = sr3high * x4 639 640 h6 += r1lowx4 641 sr3lowx4 = sr3low * x4 642 643 h5 += r0highx4 644 sr2highx4 = sr2high * x4 645 646 h4 += r0lowx4 647 sr2lowx4 = sr2low * x4 648 649 h3 += sr3highx4 650 r0lowx2 = r0low * x2 651 652 h2 += sr3lowx4 653 r0highx2 = r0high * x2 654 655 h1 += sr2highx4 656 r1lowx2 = r1low * x2 657 658 h0 += sr2lowx4 659 r1highx2 = r1high * x2 660 661 h2 += r0lowx2 662 r2lowx2 = r2low * x2 663 664 h3 += r0highx2 665 r2highx2 = r2high * x2 666 667 h4 += r1lowx2 668 sr3lowx2 = sr3low * x2 669 670 h5 += r1highx2 671 sr3highx2 = sr3high * x2 672 673 p += 16 674 l -= 16 675 h6 += r2lowx2 676 677 h7 += r2highx2 678 679 z1 = math.Float64frombits(uint64(d1)) 680 h0 += sr3lowx2 681 682 z0 = math.Float64frombits(uint64(d0)) 683 h1 += sr3highx2 684 685 z1 -= alpha32 686 687 z0 -= alpha0 688 689 h5 += z3 690 691 h3 += z2 692 693 h1 += z1 694 695 h0 += z0 696 697 if l >= 16 { 698 goto multiplyaddatleast16bytes 699 } 700 701 multiplyaddatmost15bytes: 702 703 y7 = h7 + alpha130 704 705 y6 = h6 + alpha130 706 707 y1 = h1 + alpha32 708 709 y0 = h0 + alpha32 710 711 y7 -= alpha130 712 713 y6 -= alpha130 714 715 y1 -= alpha32 716 717 y0 -= alpha32 718 719 y5 = h5 + alpha96 720 721 y4 = h4 + alpha96 722 723 x7 = h7 - y7 724 y7 *= scale 725 726 x6 = h6 - y6 727 y6 *= scale 728 729 x1 = h1 - y1 730 731 x0 = h0 - y0 732 733 y5 -= alpha96 734 735 y4 -= alpha96 736 737 x1 += y7 738 739 x0 += y6 740 741 x7 += y5 742 743 x6 += y4 744 745 y3 = h3 + alpha64 746 747 y2 = h2 + alpha64 748 749 x0 += x1 750 751 x6 += x7 752 753 y3 -= alpha64 754 r3low = r3low_stack 755 756 y2 -= alpha64 757 r0low = r0low_stack 758 759 x5 = h5 - y5 760 r3lowx0 = r3low * x0 761 r3high = r3high_stack 762 763 x4 = h4 - y4 764 r0lowx6 = r0low * x6 765 r0high = r0high_stack 766 767 x3 = h3 - y3 768 r3highx0 = r3high * x0 769 sr1low = sr1low_stack 770 771 x2 = h2 - y2 772 r0highx6 = r0high * x6 773 sr1high = sr1high_stack 774 775 x5 += y3 776 r0lowx0 = r0low * x0 777 r1low = r1low_stack 778 779 h6 = r3lowx0 + r0lowx6 780 sr1lowx6 = sr1low * x6 781 r1high = r1high_stack 782 783 x4 += y2 784 r0highx0 = r0high * x0 785 sr2low = sr2low_stack 786 787 h7 = r3highx0 + r0highx6 788 sr1highx6 = sr1high * x6 789 sr2high = sr2high_stack 790 791 x3 += y1 792 r1lowx0 = r1low * x0 793 r2low = r2low_stack 794 795 h0 = r0lowx0 + sr1lowx6 796 sr2lowx6 = sr2low * x6 797 r2high = r2high_stack 798 799 x2 += y0 800 r1highx0 = r1high * x0 801 sr3low = sr3low_stack 802 803 h1 = r0highx0 + sr1highx6 804 sr2highx6 = sr2high * x6 805 sr3high = sr3high_stack 806 807 x4 += x5 808 r2lowx0 = r2low * x0 809 810 h2 = r1lowx0 + sr2lowx6 811 sr3lowx6 = sr3low * x6 812 813 x2 += x3 814 r2highx0 = r2high * x0 815 816 h3 = r1highx0 + sr2highx6 817 sr3highx6 = sr3high * x6 818 819 r1highx4 = r1high * x4 820 821 h4 = r2lowx0 + sr3lowx6 822 r1lowx4 = r1low * x4 823 824 r0highx4 = r0high * x4 825 826 h5 = r2highx0 + sr3highx6 827 r0lowx4 = r0low * x4 828 829 h7 += r1highx4 830 sr3highx4 = sr3high * x4 831 832 h6 += r1lowx4 833 sr3lowx4 = sr3low * x4 834 835 h5 += r0highx4 836 sr2highx4 = sr2high * x4 837 838 h4 += r0lowx4 839 sr2lowx4 = sr2low * x4 840 841 h3 += sr3highx4 842 r0lowx2 = r0low * x2 843 844 h2 += sr3lowx4 845 r0highx2 = r0high * x2 846 847 h1 += sr2highx4 848 r1lowx2 = r1low * x2 849 850 h0 += sr2lowx4 851 r1highx2 = r1high * x2 852 853 h2 += r0lowx2 854 r2lowx2 = r2low * x2 855 856 h3 += r0highx2 857 r2highx2 = r2high * x2 858 859 h4 += r1lowx2 860 sr3lowx2 = sr3low * x2 861 862 h5 += r1highx2 863 sr3highx2 = sr3high * x2 864 865 h6 += r2lowx2 866 867 h7 += r2highx2 868 869 h0 += sr3lowx2 870 871 h1 += sr3highx2 872 873 addatmost15bytes: 874 875 if l == 0 { 876 goto nomorebytes 877 } 878 879 lbelow2 = l - 2 880 881 lbelow3 = l - 3 882 883 lbelow2 >>= 31 884 lbelow4 = l - 4 885 886 m00 = uint32(m[p+0]) 887 lbelow3 >>= 31 888 p += lbelow2 889 890 m01 = uint32(m[p+1]) 891 lbelow4 >>= 31 892 p += lbelow3 893 894 m02 = uint32(m[p+2]) 895 p += lbelow4 896 m0 = 2151 897 898 m03 = uint32(m[p+3]) 899 m0 <<= 51 900 m1 = 2215 901 902 m0 += int64(m00) 903 m01 &^= uint32(lbelow2) 904 905 m02 &^= uint32(lbelow3) 906 m01 -= uint32(lbelow2) 907 908 m01 <<= 8 909 m03 &^= uint32(lbelow4) 910 911 m0 += int64(m01) 912 lbelow2 -= lbelow3 913 914 m02 += uint32(lbelow2) 915 lbelow3 -= lbelow4 916 917 m02 <<= 16 918 m03 += uint32(lbelow3) 919 920 m03 <<= 24 921 m0 += int64(m02) 922 923 m0 += int64(m03) 924 lbelow5 = l - 5 925 926 lbelow6 = l - 6 927 lbelow7 = l - 7 928 929 lbelow5 >>= 31 930 lbelow8 = l - 8 931 932 lbelow6 >>= 31 933 p += lbelow5 934 935 m10 = uint32(m[p+4]) 936 lbelow7 >>= 31 937 p += lbelow6 938 939 m11 = uint32(m[p+5]) 940 lbelow8 >>= 31 941 p += lbelow7 942 943 m12 = uint32(m[p+6]) 944 m1 <<= 51 945 p += lbelow8 946 947 m13 = uint32(m[p+7]) 948 m10 &^= uint32(lbelow5) 949 lbelow4 -= lbelow5 950 951 m10 += uint32(lbelow4) 952 lbelow5 -= lbelow6 953 954 m11 &^= uint32(lbelow6) 955 m11 += uint32(lbelow5) 956 957 m11 <<= 8 958 m1 += int64(m10) 959 960 m1 += int64(m11) 961 m12 &^= uint32(lbelow7) 962 963 lbelow6 -= lbelow7 964 m13 &^= uint32(lbelow8) 965 966 m12 += uint32(lbelow6) 967 lbelow7 -= lbelow8 968 969 m12 <<= 16 970 m13 += uint32(lbelow7) 971 972 m13 <<= 24 973 m1 += int64(m12) 974 975 m1 += int64(m13) 976 m2 = 2279 977 978 lbelow9 = l - 9 979 m3 = 2343 980 981 lbelow10 = l - 10 982 lbelow11 = l - 11 983 984 lbelow9 >>= 31 985 lbelow12 = l - 12 986 987 lbelow10 >>= 31 988 p += lbelow9 989 990 m20 = uint32(m[p+8]) 991 lbelow11 >>= 31 992 p += lbelow10 993 994 m21 = uint32(m[p+9]) 995 lbelow12 >>= 31 996 p += lbelow11 997 998 m22 = uint32(m[p+10]) 999 m2 <<= 51 1000 p += lbelow12 1001 1002 m23 = uint32(m[p+11]) 1003 m20 &^= uint32(lbelow9) 1004 lbelow8 -= lbelow9 1005 1006 m20 += uint32(lbelow8) 1007 lbelow9 -= lbelow10 1008 1009 m21 &^= uint32(lbelow10) 1010 m21 += uint32(lbelow9) 1011 1012 m21 <<= 8 1013 m2 += int64(m20) 1014 1015 m2 += int64(m21) 1016 m22 &^= uint32(lbelow11) 1017 1018 lbelow10 -= lbelow11 1019 m23 &^= uint32(lbelow12) 1020 1021 m22 += uint32(lbelow10) 1022 lbelow11 -= lbelow12 1023 1024 m22 <<= 16 1025 m23 += uint32(lbelow11) 1026 1027 m23 <<= 24 1028 m2 += int64(m22) 1029 1030 m3 <<= 51 1031 lbelow13 = l - 13 1032 1033 lbelow13 >>= 31 1034 lbelow14 = l - 14 1035 1036 lbelow14 >>= 31 1037 p += lbelow13 1038 lbelow15 = l - 15 1039 1040 m30 = uint32(m[p+12]) 1041 lbelow15 >>= 31 1042 p += lbelow14 1043 1044 m31 = uint32(m[p+13]) 1045 p += lbelow15 1046 m2 += int64(m23) 1047 1048 m32 = uint32(m[p+14]) 1049 m30 &^= uint32(lbelow13) 1050 lbelow12 -= lbelow13 1051 1052 m30 += uint32(lbelow12) 1053 lbelow13 -= lbelow14 1054 1055 m3 += int64(m30) 1056 m31 &^= uint32(lbelow14) 1057 1058 m31 += uint32(lbelow13) 1059 m32 &^= uint32(lbelow15) 1060 1061 m31 <<= 8 1062 lbelow14 -= lbelow15 1063 1064 m3 += int64(m31) 1065 m32 += uint32(lbelow14) 1066 d0 = m0 1067 1068 m32 <<= 16 1069 m33 = uint64(lbelow15 + 1) 1070 d1 = m1 1071 1072 m33 <<= 24 1073 m3 += int64(m32) 1074 d2 = m2 1075 1076 m3 += int64(m33) 1077 d3 = m3 1078 1079 z3 = math.Float64frombits(uint64(d3)) 1080 1081 z2 = math.Float64frombits(uint64(d2)) 1082 1083 z1 = math.Float64frombits(uint64(d1)) 1084 1085 z0 = math.Float64frombits(uint64(d0)) 1086 1087 z3 -= alpha96 1088 1089 z2 -= alpha64 1090 1091 z1 -= alpha32 1092 1093 z0 -= alpha0 1094 1095 h5 += z3 1096 1097 h3 += z2 1098 1099 h1 += z1 1100 1101 h0 += z0 1102 1103 y7 = h7 + alpha130 1104 1105 y6 = h6 + alpha130 1106 1107 y1 = h1 + alpha32 1108 1109 y0 = h0 + alpha32 1110 1111 y7 -= alpha130 1112 1113 y6 -= alpha130 1114 1115 y1 -= alpha32 1116 1117 y0 -= alpha32 1118 1119 y5 = h5 + alpha96 1120 1121 y4 = h4 + alpha96 1122 1123 x7 = h7 - y7 1124 y7 *= scale 1125 1126 x6 = h6 - y6 1127 y6 *= scale 1128 1129 x1 = h1 - y1 1130 1131 x0 = h0 - y0 1132 1133 y5 -= alpha96 1134 1135 y4 -= alpha96 1136 1137 x1 += y7 1138 1139 x0 += y6 1140 1141 x7 += y5 1142 1143 x6 += y4 1144 1145 y3 = h3 + alpha64 1146 1147 y2 = h2 + alpha64 1148 1149 x0 += x1 1150 1151 x6 += x7 1152 1153 y3 -= alpha64 1154 r3low = r3low_stack 1155 1156 y2 -= alpha64 1157 r0low = r0low_stack 1158 1159 x5 = h5 - y5 1160 r3lowx0 = r3low * x0 1161 r3high = r3high_stack 1162 1163 x4 = h4 - y4 1164 r0lowx6 = r0low * x6 1165 r0high = r0high_stack 1166 1167 x3 = h3 - y3 1168 r3highx0 = r3high * x0 1169 sr1low = sr1low_stack 1170 1171 x2 = h2 - y2 1172 r0highx6 = r0high * x6 1173 sr1high = sr1high_stack 1174 1175 x5 += y3 1176 r0lowx0 = r0low * x0 1177 r1low = r1low_stack 1178 1179 h6 = r3lowx0 + r0lowx6 1180 sr1lowx6 = sr1low * x6 1181 r1high = r1high_stack 1182 1183 x4 += y2 1184 r0highx0 = r0high * x0 1185 sr2low = sr2low_stack 1186 1187 h7 = r3highx0 + r0highx6 1188 sr1highx6 = sr1high * x6 1189 sr2high = sr2high_stack 1190 1191 x3 += y1 1192 r1lowx0 = r1low * x0 1193 r2low = r2low_stack 1194 1195 h0 = r0lowx0 + sr1lowx6 1196 sr2lowx6 = sr2low * x6 1197 r2high = r2high_stack 1198 1199 x2 += y0 1200 r1highx0 = r1high * x0 1201 sr3low = sr3low_stack 1202 1203 h1 = r0highx0 + sr1highx6 1204 sr2highx6 = sr2high * x6 1205 sr3high = sr3high_stack 1206 1207 x4 += x5 1208 r2lowx0 = r2low * x0 1209 1210 h2 = r1lowx0 + sr2lowx6 1211 sr3lowx6 = sr3low * x6 1212 1213 x2 += x3 1214 r2highx0 = r2high * x0 1215 1216 h3 = r1highx0 + sr2highx6 1217 sr3highx6 = sr3high * x6 1218 1219 r1highx4 = r1high * x4 1220 1221 h4 = r2lowx0 + sr3lowx6 1222 r1lowx4 = r1low * x4 1223 1224 r0highx4 = r0high * x4 1225 1226 h5 = r2highx0 + sr3highx6 1227 r0lowx4 = r0low * x4 1228 1229 h7 += r1highx4 1230 sr3highx4 = sr3high * x4 1231 1232 h6 += r1lowx4 1233 sr3lowx4 = sr3low * x4 1234 1235 h5 += r0highx4 1236 sr2highx4 = sr2high * x4 1237 1238 h4 += r0lowx4 1239 sr2lowx4 = sr2low * x4 1240 1241 h3 += sr3highx4 1242 r0lowx2 = r0low * x2 1243 1244 h2 += sr3lowx4 1245 r0highx2 = r0high * x2 1246 1247 h1 += sr2highx4 1248 r1lowx2 = r1low * x2 1249 1250 h0 += sr2lowx4 1251 r1highx2 = r1high * x2 1252 1253 h2 += r0lowx2 1254 r2lowx2 = r2low * x2 1255 1256 h3 += r0highx2 1257 r2highx2 = r2high * x2 1258 1259 h4 += r1lowx2 1260 sr3lowx2 = sr3low * x2 1261 1262 h5 += r1highx2 1263 sr3highx2 = sr3high * x2 1264 1265 h6 += r2lowx2 1266 1267 h7 += r2highx2 1268 1269 h0 += sr3lowx2 1270 1271 h1 += sr3highx2 1272 1273 nomorebytes: 1274 1275 y7 = h7 + alpha130 1276 1277 y0 = h0 + alpha32 1278 1279 y1 = h1 + alpha32 1280 1281 y2 = h2 + alpha64 1282 1283 y7 -= alpha130 1284 1285 y3 = h3 + alpha64 1286 1287 y4 = h4 + alpha96 1288 1289 y5 = h5 + alpha96 1290 1291 x7 = h7 - y7 1292 y7 *= scale 1293 1294 y0 -= alpha32 1295 1296 y1 -= alpha32 1297 1298 y2 -= alpha64 1299 1300 h6 += x7 1301 1302 y3 -= alpha64 1303 1304 y4 -= alpha96 1305 1306 y5 -= alpha96 1307 1308 y6 = h6 + alpha130 1309 1310 x0 = h0 - y0 1311 1312 x1 = h1 - y1 1313 1314 x2 = h2 - y2 1315 1316 y6 -= alpha130 1317 1318 x0 += y7 1319 1320 x3 = h3 - y3 1321 1322 x4 = h4 - y4 1323 1324 x5 = h5 - y5 1325 1326 x6 = h6 - y6 1327 1328 y6 *= scale 1329 1330 x2 += y0 1331 1332 x3 += y1 1333 1334 x4 += y2 1335 1336 x0 += y6 1337 1338 x5 += y3 1339 1340 x6 += y4 1341 1342 x2 += x3 1343 1344 x0 += x1 1345 1346 x4 += x5 1347 1348 x6 += y5 1349 1350 x2 += offset1 1351 d1 = int64(math.Float64bits(x2)) 1352 1353 x0 += offset0 1354 d0 = int64(math.Float64bits(x0)) 1355 1356 x4 += offset2 1357 d2 = int64(math.Float64bits(x4)) 1358 1359 x6 += offset3 1360 d3 = int64(math.Float64bits(x6)) 1361 1362 f0 = uint64(d0) 1363 1364 f1 = uint64(d1) 1365 bits32 = math.MaxUint64 1366 1367 f2 = uint64(d2) 1368 bits32 >>= 32 1369 1370 f3 = uint64(d3) 1371 f = f0 >> 32 1372 1373 f0 &= bits32 1374 f &= 255 1375 1376 f1 += f 1377 g0 = f0 + 5 1378 1379 g = g0 >> 32 1380 g0 &= bits32 1381 1382 f = f1 >> 32 1383 f1 &= bits32 1384 1385 f &= 255 1386 g1 = f1 + g 1387 1388 g = g1 >> 32 1389 f2 += f 1390 1391 f = f2 >> 32 1392 g1 &= bits32 1393 1394 f2 &= bits32 1395 f &= 255 1396 1397 f3 += f 1398 g2 = f2 + g 1399 1400 g = g2 >> 32 1401 g2 &= bits32 1402 1403 f4 = f3 >> 32 1404 f3 &= bits32 1405 1406 f4 &= 255 1407 g3 = f3 + g 1408 1409 g = g3 >> 32 1410 g3 &= bits32 1411 1412 g4 = f4 + g 1413 1414 g4 = g4 - 4 1415 s00 = uint32(s[0]) 1416 1417 f = uint64(int64(g4) >> 63) 1418 s01 = uint32(s[1]) 1419 1420 f0 &= f 1421 g0 &^= f 1422 s02 = uint32(s[2]) 1423 1424 f1 &= f 1425 f0 |= g0 1426 s03 = uint32(s[3]) 1427 1428 g1 &^= f 1429 f2 &= f 1430 s10 = uint32(s[4]) 1431 1432 f3 &= f 1433 g2 &^= f 1434 s11 = uint32(s[5]) 1435 1436 g3 &^= f 1437 f1 |= g1 1438 s12 = uint32(s[6]) 1439 1440 f2 |= g2 1441 f3 |= g3 1442 s13 = uint32(s[7]) 1443 1444 s01 <<= 8 1445 f0 += uint64(s00) 1446 s20 = uint32(s[8]) 1447 1448 s02 <<= 16 1449 f0 += uint64(s01) 1450 s21 = uint32(s[9]) 1451 1452 s03 <<= 24 1453 f0 += uint64(s02) 1454 s22 = uint32(s[10]) 1455 1456 s11 <<= 8 1457 f1 += uint64(s10) 1458 s23 = uint32(s[11]) 1459 1460 s12 <<= 16 1461 f1 += uint64(s11) 1462 s30 = uint32(s[12]) 1463 1464 s13 <<= 24 1465 f1 += uint64(s12) 1466 s31 = uint32(s[13]) 1467 1468 f0 += uint64(s03) 1469 f1 += uint64(s13) 1470 s32 = uint32(s[14]) 1471 1472 s21 <<= 8 1473 f2 += uint64(s20) 1474 s33 = uint32(s[15]) 1475 1476 s22 <<= 16 1477 f2 += uint64(s21) 1478 1479 s23 <<= 24 1480 f2 += uint64(s22) 1481 1482 s31 <<= 8 1483 f3 += uint64(s30) 1484 1485 s32 <<= 16 1486 f3 += uint64(s31) 1487 1488 s33 <<= 24 1489 f3 += uint64(s32) 1490 1491 f2 += uint64(s23) 1492 f3 += uint64(s33) 1493 1494 out[0] = byte(f0) 1495 f0 >>= 8 1496 out[1] = byte(f0) 1497 f0 >>= 8 1498 out[2] = byte(f0) 1499 f0 >>= 8 1500 out[3] = byte(f0) 1501 f0 >>= 8 1502 f1 += f0 1503 1504 out[4] = byte(f1) 1505 f1 >>= 8 1506 out[5] = byte(f1) 1507 f1 >>= 8 1508 out[6] = byte(f1) 1509 f1 >>= 8 1510 out[7] = byte(f1) 1511 f1 >>= 8 1512 f2 += f1 1513 1514 out[8] = byte(f2) 1515 f2 >>= 8 1516 out[9] = byte(f2) 1517 f2 >>= 8 1518 out[10] = byte(f2) 1519 f2 >>= 8 1520 out[11] = byte(f2) 1521 f2 >>= 8 1522 f3 += f2 1523 1524 out[12] = byte(f3) 1525 f3 >>= 8 1526 out[13] = byte(f3) 1527 f3 >>= 8 1528 out[14] = byte(f3) 1529 f3 >>= 8 1530 out[15] = byte(f3) 1531 } 1532