Home | History | Annotate | Download | only in poly1305
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !amd64,!arm gccgo appengine nacl
      6 
      7 package poly1305
      8 
      9 // Based on original, public domain implementation from NaCl by D. J.
     10 // Bernstein.
     11 
     12 import "math"
     13 
     14 const (
     15 	alpham80 = 0.00000000558793544769287109375
     16 	alpham48 = 24.0
     17 	alpham16 = 103079215104.0
     18 	alpha0   = 6755399441055744.0
     19 	alpha18  = 1770887431076116955136.0
     20 	alpha32  = 29014219670751100192948224.0
     21 	alpha50  = 7605903601369376408980219232256.0
     22 	alpha64  = 124615124604835863084731911901282304.0
     23 	alpha82  = 32667107224410092492483962313449748299776.0
     24 	alpha96  = 535217884764734955396857238543560676143529984.0
     25 	alpha112 = 35076039295941670036888435985190792471742381031424.0
     26 	alpha130 = 9194973245195333150150082162901855101712434733101613056.0
     27 	scale    = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
     28 	offset0  = 6755408030990331.0
     29 	offset1  = 29014256564239239022116864.0
     30 	offset2  = 124615283061160854719918951570079744.0
     31 	offset3  = 535219245894202480694386063513315216128475136.0
     32 )
     33 
     34 // Sum generates an authenticator for m using a one-time key and puts the
     35 // 16-byte result into out. Authenticating two different messages with the same
     36 // key allows an attacker to forge messages at will.
     37 func Sum(out *[16]byte, m []byte, key *[32]byte) {
     38 	r := key
     39 	s := key[16:]
     40 	var (
     41 		y7        float64
     42 		y6        float64
     43 		y1        float64
     44 		y0        float64
     45 		y5        float64
     46 		y4        float64
     47 		x7        float64
     48 		x6        float64
     49 		x1        float64
     50 		x0        float64
     51 		y3        float64
     52 		y2        float64
     53 		x5        float64
     54 		r3lowx0   float64
     55 		x4        float64
     56 		r0lowx6   float64
     57 		x3        float64
     58 		r3highx0  float64
     59 		x2        float64
     60 		r0highx6  float64
     61 		r0lowx0   float64
     62 		sr1lowx6  float64
     63 		r0highx0  float64
     64 		sr1highx6 float64
     65 		sr3low    float64
     66 		r1lowx0   float64
     67 		sr2lowx6  float64
     68 		r1highx0  float64
     69 		sr2highx6 float64
     70 		r2lowx0   float64
     71 		sr3lowx6  float64
     72 		r2highx0  float64
     73 		sr3highx6 float64
     74 		r1highx4  float64
     75 		r1lowx4   float64
     76 		r0highx4  float64
     77 		r0lowx4   float64
     78 		sr3highx4 float64
     79 		sr3lowx4  float64
     80 		sr2highx4 float64
     81 		sr2lowx4  float64
     82 		r0lowx2   float64
     83 		r0highx2  float64
     84 		r1lowx2   float64
     85 		r1highx2  float64
     86 		r2lowx2   float64
     87 		r2highx2  float64
     88 		sr3lowx2  float64
     89 		sr3highx2 float64
     90 		z0        float64
     91 		z1        float64
     92 		z2        float64
     93 		z3        float64
     94 		m0        int64
     95 		m1        int64
     96 		m2        int64
     97 		m3        int64
     98 		m00       uint32
     99 		m01       uint32
    100 		m02       uint32
    101 		m03       uint32
    102 		m10       uint32
    103 		m11       uint32
    104 		m12       uint32
    105 		m13       uint32
    106 		m20       uint32
    107 		m21       uint32
    108 		m22       uint32
    109 		m23       uint32
    110 		m30       uint32
    111 		m31       uint32
    112 		m32       uint32
    113 		m33       uint64
    114 		lbelow2   int32
    115 		lbelow3   int32
    116 		lbelow4   int32
    117 		lbelow5   int32
    118 		lbelow6   int32
    119 		lbelow7   int32
    120 		lbelow8   int32
    121 		lbelow9   int32
    122 		lbelow10  int32
    123 		lbelow11  int32
    124 		lbelow12  int32
    125 		lbelow13  int32
    126 		lbelow14  int32
    127 		lbelow15  int32
    128 		s00       uint32
    129 		s01       uint32
    130 		s02       uint32
    131 		s03       uint32
    132 		s10       uint32
    133 		s11       uint32
    134 		s12       uint32
    135 		s13       uint32
    136 		s20       uint32
    137 		s21       uint32
    138 		s22       uint32
    139 		s23       uint32
    140 		s30       uint32
    141 		s31       uint32
    142 		s32       uint32
    143 		s33       uint32
    144 		bits32    uint64
    145 		f         uint64
    146 		f0        uint64
    147 		f1        uint64
    148 		f2        uint64
    149 		f3        uint64
    150 		f4        uint64
    151 		g         uint64
    152 		g0        uint64
    153 		g1        uint64
    154 		g2        uint64
    155 		g3        uint64
    156 		g4        uint64
    157 	)
    158 
    159 	var p int32
    160 
    161 	l := int32(len(m))
    162 
    163 	r00 := uint32(r[0])
    164 
    165 	r01 := uint32(r[1])
    166 
    167 	r02 := uint32(r[2])
    168 	r0 := int64(2151)
    169 
    170 	r03 := uint32(r[3])
    171 	r03 &= 15
    172 	r0 <<= 51
    173 
    174 	r10 := uint32(r[4])
    175 	r10 &= 252
    176 	r01 <<= 8
    177 	r0 += int64(r00)
    178 
    179 	r11 := uint32(r[5])
    180 	r02 <<= 16
    181 	r0 += int64(r01)
    182 
    183 	r12 := uint32(r[6])
    184 	r03 <<= 24
    185 	r0 += int64(r02)
    186 
    187 	r13 := uint32(r[7])
    188 	r13 &= 15
    189 	r1 := int64(2215)
    190 	r0 += int64(r03)
    191 
    192 	d0 := r0
    193 	r1 <<= 51
    194 	r2 := int64(2279)
    195 
    196 	r20 := uint32(r[8])
    197 	r20 &= 252
    198 	r11 <<= 8
    199 	r1 += int64(r10)
    200 
    201 	r21 := uint32(r[9])
    202 	r12 <<= 16
    203 	r1 += int64(r11)
    204 
    205 	r22 := uint32(r[10])
    206 	r13 <<= 24
    207 	r1 += int64(r12)
    208 
    209 	r23 := uint32(r[11])
    210 	r23 &= 15
    211 	r2 <<= 51
    212 	r1 += int64(r13)
    213 
    214 	d1 := r1
    215 	r21 <<= 8
    216 	r2 += int64(r20)
    217 
    218 	r30 := uint32(r[12])
    219 	r30 &= 252
    220 	r22 <<= 16
    221 	r2 += int64(r21)
    222 
    223 	r31 := uint32(r[13])
    224 	r23 <<= 24
    225 	r2 += int64(r22)
    226 
    227 	r32 := uint32(r[14])
    228 	r2 += int64(r23)
    229 	r3 := int64(2343)
    230 
    231 	d2 := r2
    232 	r3 <<= 51
    233 
    234 	r33 := uint32(r[15])
    235 	r33 &= 15
    236 	r31 <<= 8
    237 	r3 += int64(r30)
    238 
    239 	r32 <<= 16
    240 	r3 += int64(r31)
    241 
    242 	r33 <<= 24
    243 	r3 += int64(r32)
    244 
    245 	r3 += int64(r33)
    246 	h0 := alpha32 - alpha32
    247 
    248 	d3 := r3
    249 	h1 := alpha32 - alpha32
    250 
    251 	h2 := alpha32 - alpha32
    252 
    253 	h3 := alpha32 - alpha32
    254 
    255 	h4 := alpha32 - alpha32
    256 
    257 	r0low := math.Float64frombits(uint64(d0))
    258 	h5 := alpha32 - alpha32
    259 
    260 	r1low := math.Float64frombits(uint64(d1))
    261 	h6 := alpha32 - alpha32
    262 
    263 	r2low := math.Float64frombits(uint64(d2))
    264 	h7 := alpha32 - alpha32
    265 
    266 	r0low -= alpha0
    267 
    268 	r1low -= alpha32
    269 
    270 	r2low -= alpha64
    271 
    272 	r0high := r0low + alpha18
    273 
    274 	r3low := math.Float64frombits(uint64(d3))
    275 
    276 	r1high := r1low + alpha50
    277 	sr1low := scale * r1low
    278 
    279 	r2high := r2low + alpha82
    280 	sr2low := scale * r2low
    281 
    282 	r0high -= alpha18
    283 	r0high_stack := r0high
    284 
    285 	r3low -= alpha96
    286 
    287 	r1high -= alpha50
    288 	r1high_stack := r1high
    289 
    290 	sr1high := sr1low + alpham80
    291 
    292 	r0low -= r0high
    293 
    294 	r2high -= alpha82
    295 	sr3low = scale * r3low
    296 
    297 	sr2high := sr2low + alpham48
    298 
    299 	r1low -= r1high
    300 	r1low_stack := r1low
    301 
    302 	sr1high -= alpham80
    303 	sr1high_stack := sr1high
    304 
    305 	r2low -= r2high
    306 	r2low_stack := r2low
    307 
    308 	sr2high -= alpham48
    309 	sr2high_stack := sr2high
    310 
    311 	r3high := r3low + alpha112
    312 	r0low_stack := r0low
    313 
    314 	sr1low -= sr1high
    315 	sr1low_stack := sr1low
    316 
    317 	sr3high := sr3low + alpham16
    318 	r2high_stack := r2high
    319 
    320 	sr2low -= sr2high
    321 	sr2low_stack := sr2low
    322 
    323 	r3high -= alpha112
    324 	r3high_stack := r3high
    325 
    326 	sr3high -= alpham16
    327 	sr3high_stack := sr3high
    328 
    329 	r3low -= r3high
    330 	r3low_stack := r3low
    331 
    332 	sr3low -= sr3high
    333 	sr3low_stack := sr3low
    334 
    335 	if l < 16 {
    336 		goto addatmost15bytes
    337 	}
    338 
    339 	m00 = uint32(m[p+0])
    340 	m0 = 2151
    341 
    342 	m0 <<= 51
    343 	m1 = 2215
    344 	m01 = uint32(m[p+1])
    345 
    346 	m1 <<= 51
    347 	m2 = 2279
    348 	m02 = uint32(m[p+2])
    349 
    350 	m2 <<= 51
    351 	m3 = 2343
    352 	m03 = uint32(m[p+3])
    353 
    354 	m10 = uint32(m[p+4])
    355 	m01 <<= 8
    356 	m0 += int64(m00)
    357 
    358 	m11 = uint32(m[p+5])
    359 	m02 <<= 16
    360 	m0 += int64(m01)
    361 
    362 	m12 = uint32(m[p+6])
    363 	m03 <<= 24
    364 	m0 += int64(m02)
    365 
    366 	m13 = uint32(m[p+7])
    367 	m3 <<= 51
    368 	m0 += int64(m03)
    369 
    370 	m20 = uint32(m[p+8])
    371 	m11 <<= 8
    372 	m1 += int64(m10)
    373 
    374 	m21 = uint32(m[p+9])
    375 	m12 <<= 16
    376 	m1 += int64(m11)
    377 
    378 	m22 = uint32(m[p+10])
    379 	m13 <<= 24
    380 	m1 += int64(m12)
    381 
    382 	m23 = uint32(m[p+11])
    383 	m1 += int64(m13)
    384 
    385 	m30 = uint32(m[p+12])
    386 	m21 <<= 8
    387 	m2 += int64(m20)
    388 
    389 	m31 = uint32(m[p+13])
    390 	m22 <<= 16
    391 	m2 += int64(m21)
    392 
    393 	m32 = uint32(m[p+14])
    394 	m23 <<= 24
    395 	m2 += int64(m22)
    396 
    397 	m33 = uint64(m[p+15])
    398 	m2 += int64(m23)
    399 
    400 	d0 = m0
    401 	m31 <<= 8
    402 	m3 += int64(m30)
    403 
    404 	d1 = m1
    405 	m32 <<= 16
    406 	m3 += int64(m31)
    407 
    408 	d2 = m2
    409 	m33 += 256
    410 
    411 	m33 <<= 24
    412 	m3 += int64(m32)
    413 
    414 	m3 += int64(m33)
    415 	d3 = m3
    416 
    417 	p += 16
    418 	l -= 16
    419 
    420 	z0 = math.Float64frombits(uint64(d0))
    421 
    422 	z1 = math.Float64frombits(uint64(d1))
    423 
    424 	z2 = math.Float64frombits(uint64(d2))
    425 
    426 	z3 = math.Float64frombits(uint64(d3))
    427 
    428 	z0 -= alpha0
    429 
    430 	z1 -= alpha32
    431 
    432 	z2 -= alpha64
    433 
    434 	z3 -= alpha96
    435 
    436 	h0 += z0
    437 
    438 	h1 += z1
    439 
    440 	h3 += z2
    441 
    442 	h5 += z3
    443 
    444 	if l < 16 {
    445 		goto multiplyaddatmost15bytes
    446 	}
    447 
    448 multiplyaddatleast16bytes:
    449 
    450 	m2 = 2279
    451 	m20 = uint32(m[p+8])
    452 	y7 = h7 + alpha130
    453 
    454 	m2 <<= 51
    455 	m3 = 2343
    456 	m21 = uint32(m[p+9])
    457 	y6 = h6 + alpha130
    458 
    459 	m3 <<= 51
    460 	m0 = 2151
    461 	m22 = uint32(m[p+10])
    462 	y1 = h1 + alpha32
    463 
    464 	m0 <<= 51
    465 	m1 = 2215
    466 	m23 = uint32(m[p+11])
    467 	y0 = h0 + alpha32
    468 
    469 	m1 <<= 51
    470 	m30 = uint32(m[p+12])
    471 	y7 -= alpha130
    472 
    473 	m21 <<= 8
    474 	m2 += int64(m20)
    475 	m31 = uint32(m[p+13])
    476 	y6 -= alpha130
    477 
    478 	m22 <<= 16
    479 	m2 += int64(m21)
    480 	m32 = uint32(m[p+14])
    481 	y1 -= alpha32
    482 
    483 	m23 <<= 24
    484 	m2 += int64(m22)
    485 	m33 = uint64(m[p+15])
    486 	y0 -= alpha32
    487 
    488 	m2 += int64(m23)
    489 	m00 = uint32(m[p+0])
    490 	y5 = h5 + alpha96
    491 
    492 	m31 <<= 8
    493 	m3 += int64(m30)
    494 	m01 = uint32(m[p+1])
    495 	y4 = h4 + alpha96
    496 
    497 	m32 <<= 16
    498 	m02 = uint32(m[p+2])
    499 	x7 = h7 - y7
    500 	y7 *= scale
    501 
    502 	m33 += 256
    503 	m03 = uint32(m[p+3])
    504 	x6 = h6 - y6
    505 	y6 *= scale
    506 
    507 	m33 <<= 24
    508 	m3 += int64(m31)
    509 	m10 = uint32(m[p+4])
    510 	x1 = h1 - y1
    511 
    512 	m01 <<= 8
    513 	m3 += int64(m32)
    514 	m11 = uint32(m[p+5])
    515 	x0 = h0 - y0
    516 
    517 	m3 += int64(m33)
    518 	m0 += int64(m00)
    519 	m12 = uint32(m[p+6])
    520 	y5 -= alpha96
    521 
    522 	m02 <<= 16
    523 	m0 += int64(m01)
    524 	m13 = uint32(m[p+7])
    525 	y4 -= alpha96
    526 
    527 	m03 <<= 24
    528 	m0 += int64(m02)
    529 	d2 = m2
    530 	x1 += y7
    531 
    532 	m0 += int64(m03)
    533 	d3 = m3
    534 	x0 += y6
    535 
    536 	m11 <<= 8
    537 	m1 += int64(m10)
    538 	d0 = m0
    539 	x7 += y5
    540 
    541 	m12 <<= 16
    542 	m1 += int64(m11)
    543 	x6 += y4
    544 
    545 	m13 <<= 24
    546 	m1 += int64(m12)
    547 	y3 = h3 + alpha64
    548 
    549 	m1 += int64(m13)
    550 	d1 = m1
    551 	y2 = h2 + alpha64
    552 
    553 	x0 += x1
    554 
    555 	x6 += x7
    556 
    557 	y3 -= alpha64
    558 	r3low = r3low_stack
    559 
    560 	y2 -= alpha64
    561 	r0low = r0low_stack
    562 
    563 	x5 = h5 - y5
    564 	r3lowx0 = r3low * x0
    565 	r3high = r3high_stack
    566 
    567 	x4 = h4 - y4
    568 	r0lowx6 = r0low * x6
    569 	r0high = r0high_stack
    570 
    571 	x3 = h3 - y3
    572 	r3highx0 = r3high * x0
    573 	sr1low = sr1low_stack
    574 
    575 	x2 = h2 - y2
    576 	r0highx6 = r0high * x6
    577 	sr1high = sr1high_stack
    578 
    579 	x5 += y3
    580 	r0lowx0 = r0low * x0
    581 	r1low = r1low_stack
    582 
    583 	h6 = r3lowx0 + r0lowx6
    584 	sr1lowx6 = sr1low * x6
    585 	r1high = r1high_stack
    586 
    587 	x4 += y2
    588 	r0highx0 = r0high * x0
    589 	sr2low = sr2low_stack
    590 
    591 	h7 = r3highx0 + r0highx6
    592 	sr1highx6 = sr1high * x6
    593 	sr2high = sr2high_stack
    594 
    595 	x3 += y1
    596 	r1lowx0 = r1low * x0
    597 	r2low = r2low_stack
    598 
    599 	h0 = r0lowx0 + sr1lowx6
    600 	sr2lowx6 = sr2low * x6
    601 	r2high = r2high_stack
    602 
    603 	x2 += y0
    604 	r1highx0 = r1high * x0
    605 	sr3low = sr3low_stack
    606 
    607 	h1 = r0highx0 + sr1highx6
    608 	sr2highx6 = sr2high * x6
    609 	sr3high = sr3high_stack
    610 
    611 	x4 += x5
    612 	r2lowx0 = r2low * x0
    613 	z2 = math.Float64frombits(uint64(d2))
    614 
    615 	h2 = r1lowx0 + sr2lowx6
    616 	sr3lowx6 = sr3low * x6
    617 
    618 	x2 += x3
    619 	r2highx0 = r2high * x0
    620 	z3 = math.Float64frombits(uint64(d3))
    621 
    622 	h3 = r1highx0 + sr2highx6
    623 	sr3highx6 = sr3high * x6
    624 
    625 	r1highx4 = r1high * x4
    626 	z2 -= alpha64
    627 
    628 	h4 = r2lowx0 + sr3lowx6
    629 	r1lowx4 = r1low * x4
    630 
    631 	r0highx4 = r0high * x4
    632 	z3 -= alpha96
    633 
    634 	h5 = r2highx0 + sr3highx6
    635 	r0lowx4 = r0low * x4
    636 
    637 	h7 += r1highx4
    638 	sr3highx4 = sr3high * x4
    639 
    640 	h6 += r1lowx4
    641 	sr3lowx4 = sr3low * x4
    642 
    643 	h5 += r0highx4
    644 	sr2highx4 = sr2high * x4
    645 
    646 	h4 += r0lowx4
    647 	sr2lowx4 = sr2low * x4
    648 
    649 	h3 += sr3highx4
    650 	r0lowx2 = r0low * x2
    651 
    652 	h2 += sr3lowx4
    653 	r0highx2 = r0high * x2
    654 
    655 	h1 += sr2highx4
    656 	r1lowx2 = r1low * x2
    657 
    658 	h0 += sr2lowx4
    659 	r1highx2 = r1high * x2
    660 
    661 	h2 += r0lowx2
    662 	r2lowx2 = r2low * x2
    663 
    664 	h3 += r0highx2
    665 	r2highx2 = r2high * x2
    666 
    667 	h4 += r1lowx2
    668 	sr3lowx2 = sr3low * x2
    669 
    670 	h5 += r1highx2
    671 	sr3highx2 = sr3high * x2
    672 
    673 	p += 16
    674 	l -= 16
    675 	h6 += r2lowx2
    676 
    677 	h7 += r2highx2
    678 
    679 	z1 = math.Float64frombits(uint64(d1))
    680 	h0 += sr3lowx2
    681 
    682 	z0 = math.Float64frombits(uint64(d0))
    683 	h1 += sr3highx2
    684 
    685 	z1 -= alpha32
    686 
    687 	z0 -= alpha0
    688 
    689 	h5 += z3
    690 
    691 	h3 += z2
    692 
    693 	h1 += z1
    694 
    695 	h0 += z0
    696 
    697 	if l >= 16 {
    698 		goto multiplyaddatleast16bytes
    699 	}
    700 
    701 multiplyaddatmost15bytes:
    702 
    703 	y7 = h7 + alpha130
    704 
    705 	y6 = h6 + alpha130
    706 
    707 	y1 = h1 + alpha32
    708 
    709 	y0 = h0 + alpha32
    710 
    711 	y7 -= alpha130
    712 
    713 	y6 -= alpha130
    714 
    715 	y1 -= alpha32
    716 
    717 	y0 -= alpha32
    718 
    719 	y5 = h5 + alpha96
    720 
    721 	y4 = h4 + alpha96
    722 
    723 	x7 = h7 - y7
    724 	y7 *= scale
    725 
    726 	x6 = h6 - y6
    727 	y6 *= scale
    728 
    729 	x1 = h1 - y1
    730 
    731 	x0 = h0 - y0
    732 
    733 	y5 -= alpha96
    734 
    735 	y4 -= alpha96
    736 
    737 	x1 += y7
    738 
    739 	x0 += y6
    740 
    741 	x7 += y5
    742 
    743 	x6 += y4
    744 
    745 	y3 = h3 + alpha64
    746 
    747 	y2 = h2 + alpha64
    748 
    749 	x0 += x1
    750 
    751 	x6 += x7
    752 
    753 	y3 -= alpha64
    754 	r3low = r3low_stack
    755 
    756 	y2 -= alpha64
    757 	r0low = r0low_stack
    758 
    759 	x5 = h5 - y5
    760 	r3lowx0 = r3low * x0
    761 	r3high = r3high_stack
    762 
    763 	x4 = h4 - y4
    764 	r0lowx6 = r0low * x6
    765 	r0high = r0high_stack
    766 
    767 	x3 = h3 - y3
    768 	r3highx0 = r3high * x0
    769 	sr1low = sr1low_stack
    770 
    771 	x2 = h2 - y2
    772 	r0highx6 = r0high * x6
    773 	sr1high = sr1high_stack
    774 
    775 	x5 += y3
    776 	r0lowx0 = r0low * x0
    777 	r1low = r1low_stack
    778 
    779 	h6 = r3lowx0 + r0lowx6
    780 	sr1lowx6 = sr1low * x6
    781 	r1high = r1high_stack
    782 
    783 	x4 += y2
    784 	r0highx0 = r0high * x0
    785 	sr2low = sr2low_stack
    786 
    787 	h7 = r3highx0 + r0highx6
    788 	sr1highx6 = sr1high * x6
    789 	sr2high = sr2high_stack
    790 
    791 	x3 += y1
    792 	r1lowx0 = r1low * x0
    793 	r2low = r2low_stack
    794 
    795 	h0 = r0lowx0 + sr1lowx6
    796 	sr2lowx6 = sr2low * x6
    797 	r2high = r2high_stack
    798 
    799 	x2 += y0
    800 	r1highx0 = r1high * x0
    801 	sr3low = sr3low_stack
    802 
    803 	h1 = r0highx0 + sr1highx6
    804 	sr2highx6 = sr2high * x6
    805 	sr3high = sr3high_stack
    806 
    807 	x4 += x5
    808 	r2lowx0 = r2low * x0
    809 
    810 	h2 = r1lowx0 + sr2lowx6
    811 	sr3lowx6 = sr3low * x6
    812 
    813 	x2 += x3
    814 	r2highx0 = r2high * x0
    815 
    816 	h3 = r1highx0 + sr2highx6
    817 	sr3highx6 = sr3high * x6
    818 
    819 	r1highx4 = r1high * x4
    820 
    821 	h4 = r2lowx0 + sr3lowx6
    822 	r1lowx4 = r1low * x4
    823 
    824 	r0highx4 = r0high * x4
    825 
    826 	h5 = r2highx0 + sr3highx6
    827 	r0lowx4 = r0low * x4
    828 
    829 	h7 += r1highx4
    830 	sr3highx4 = sr3high * x4
    831 
    832 	h6 += r1lowx4
    833 	sr3lowx4 = sr3low * x4
    834 
    835 	h5 += r0highx4
    836 	sr2highx4 = sr2high * x4
    837 
    838 	h4 += r0lowx4
    839 	sr2lowx4 = sr2low * x4
    840 
    841 	h3 += sr3highx4
    842 	r0lowx2 = r0low * x2
    843 
    844 	h2 += sr3lowx4
    845 	r0highx2 = r0high * x2
    846 
    847 	h1 += sr2highx4
    848 	r1lowx2 = r1low * x2
    849 
    850 	h0 += sr2lowx4
    851 	r1highx2 = r1high * x2
    852 
    853 	h2 += r0lowx2
    854 	r2lowx2 = r2low * x2
    855 
    856 	h3 += r0highx2
    857 	r2highx2 = r2high * x2
    858 
    859 	h4 += r1lowx2
    860 	sr3lowx2 = sr3low * x2
    861 
    862 	h5 += r1highx2
    863 	sr3highx2 = sr3high * x2
    864 
    865 	h6 += r2lowx2
    866 
    867 	h7 += r2highx2
    868 
    869 	h0 += sr3lowx2
    870 
    871 	h1 += sr3highx2
    872 
    873 addatmost15bytes:
    874 
    875 	if l == 0 {
    876 		goto nomorebytes
    877 	}
    878 
    879 	lbelow2 = l - 2
    880 
    881 	lbelow3 = l - 3
    882 
    883 	lbelow2 >>= 31
    884 	lbelow4 = l - 4
    885 
    886 	m00 = uint32(m[p+0])
    887 	lbelow3 >>= 31
    888 	p += lbelow2
    889 
    890 	m01 = uint32(m[p+1])
    891 	lbelow4 >>= 31
    892 	p += lbelow3
    893 
    894 	m02 = uint32(m[p+2])
    895 	p += lbelow4
    896 	m0 = 2151
    897 
    898 	m03 = uint32(m[p+3])
    899 	m0 <<= 51
    900 	m1 = 2215
    901 
    902 	m0 += int64(m00)
    903 	m01 &^= uint32(lbelow2)
    904 
    905 	m02 &^= uint32(lbelow3)
    906 	m01 -= uint32(lbelow2)
    907 
    908 	m01 <<= 8
    909 	m03 &^= uint32(lbelow4)
    910 
    911 	m0 += int64(m01)
    912 	lbelow2 -= lbelow3
    913 
    914 	m02 += uint32(lbelow2)
    915 	lbelow3 -= lbelow4
    916 
    917 	m02 <<= 16
    918 	m03 += uint32(lbelow3)
    919 
    920 	m03 <<= 24
    921 	m0 += int64(m02)
    922 
    923 	m0 += int64(m03)
    924 	lbelow5 = l - 5
    925 
    926 	lbelow6 = l - 6
    927 	lbelow7 = l - 7
    928 
    929 	lbelow5 >>= 31
    930 	lbelow8 = l - 8
    931 
    932 	lbelow6 >>= 31
    933 	p += lbelow5
    934 
    935 	m10 = uint32(m[p+4])
    936 	lbelow7 >>= 31
    937 	p += lbelow6
    938 
    939 	m11 = uint32(m[p+5])
    940 	lbelow8 >>= 31
    941 	p += lbelow7
    942 
    943 	m12 = uint32(m[p+6])
    944 	m1 <<= 51
    945 	p += lbelow8
    946 
    947 	m13 = uint32(m[p+7])
    948 	m10 &^= uint32(lbelow5)
    949 	lbelow4 -= lbelow5
    950 
    951 	m10 += uint32(lbelow4)
    952 	lbelow5 -= lbelow6
    953 
    954 	m11 &^= uint32(lbelow6)
    955 	m11 += uint32(lbelow5)
    956 
    957 	m11 <<= 8
    958 	m1 += int64(m10)
    959 
    960 	m1 += int64(m11)
    961 	m12 &^= uint32(lbelow7)
    962 
    963 	lbelow6 -= lbelow7
    964 	m13 &^= uint32(lbelow8)
    965 
    966 	m12 += uint32(lbelow6)
    967 	lbelow7 -= lbelow8
    968 
    969 	m12 <<= 16
    970 	m13 += uint32(lbelow7)
    971 
    972 	m13 <<= 24
    973 	m1 += int64(m12)
    974 
    975 	m1 += int64(m13)
    976 	m2 = 2279
    977 
    978 	lbelow9 = l - 9
    979 	m3 = 2343
    980 
    981 	lbelow10 = l - 10
    982 	lbelow11 = l - 11
    983 
    984 	lbelow9 >>= 31
    985 	lbelow12 = l - 12
    986 
    987 	lbelow10 >>= 31
    988 	p += lbelow9
    989 
    990 	m20 = uint32(m[p+8])
    991 	lbelow11 >>= 31
    992 	p += lbelow10
    993 
    994 	m21 = uint32(m[p+9])
    995 	lbelow12 >>= 31
    996 	p += lbelow11
    997 
    998 	m22 = uint32(m[p+10])
    999 	m2 <<= 51
   1000 	p += lbelow12
   1001 
   1002 	m23 = uint32(m[p+11])
   1003 	m20 &^= uint32(lbelow9)
   1004 	lbelow8 -= lbelow9
   1005 
   1006 	m20 += uint32(lbelow8)
   1007 	lbelow9 -= lbelow10
   1008 
   1009 	m21 &^= uint32(lbelow10)
   1010 	m21 += uint32(lbelow9)
   1011 
   1012 	m21 <<= 8
   1013 	m2 += int64(m20)
   1014 
   1015 	m2 += int64(m21)
   1016 	m22 &^= uint32(lbelow11)
   1017 
   1018 	lbelow10 -= lbelow11
   1019 	m23 &^= uint32(lbelow12)
   1020 
   1021 	m22 += uint32(lbelow10)
   1022 	lbelow11 -= lbelow12
   1023 
   1024 	m22 <<= 16
   1025 	m23 += uint32(lbelow11)
   1026 
   1027 	m23 <<= 24
   1028 	m2 += int64(m22)
   1029 
   1030 	m3 <<= 51
   1031 	lbelow13 = l - 13
   1032 
   1033 	lbelow13 >>= 31
   1034 	lbelow14 = l - 14
   1035 
   1036 	lbelow14 >>= 31
   1037 	p += lbelow13
   1038 	lbelow15 = l - 15
   1039 
   1040 	m30 = uint32(m[p+12])
   1041 	lbelow15 >>= 31
   1042 	p += lbelow14
   1043 
   1044 	m31 = uint32(m[p+13])
   1045 	p += lbelow15
   1046 	m2 += int64(m23)
   1047 
   1048 	m32 = uint32(m[p+14])
   1049 	m30 &^= uint32(lbelow13)
   1050 	lbelow12 -= lbelow13
   1051 
   1052 	m30 += uint32(lbelow12)
   1053 	lbelow13 -= lbelow14
   1054 
   1055 	m3 += int64(m30)
   1056 	m31 &^= uint32(lbelow14)
   1057 
   1058 	m31 += uint32(lbelow13)
   1059 	m32 &^= uint32(lbelow15)
   1060 
   1061 	m31 <<= 8
   1062 	lbelow14 -= lbelow15
   1063 
   1064 	m3 += int64(m31)
   1065 	m32 += uint32(lbelow14)
   1066 	d0 = m0
   1067 
   1068 	m32 <<= 16
   1069 	m33 = uint64(lbelow15 + 1)
   1070 	d1 = m1
   1071 
   1072 	m33 <<= 24
   1073 	m3 += int64(m32)
   1074 	d2 = m2
   1075 
   1076 	m3 += int64(m33)
   1077 	d3 = m3
   1078 
   1079 	z3 = math.Float64frombits(uint64(d3))
   1080 
   1081 	z2 = math.Float64frombits(uint64(d2))
   1082 
   1083 	z1 = math.Float64frombits(uint64(d1))
   1084 
   1085 	z0 = math.Float64frombits(uint64(d0))
   1086 
   1087 	z3 -= alpha96
   1088 
   1089 	z2 -= alpha64
   1090 
   1091 	z1 -= alpha32
   1092 
   1093 	z0 -= alpha0
   1094 
   1095 	h5 += z3
   1096 
   1097 	h3 += z2
   1098 
   1099 	h1 += z1
   1100 
   1101 	h0 += z0
   1102 
   1103 	y7 = h7 + alpha130
   1104 
   1105 	y6 = h6 + alpha130
   1106 
   1107 	y1 = h1 + alpha32
   1108 
   1109 	y0 = h0 + alpha32
   1110 
   1111 	y7 -= alpha130
   1112 
   1113 	y6 -= alpha130
   1114 
   1115 	y1 -= alpha32
   1116 
   1117 	y0 -= alpha32
   1118 
   1119 	y5 = h5 + alpha96
   1120 
   1121 	y4 = h4 + alpha96
   1122 
   1123 	x7 = h7 - y7
   1124 	y7 *= scale
   1125 
   1126 	x6 = h6 - y6
   1127 	y6 *= scale
   1128 
   1129 	x1 = h1 - y1
   1130 
   1131 	x0 = h0 - y0
   1132 
   1133 	y5 -= alpha96
   1134 
   1135 	y4 -= alpha96
   1136 
   1137 	x1 += y7
   1138 
   1139 	x0 += y6
   1140 
   1141 	x7 += y5
   1142 
   1143 	x6 += y4
   1144 
   1145 	y3 = h3 + alpha64
   1146 
   1147 	y2 = h2 + alpha64
   1148 
   1149 	x0 += x1
   1150 
   1151 	x6 += x7
   1152 
   1153 	y3 -= alpha64
   1154 	r3low = r3low_stack
   1155 
   1156 	y2 -= alpha64
   1157 	r0low = r0low_stack
   1158 
   1159 	x5 = h5 - y5
   1160 	r3lowx0 = r3low * x0
   1161 	r3high = r3high_stack
   1162 
   1163 	x4 = h4 - y4
   1164 	r0lowx6 = r0low * x6
   1165 	r0high = r0high_stack
   1166 
   1167 	x3 = h3 - y3
   1168 	r3highx0 = r3high * x0
   1169 	sr1low = sr1low_stack
   1170 
   1171 	x2 = h2 - y2
   1172 	r0highx6 = r0high * x6
   1173 	sr1high = sr1high_stack
   1174 
   1175 	x5 += y3
   1176 	r0lowx0 = r0low * x0
   1177 	r1low = r1low_stack
   1178 
   1179 	h6 = r3lowx0 + r0lowx6
   1180 	sr1lowx6 = sr1low * x6
   1181 	r1high = r1high_stack
   1182 
   1183 	x4 += y2
   1184 	r0highx0 = r0high * x0
   1185 	sr2low = sr2low_stack
   1186 
   1187 	h7 = r3highx0 + r0highx6
   1188 	sr1highx6 = sr1high * x6
   1189 	sr2high = sr2high_stack
   1190 
   1191 	x3 += y1
   1192 	r1lowx0 = r1low * x0
   1193 	r2low = r2low_stack
   1194 
   1195 	h0 = r0lowx0 + sr1lowx6
   1196 	sr2lowx6 = sr2low * x6
   1197 	r2high = r2high_stack
   1198 
   1199 	x2 += y0
   1200 	r1highx0 = r1high * x0
   1201 	sr3low = sr3low_stack
   1202 
   1203 	h1 = r0highx0 + sr1highx6
   1204 	sr2highx6 = sr2high * x6
   1205 	sr3high = sr3high_stack
   1206 
   1207 	x4 += x5
   1208 	r2lowx0 = r2low * x0
   1209 
   1210 	h2 = r1lowx0 + sr2lowx6
   1211 	sr3lowx6 = sr3low * x6
   1212 
   1213 	x2 += x3
   1214 	r2highx0 = r2high * x0
   1215 
   1216 	h3 = r1highx0 + sr2highx6
   1217 	sr3highx6 = sr3high * x6
   1218 
   1219 	r1highx4 = r1high * x4
   1220 
   1221 	h4 = r2lowx0 + sr3lowx6
   1222 	r1lowx4 = r1low * x4
   1223 
   1224 	r0highx4 = r0high * x4
   1225 
   1226 	h5 = r2highx0 + sr3highx6
   1227 	r0lowx4 = r0low * x4
   1228 
   1229 	h7 += r1highx4
   1230 	sr3highx4 = sr3high * x4
   1231 
   1232 	h6 += r1lowx4
   1233 	sr3lowx4 = sr3low * x4
   1234 
   1235 	h5 += r0highx4
   1236 	sr2highx4 = sr2high * x4
   1237 
   1238 	h4 += r0lowx4
   1239 	sr2lowx4 = sr2low * x4
   1240 
   1241 	h3 += sr3highx4
   1242 	r0lowx2 = r0low * x2
   1243 
   1244 	h2 += sr3lowx4
   1245 	r0highx2 = r0high * x2
   1246 
   1247 	h1 += sr2highx4
   1248 	r1lowx2 = r1low * x2
   1249 
   1250 	h0 += sr2lowx4
   1251 	r1highx2 = r1high * x2
   1252 
   1253 	h2 += r0lowx2
   1254 	r2lowx2 = r2low * x2
   1255 
   1256 	h3 += r0highx2
   1257 	r2highx2 = r2high * x2
   1258 
   1259 	h4 += r1lowx2
   1260 	sr3lowx2 = sr3low * x2
   1261 
   1262 	h5 += r1highx2
   1263 	sr3highx2 = sr3high * x2
   1264 
   1265 	h6 += r2lowx2
   1266 
   1267 	h7 += r2highx2
   1268 
   1269 	h0 += sr3lowx2
   1270 
   1271 	h1 += sr3highx2
   1272 
   1273 nomorebytes:
   1274 
   1275 	y7 = h7 + alpha130
   1276 
   1277 	y0 = h0 + alpha32
   1278 
   1279 	y1 = h1 + alpha32
   1280 
   1281 	y2 = h2 + alpha64
   1282 
   1283 	y7 -= alpha130
   1284 
   1285 	y3 = h3 + alpha64
   1286 
   1287 	y4 = h4 + alpha96
   1288 
   1289 	y5 = h5 + alpha96
   1290 
   1291 	x7 = h7 - y7
   1292 	y7 *= scale
   1293 
   1294 	y0 -= alpha32
   1295 
   1296 	y1 -= alpha32
   1297 
   1298 	y2 -= alpha64
   1299 
   1300 	h6 += x7
   1301 
   1302 	y3 -= alpha64
   1303 
   1304 	y4 -= alpha96
   1305 
   1306 	y5 -= alpha96
   1307 
   1308 	y6 = h6 + alpha130
   1309 
   1310 	x0 = h0 - y0
   1311 
   1312 	x1 = h1 - y1
   1313 
   1314 	x2 = h2 - y2
   1315 
   1316 	y6 -= alpha130
   1317 
   1318 	x0 += y7
   1319 
   1320 	x3 = h3 - y3
   1321 
   1322 	x4 = h4 - y4
   1323 
   1324 	x5 = h5 - y5
   1325 
   1326 	x6 = h6 - y6
   1327 
   1328 	y6 *= scale
   1329 
   1330 	x2 += y0
   1331 
   1332 	x3 += y1
   1333 
   1334 	x4 += y2
   1335 
   1336 	x0 += y6
   1337 
   1338 	x5 += y3
   1339 
   1340 	x6 += y4
   1341 
   1342 	x2 += x3
   1343 
   1344 	x0 += x1
   1345 
   1346 	x4 += x5
   1347 
   1348 	x6 += y5
   1349 
   1350 	x2 += offset1
   1351 	d1 = int64(math.Float64bits(x2))
   1352 
   1353 	x0 += offset0
   1354 	d0 = int64(math.Float64bits(x0))
   1355 
   1356 	x4 += offset2
   1357 	d2 = int64(math.Float64bits(x4))
   1358 
   1359 	x6 += offset3
   1360 	d3 = int64(math.Float64bits(x6))
   1361 
   1362 	f0 = uint64(d0)
   1363 
   1364 	f1 = uint64(d1)
   1365 	bits32 = math.MaxUint64
   1366 
   1367 	f2 = uint64(d2)
   1368 	bits32 >>= 32
   1369 
   1370 	f3 = uint64(d3)
   1371 	f = f0 >> 32
   1372 
   1373 	f0 &= bits32
   1374 	f &= 255
   1375 
   1376 	f1 += f
   1377 	g0 = f0 + 5
   1378 
   1379 	g = g0 >> 32
   1380 	g0 &= bits32
   1381 
   1382 	f = f1 >> 32
   1383 	f1 &= bits32
   1384 
   1385 	f &= 255
   1386 	g1 = f1 + g
   1387 
   1388 	g = g1 >> 32
   1389 	f2 += f
   1390 
   1391 	f = f2 >> 32
   1392 	g1 &= bits32
   1393 
   1394 	f2 &= bits32
   1395 	f &= 255
   1396 
   1397 	f3 += f
   1398 	g2 = f2 + g
   1399 
   1400 	g = g2 >> 32
   1401 	g2 &= bits32
   1402 
   1403 	f4 = f3 >> 32
   1404 	f3 &= bits32
   1405 
   1406 	f4 &= 255
   1407 	g3 = f3 + g
   1408 
   1409 	g = g3 >> 32
   1410 	g3 &= bits32
   1411 
   1412 	g4 = f4 + g
   1413 
   1414 	g4 = g4 - 4
   1415 	s00 = uint32(s[0])
   1416 
   1417 	f = uint64(int64(g4) >> 63)
   1418 	s01 = uint32(s[1])
   1419 
   1420 	f0 &= f
   1421 	g0 &^= f
   1422 	s02 = uint32(s[2])
   1423 
   1424 	f1 &= f
   1425 	f0 |= g0
   1426 	s03 = uint32(s[3])
   1427 
   1428 	g1 &^= f
   1429 	f2 &= f
   1430 	s10 = uint32(s[4])
   1431 
   1432 	f3 &= f
   1433 	g2 &^= f
   1434 	s11 = uint32(s[5])
   1435 
   1436 	g3 &^= f
   1437 	f1 |= g1
   1438 	s12 = uint32(s[6])
   1439 
   1440 	f2 |= g2
   1441 	f3 |= g3
   1442 	s13 = uint32(s[7])
   1443 
   1444 	s01 <<= 8
   1445 	f0 += uint64(s00)
   1446 	s20 = uint32(s[8])
   1447 
   1448 	s02 <<= 16
   1449 	f0 += uint64(s01)
   1450 	s21 = uint32(s[9])
   1451 
   1452 	s03 <<= 24
   1453 	f0 += uint64(s02)
   1454 	s22 = uint32(s[10])
   1455 
   1456 	s11 <<= 8
   1457 	f1 += uint64(s10)
   1458 	s23 = uint32(s[11])
   1459 
   1460 	s12 <<= 16
   1461 	f1 += uint64(s11)
   1462 	s30 = uint32(s[12])
   1463 
   1464 	s13 <<= 24
   1465 	f1 += uint64(s12)
   1466 	s31 = uint32(s[13])
   1467 
   1468 	f0 += uint64(s03)
   1469 	f1 += uint64(s13)
   1470 	s32 = uint32(s[14])
   1471 
   1472 	s21 <<= 8
   1473 	f2 += uint64(s20)
   1474 	s33 = uint32(s[15])
   1475 
   1476 	s22 <<= 16
   1477 	f2 += uint64(s21)
   1478 
   1479 	s23 <<= 24
   1480 	f2 += uint64(s22)
   1481 
   1482 	s31 <<= 8
   1483 	f3 += uint64(s30)
   1484 
   1485 	s32 <<= 16
   1486 	f3 += uint64(s31)
   1487 
   1488 	s33 <<= 24
   1489 	f3 += uint64(s32)
   1490 
   1491 	f2 += uint64(s23)
   1492 	f3 += uint64(s33)
   1493 
   1494 	out[0] = byte(f0)
   1495 	f0 >>= 8
   1496 	out[1] = byte(f0)
   1497 	f0 >>= 8
   1498 	out[2] = byte(f0)
   1499 	f0 >>= 8
   1500 	out[3] = byte(f0)
   1501 	f0 >>= 8
   1502 	f1 += f0
   1503 
   1504 	out[4] = byte(f1)
   1505 	f1 >>= 8
   1506 	out[5] = byte(f1)
   1507 	f1 >>= 8
   1508 	out[6] = byte(f1)
   1509 	f1 >>= 8
   1510 	out[7] = byte(f1)
   1511 	f1 >>= 8
   1512 	f2 += f1
   1513 
   1514 	out[8] = byte(f2)
   1515 	f2 >>= 8
   1516 	out[9] = byte(f2)
   1517 	f2 >>= 8
   1518 	out[10] = byte(f2)
   1519 	f2 >>= 8
   1520 	out[11] = byte(f2)
   1521 	f2 >>= 8
   1522 	f3 += f2
   1523 
   1524 	out[12] = byte(f3)
   1525 	f3 >>= 8
   1526 	out[13] = byte(f3)
   1527 	f3 >>= 8
   1528 	out[14] = byte(f3)
   1529 	f3 >>= 8
   1530 	out[15] = byte(f3)
   1531 }
   1532