Home | History | Annotate | Download | only in big
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This file implements multi-precision floating-point numbers.
      6 // Like in the GNU MPFR library (http://www.mpfr.org/), operands
      7 // can be of mixed precision. Unlike MPFR, the rounding mode is
      8 // not specified with each operation, but with each operand. The
      9 // rounding mode of the result operand determines the rounding
     10 // mode of an operation. This is a from-scratch implementation.
     11 
     12 package big
     13 
     14 import (
     15 	"fmt"
     16 	"math"
     17 )
     18 
     19 const debugFloat = false // enable for debugging
     20 
     21 // A nonzero finite Float represents a multi-precision floating point number
     22 //
     23 //   sign  mantissa  2**exponent
     24 //
     25 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
     26 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
     27 // All Floats are ordered, and the ordering of two Floats x and y
     28 // is defined by x.Cmp(y).
     29 //
     30 // Each Float value also has a precision, rounding mode, and accuracy.
     31 // The precision is the maximum number of mantissa bits available to
     32 // represent the value. The rounding mode specifies how a result should
     33 // be rounded to fit into the mantissa bits, and accuracy describes the
     34 // rounding error with respect to the exact result.
     35 //
     36 // Unless specified otherwise, all operations (including setters) that
     37 // specify a *Float variable for the result (usually via the receiver
     38 // with the exception of MantExp), round the numeric result according
     39 // to the precision and rounding mode of the result variable.
     40 //
     41 // If the provided result precision is 0 (see below), it is set to the
     42 // precision of the argument with the largest precision value before any
     43 // rounding takes place, and the rounding mode remains unchanged. Thus,
     44 // uninitialized Floats provided as result arguments will have their
     45 // precision set to a reasonable value determined by the operands and
     46 // their mode is the zero value for RoundingMode (ToNearestEven).
     47 //
     48 // By setting the desired precision to 24 or 53 and using matching rounding
     49 // mode (typically ToNearestEven), Float operations produce the same results
     50 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
     51 // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
     52 // Exponent underflow and overflow lead to a 0 or an Infinity for different
     53 // values than IEEE-754 because Float exponents have a much larger range.
     54 //
     55 // The zero (uninitialized) value for a Float is ready to use and represents
     56 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
     57 //
     58 type Float struct {
     59 	prec uint32
     60 	mode RoundingMode
     61 	acc  Accuracy
     62 	form form
     63 	neg  bool
     64 	mant nat
     65 	exp  int32
     66 }
     67 
     68 // An ErrNaN panic is raised by a Float operation that would lead to
     69 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
     70 type ErrNaN struct {
     71 	msg string
     72 }
     73 
     74 func (err ErrNaN) Error() string {
     75 	return err.msg
     76 }
     77 
     78 // NewFloat allocates and returns a new Float set to x,
     79 // with precision 53 and rounding mode ToNearestEven.
     80 // NewFloat panics with ErrNaN if x is a NaN.
     81 func NewFloat(x float64) *Float {
     82 	if math.IsNaN(x) {
     83 		panic(ErrNaN{"NewFloat(NaN)"})
     84 	}
     85 	return new(Float).SetFloat64(x)
     86 }
     87 
     88 // Exponent and precision limits.
     89 const (
     90 	MaxExp  = math.MaxInt32  // largest supported exponent
     91 	MinExp  = math.MinInt32  // smallest supported exponent
     92 	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
     93 )
     94 
     95 // Internal representation: The mantissa bits x.mant of a nonzero finite
     96 // Float x are stored in a nat slice long enough to hold up to x.prec bits;
     97 // the slice may (but doesn't have to) be shorter if the mantissa contains
     98 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
     99 // the msb is shifted all the way "to the left"). Thus, if the mantissa has
    100 // trailing 0 bits or x.prec is not a multiple of the the Word size _W,
    101 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
    102 // to the value 0.5; the exponent x.exp shifts the binary point as needed.
    103 //
    104 // A zero or non-finite Float x ignores x.mant and x.exp.
    105 //
    106 // x                 form      neg      mant         exp
    107 // ----------------------------------------------------------
    108 // 0                zero      sign     -            -
    109 // 0 < |x| < +Inf    finite    sign     mantissa     exponent
    110 // Inf              inf       sign     -            -
    111 
    112 // A form value describes the internal representation.
    113 type form byte
    114 
    115 // The form value order is relevant - do not change!
    116 const (
    117 	zero form = iota
    118 	finite
    119 	inf
    120 )
    121 
    122 // RoundingMode determines how a Float value is rounded to the
    123 // desired precision. Rounding may change the Float value; the
    124 // rounding error is described by the Float's Accuracy.
    125 type RoundingMode byte
    126 
    127 // These constants define supported rounding modes.
    128 const (
    129 	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
    130 	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
    131 	ToZero                            // == IEEE 754-2008 roundTowardZero
    132 	AwayFromZero                      // no IEEE 754-2008 equivalent
    133 	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
    134 	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
    135 )
    136 
    137 //go:generate stringer -type=RoundingMode
    138 
    139 // Accuracy describes the rounding error produced by the most recent
    140 // operation that generated a Float value, relative to the exact value.
    141 type Accuracy int8
    142 
    143 // Constants describing the Accuracy of a Float.
    144 const (
    145 	Below Accuracy = -1
    146 	Exact Accuracy = 0
    147 	Above Accuracy = +1
    148 )
    149 
    150 //go:generate stringer -type=Accuracy
    151 
    152 // SetPrec sets z's precision to prec and returns the (possibly) rounded
    153 // value of z. Rounding occurs according to z's rounding mode if the mantissa
    154 // cannot be represented in prec bits without loss of precision.
    155 // SetPrec(0) maps all finite values to 0; infinite values remain unchanged.
    156 // If prec > MaxPrec, it is set to MaxPrec.
    157 func (z *Float) SetPrec(prec uint) *Float {
    158 	z.acc = Exact // optimistically assume no rounding is needed
    159 
    160 	// special case
    161 	if prec == 0 {
    162 		z.prec = 0
    163 		if z.form == finite {
    164 			// truncate z to 0
    165 			z.acc = makeAcc(z.neg)
    166 			z.form = zero
    167 		}
    168 		return z
    169 	}
    170 
    171 	// general case
    172 	if prec > MaxPrec {
    173 		prec = MaxPrec
    174 	}
    175 	old := z.prec
    176 	z.prec = uint32(prec)
    177 	if z.prec < old {
    178 		z.round(0)
    179 	}
    180 	return z
    181 }
    182 
    183 func makeAcc(above bool) Accuracy {
    184 	if above {
    185 		return Above
    186 	}
    187 	return Below
    188 }
    189 
    190 // SetMode sets z's rounding mode to mode and returns an exact z.
    191 // z remains unchanged otherwise.
    192 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
    193 func (z *Float) SetMode(mode RoundingMode) *Float {
    194 	z.mode = mode
    195 	z.acc = Exact
    196 	return z
    197 }
    198 
    199 // Prec returns the mantissa precision of x in bits.
    200 // The result may be 0 for |x| == 0 and |x| == Inf.
    201 func (x *Float) Prec() uint {
    202 	return uint(x.prec)
    203 }
    204 
    205 // MinPrec returns the minimum precision required to represent x exactly
    206 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
    207 // The result is 0 for |x| == 0 and |x| == Inf.
    208 func (x *Float) MinPrec() uint {
    209 	if x.form != finite {
    210 		return 0
    211 	}
    212 	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
    213 }
    214 
    215 // Mode returns the rounding mode of x.
    216 func (x *Float) Mode() RoundingMode {
    217 	return x.mode
    218 }
    219 
    220 // Acc returns the accuracy of x produced by the most recent operation.
    221 func (x *Float) Acc() Accuracy {
    222 	return x.acc
    223 }
    224 
    225 // Sign returns:
    226 //
    227 //	-1 if x <   0
    228 //	 0 if x is 0
    229 //	+1 if x >   0
    230 //
    231 func (x *Float) Sign() int {
    232 	if debugFloat {
    233 		x.validate()
    234 	}
    235 	if x.form == zero {
    236 		return 0
    237 	}
    238 	if x.neg {
    239 		return -1
    240 	}
    241 	return 1
    242 }
    243 
    244 // MantExp breaks x into its mantissa and exponent components
    245 // and returns the exponent. If a non-nil mant argument is
    246 // provided its value is set to the mantissa of x, with the
    247 // same precision and rounding mode as x. The components
    248 // satisfy x == mant  2**exp, with 0.5 <= |mant| < 1.0.
    249 // Calling MantExp with a nil argument is an efficient way to
    250 // get the exponent of the receiver.
    251 //
    252 // Special cases are:
    253 //
    254 //	(  0).MantExp(mant) = 0, with mant set to   0
    255 //	(Inf).MantExp(mant) = 0, with mant set to Inf
    256 //
    257 // x and mant may be the same in which case x is set to its
    258 // mantissa value.
    259 func (x *Float) MantExp(mant *Float) (exp int) {
    260 	if debugFloat {
    261 		x.validate()
    262 	}
    263 	if x.form == finite {
    264 		exp = int(x.exp)
    265 	}
    266 	if mant != nil {
    267 		mant.Copy(x)
    268 		if mant.form == finite {
    269 			mant.exp = 0
    270 		}
    271 	}
    272 	return
    273 }
    274 
    275 func (z *Float) setExpAndRound(exp int64, sbit uint) {
    276 	if exp < MinExp {
    277 		// underflow
    278 		z.acc = makeAcc(z.neg)
    279 		z.form = zero
    280 		return
    281 	}
    282 
    283 	if exp > MaxExp {
    284 		// overflow
    285 		z.acc = makeAcc(!z.neg)
    286 		z.form = inf
    287 		return
    288 	}
    289 
    290 	z.form = finite
    291 	z.exp = int32(exp)
    292 	z.round(sbit)
    293 }
    294 
    295 // SetMantExp sets z to mant  2**exp and and returns z.
    296 // The result z has the same precision and rounding mode
    297 // as mant. SetMantExp is an inverse of MantExp but does
    298 // not require 0.5 <= |mant| < 1.0. Specifically:
    299 //
    300 //	mant := new(Float)
    301 //	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
    302 //
    303 // Special cases are:
    304 //
    305 //	z.SetMantExp(  0, exp) =   0
    306 //	z.SetMantExp(Inf, exp) = Inf
    307 //
    308 // z and mant may be the same in which case z's exponent
    309 // is set to exp.
    310 func (z *Float) SetMantExp(mant *Float, exp int) *Float {
    311 	if debugFloat {
    312 		z.validate()
    313 		mant.validate()
    314 	}
    315 	z.Copy(mant)
    316 	if z.form != finite {
    317 		return z
    318 	}
    319 	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
    320 	return z
    321 }
    322 
    323 // Signbit returns true if x is negative or negative zero.
    324 func (x *Float) Signbit() bool {
    325 	return x.neg
    326 }
    327 
    328 // IsInf reports whether x is +Inf or -Inf.
    329 func (x *Float) IsInf() bool {
    330 	return x.form == inf
    331 }
    332 
    333 // IsInt reports whether x is an integer.
    334 // Inf values are not integers.
    335 func (x *Float) IsInt() bool {
    336 	if debugFloat {
    337 		x.validate()
    338 	}
    339 	// special cases
    340 	if x.form != finite {
    341 		return x.form == zero
    342 	}
    343 	// x.form == finite
    344 	if x.exp <= 0 {
    345 		return false
    346 	}
    347 	// x.exp > 0
    348 	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
    349 }
    350 
    351 // debugging support
    352 func (x *Float) validate() {
    353 	if !debugFloat {
    354 		// avoid performance bugs
    355 		panic("validate called but debugFloat is not set")
    356 	}
    357 	if x.form != finite {
    358 		return
    359 	}
    360 	m := len(x.mant)
    361 	if m == 0 {
    362 		panic("nonzero finite number with empty mantissa")
    363 	}
    364 	const msb = 1 << (_W - 1)
    365 	if x.mant[m-1]&msb == 0 {
    366 		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
    367 	}
    368 	if x.prec == 0 {
    369 		panic("zero precision finite number")
    370 	}
    371 }
    372 
    373 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
    374 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
    375 // have before calling round. z's mantissa must be normalized (with the msb set)
    376 // or empty.
    377 //
    378 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
    379 // sign of z. For correct rounding, the sign of z must be set correctly before
    380 // calling round.
    381 func (z *Float) round(sbit uint) {
    382 	if debugFloat {
    383 		z.validate()
    384 	}
    385 
    386 	z.acc = Exact
    387 	if z.form != finite {
    388 		// 0 or Inf => nothing left to do
    389 		return
    390 	}
    391 	// z.form == finite && len(z.mant) > 0
    392 	// m > 0 implies z.prec > 0 (checked by validate)
    393 
    394 	m := uint32(len(z.mant)) // present mantissa length in words
    395 	bits := m * _W           // present mantissa bits; bits > 0
    396 	if bits <= z.prec {
    397 		// mantissa fits => nothing to do
    398 		return
    399 	}
    400 	// bits > z.prec
    401 
    402 	// Rounding is based on two bits: the rounding bit (rbit) and the
    403 	// sticky bit (sbit). The rbit is the bit immediately before the
    404 	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
    405 	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
    406 	//
    407 	//   rbit  sbit  => "fractional part"
    408 	//
    409 	//   0     0        == 0
    410 	//   0     1        >  0  , < 0.5
    411 	//   1     0        == 0.5
    412 	//   1     1        >  0.5, < 1.0
    413 
    414 	// bits > z.prec: mantissa too large => round
    415 	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
    416 	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
    417 	if sbit == 0 {
    418 		// TODO(gri) if rbit != 0 we don't need to compute sbit for some rounding modes (optimization)
    419 		sbit = z.mant.sticky(r)
    420 	}
    421 	sbit &= 1 // be safe and ensure it's a single bit
    422 
    423 	// cut off extra words
    424 	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
    425 	if m > n {
    426 		copy(z.mant, z.mant[m-n:]) // move n last words to front
    427 		z.mant = z.mant[:n]
    428 	}
    429 
    430 	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
    431 	ntz := n*_W - z.prec // 0 <= ntz < _W
    432 	lsb := Word(1) << ntz
    433 
    434 	// round if result is inexact
    435 	if rbit|sbit != 0 {
    436 		// Make rounding decision: The result mantissa is truncated ("rounded down")
    437 		// by default. Decide if we need to increment, or "round up", the (unsigned)
    438 		// mantissa.
    439 		inc := false
    440 		switch z.mode {
    441 		case ToNegativeInf:
    442 			inc = z.neg
    443 		case ToZero:
    444 			// nothing to do
    445 		case ToNearestEven:
    446 			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
    447 		case ToNearestAway:
    448 			inc = rbit != 0
    449 		case AwayFromZero:
    450 			inc = true
    451 		case ToPositiveInf:
    452 			inc = !z.neg
    453 		default:
    454 			panic("unreachable")
    455 		}
    456 
    457 		// A positive result (!z.neg) is Above the exact result if we increment,
    458 		// and it's Below if we truncate (Exact results require no rounding).
    459 		// For a negative result (z.neg) it is exactly the opposite.
    460 		z.acc = makeAcc(inc != z.neg)
    461 
    462 		if inc {
    463 			// add 1 to mantissa
    464 			if addVW(z.mant, z.mant, lsb) != 0 {
    465 				// mantissa overflow => adjust exponent
    466 				if z.exp >= MaxExp {
    467 					// exponent overflow
    468 					z.form = inf
    469 					return
    470 				}
    471 				z.exp++
    472 				// adjust mantissa: divide by 2 to compensate for exponent adjustment
    473 				shrVU(z.mant, z.mant, 1)
    474 				// set msb == carry == 1 from the mantissa overflow above
    475 				const msb = 1 << (_W - 1)
    476 				z.mant[n-1] |= msb
    477 			}
    478 		}
    479 	}
    480 
    481 	// zero out trailing bits in least-significant word
    482 	z.mant[0] &^= lsb - 1
    483 
    484 	if debugFloat {
    485 		z.validate()
    486 	}
    487 }
    488 
    489 func (z *Float) setBits64(neg bool, x uint64) *Float {
    490 	if z.prec == 0 {
    491 		z.prec = 64
    492 	}
    493 	z.acc = Exact
    494 	z.neg = neg
    495 	if x == 0 {
    496 		z.form = zero
    497 		return z
    498 	}
    499 	// x != 0
    500 	z.form = finite
    501 	s := nlz64(x)
    502 	z.mant = z.mant.setUint64(x << s)
    503 	z.exp = int32(64 - s) // always fits
    504 	if z.prec < 64 {
    505 		z.round(0)
    506 	}
    507 	return z
    508 }
    509 
    510 // SetUint64 sets z to the (possibly rounded) value of x and returns z.
    511 // If z's precision is 0, it is changed to 64 (and rounding will have
    512 // no effect).
    513 func (z *Float) SetUint64(x uint64) *Float {
    514 	return z.setBits64(false, x)
    515 }
    516 
    517 // SetInt64 sets z to the (possibly rounded) value of x and returns z.
    518 // If z's precision is 0, it is changed to 64 (and rounding will have
    519 // no effect).
    520 func (z *Float) SetInt64(x int64) *Float {
    521 	u := x
    522 	if u < 0 {
    523 		u = -u
    524 	}
    525 	// We cannot simply call z.SetUint64(uint64(u)) and change
    526 	// the sign afterwards because the sign affects rounding.
    527 	return z.setBits64(x < 0, uint64(u))
    528 }
    529 
    530 // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
    531 // If z's precision is 0, it is changed to 53 (and rounding will have
    532 // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
    533 func (z *Float) SetFloat64(x float64) *Float {
    534 	if z.prec == 0 {
    535 		z.prec = 53
    536 	}
    537 	if math.IsNaN(x) {
    538 		panic(ErrNaN{"Float.SetFloat64(NaN)"})
    539 	}
    540 	z.acc = Exact
    541 	z.neg = math.Signbit(x) // handle -0, -Inf correctly
    542 	if x == 0 {
    543 		z.form = zero
    544 		return z
    545 	}
    546 	if math.IsInf(x, 0) {
    547 		z.form = inf
    548 		return z
    549 	}
    550 	// normalized x != 0
    551 	z.form = finite
    552 	fmant, exp := math.Frexp(x) // get normalized mantissa
    553 	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
    554 	z.exp = int32(exp) // always fits
    555 	if z.prec < 53 {
    556 		z.round(0)
    557 	}
    558 	return z
    559 }
    560 
    561 // fnorm normalizes mantissa m by shifting it to the left
    562 // such that the msb of the most-significant word (msw) is 1.
    563 // It returns the shift amount. It assumes that len(m) != 0.
    564 func fnorm(m nat) int64 {
    565 	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
    566 		panic("msw of mantissa is 0")
    567 	}
    568 	s := nlz(m[len(m)-1])
    569 	if s > 0 {
    570 		c := shlVU(m, m, s)
    571 		if debugFloat && c != 0 {
    572 			panic("nlz or shlVU incorrect")
    573 		}
    574 	}
    575 	return int64(s)
    576 }
    577 
    578 // SetInt sets z to the (possibly rounded) value of x and returns z.
    579 // If z's precision is 0, it is changed to the larger of x.BitLen()
    580 // or 64 (and rounding will have no effect).
    581 func (z *Float) SetInt(x *Int) *Float {
    582 	// TODO(gri) can be more efficient if z.prec > 0
    583 	// but small compared to the size of x, or if there
    584 	// are many trailing 0's.
    585 	bits := uint32(x.BitLen())
    586 	if z.prec == 0 {
    587 		z.prec = umax32(bits, 64)
    588 	}
    589 	z.acc = Exact
    590 	z.neg = x.neg
    591 	if len(x.abs) == 0 {
    592 		z.form = zero
    593 		return z
    594 	}
    595 	// x != 0
    596 	z.mant = z.mant.set(x.abs)
    597 	fnorm(z.mant)
    598 	z.setExpAndRound(int64(bits), 0)
    599 	return z
    600 }
    601 
    602 // SetRat sets z to the (possibly rounded) value of x and returns z.
    603 // If z's precision is 0, it is changed to the largest of a.BitLen(),
    604 // b.BitLen(), or 64; with x = a/b.
    605 func (z *Float) SetRat(x *Rat) *Float {
    606 	if x.IsInt() {
    607 		return z.SetInt(x.Num())
    608 	}
    609 	var a, b Float
    610 	a.SetInt(x.Num())
    611 	b.SetInt(x.Denom())
    612 	if z.prec == 0 {
    613 		z.prec = umax32(a.prec, b.prec)
    614 	}
    615 	return z.Quo(&a, &b)
    616 }
    617 
    618 // SetInf sets z to the infinite Float -Inf if signbit is
    619 // set, or +Inf if signbit is not set, and returns z. The
    620 // precision of z is unchanged and the result is always
    621 // Exact.
    622 func (z *Float) SetInf(signbit bool) *Float {
    623 	z.acc = Exact
    624 	z.form = inf
    625 	z.neg = signbit
    626 	return z
    627 }
    628 
    629 // Set sets z to the (possibly rounded) value of x and returns z.
    630 // If z's precision is 0, it is changed to the precision of x
    631 // before setting z (and rounding will have no effect).
    632 // Rounding is performed according to z's precision and rounding
    633 // mode; and z's accuracy reports the result error relative to the
    634 // exact (not rounded) result.
    635 func (z *Float) Set(x *Float) *Float {
    636 	if debugFloat {
    637 		x.validate()
    638 	}
    639 	z.acc = Exact
    640 	if z != x {
    641 		z.form = x.form
    642 		z.neg = x.neg
    643 		if x.form == finite {
    644 			z.exp = x.exp
    645 			z.mant = z.mant.set(x.mant)
    646 		}
    647 		if z.prec == 0 {
    648 			z.prec = x.prec
    649 		} else if z.prec < x.prec {
    650 			z.round(0)
    651 		}
    652 	}
    653 	return z
    654 }
    655 
    656 // Copy sets z to x, with the same precision, rounding mode, and
    657 // accuracy as x, and returns z. x is not changed even if z and
    658 // x are the same.
    659 func (z *Float) Copy(x *Float) *Float {
    660 	if debugFloat {
    661 		x.validate()
    662 	}
    663 	if z != x {
    664 		z.prec = x.prec
    665 		z.mode = x.mode
    666 		z.acc = x.acc
    667 		z.form = x.form
    668 		z.neg = x.neg
    669 		if z.form == finite {
    670 			z.mant = z.mant.set(x.mant)
    671 			z.exp = x.exp
    672 		}
    673 	}
    674 	return z
    675 }
    676 
    677 // msb32 returns the 32 most significant bits of x.
    678 func msb32(x nat) uint32 {
    679 	i := len(x) - 1
    680 	if i < 0 {
    681 		return 0
    682 	}
    683 	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
    684 		panic("x not normalized")
    685 	}
    686 	switch _W {
    687 	case 32:
    688 		return uint32(x[i])
    689 	case 64:
    690 		return uint32(x[i] >> 32)
    691 	}
    692 	panic("unreachable")
    693 }
    694 
    695 // msb64 returns the 64 most significant bits of x.
    696 func msb64(x nat) uint64 {
    697 	i := len(x) - 1
    698 	if i < 0 {
    699 		return 0
    700 	}
    701 	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
    702 		panic("x not normalized")
    703 	}
    704 	switch _W {
    705 	case 32:
    706 		v := uint64(x[i]) << 32
    707 		if i > 0 {
    708 			v |= uint64(x[i-1])
    709 		}
    710 		return v
    711 	case 64:
    712 		return uint64(x[i])
    713 	}
    714 	panic("unreachable")
    715 }
    716 
    717 // Uint64 returns the unsigned integer resulting from truncating x
    718 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
    719 // if x is an integer and Below otherwise.
    720 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
    721 // for x > math.MaxUint64.
    722 func (x *Float) Uint64() (uint64, Accuracy) {
    723 	if debugFloat {
    724 		x.validate()
    725 	}
    726 
    727 	switch x.form {
    728 	case finite:
    729 		if x.neg {
    730 			return 0, Above
    731 		}
    732 		// 0 < x < +Inf
    733 		if x.exp <= 0 {
    734 			// 0 < x < 1
    735 			return 0, Below
    736 		}
    737 		// 1 <= x < Inf
    738 		if x.exp <= 64 {
    739 			// u = trunc(x) fits into a uint64
    740 			u := msb64(x.mant) >> (64 - uint32(x.exp))
    741 			if x.MinPrec() <= 64 {
    742 				return u, Exact
    743 			}
    744 			return u, Below // x truncated
    745 		}
    746 		// x too large
    747 		return math.MaxUint64, Below
    748 
    749 	case zero:
    750 		return 0, Exact
    751 
    752 	case inf:
    753 		if x.neg {
    754 			return 0, Above
    755 		}
    756 		return math.MaxUint64, Below
    757 	}
    758 
    759 	panic("unreachable")
    760 }
    761 
    762 // Int64 returns the integer resulting from truncating x towards zero.
    763 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
    764 // an integer, and Above (x < 0) or Below (x > 0) otherwise.
    765 // The result is (math.MinInt64, Above) for x < math.MinInt64,
    766 // and (math.MaxInt64, Below) for x > math.MaxInt64.
    767 func (x *Float) Int64() (int64, Accuracy) {
    768 	if debugFloat {
    769 		x.validate()
    770 	}
    771 
    772 	switch x.form {
    773 	case finite:
    774 		// 0 < |x| < +Inf
    775 		acc := makeAcc(x.neg)
    776 		if x.exp <= 0 {
    777 			// 0 < |x| < 1
    778 			return 0, acc
    779 		}
    780 		// x.exp > 0
    781 
    782 		// 1 <= |x| < +Inf
    783 		if x.exp <= 63 {
    784 			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
    785 			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
    786 			if x.neg {
    787 				i = -i
    788 			}
    789 			if x.MinPrec() <= uint(x.exp) {
    790 				return i, Exact
    791 			}
    792 			return i, acc // x truncated
    793 		}
    794 		if x.neg {
    795 			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
    796 			if x.exp == 64 && x.MinPrec() == 1 {
    797 				acc = Exact
    798 			}
    799 			return math.MinInt64, acc
    800 		}
    801 		// x too large
    802 		return math.MaxInt64, Below
    803 
    804 	case zero:
    805 		return 0, Exact
    806 
    807 	case inf:
    808 		if x.neg {
    809 			return math.MinInt64, Above
    810 		}
    811 		return math.MaxInt64, Below
    812 	}
    813 
    814 	panic("unreachable")
    815 }
    816 
    817 // Float32 returns the float32 value nearest to x. If x is too small to be
    818 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
    819 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
    820 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
    821 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
    822 func (x *Float) Float32() (float32, Accuracy) {
    823 	if debugFloat {
    824 		x.validate()
    825 	}
    826 
    827 	switch x.form {
    828 	case finite:
    829 		// 0 < |x| < +Inf
    830 
    831 		const (
    832 			fbits = 32                //        float size
    833 			mbits = 23                //        mantissa size (excluding implicit msb)
    834 			ebits = fbits - mbits - 1 //     8  exponent size
    835 			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
    836 			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
    837 			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
    838 			emax  = bias              //   127  largest unbiased exponent (normal)
    839 		)
    840 
    841 		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
    842 		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
    843 
    844 		// Compute precision p for float32 mantissa.
    845 		// If the exponent is too small, we have a denormal number before
    846 		// rounding and fewer than p mantissa bits of precision available
    847 		// (the exponent remains fixed but the mantissa gets shifted right).
    848 		p := mbits + 1 // precision of normal float
    849 		if e < emin {
    850 			// recompute precision
    851 			p = mbits + 1 - emin + int(e)
    852 			// If p == 0, the mantissa of x is shifted so much to the right
    853 			// that its msb falls immediately to the right of the float32
    854 			// mantissa space. In other words, if the smallest denormal is
    855 			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
    856 			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
    857 			// If m == 0.5, it is rounded down to even, i.e., 0.0.
    858 			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
    859 			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
    860 				// underflow to 0
    861 				if x.neg {
    862 					var z float32
    863 					return -z, Above
    864 				}
    865 				return 0.0, Below
    866 			}
    867 			// otherwise, round up
    868 			// We handle p == 0 explicitly because it's easy and because
    869 			// Float.round doesn't support rounding to 0 bits of precision.
    870 			if p == 0 {
    871 				if x.neg {
    872 					return -math.SmallestNonzeroFloat32, Below
    873 				}
    874 				return math.SmallestNonzeroFloat32, Above
    875 			}
    876 		}
    877 		// p > 0
    878 
    879 		// round
    880 		var r Float
    881 		r.prec = uint32(p)
    882 		r.Set(x)
    883 		e = r.exp - 1
    884 
    885 		// Rounding may have caused r to overflow to Inf
    886 		// (rounding never causes underflows to 0).
    887 		// If the exponent is too large, also overflow to Inf.
    888 		if r.form == inf || e > emax {
    889 			// overflow
    890 			if x.neg {
    891 				return float32(math.Inf(-1)), Below
    892 			}
    893 			return float32(math.Inf(+1)), Above
    894 		}
    895 		// e <= emax
    896 
    897 		// Determine sign, biased exponent, and mantissa.
    898 		var sign, bexp, mant uint32
    899 		if x.neg {
    900 			sign = 1 << (fbits - 1)
    901 		}
    902 
    903 		// Rounding may have caused a denormal number to
    904 		// become normal. Check again.
    905 		if e < emin {
    906 			// denormal number: recompute precision
    907 			// Since rounding may have at best increased precision
    908 			// and we have eliminated p <= 0 early, we know p > 0.
    909 			// bexp == 0 for denormals
    910 			p = mbits + 1 - emin + int(e)
    911 			mant = msb32(r.mant) >> uint(fbits-p)
    912 		} else {
    913 			// normal number: emin <= e <= emax
    914 			bexp = uint32(e+bias) << mbits
    915 			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
    916 		}
    917 
    918 		return math.Float32frombits(sign | bexp | mant), r.acc
    919 
    920 	case zero:
    921 		if x.neg {
    922 			var z float32
    923 			return -z, Exact
    924 		}
    925 		return 0.0, Exact
    926 
    927 	case inf:
    928 		if x.neg {
    929 			return float32(math.Inf(-1)), Exact
    930 		}
    931 		return float32(math.Inf(+1)), Exact
    932 	}
    933 
    934 	panic("unreachable")
    935 }
    936 
    937 // Float64 returns the float64 value nearest to x. If x is too small to be
    938 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
    939 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
    940 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
    941 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
    942 func (x *Float) Float64() (float64, Accuracy) {
    943 	if debugFloat {
    944 		x.validate()
    945 	}
    946 
    947 	switch x.form {
    948 	case finite:
    949 		// 0 < |x| < +Inf
    950 
    951 		const (
    952 			fbits = 64                //        float size
    953 			mbits = 52                //        mantissa size (excluding implicit msb)
    954 			ebits = fbits - mbits - 1 //    11  exponent size
    955 			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
    956 			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
    957 			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
    958 			emax  = bias              //  1023  largest unbiased exponent (normal)
    959 		)
    960 
    961 		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
    962 		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
    963 
    964 		// Compute precision p for float64 mantissa.
    965 		// If the exponent is too small, we have a denormal number before
    966 		// rounding and fewer than p mantissa bits of precision available
    967 		// (the exponent remains fixed but the mantissa gets shifted right).
    968 		p := mbits + 1 // precision of normal float
    969 		if e < emin {
    970 			// recompute precision
    971 			p = mbits + 1 - emin + int(e)
    972 			// If p == 0, the mantissa of x is shifted so much to the right
    973 			// that its msb falls immediately to the right of the float64
    974 			// mantissa space. In other words, if the smallest denormal is
    975 			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
    976 			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
    977 			// If m == 0.5, it is rounded down to even, i.e., 0.0.
    978 			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
    979 			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
    980 				// underflow to 0
    981 				if x.neg {
    982 					var z float64
    983 					return -z, Above
    984 				}
    985 				return 0.0, Below
    986 			}
    987 			// otherwise, round up
    988 			// We handle p == 0 explicitly because it's easy and because
    989 			// Float.round doesn't support rounding to 0 bits of precision.
    990 			if p == 0 {
    991 				if x.neg {
    992 					return -math.SmallestNonzeroFloat64, Below
    993 				}
    994 				return math.SmallestNonzeroFloat64, Above
    995 			}
    996 		}
    997 		// p > 0
    998 
    999 		// round
   1000 		var r Float
   1001 		r.prec = uint32(p)
   1002 		r.Set(x)
   1003 		e = r.exp - 1
   1004 
   1005 		// Rounding may have caused r to overflow to Inf
   1006 		// (rounding never causes underflows to 0).
   1007 		// If the exponent is too large, also overflow to Inf.
   1008 		if r.form == inf || e > emax {
   1009 			// overflow
   1010 			if x.neg {
   1011 				return math.Inf(-1), Below
   1012 			}
   1013 			return math.Inf(+1), Above
   1014 		}
   1015 		// e <= emax
   1016 
   1017 		// Determine sign, biased exponent, and mantissa.
   1018 		var sign, bexp, mant uint64
   1019 		if x.neg {
   1020 			sign = 1 << (fbits - 1)
   1021 		}
   1022 
   1023 		// Rounding may have caused a denormal number to
   1024 		// become normal. Check again.
   1025 		if e < emin {
   1026 			// denormal number: recompute precision
   1027 			// Since rounding may have at best increased precision
   1028 			// and we have eliminated p <= 0 early, we know p > 0.
   1029 			// bexp == 0 for denormals
   1030 			p = mbits + 1 - emin + int(e)
   1031 			mant = msb64(r.mant) >> uint(fbits-p)
   1032 		} else {
   1033 			// normal number: emin <= e <= emax
   1034 			bexp = uint64(e+bias) << mbits
   1035 			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   1036 		}
   1037 
   1038 		return math.Float64frombits(sign | bexp | mant), r.acc
   1039 
   1040 	case zero:
   1041 		if x.neg {
   1042 			var z float64
   1043 			return -z, Exact
   1044 		}
   1045 		return 0.0, Exact
   1046 
   1047 	case inf:
   1048 		if x.neg {
   1049 			return math.Inf(-1), Exact
   1050 		}
   1051 		return math.Inf(+1), Exact
   1052 	}
   1053 
   1054 	panic("unreachable")
   1055 }
   1056 
   1057 // Int returns the result of truncating x towards zero;
   1058 // or nil if x is an infinity.
   1059 // The result is Exact if x.IsInt(); otherwise it is Below
   1060 // for x > 0, and Above for x < 0.
   1061 // If a non-nil *Int argument z is provided, Int stores
   1062 // the result in z instead of allocating a new Int.
   1063 func (x *Float) Int(z *Int) (*Int, Accuracy) {
   1064 	if debugFloat {
   1065 		x.validate()
   1066 	}
   1067 
   1068 	if z == nil && x.form <= finite {
   1069 		z = new(Int)
   1070 	}
   1071 
   1072 	switch x.form {
   1073 	case finite:
   1074 		// 0 < |x| < +Inf
   1075 		acc := makeAcc(x.neg)
   1076 		if x.exp <= 0 {
   1077 			// 0 < |x| < 1
   1078 			return z.SetInt64(0), acc
   1079 		}
   1080 		// x.exp > 0
   1081 
   1082 		// 1 <= |x| < +Inf
   1083 		// determine minimum required precision for x
   1084 		allBits := uint(len(x.mant)) * _W
   1085 		exp := uint(x.exp)
   1086 		if x.MinPrec() <= exp {
   1087 			acc = Exact
   1088 		}
   1089 		// shift mantissa as needed
   1090 		if z == nil {
   1091 			z = new(Int)
   1092 		}
   1093 		z.neg = x.neg
   1094 		switch {
   1095 		case exp > allBits:
   1096 			z.abs = z.abs.shl(x.mant, exp-allBits)
   1097 		default:
   1098 			z.abs = z.abs.set(x.mant)
   1099 		case exp < allBits:
   1100 			z.abs = z.abs.shr(x.mant, allBits-exp)
   1101 		}
   1102 		return z, acc
   1103 
   1104 	case zero:
   1105 		return z.SetInt64(0), Exact
   1106 
   1107 	case inf:
   1108 		return nil, makeAcc(x.neg)
   1109 	}
   1110 
   1111 	panic("unreachable")
   1112 }
   1113 
   1114 // Rat returns the rational number corresponding to x;
   1115 // or nil if x is an infinity.
   1116 // The result is Exact if x is not an Inf.
   1117 // If a non-nil *Rat argument z is provided, Rat stores
   1118 // the result in z instead of allocating a new Rat.
   1119 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
   1120 	if debugFloat {
   1121 		x.validate()
   1122 	}
   1123 
   1124 	if z == nil && x.form <= finite {
   1125 		z = new(Rat)
   1126 	}
   1127 
   1128 	switch x.form {
   1129 	case finite:
   1130 		// 0 < |x| < +Inf
   1131 		allBits := int32(len(x.mant)) * _W
   1132 		// build up numerator and denominator
   1133 		z.a.neg = x.neg
   1134 		switch {
   1135 		case x.exp > allBits:
   1136 			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
   1137 			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
   1138 			// z already in normal form
   1139 		default:
   1140 			z.a.abs = z.a.abs.set(x.mant)
   1141 			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
   1142 			// z already in normal form
   1143 		case x.exp < allBits:
   1144 			z.a.abs = z.a.abs.set(x.mant)
   1145 			t := z.b.abs.setUint64(1)
   1146 			z.b.abs = t.shl(t, uint(allBits-x.exp))
   1147 			z.norm()
   1148 		}
   1149 		return z, Exact
   1150 
   1151 	case zero:
   1152 		return z.SetInt64(0), Exact
   1153 
   1154 	case inf:
   1155 		return nil, makeAcc(x.neg)
   1156 	}
   1157 
   1158 	panic("unreachable")
   1159 }
   1160 
   1161 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
   1162 // and returns z.
   1163 func (z *Float) Abs(x *Float) *Float {
   1164 	z.Set(x)
   1165 	z.neg = false
   1166 	return z
   1167 }
   1168 
   1169 // Neg sets z to the (possibly rounded) value of x with its sign negated,
   1170 // and returns z.
   1171 func (z *Float) Neg(x *Float) *Float {
   1172 	z.Set(x)
   1173 	z.neg = !z.neg
   1174 	return z
   1175 }
   1176 
   1177 func validateBinaryOperands(x, y *Float) {
   1178 	if !debugFloat {
   1179 		// avoid performance bugs
   1180 		panic("validateBinaryOperands called but debugFloat is not set")
   1181 	}
   1182 	if len(x.mant) == 0 {
   1183 		panic("empty mantissa for x")
   1184 	}
   1185 	if len(y.mant) == 0 {
   1186 		panic("empty mantissa for y")
   1187 	}
   1188 }
   1189 
   1190 // z = x + y, ignoring signs of x and y for the addition
   1191 // but using the sign of z for rounding the result.
   1192 // x and y must have a non-empty mantissa and valid exponent.
   1193 func (z *Float) uadd(x, y *Float) {
   1194 	// Note: This implementation requires 2 shifts most of the
   1195 	// time. It is also inefficient if exponents or precisions
   1196 	// differ by wide margins. The following article describes
   1197 	// an efficient (but much more complicated) implementation
   1198 	// compatible with the internal representation used here:
   1199 	//
   1200 	// Vincent Lefvre: "The Generic Multiple-Precision Floating-
   1201 	// Point Addition With Exact Rounding (as in the MPFR Library)"
   1202 	// http://www.vinc17.net/research/papers/rnc6.pdf
   1203 
   1204 	if debugFloat {
   1205 		validateBinaryOperands(x, y)
   1206 	}
   1207 
   1208 	// compute exponents ex, ey for mantissa with "binary point"
   1209 	// on the right (mantissa.0) - use int64 to avoid overflow
   1210 	ex := int64(x.exp) - int64(len(x.mant))*_W
   1211 	ey := int64(y.exp) - int64(len(y.mant))*_W
   1212 
   1213 	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
   1214 
   1215 	// TODO(gri) having a combined add-and-shift primitive
   1216 	//           could make this code significantly faster
   1217 	switch {
   1218 	case ex < ey:
   1219 		if al {
   1220 			t := nat(nil).shl(y.mant, uint(ey-ex))
   1221 			z.mant = z.mant.add(x.mant, t)
   1222 		} else {
   1223 			z.mant = z.mant.shl(y.mant, uint(ey-ex))
   1224 			z.mant = z.mant.add(x.mant, z.mant)
   1225 		}
   1226 	default:
   1227 		// ex == ey, no shift needed
   1228 		z.mant = z.mant.add(x.mant, y.mant)
   1229 	case ex > ey:
   1230 		if al {
   1231 			t := nat(nil).shl(x.mant, uint(ex-ey))
   1232 			z.mant = z.mant.add(t, y.mant)
   1233 		} else {
   1234 			z.mant = z.mant.shl(x.mant, uint(ex-ey))
   1235 			z.mant = z.mant.add(z.mant, y.mant)
   1236 		}
   1237 		ex = ey
   1238 	}
   1239 	// len(z.mant) > 0
   1240 
   1241 	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
   1242 }
   1243 
   1244 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
   1245 // but using the sign of z for rounding the result.
   1246 // x and y must have a non-empty mantissa and valid exponent.
   1247 func (z *Float) usub(x, y *Float) {
   1248 	// This code is symmetric to uadd.
   1249 	// We have not factored the common code out because
   1250 	// eventually uadd (and usub) should be optimized
   1251 	// by special-casing, and the code will diverge.
   1252 
   1253 	if debugFloat {
   1254 		validateBinaryOperands(x, y)
   1255 	}
   1256 
   1257 	ex := int64(x.exp) - int64(len(x.mant))*_W
   1258 	ey := int64(y.exp) - int64(len(y.mant))*_W
   1259 
   1260 	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
   1261 
   1262 	switch {
   1263 	case ex < ey:
   1264 		if al {
   1265 			t := nat(nil).shl(y.mant, uint(ey-ex))
   1266 			z.mant = t.sub(x.mant, t)
   1267 		} else {
   1268 			z.mant = z.mant.shl(y.mant, uint(ey-ex))
   1269 			z.mant = z.mant.sub(x.mant, z.mant)
   1270 		}
   1271 	default:
   1272 		// ex == ey, no shift needed
   1273 		z.mant = z.mant.sub(x.mant, y.mant)
   1274 	case ex > ey:
   1275 		if al {
   1276 			t := nat(nil).shl(x.mant, uint(ex-ey))
   1277 			z.mant = t.sub(t, y.mant)
   1278 		} else {
   1279 			z.mant = z.mant.shl(x.mant, uint(ex-ey))
   1280 			z.mant = z.mant.sub(z.mant, y.mant)
   1281 		}
   1282 		ex = ey
   1283 	}
   1284 
   1285 	// operands may have canceled each other out
   1286 	if len(z.mant) == 0 {
   1287 		z.acc = Exact
   1288 		z.form = zero
   1289 		z.neg = false
   1290 		return
   1291 	}
   1292 	// len(z.mant) > 0
   1293 
   1294 	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
   1295 }
   1296 
   1297 // z = x * y, ignoring signs of x and y for the multiplication
   1298 // but using the sign of z for rounding the result.
   1299 // x and y must have a non-empty mantissa and valid exponent.
   1300 func (z *Float) umul(x, y *Float) {
   1301 	if debugFloat {
   1302 		validateBinaryOperands(x, y)
   1303 	}
   1304 
   1305 	// Note: This is doing too much work if the precision
   1306 	// of z is less than the sum of the precisions of x
   1307 	// and y which is often the case (e.g., if all floats
   1308 	// have the same precision).
   1309 	// TODO(gri) Optimize this for the common case.
   1310 
   1311 	e := int64(x.exp) + int64(y.exp)
   1312 	z.mant = z.mant.mul(x.mant, y.mant)
   1313 
   1314 	z.setExpAndRound(e-fnorm(z.mant), 0)
   1315 }
   1316 
   1317 // z = x / y, ignoring signs of x and y for the division
   1318 // but using the sign of z for rounding the result.
   1319 // x and y must have a non-empty mantissa and valid exponent.
   1320 func (z *Float) uquo(x, y *Float) {
   1321 	if debugFloat {
   1322 		validateBinaryOperands(x, y)
   1323 	}
   1324 
   1325 	// mantissa length in words for desired result precision + 1
   1326 	// (at least one extra bit so we get the rounding bit after
   1327 	// the division)
   1328 	n := int(z.prec/_W) + 1
   1329 
   1330 	// compute adjusted x.mant such that we get enough result precision
   1331 	xadj := x.mant
   1332 	if d := n - len(x.mant) + len(y.mant); d > 0 {
   1333 		// d extra words needed => add d "0 digits" to x
   1334 		xadj = make(nat, len(x.mant)+d)
   1335 		copy(xadj[d:], x.mant)
   1336 	}
   1337 	// TODO(gri): If we have too many digits (d < 0), we should be able
   1338 	// to shorten x for faster division. But we must be extra careful
   1339 	// with rounding in that case.
   1340 
   1341 	// Compute d before division since there may be aliasing of x.mant
   1342 	// (via xadj) or y.mant with z.mant.
   1343 	d := len(xadj) - len(y.mant)
   1344 
   1345 	// divide
   1346 	var r nat
   1347 	z.mant, r = z.mant.div(nil, xadj, y.mant)
   1348 	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
   1349 
   1350 	// The result is long enough to include (at least) the rounding bit.
   1351 	// If there's a non-zero remainder, the corresponding fractional part
   1352 	// (if it were computed), would have a non-zero sticky bit (if it were
   1353 	// zero, it couldn't have a non-zero remainder).
   1354 	var sbit uint
   1355 	if len(r) > 0 {
   1356 		sbit = 1
   1357 	}
   1358 
   1359 	z.setExpAndRound(e-fnorm(z.mant), sbit)
   1360 }
   1361 
   1362 // ucmp returns -1, 0, or +1, depending on whether
   1363 // |x| < |y|, |x| == |y|, or |x| > |y|.
   1364 // x and y must have a non-empty mantissa and valid exponent.
   1365 func (x *Float) ucmp(y *Float) int {
   1366 	if debugFloat {
   1367 		validateBinaryOperands(x, y)
   1368 	}
   1369 
   1370 	switch {
   1371 	case x.exp < y.exp:
   1372 		return -1
   1373 	case x.exp > y.exp:
   1374 		return +1
   1375 	}
   1376 	// x.exp == y.exp
   1377 
   1378 	// compare mantissas
   1379 	i := len(x.mant)
   1380 	j := len(y.mant)
   1381 	for i > 0 || j > 0 {
   1382 		var xm, ym Word
   1383 		if i > 0 {
   1384 			i--
   1385 			xm = x.mant[i]
   1386 		}
   1387 		if j > 0 {
   1388 			j--
   1389 			ym = y.mant[j]
   1390 		}
   1391 		switch {
   1392 		case xm < ym:
   1393 			return -1
   1394 		case xm > ym:
   1395 			return +1
   1396 		}
   1397 	}
   1398 
   1399 	return 0
   1400 }
   1401 
   1402 // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
   1403 //
   1404 // When neither the inputs nor result are NaN, the sign of a product or
   1405 // quotient is the exclusive OR of the operands signs; the sign of a sum,
   1406 // or of a difference xy regarded as a sum x+(y), differs from at most
   1407 // one of the addends signs; and the sign of the result of conversions,
   1408 // the quantize operation, the roundToIntegral operations, and the
   1409 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
   1410 // These rules shall apply even when operands or results are zero or infinite.
   1411 //
   1412 // When the sum of two operands with opposite signs (or the difference of
   1413 // two operands with like signs) is exactly zero, the sign of that sum (or
   1414 // difference) shall be +0 in all rounding-direction attributes except
   1415 // roundTowardNegative; under that attribute, the sign of an exact zero
   1416 // sum (or difference) shall be 0. However, x+x = x(x) retains the same
   1417 // sign as x even when x is zero.
   1418 //
   1419 // See also: https://play.golang.org/p/RtH3UCt5IH
   1420 
   1421 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
   1422 // it is changed to the larger of x's or y's precision before the operation.
   1423 // Rounding is performed according to z's precision and rounding mode; and
   1424 // z's accuracy reports the result error relative to the exact (not rounded)
   1425 // result. Add panics with ErrNaN if x and y are infinities with opposite
   1426 // signs. The value of z is undefined in that case.
   1427 //
   1428 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
   1429 func (z *Float) Add(x, y *Float) *Float {
   1430 	if debugFloat {
   1431 		x.validate()
   1432 		y.validate()
   1433 	}
   1434 
   1435 	if z.prec == 0 {
   1436 		z.prec = umax32(x.prec, y.prec)
   1437 	}
   1438 
   1439 	if x.form == finite && y.form == finite {
   1440 		// x + y (common case)
   1441 		z.neg = x.neg
   1442 		if x.neg == y.neg {
   1443 			// x + y == x + y
   1444 			// (-x) + (-y) == -(x + y)
   1445 			z.uadd(x, y)
   1446 		} else {
   1447 			// x + (-y) == x - y == -(y - x)
   1448 			// (-x) + y == y - x == -(x - y)
   1449 			if x.ucmp(y) > 0 {
   1450 				z.usub(x, y)
   1451 			} else {
   1452 				z.neg = !z.neg
   1453 				z.usub(y, x)
   1454 			}
   1455 		}
   1456 		return z
   1457 	}
   1458 
   1459 	if x.form == inf && y.form == inf && x.neg != y.neg {
   1460 		// +Inf + -Inf
   1461 		// -Inf + +Inf
   1462 		// value of z is undefined but make sure it's valid
   1463 		z.acc = Exact
   1464 		z.form = zero
   1465 		z.neg = false
   1466 		panic(ErrNaN{"addition of infinities with opposite signs"})
   1467 	}
   1468 
   1469 	if x.form == zero && y.form == zero {
   1470 		// 0 + 0
   1471 		z.acc = Exact
   1472 		z.form = zero
   1473 		z.neg = x.neg && y.neg // -0 + -0 == -0
   1474 		return z
   1475 	}
   1476 
   1477 	if x.form == inf || y.form == zero {
   1478 		// Inf + y
   1479 		// x + 0
   1480 		return z.Set(x)
   1481 	}
   1482 
   1483 	// 0 + y
   1484 	// x + Inf
   1485 	return z.Set(y)
   1486 }
   1487 
   1488 // Sub sets z to the rounded difference x-y and returns z.
   1489 // Precision, rounding, and accuracy reporting are as for Add.
   1490 // Sub panics with ErrNaN if x and y are infinities with equal
   1491 // signs. The value of z is undefined in that case.
   1492 func (z *Float) Sub(x, y *Float) *Float {
   1493 	if debugFloat {
   1494 		x.validate()
   1495 		y.validate()
   1496 	}
   1497 
   1498 	if z.prec == 0 {
   1499 		z.prec = umax32(x.prec, y.prec)
   1500 	}
   1501 
   1502 	if x.form == finite && y.form == finite {
   1503 		// x - y (common case)
   1504 		z.neg = x.neg
   1505 		if x.neg != y.neg {
   1506 			// x - (-y) == x + y
   1507 			// (-x) - y == -(x + y)
   1508 			z.uadd(x, y)
   1509 		} else {
   1510 			// x - y == x - y == -(y - x)
   1511 			// (-x) - (-y) == y - x == -(x - y)
   1512 			if x.ucmp(y) > 0 {
   1513 				z.usub(x, y)
   1514 			} else {
   1515 				z.neg = !z.neg
   1516 				z.usub(y, x)
   1517 			}
   1518 		}
   1519 		return z
   1520 	}
   1521 
   1522 	if x.form == inf && y.form == inf && x.neg == y.neg {
   1523 		// +Inf - +Inf
   1524 		// -Inf - -Inf
   1525 		// value of z is undefined but make sure it's valid
   1526 		z.acc = Exact
   1527 		z.form = zero
   1528 		z.neg = false
   1529 		panic(ErrNaN{"subtraction of infinities with equal signs"})
   1530 	}
   1531 
   1532 	if x.form == zero && y.form == zero {
   1533 		// 0 - 0
   1534 		z.acc = Exact
   1535 		z.form = zero
   1536 		z.neg = x.neg && !y.neg // -0 - +0 == -0
   1537 		return z
   1538 	}
   1539 
   1540 	if x.form == inf || y.form == zero {
   1541 		// Inf - y
   1542 		// x - 0
   1543 		return z.Set(x)
   1544 	}
   1545 
   1546 	// 0 - y
   1547 	// x - Inf
   1548 	return z.Neg(y)
   1549 }
   1550 
   1551 // Mul sets z to the rounded product x*y and returns z.
   1552 // Precision, rounding, and accuracy reporting are as for Add.
   1553 // Mul panics with ErrNaN if one operand is zero and the other
   1554 // operand an infinity. The value of z is undefined in that case.
   1555 func (z *Float) Mul(x, y *Float) *Float {
   1556 	if debugFloat {
   1557 		x.validate()
   1558 		y.validate()
   1559 	}
   1560 
   1561 	if z.prec == 0 {
   1562 		z.prec = umax32(x.prec, y.prec)
   1563 	}
   1564 
   1565 	z.neg = x.neg != y.neg
   1566 
   1567 	if x.form == finite && y.form == finite {
   1568 		// x * y (common case)
   1569 		z.umul(x, y)
   1570 		return z
   1571 	}
   1572 
   1573 	z.acc = Exact
   1574 	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
   1575 		// 0 * Inf
   1576 		// Inf * 0
   1577 		// value of z is undefined but make sure it's valid
   1578 		z.form = zero
   1579 		z.neg = false
   1580 		panic(ErrNaN{"multiplication of zero with infinity"})
   1581 	}
   1582 
   1583 	if x.form == inf || y.form == inf {
   1584 		// Inf * y
   1585 		// x * Inf
   1586 		z.form = inf
   1587 		return z
   1588 	}
   1589 
   1590 	// 0 * y
   1591 	// x * 0
   1592 	z.form = zero
   1593 	return z
   1594 }
   1595 
   1596 // Quo sets z to the rounded quotient x/y and returns z.
   1597 // Precision, rounding, and accuracy reporting are as for Add.
   1598 // Quo panics with ErrNaN if both operands are zero or infinities.
   1599 // The value of z is undefined in that case.
   1600 func (z *Float) Quo(x, y *Float) *Float {
   1601 	if debugFloat {
   1602 		x.validate()
   1603 		y.validate()
   1604 	}
   1605 
   1606 	if z.prec == 0 {
   1607 		z.prec = umax32(x.prec, y.prec)
   1608 	}
   1609 
   1610 	z.neg = x.neg != y.neg
   1611 
   1612 	if x.form == finite && y.form == finite {
   1613 		// x / y (common case)
   1614 		z.uquo(x, y)
   1615 		return z
   1616 	}
   1617 
   1618 	z.acc = Exact
   1619 	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
   1620 		// 0 / 0
   1621 		// Inf / Inf
   1622 		// value of z is undefined but make sure it's valid
   1623 		z.form = zero
   1624 		z.neg = false
   1625 		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
   1626 	}
   1627 
   1628 	if x.form == zero || y.form == inf {
   1629 		// 0 / y
   1630 		// x / Inf
   1631 		z.form = zero
   1632 		return z
   1633 	}
   1634 
   1635 	// x / 0
   1636 	// Inf / y
   1637 	z.form = inf
   1638 	return z
   1639 }
   1640 
   1641 // Cmp compares x and y and returns:
   1642 //
   1643 //   -1 if x <  y
   1644 //    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
   1645 //   +1 if x >  y
   1646 //
   1647 func (x *Float) Cmp(y *Float) int {
   1648 	if debugFloat {
   1649 		x.validate()
   1650 		y.validate()
   1651 	}
   1652 
   1653 	mx := x.ord()
   1654 	my := y.ord()
   1655 	switch {
   1656 	case mx < my:
   1657 		return -1
   1658 	case mx > my:
   1659 		return +1
   1660 	}
   1661 	// mx == my
   1662 
   1663 	// only if |mx| == 1 we have to compare the mantissae
   1664 	switch mx {
   1665 	case -1:
   1666 		return y.ucmp(x)
   1667 	case +1:
   1668 		return x.ucmp(y)
   1669 	}
   1670 
   1671 	return 0
   1672 }
   1673 
   1674 // ord classifies x and returns:
   1675 //
   1676 //	-2 if -Inf == x
   1677 //	-1 if -Inf < x < 0
   1678 //	 0 if x == 0 (signed or unsigned)
   1679 //	+1 if 0 < x < +Inf
   1680 //	+2 if x == +Inf
   1681 //
   1682 func (x *Float) ord() int {
   1683 	var m int
   1684 	switch x.form {
   1685 	case finite:
   1686 		m = 1
   1687 	case zero:
   1688 		return 0
   1689 	case inf:
   1690 		m = 2
   1691 	}
   1692 	if x.neg {
   1693 		m = -m
   1694 	}
   1695 	return m
   1696 }
   1697 
   1698 func umax32(x, y uint32) uint32 {
   1699 	if x > y {
   1700 		return x
   1701 	}
   1702 	return y
   1703 }
   1704