Home | History | Annotate | Download | only in big
      1 // Do not edit. Bootstrap copy of /Volumes/Android/buildbot/src/android/build-tools/out/obj/go/src/cmd/compile/internal/big/float.go
      2 
      3 //line /Volumes/Android/buildbot/src/android/build-tools/out/obj/go/src/cmd/compile/internal/big/float.go:1
      4 // Copyright 2014 The Go Authors. All rights reserved.
      5 // Use of this source code is governed by a BSD-style
      6 // license that can be found in the LICENSE file.
      7 
      8 // This file implements multi-precision floating-point numbers.
      9 // Like in the GNU MPFR library (http://www.mpfr.org/), operands
     10 // can be of mixed precision. Unlike MPFR, the rounding mode is
     11 // not specified with each operation, but with each operand. The
     12 // rounding mode of the result operand determines the rounding
     13 // mode of an operation. This is a from-scratch implementation.
     14 
     15 package big
     16 
     17 import (
     18 	"fmt"
     19 	"math"
     20 )
     21 
     22 const debugFloat = false // enable for debugging
     23 
     24 // A nonzero finite Float represents a multi-precision floating point number
     25 //
     26 //   sign  mantissa  2**exponent
     27 //
     28 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
     29 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
     30 // All Floats are ordered, and the ordering of two Floats x and y
     31 // is defined by x.Cmp(y).
     32 //
     33 // Each Float value also has a precision, rounding mode, and accuracy.
     34 // The precision is the maximum number of mantissa bits available to
     35 // represent the value. The rounding mode specifies how a result should
     36 // be rounded to fit into the mantissa bits, and accuracy describes the
     37 // rounding error with respect to the exact result.
     38 //
     39 // Unless specified otherwise, all operations (including setters) that
     40 // specify a *Float variable for the result (usually via the receiver
     41 // with the exception of MantExp), round the numeric result according
     42 // to the precision and rounding mode of the result variable.
     43 //
     44 // If the provided result precision is 0 (see below), it is set to the
     45 // precision of the argument with the largest precision value before any
     46 // rounding takes place, and the rounding mode remains unchanged. Thus,
     47 // uninitialized Floats provided as result arguments will have their
     48 // precision set to a reasonable value determined by the operands and
     49 // their mode is the zero value for RoundingMode (ToNearestEven).
     50 //
     51 // By setting the desired precision to 24 or 53 and using matching rounding
     52 // mode (typically ToNearestEven), Float operations produce the same results
     53 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
     54 // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
     55 // Exponent underflow and overflow lead to a 0 or an Infinity for different
     56 // values than IEEE-754 because Float exponents have a much larger range.
     57 //
     58 // The zero (uninitialized) value for a Float is ready to use and represents
     59 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
     60 //
     61 type Float struct {
     62 	prec uint32
     63 	mode RoundingMode
     64 	acc  Accuracy
     65 	form form
     66 	neg  bool
     67 	mant nat
     68 	exp  int32
     69 }
     70 
     71 // An ErrNaN panic is raised by a Float operation that would lead to
     72 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
     73 type ErrNaN struct {
     74 	msg string
     75 }
     76 
     77 func (err ErrNaN) Error() string {
     78 	return err.msg
     79 }
     80 
     81 // NewFloat allocates and returns a new Float set to x,
     82 // with precision 53 and rounding mode ToNearestEven.
     83 // NewFloat panics with ErrNaN if x is a NaN.
     84 func NewFloat(x float64) *Float {
     85 	if math.IsNaN(x) {
     86 		panic(ErrNaN{"NewFloat(NaN)"})
     87 	}
     88 	return new(Float).SetFloat64(x)
     89 }
     90 
     91 // Exponent and precision limits.
     92 const (
     93 	MaxExp  = math.MaxInt32  // largest supported exponent
     94 	MinExp  = math.MinInt32  // smallest supported exponent
     95 	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
     96 )
     97 
     98 // Internal representation: The mantissa bits x.mant of a nonzero finite
     99 // Float x are stored in a nat slice long enough to hold up to x.prec bits;
    100 // the slice may (but doesn't have to) be shorter if the mantissa contains
    101 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
    102 // the msb is shifted all the way "to the left"). Thus, if the mantissa has
    103 // trailing 0 bits or x.prec is not a multiple of the the Word size _W,
    104 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
    105 // to the value 0.5; the exponent x.exp shifts the binary point as needed.
    106 //
    107 // A zero or non-finite Float x ignores x.mant and x.exp.
    108 //
    109 // x                 form      neg      mant         exp
    110 // ----------------------------------------------------------
    111 // 0                zero      sign     -            -
    112 // 0 < |x| < +Inf    finite    sign     mantissa     exponent
    113 // Inf              inf       sign     -            -
    114 
    115 // A form value describes the internal representation.
    116 type form byte
    117 
    118 // The form value order is relevant - do not change!
    119 const (
    120 	zero form = iota
    121 	finite
    122 	inf
    123 )
    124 
    125 // RoundingMode determines how a Float value is rounded to the
    126 // desired precision. Rounding may change the Float value; the
    127 // rounding error is described by the Float's Accuracy.
    128 type RoundingMode byte
    129 
    130 // The following rounding modes are supported.
    131 const (
    132 	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
    133 	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
    134 	ToZero                            // == IEEE 754-2008 roundTowardZero
    135 	AwayFromZero                      // no IEEE 754-2008 equivalent
    136 	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
    137 	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
    138 )
    139 
    140 //go:generate stringer -type=RoundingMode
    141 
    142 // Accuracy describes the rounding error produced by the most recent
    143 // operation that generated a Float value, relative to the exact value.
    144 type Accuracy int8
    145 
    146 // Constants describing the Accuracy of a Float.
    147 const (
    148 	Below Accuracy = -1
    149 	Exact Accuracy = 0
    150 	Above Accuracy = +1
    151 )
    152 
    153 //go:generate stringer -type=Accuracy
    154 
    155 // SetPrec sets z's precision to prec and returns the (possibly) rounded
    156 // value of z. Rounding occurs according to z's rounding mode if the mantissa
    157 // cannot be represented in prec bits without loss of precision.
    158 // SetPrec(0) maps all finite values to 0; infinite values remain unchanged.
    159 // If prec > MaxPrec, it is set to MaxPrec.
    160 func (z *Float) SetPrec(prec uint) *Float {
    161 	z.acc = Exact // optimistically assume no rounding is needed
    162 
    163 	// special case
    164 	if prec == 0 {
    165 		z.prec = 0
    166 		if z.form == finite {
    167 			// truncate z to 0
    168 			z.acc = makeAcc(z.neg)
    169 			z.form = zero
    170 		}
    171 		return z
    172 	}
    173 
    174 	// general case
    175 	if prec > MaxPrec {
    176 		prec = MaxPrec
    177 	}
    178 	old := z.prec
    179 	z.prec = uint32(prec)
    180 	if z.prec < old {
    181 		z.round(0)
    182 	}
    183 	return z
    184 }
    185 
    186 func makeAcc(above bool) Accuracy {
    187 	if above {
    188 		return Above
    189 	}
    190 	return Below
    191 }
    192 
    193 // SetMode sets z's rounding mode to mode and returns an exact z.
    194 // z remains unchanged otherwise.
    195 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
    196 func (z *Float) SetMode(mode RoundingMode) *Float {
    197 	z.mode = mode
    198 	z.acc = Exact
    199 	return z
    200 }
    201 
    202 // Prec returns the mantissa precision of x in bits.
    203 // The result may be 0 for |x| == 0 and |x| == Inf.
    204 func (x *Float) Prec() uint {
    205 	return uint(x.prec)
    206 }
    207 
    208 // MinPrec returns the minimum precision required to represent x exactly
    209 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
    210 // The result is 0 for |x| == 0 and |x| == Inf.
    211 func (x *Float) MinPrec() uint {
    212 	if x.form != finite {
    213 		return 0
    214 	}
    215 	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
    216 }
    217 
    218 // Mode returns the rounding mode of x.
    219 func (x *Float) Mode() RoundingMode {
    220 	return x.mode
    221 }
    222 
    223 // Acc returns the accuracy of x produced by the most recent operation.
    224 func (x *Float) Acc() Accuracy {
    225 	return x.acc
    226 }
    227 
    228 // Sign returns:
    229 //
    230 //	-1 if x <   0
    231 //	 0 if x is 0
    232 //	+1 if x >   0
    233 //
    234 func (x *Float) Sign() int {
    235 	if debugFloat {
    236 		x.validate()
    237 	}
    238 	if x.form == zero {
    239 		return 0
    240 	}
    241 	if x.neg {
    242 		return -1
    243 	}
    244 	return 1
    245 }
    246 
    247 // MantExp breaks x into its mantissa and exponent components
    248 // and returns the exponent. If a non-nil mant argument is
    249 // provided its value is set to the mantissa of x, with the
    250 // same precision and rounding mode as x. The components
    251 // satisfy x == mant  2**exp, with 0.5 <= |mant| < 1.0.
    252 // Calling MantExp with a nil argument is an efficient way to
    253 // get the exponent of the receiver.
    254 //
    255 // Special cases are:
    256 //
    257 //	(  0).MantExp(mant) = 0, with mant set to   0
    258 //	(Inf).MantExp(mant) = 0, with mant set to Inf
    259 //
    260 // x and mant may be the same in which case x is set to its
    261 // mantissa value.
    262 func (x *Float) MantExp(mant *Float) (exp int) {
    263 	if debugFloat {
    264 		x.validate()
    265 	}
    266 	if x.form == finite {
    267 		exp = int(x.exp)
    268 	}
    269 	if mant != nil {
    270 		mant.Copy(x)
    271 		if mant.form == finite {
    272 			mant.exp = 0
    273 		}
    274 	}
    275 	return
    276 }
    277 
    278 func (z *Float) setExpAndRound(exp int64, sbit uint) {
    279 	if exp < MinExp {
    280 		// underflow
    281 		z.acc = makeAcc(z.neg)
    282 		z.form = zero
    283 		return
    284 	}
    285 
    286 	if exp > MaxExp {
    287 		// overflow
    288 		z.acc = makeAcc(!z.neg)
    289 		z.form = inf
    290 		return
    291 	}
    292 
    293 	z.form = finite
    294 	z.exp = int32(exp)
    295 	z.round(sbit)
    296 }
    297 
    298 // SetMantExp sets z to mant  2**exp and and returns z.
    299 // The result z has the same precision and rounding mode
    300 // as mant. SetMantExp is an inverse of MantExp but does
    301 // not require 0.5 <= |mant| < 1.0. Specifically:
    302 //
    303 //	mant := new(Float)
    304 //	new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true
    305 //
    306 // Special cases are:
    307 //
    308 //	z.SetMantExp(  0, exp) =   0
    309 //	z.SetMantExp(Inf, exp) = Inf
    310 //
    311 // z and mant may be the same in which case z's exponent
    312 // is set to exp.
    313 func (z *Float) SetMantExp(mant *Float, exp int) *Float {
    314 	if debugFloat {
    315 		z.validate()
    316 		mant.validate()
    317 	}
    318 	z.Copy(mant)
    319 	if z.form != finite {
    320 		return z
    321 	}
    322 	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
    323 	return z
    324 }
    325 
    326 // Signbit returns true if x is negative or negative zero.
    327 func (x *Float) Signbit() bool {
    328 	return x.neg
    329 }
    330 
    331 // IsInf reports whether x is +Inf or -Inf.
    332 func (x *Float) IsInf() bool {
    333 	return x.form == inf
    334 }
    335 
    336 // IsInt reports whether x is an integer.
    337 // Inf values are not integers.
    338 func (x *Float) IsInt() bool {
    339 	if debugFloat {
    340 		x.validate()
    341 	}
    342 	// special cases
    343 	if x.form != finite {
    344 		return x.form == zero
    345 	}
    346 	// x.form == finite
    347 	if x.exp <= 0 {
    348 		return false
    349 	}
    350 	// x.exp > 0
    351 	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
    352 }
    353 
    354 // debugging support
    355 func (x *Float) validate() {
    356 	if !debugFloat {
    357 		// avoid performance bugs
    358 		panic("validate called but debugFloat is not set")
    359 	}
    360 	if x.form != finite {
    361 		return
    362 	}
    363 	m := len(x.mant)
    364 	if m == 0 {
    365 		panic("nonzero finite number with empty mantissa")
    366 	}
    367 	const msb = 1 << (_W - 1)
    368 	if x.mant[m-1]&msb == 0 {
    369 		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
    370 	}
    371 	if x.prec == 0 {
    372 		panic("zero precision finite number")
    373 	}
    374 }
    375 
    376 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
    377 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
    378 // have before calling round. z's mantissa must be normalized (with the msb set)
    379 // or empty.
    380 //
    381 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
    382 // sign of z. For correct rounding, the sign of z must be set correctly before
    383 // calling round.
    384 func (z *Float) round(sbit uint) {
    385 	if debugFloat {
    386 		z.validate()
    387 	}
    388 
    389 	z.acc = Exact
    390 	if z.form != finite {
    391 		// 0 or Inf => nothing left to do
    392 		return
    393 	}
    394 	// z.form == finite && len(z.mant) > 0
    395 	// m > 0 implies z.prec > 0 (checked by validate)
    396 
    397 	m := uint32(len(z.mant)) // present mantissa length in words
    398 	bits := m * _W           // present mantissa bits
    399 	if bits <= z.prec {
    400 		// mantissa fits => nothing to do
    401 		return
    402 	}
    403 	// bits > z.prec
    404 
    405 	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
    406 
    407 	// Rounding is based on two bits: the rounding bit (rbit) and the
    408 	// sticky bit (sbit). The rbit is the bit immediately before the
    409 	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
    410 	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
    411 	//
    412 	//   rbit  sbit  => "fractional part"
    413 	//
    414 	//   0     0        == 0
    415 	//   0     1        >  0  , < 0.5
    416 	//   1     0        == 0.5
    417 	//   1     1        >  0.5, < 1.0
    418 
    419 	// bits > z.prec: mantissa too large => round
    420 	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
    421 	rbit := z.mant.bit(r)        // rounding bit
    422 	if sbit == 0 {
    423 		sbit = z.mant.sticky(r)
    424 	}
    425 	if debugFloat && sbit&^1 != 0 {
    426 		panic(fmt.Sprintf("invalid sbit %#x", sbit))
    427 	}
    428 
    429 	// convert ToXInf rounding modes
    430 	mode := z.mode
    431 	switch mode {
    432 	case ToNegativeInf:
    433 		mode = ToZero
    434 		if z.neg {
    435 			mode = AwayFromZero
    436 		}
    437 	case ToPositiveInf:
    438 		mode = AwayFromZero
    439 		if z.neg {
    440 			mode = ToZero
    441 		}
    442 	}
    443 
    444 	// cut off extra words
    445 	if m > n {
    446 		copy(z.mant, z.mant[m-n:]) // move n last words to front
    447 		z.mant = z.mant[:n]
    448 	}
    449 
    450 	// determine number of trailing zero bits t
    451 	t := n*_W - z.prec // 0 <= t < _W
    452 	lsb := Word(1) << t
    453 
    454 	// make rounding decision
    455 	// TODO(gri) This can be simplified (see Bits.round in bits_test.go).
    456 	switch mode {
    457 	case ToZero:
    458 		// nothing to do
    459 	case ToNearestEven, ToNearestAway:
    460 		if rbit == 0 {
    461 			// rounding bits == 0b0x
    462 			mode = ToZero
    463 		} else if sbit == 1 {
    464 			// rounding bits == 0b11
    465 			mode = AwayFromZero
    466 		}
    467 	case AwayFromZero:
    468 		if rbit|sbit == 0 {
    469 			mode = ToZero
    470 		}
    471 	default:
    472 		// ToXInf modes have been converted to ToZero or AwayFromZero
    473 		panic("unreachable")
    474 	}
    475 
    476 	// round and determine accuracy
    477 	switch mode {
    478 	case ToZero:
    479 		if rbit|sbit != 0 {
    480 			z.acc = Below
    481 		}
    482 
    483 	case ToNearestEven, ToNearestAway:
    484 		if debugFloat && rbit != 1 {
    485 			panic("internal error in rounding")
    486 		}
    487 		if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
    488 			z.acc = Below
    489 			break
    490 		}
    491 		// mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
    492 		fallthrough
    493 
    494 	case AwayFromZero:
    495 		// add 1 to mantissa
    496 		if addVW(z.mant, z.mant, lsb) != 0 {
    497 			// overflow => shift mantissa right by 1 and add msb
    498 			shrVU(z.mant, z.mant, 1)
    499 			z.mant[n-1] |= 1 << (_W - 1)
    500 			// adjust exponent
    501 			if z.exp < MaxExp {
    502 				z.exp++
    503 			} else {
    504 				// exponent overflow
    505 				z.acc = makeAcc(!z.neg)
    506 				z.form = inf
    507 				return
    508 			}
    509 		}
    510 		z.acc = Above
    511 	}
    512 
    513 	// zero out trailing bits in least-significant word
    514 	z.mant[0] &^= lsb - 1
    515 
    516 	// update accuracy
    517 	if z.acc != Exact && z.neg {
    518 		z.acc = -z.acc
    519 	}
    520 
    521 	if debugFloat {
    522 		z.validate()
    523 	}
    524 
    525 	return
    526 }
    527 
    528 func (z *Float) setBits64(neg bool, x uint64) *Float {
    529 	if z.prec == 0 {
    530 		z.prec = 64
    531 	}
    532 	z.acc = Exact
    533 	z.neg = neg
    534 	if x == 0 {
    535 		z.form = zero
    536 		return z
    537 	}
    538 	// x != 0
    539 	z.form = finite
    540 	s := nlz64(x)
    541 	z.mant = z.mant.setUint64(x << s)
    542 	z.exp = int32(64 - s) // always fits
    543 	if z.prec < 64 {
    544 		z.round(0)
    545 	}
    546 	return z
    547 }
    548 
    549 // SetUint64 sets z to the (possibly rounded) value of x and returns z.
    550 // If z's precision is 0, it is changed to 64 (and rounding will have
    551 // no effect).
    552 func (z *Float) SetUint64(x uint64) *Float {
    553 	return z.setBits64(false, x)
    554 }
    555 
    556 // SetInt64 sets z to the (possibly rounded) value of x and returns z.
    557 // If z's precision is 0, it is changed to 64 (and rounding will have
    558 // no effect).
    559 func (z *Float) SetInt64(x int64) *Float {
    560 	u := x
    561 	if u < 0 {
    562 		u = -u
    563 	}
    564 	// We cannot simply call z.SetUint64(uint64(u)) and change
    565 	// the sign afterwards because the sign affects rounding.
    566 	return z.setBits64(x < 0, uint64(u))
    567 }
    568 
    569 // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
    570 // If z's precision is 0, it is changed to 53 (and rounding will have
    571 // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
    572 func (z *Float) SetFloat64(x float64) *Float {
    573 	if z.prec == 0 {
    574 		z.prec = 53
    575 	}
    576 	if math.IsNaN(x) {
    577 		panic(ErrNaN{"Float.SetFloat64(NaN)"})
    578 	}
    579 	z.acc = Exact
    580 	z.neg = math.Signbit(x) // handle -0, -Inf correctly
    581 	if x == 0 {
    582 		z.form = zero
    583 		return z
    584 	}
    585 	if math.IsInf(x, 0) {
    586 		z.form = inf
    587 		return z
    588 	}
    589 	// normalized x != 0
    590 	z.form = finite
    591 	fmant, exp := math.Frexp(x) // get normalized mantissa
    592 	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
    593 	z.exp = int32(exp) // always fits
    594 	if z.prec < 53 {
    595 		z.round(0)
    596 	}
    597 	return z
    598 }
    599 
    600 // fnorm normalizes mantissa m by shifting it to the left
    601 // such that the msb of the most-significant word (msw) is 1.
    602 // It returns the shift amount. It assumes that len(m) != 0.
    603 func fnorm(m nat) int64 {
    604 	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
    605 		panic("msw of mantissa is 0")
    606 	}
    607 	s := nlz(m[len(m)-1])
    608 	if s > 0 {
    609 		c := shlVU(m, m, s)
    610 		if debugFloat && c != 0 {
    611 			panic("nlz or shlVU incorrect")
    612 		}
    613 	}
    614 	return int64(s)
    615 }
    616 
    617 // SetInt sets z to the (possibly rounded) value of x and returns z.
    618 // If z's precision is 0, it is changed to the larger of x.BitLen()
    619 // or 64 (and rounding will have no effect).
    620 func (z *Float) SetInt(x *Int) *Float {
    621 	// TODO(gri) can be more efficient if z.prec > 0
    622 	// but small compared to the size of x, or if there
    623 	// are many trailing 0's.
    624 	bits := uint32(x.BitLen())
    625 	if z.prec == 0 {
    626 		z.prec = umax32(bits, 64)
    627 	}
    628 	z.acc = Exact
    629 	z.neg = x.neg
    630 	if len(x.abs) == 0 {
    631 		z.form = zero
    632 		return z
    633 	}
    634 	// x != 0
    635 	z.mant = z.mant.set(x.abs)
    636 	fnorm(z.mant)
    637 	z.setExpAndRound(int64(bits), 0)
    638 	return z
    639 }
    640 
    641 // SetRat sets z to the (possibly rounded) value of x and returns z.
    642 // If z's precision is 0, it is changed to the largest of a.BitLen(),
    643 // b.BitLen(), or 64; with x = a/b.
    644 func (z *Float) SetRat(x *Rat) *Float {
    645 	if x.IsInt() {
    646 		return z.SetInt(x.Num())
    647 	}
    648 	var a, b Float
    649 	a.SetInt(x.Num())
    650 	b.SetInt(x.Denom())
    651 	if z.prec == 0 {
    652 		z.prec = umax32(a.prec, b.prec)
    653 	}
    654 	return z.Quo(&a, &b)
    655 }
    656 
    657 // SetInf sets z to the infinite Float -Inf if signbit is
    658 // set, or +Inf if signbit is not set, and returns z. The
    659 // precision of z is unchanged and the result is always
    660 // Exact.
    661 func (z *Float) SetInf(signbit bool) *Float {
    662 	z.acc = Exact
    663 	z.form = inf
    664 	z.neg = signbit
    665 	return z
    666 }
    667 
    668 // Set sets z to the (possibly rounded) value of x and returns z.
    669 // If z's precision is 0, it is changed to the precision of x
    670 // before setting z (and rounding will have no effect).
    671 // Rounding is performed according to z's precision and rounding
    672 // mode; and z's accuracy reports the result error relative to the
    673 // exact (not rounded) result.
    674 func (z *Float) Set(x *Float) *Float {
    675 	if debugFloat {
    676 		x.validate()
    677 	}
    678 	z.acc = Exact
    679 	if z != x {
    680 		z.form = x.form
    681 		z.neg = x.neg
    682 		if x.form == finite {
    683 			z.exp = x.exp
    684 			z.mant = z.mant.set(x.mant)
    685 		}
    686 		if z.prec == 0 {
    687 			z.prec = x.prec
    688 		} else if z.prec < x.prec {
    689 			z.round(0)
    690 		}
    691 	}
    692 	return z
    693 }
    694 
    695 // Copy sets z to x, with the same precision, rounding mode, and
    696 // accuracy as x, and returns z. x is not changed even if z and
    697 // x are the same.
    698 func (z *Float) Copy(x *Float) *Float {
    699 	if debugFloat {
    700 		x.validate()
    701 	}
    702 	if z != x {
    703 		z.prec = x.prec
    704 		z.mode = x.mode
    705 		z.acc = x.acc
    706 		z.form = x.form
    707 		z.neg = x.neg
    708 		if z.form == finite {
    709 			z.mant = z.mant.set(x.mant)
    710 			z.exp = x.exp
    711 		}
    712 	}
    713 	return z
    714 }
    715 
    716 // msb32 returns the 32 most significant bits of x.
    717 func msb32(x nat) uint32 {
    718 	i := len(x) - 1
    719 	if i < 0 {
    720 		return 0
    721 	}
    722 	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
    723 		panic("x not normalized")
    724 	}
    725 	switch _W {
    726 	case 32:
    727 		return uint32(x[i])
    728 	case 64:
    729 		return uint32(x[i] >> 32)
    730 	}
    731 	panic("unreachable")
    732 }
    733 
    734 // msb64 returns the 64 most significant bits of x.
    735 func msb64(x nat) uint64 {
    736 	i := len(x) - 1
    737 	if i < 0 {
    738 		return 0
    739 	}
    740 	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
    741 		panic("x not normalized")
    742 	}
    743 	switch _W {
    744 	case 32:
    745 		v := uint64(x[i]) << 32
    746 		if i > 0 {
    747 			v |= uint64(x[i-1])
    748 		}
    749 		return v
    750 	case 64:
    751 		return uint64(x[i])
    752 	}
    753 	panic("unreachable")
    754 }
    755 
    756 // Uint64 returns the unsigned integer resulting from truncating x
    757 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
    758 // if x is an integer and Below otherwise.
    759 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
    760 // for x > math.MaxUint64.
    761 func (x *Float) Uint64() (uint64, Accuracy) {
    762 	if debugFloat {
    763 		x.validate()
    764 	}
    765 
    766 	switch x.form {
    767 	case finite:
    768 		if x.neg {
    769 			return 0, Above
    770 		}
    771 		// 0 < x < +Inf
    772 		if x.exp <= 0 {
    773 			// 0 < x < 1
    774 			return 0, Below
    775 		}
    776 		// 1 <= x < Inf
    777 		if x.exp <= 64 {
    778 			// u = trunc(x) fits into a uint64
    779 			u := msb64(x.mant) >> (64 - uint32(x.exp))
    780 			if x.MinPrec() <= 64 {
    781 				return u, Exact
    782 			}
    783 			return u, Below // x truncated
    784 		}
    785 		// x too large
    786 		return math.MaxUint64, Below
    787 
    788 	case zero:
    789 		return 0, Exact
    790 
    791 	case inf:
    792 		if x.neg {
    793 			return 0, Above
    794 		}
    795 		return math.MaxUint64, Below
    796 	}
    797 
    798 	panic("unreachable")
    799 }
    800 
    801 // Int64 returns the integer resulting from truncating x towards zero.
    802 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
    803 // an integer, and Above (x < 0) or Below (x > 0) otherwise.
    804 // The result is (math.MinInt64, Above) for x < math.MinInt64,
    805 // and (math.MaxInt64, Below) for x > math.MaxInt64.
    806 func (x *Float) Int64() (int64, Accuracy) {
    807 	if debugFloat {
    808 		x.validate()
    809 	}
    810 
    811 	switch x.form {
    812 	case finite:
    813 		// 0 < |x| < +Inf
    814 		acc := makeAcc(x.neg)
    815 		if x.exp <= 0 {
    816 			// 0 < |x| < 1
    817 			return 0, acc
    818 		}
    819 		// x.exp > 0
    820 
    821 		// 1 <= |x| < +Inf
    822 		if x.exp <= 63 {
    823 			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
    824 			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
    825 			if x.neg {
    826 				i = -i
    827 			}
    828 			if x.MinPrec() <= uint(x.exp) {
    829 				return i, Exact
    830 			}
    831 			return i, acc // x truncated
    832 		}
    833 		if x.neg {
    834 			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
    835 			if x.exp == 64 && x.MinPrec() == 1 {
    836 				acc = Exact
    837 			}
    838 			return math.MinInt64, acc
    839 		}
    840 		// x too large
    841 		return math.MaxInt64, Below
    842 
    843 	case zero:
    844 		return 0, Exact
    845 
    846 	case inf:
    847 		if x.neg {
    848 			return math.MinInt64, Above
    849 		}
    850 		return math.MaxInt64, Below
    851 	}
    852 
    853 	panic("unreachable")
    854 }
    855 
    856 // Float32 returns the float32 value nearest to x. If x is too small to be
    857 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
    858 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
    859 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
    860 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
    861 func (x *Float) Float32() (float32, Accuracy) {
    862 	if debugFloat {
    863 		x.validate()
    864 	}
    865 
    866 	switch x.form {
    867 	case finite:
    868 		// 0 < |x| < +Inf
    869 
    870 		const (
    871 			fbits = 32                //        float size
    872 			mbits = 23                //        mantissa size (excluding implicit msb)
    873 			ebits = fbits - mbits - 1 //     8  exponent size
    874 			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
    875 			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
    876 			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
    877 			emax  = bias              //   127  largest unbiased exponent (normal)
    878 		)
    879 
    880 		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
    881 		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
    882 		p := mbits + 1 // precision of normal float
    883 
    884 		// If the exponent is too small, we may have a denormal number
    885 		// in which case we have fewer mantissa bits available: reduce
    886 		// precision accordingly.
    887 		if e < emin {
    888 			p -= emin - int(e)
    889 			// Make sure we have at least 1 bit so that we don't
    890 			// lose numbers rounded up to the smallest denormal.
    891 			if p < 1 {
    892 				p = 1
    893 			}
    894 		}
    895 
    896 		// round
    897 		var r Float
    898 		r.prec = uint32(p)
    899 		r.Set(x)
    900 		e = r.exp - 1
    901 
    902 		// Rounding may have caused r to overflow to Inf
    903 		// (rounding never causes underflows to 0).
    904 		if r.form == inf {
    905 			e = emax + 1 // cause overflow below
    906 		}
    907 
    908 		// If the exponent is too large, overflow to Inf.
    909 		if e > emax {
    910 			// overflow
    911 			if x.neg {
    912 				return float32(math.Inf(-1)), Below
    913 			}
    914 			return float32(math.Inf(+1)), Above
    915 		}
    916 		// e <= emax
    917 
    918 		// Determine sign, biased exponent, and mantissa.
    919 		var sign, bexp, mant uint32
    920 		if x.neg {
    921 			sign = 1 << (fbits - 1)
    922 		}
    923 
    924 		// Rounding may have caused a denormal number to
    925 		// become normal. Check again.
    926 		if e < emin {
    927 			// denormal number
    928 			if e < dmin {
    929 				// underflow to 0
    930 				if x.neg {
    931 					var z float32
    932 					return -z, Above
    933 				}
    934 				return 0.0, Below
    935 			}
    936 			// bexp = 0
    937 			mant = msb32(r.mant) >> (fbits - r.prec)
    938 		} else {
    939 			// normal number: emin <= e <= emax
    940 			bexp = uint32(e+bias) << mbits
    941 			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
    942 		}
    943 
    944 		return math.Float32frombits(sign | bexp | mant), r.acc
    945 
    946 	case zero:
    947 		if x.neg {
    948 			var z float32
    949 			return -z, Exact
    950 		}
    951 		return 0.0, Exact
    952 
    953 	case inf:
    954 		if x.neg {
    955 			return float32(math.Inf(-1)), Exact
    956 		}
    957 		return float32(math.Inf(+1)), Exact
    958 	}
    959 
    960 	panic("unreachable")
    961 }
    962 
    963 // Float64 returns the float64 value nearest to x. If x is too small to be
    964 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
    965 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
    966 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
    967 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
    968 func (x *Float) Float64() (float64, Accuracy) {
    969 	if debugFloat {
    970 		x.validate()
    971 	}
    972 
    973 	switch x.form {
    974 	case finite:
    975 		// 0 < |x| < +Inf
    976 
    977 		const (
    978 			fbits = 64                //        float size
    979 			mbits = 52                //        mantissa size (excluding implicit msb)
    980 			ebits = fbits - mbits - 1 //    11  exponent size
    981 			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
    982 			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
    983 			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
    984 			emax  = bias              //  1023  largest unbiased exponent (normal)
    985 		)
    986 
    987 		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
    988 		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
    989 		p := mbits + 1 // precision of normal float
    990 
    991 		// If the exponent is too small, we may have a denormal number
    992 		// in which case we have fewer mantissa bits available: reduce
    993 		// precision accordingly.
    994 		if e < emin {
    995 			p -= emin - int(e)
    996 			// Make sure we have at least 1 bit so that we don't
    997 			// lose numbers rounded up to the smallest denormal.
    998 			if p < 1 {
    999 				p = 1
   1000 			}
   1001 		}
   1002 
   1003 		// round
   1004 		var r Float
   1005 		r.prec = uint32(p)
   1006 		r.Set(x)
   1007 		e = r.exp - 1
   1008 
   1009 		// Rounding may have caused r to overflow to Inf
   1010 		// (rounding never causes underflows to 0).
   1011 		if r.form == inf {
   1012 			e = emax + 1 // cause overflow below
   1013 		}
   1014 
   1015 		// If the exponent is too large, overflow to Inf.
   1016 		if e > emax {
   1017 			// overflow
   1018 			if x.neg {
   1019 				return math.Inf(-1), Below
   1020 			}
   1021 			return math.Inf(+1), Above
   1022 		}
   1023 		// e <= emax
   1024 
   1025 		// Determine sign, biased exponent, and mantissa.
   1026 		var sign, bexp, mant uint64
   1027 		if x.neg {
   1028 			sign = 1 << (fbits - 1)
   1029 		}
   1030 
   1031 		// Rounding may have caused a denormal number to
   1032 		// become normal. Check again.
   1033 		if e < emin {
   1034 			// denormal number
   1035 			if e < dmin {
   1036 				// underflow to 0
   1037 				if x.neg {
   1038 					var z float64
   1039 					return -z, Above
   1040 				}
   1041 				return 0.0, Below
   1042 			}
   1043 			// bexp = 0
   1044 			mant = msb64(r.mant) >> (fbits - r.prec)
   1045 		} else {
   1046 			// normal number: emin <= e <= emax
   1047 			bexp = uint64(e+bias) << mbits
   1048 			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   1049 		}
   1050 
   1051 		return math.Float64frombits(sign | bexp | mant), r.acc
   1052 
   1053 	case zero:
   1054 		if x.neg {
   1055 			var z float64
   1056 			return -z, Exact
   1057 		}
   1058 		return 0.0, Exact
   1059 
   1060 	case inf:
   1061 		if x.neg {
   1062 			return math.Inf(-1), Exact
   1063 		}
   1064 		return math.Inf(+1), Exact
   1065 	}
   1066 
   1067 	panic("unreachable")
   1068 }
   1069 
   1070 // Int returns the result of truncating x towards zero;
   1071 // or nil if x is an infinity.
   1072 // The result is Exact if x.IsInt(); otherwise it is Below
   1073 // for x > 0, and Above for x < 0.
   1074 // If a non-nil *Int argument z is provided, Int stores
   1075 // the result in z instead of allocating a new Int.
   1076 func (x *Float) Int(z *Int) (*Int, Accuracy) {
   1077 	if debugFloat {
   1078 		x.validate()
   1079 	}
   1080 
   1081 	if z == nil && x.form <= finite {
   1082 		z = new(Int)
   1083 	}
   1084 
   1085 	switch x.form {
   1086 	case finite:
   1087 		// 0 < |x| < +Inf
   1088 		acc := makeAcc(x.neg)
   1089 		if x.exp <= 0 {
   1090 			// 0 < |x| < 1
   1091 			return z.SetInt64(0), acc
   1092 		}
   1093 		// x.exp > 0
   1094 
   1095 		// 1 <= |x| < +Inf
   1096 		// determine minimum required precision for x
   1097 		allBits := uint(len(x.mant)) * _W
   1098 		exp := uint(x.exp)
   1099 		if x.MinPrec() <= exp {
   1100 			acc = Exact
   1101 		}
   1102 		// shift mantissa as needed
   1103 		if z == nil {
   1104 			z = new(Int)
   1105 		}
   1106 		z.neg = x.neg
   1107 		switch {
   1108 		case exp > allBits:
   1109 			z.abs = z.abs.shl(x.mant, exp-allBits)
   1110 		default:
   1111 			z.abs = z.abs.set(x.mant)
   1112 		case exp < allBits:
   1113 			z.abs = z.abs.shr(x.mant, allBits-exp)
   1114 		}
   1115 		return z, acc
   1116 
   1117 	case zero:
   1118 		return z.SetInt64(0), Exact
   1119 
   1120 	case inf:
   1121 		return nil, makeAcc(x.neg)
   1122 	}
   1123 
   1124 	panic("unreachable")
   1125 }
   1126 
   1127 // Rat returns the rational number corresponding to x;
   1128 // or nil if x is an infinity.
   1129 // The result is Exact is x is not an Inf.
   1130 // If a non-nil *Rat argument z is provided, Rat stores
   1131 // the result in z instead of allocating a new Rat.
   1132 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
   1133 	if debugFloat {
   1134 		x.validate()
   1135 	}
   1136 
   1137 	if z == nil && x.form <= finite {
   1138 		z = new(Rat)
   1139 	}
   1140 
   1141 	switch x.form {
   1142 	case finite:
   1143 		// 0 < |x| < +Inf
   1144 		allBits := int32(len(x.mant)) * _W
   1145 		// build up numerator and denominator
   1146 		z.a.neg = x.neg
   1147 		switch {
   1148 		case x.exp > allBits:
   1149 			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
   1150 			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
   1151 			// z already in normal form
   1152 		default:
   1153 			z.a.abs = z.a.abs.set(x.mant)
   1154 			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
   1155 			// z already in normal form
   1156 		case x.exp < allBits:
   1157 			z.a.abs = z.a.abs.set(x.mant)
   1158 			t := z.b.abs.setUint64(1)
   1159 			z.b.abs = t.shl(t, uint(allBits-x.exp))
   1160 			z.norm()
   1161 		}
   1162 		return z, Exact
   1163 
   1164 	case zero:
   1165 		return z.SetInt64(0), Exact
   1166 
   1167 	case inf:
   1168 		return nil, makeAcc(x.neg)
   1169 	}
   1170 
   1171 	panic("unreachable")
   1172 }
   1173 
   1174 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
   1175 // and returns z.
   1176 func (z *Float) Abs(x *Float) *Float {
   1177 	z.Set(x)
   1178 	z.neg = false
   1179 	return z
   1180 }
   1181 
   1182 // Neg sets z to the (possibly rounded) value of x with its sign negated,
   1183 // and returns z.
   1184 func (z *Float) Neg(x *Float) *Float {
   1185 	z.Set(x)
   1186 	z.neg = !z.neg
   1187 	return z
   1188 }
   1189 
   1190 func validateBinaryOperands(x, y *Float) {
   1191 	if !debugFloat {
   1192 		// avoid performance bugs
   1193 		panic("validateBinaryOperands called but debugFloat is not set")
   1194 	}
   1195 	if len(x.mant) == 0 {
   1196 		panic("empty mantissa for x")
   1197 	}
   1198 	if len(y.mant) == 0 {
   1199 		panic("empty mantissa for y")
   1200 	}
   1201 }
   1202 
   1203 // z = x + y, ignoring signs of x and y for the addition
   1204 // but using the sign of z for rounding the result.
   1205 // x and y must have a non-empty mantissa and valid exponent.
   1206 func (z *Float) uadd(x, y *Float) {
   1207 	// Note: This implementation requires 2 shifts most of the
   1208 	// time. It is also inefficient if exponents or precisions
   1209 	// differ by wide margins. The following article describes
   1210 	// an efficient (but much more complicated) implementation
   1211 	// compatible with the internal representation used here:
   1212 	//
   1213 	// Vincent Lefvre: "The Generic Multiple-Precision Floating-
   1214 	// Point Addition With Exact Rounding (as in the MPFR Library)"
   1215 	// http://www.vinc17.net/research/papers/rnc6.pdf
   1216 
   1217 	if debugFloat {
   1218 		validateBinaryOperands(x, y)
   1219 	}
   1220 
   1221 	// compute exponents ex, ey for mantissa with "binary point"
   1222 	// on the right (mantissa.0) - use int64 to avoid overflow
   1223 	ex := int64(x.exp) - int64(len(x.mant))*_W
   1224 	ey := int64(y.exp) - int64(len(y.mant))*_W
   1225 
   1226 	// TODO(gri) having a combined add-and-shift primitive
   1227 	//           could make this code significantly faster
   1228 	switch {
   1229 	case ex < ey:
   1230 		// cannot re-use z.mant w/o testing for aliasing
   1231 		t := nat(nil).shl(y.mant, uint(ey-ex))
   1232 		z.mant = z.mant.add(x.mant, t)
   1233 	default:
   1234 		// ex == ey, no shift needed
   1235 		z.mant = z.mant.add(x.mant, y.mant)
   1236 	case ex > ey:
   1237 		// cannot re-use z.mant w/o testing for aliasing
   1238 		t := nat(nil).shl(x.mant, uint(ex-ey))
   1239 		z.mant = z.mant.add(t, y.mant)
   1240 		ex = ey
   1241 	}
   1242 	// len(z.mant) > 0
   1243 
   1244 	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
   1245 }
   1246 
   1247 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
   1248 // but using the sign of z for rounding the result.
   1249 // x and y must have a non-empty mantissa and valid exponent.
   1250 func (z *Float) usub(x, y *Float) {
   1251 	// This code is symmetric to uadd.
   1252 	// We have not factored the common code out because
   1253 	// eventually uadd (and usub) should be optimized
   1254 	// by special-casing, and the code will diverge.
   1255 
   1256 	if debugFloat {
   1257 		validateBinaryOperands(x, y)
   1258 	}
   1259 
   1260 	ex := int64(x.exp) - int64(len(x.mant))*_W
   1261 	ey := int64(y.exp) - int64(len(y.mant))*_W
   1262 
   1263 	switch {
   1264 	case ex < ey:
   1265 		// cannot re-use z.mant w/o testing for aliasing
   1266 		t := nat(nil).shl(y.mant, uint(ey-ex))
   1267 		z.mant = t.sub(x.mant, t)
   1268 	default:
   1269 		// ex == ey, no shift needed
   1270 		z.mant = z.mant.sub(x.mant, y.mant)
   1271 	case ex > ey:
   1272 		// cannot re-use z.mant w/o testing for aliasing
   1273 		t := nat(nil).shl(x.mant, uint(ex-ey))
   1274 		z.mant = t.sub(t, y.mant)
   1275 		ex = ey
   1276 	}
   1277 
   1278 	// operands may have cancelled each other out
   1279 	if len(z.mant) == 0 {
   1280 		z.acc = Exact
   1281 		z.form = zero
   1282 		z.neg = false
   1283 		return
   1284 	}
   1285 	// len(z.mant) > 0
   1286 
   1287 	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
   1288 }
   1289 
   1290 // z = x * y, ignoring signs of x and y for the multiplication
   1291 // but using the sign of z for rounding the result.
   1292 // x and y must have a non-empty mantissa and valid exponent.
   1293 func (z *Float) umul(x, y *Float) {
   1294 	if debugFloat {
   1295 		validateBinaryOperands(x, y)
   1296 	}
   1297 
   1298 	// Note: This is doing too much work if the precision
   1299 	// of z is less than the sum of the precisions of x
   1300 	// and y which is often the case (e.g., if all floats
   1301 	// have the same precision).
   1302 	// TODO(gri) Optimize this for the common case.
   1303 
   1304 	e := int64(x.exp) + int64(y.exp)
   1305 	z.mant = z.mant.mul(x.mant, y.mant)
   1306 
   1307 	z.setExpAndRound(e-fnorm(z.mant), 0)
   1308 }
   1309 
   1310 // z = x / y, ignoring signs of x and y for the division
   1311 // but using the sign of z for rounding the result.
   1312 // x and y must have a non-empty mantissa and valid exponent.
   1313 func (z *Float) uquo(x, y *Float) {
   1314 	if debugFloat {
   1315 		validateBinaryOperands(x, y)
   1316 	}
   1317 
   1318 	// mantissa length in words for desired result precision + 1
   1319 	// (at least one extra bit so we get the rounding bit after
   1320 	// the division)
   1321 	n := int(z.prec/_W) + 1
   1322 
   1323 	// compute adjusted x.mant such that we get enough result precision
   1324 	xadj := x.mant
   1325 	if d := n - len(x.mant) + len(y.mant); d > 0 {
   1326 		// d extra words needed => add d "0 digits" to x
   1327 		xadj = make(nat, len(x.mant)+d)
   1328 		copy(xadj[d:], x.mant)
   1329 	}
   1330 	// TODO(gri): If we have too many digits (d < 0), we should be able
   1331 	// to shorten x for faster division. But we must be extra careful
   1332 	// with rounding in that case.
   1333 
   1334 	// Compute d before division since there may be aliasing of x.mant
   1335 	// (via xadj) or y.mant with z.mant.
   1336 	d := len(xadj) - len(y.mant)
   1337 
   1338 	// divide
   1339 	var r nat
   1340 	z.mant, r = z.mant.div(nil, xadj, y.mant)
   1341 	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
   1342 
   1343 	// The result is long enough to include (at least) the rounding bit.
   1344 	// If there's a non-zero remainder, the corresponding fractional part
   1345 	// (if it were computed), would have a non-zero sticky bit (if it were
   1346 	// zero, it couldn't have a non-zero remainder).
   1347 	var sbit uint
   1348 	if len(r) > 0 {
   1349 		sbit = 1
   1350 	}
   1351 
   1352 	z.setExpAndRound(e-fnorm(z.mant), sbit)
   1353 }
   1354 
   1355 // ucmp returns -1, 0, or +1, depending on whether
   1356 // |x| < |y|, |x| == |y|, or |x| > |y|.
   1357 // x and y must have a non-empty mantissa and valid exponent.
   1358 func (x *Float) ucmp(y *Float) int {
   1359 	if debugFloat {
   1360 		validateBinaryOperands(x, y)
   1361 	}
   1362 
   1363 	switch {
   1364 	case x.exp < y.exp:
   1365 		return -1
   1366 	case x.exp > y.exp:
   1367 		return +1
   1368 	}
   1369 	// x.exp == y.exp
   1370 
   1371 	// compare mantissas
   1372 	i := len(x.mant)
   1373 	j := len(y.mant)
   1374 	for i > 0 || j > 0 {
   1375 		var xm, ym Word
   1376 		if i > 0 {
   1377 			i--
   1378 			xm = x.mant[i]
   1379 		}
   1380 		if j > 0 {
   1381 			j--
   1382 			ym = y.mant[j]
   1383 		}
   1384 		switch {
   1385 		case xm < ym:
   1386 			return -1
   1387 		case xm > ym:
   1388 			return +1
   1389 		}
   1390 	}
   1391 
   1392 	return 0
   1393 }
   1394 
   1395 // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
   1396 //
   1397 // When neither the inputs nor result are NaN, the sign of a product or
   1398 // quotient is the exclusive OR of the operands signs; the sign of a sum,
   1399 // or of a difference xy regarded as a sum x+(y), differs from at most
   1400 // one of the addends signs; and the sign of the result of conversions,
   1401 // the quantize operation, the roundToIntegral operations, and the
   1402 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
   1403 // These rules shall apply even when operands or results are zero or infinite.
   1404 //
   1405 // When the sum of two operands with opposite signs (or the difference of
   1406 // two operands with like signs) is exactly zero, the sign of that sum (or
   1407 // difference) shall be +0 in all rounding-direction attributes except
   1408 // roundTowardNegative; under that attribute, the sign of an exact zero
   1409 // sum (or difference) shall be 0. However, x+x = x(x) retains the same
   1410 // sign as x even when x is zero.
   1411 //
   1412 // See also: https://play.golang.org/p/RtH3UCt5IH
   1413 
   1414 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
   1415 // it is changed to the larger of x's or y's precision before the operation.
   1416 // Rounding is performed according to z's precision and rounding mode; and
   1417 // z's accuracy reports the result error relative to the exact (not rounded)
   1418 // result. Add panics with ErrNaN if x and y are infinities with opposite
   1419 // signs. The value of z is undefined in that case.
   1420 //
   1421 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
   1422 func (z *Float) Add(x, y *Float) *Float {
   1423 	if debugFloat {
   1424 		x.validate()
   1425 		y.validate()
   1426 	}
   1427 
   1428 	if z.prec == 0 {
   1429 		z.prec = umax32(x.prec, y.prec)
   1430 	}
   1431 
   1432 	if x.form == finite && y.form == finite {
   1433 		// x + y (commom case)
   1434 		z.neg = x.neg
   1435 		if x.neg == y.neg {
   1436 			// x + y == x + y
   1437 			// (-x) + (-y) == -(x + y)
   1438 			z.uadd(x, y)
   1439 		} else {
   1440 			// x + (-y) == x - y == -(y - x)
   1441 			// (-x) + y == y - x == -(x - y)
   1442 			if x.ucmp(y) > 0 {
   1443 				z.usub(x, y)
   1444 			} else {
   1445 				z.neg = !z.neg
   1446 				z.usub(y, x)
   1447 			}
   1448 		}
   1449 		return z
   1450 	}
   1451 
   1452 	if x.form == inf && y.form == inf && x.neg != y.neg {
   1453 		// +Inf + -Inf
   1454 		// -Inf + +Inf
   1455 		// value of z is undefined but make sure it's valid
   1456 		z.acc = Exact
   1457 		z.form = zero
   1458 		z.neg = false
   1459 		panic(ErrNaN{"addition of infinities with opposite signs"})
   1460 	}
   1461 
   1462 	if x.form == zero && y.form == zero {
   1463 		// 0 + 0
   1464 		z.acc = Exact
   1465 		z.form = zero
   1466 		z.neg = x.neg && y.neg // -0 + -0 == -0
   1467 		return z
   1468 	}
   1469 
   1470 	if x.form == inf || y.form == zero {
   1471 		// Inf + y
   1472 		// x + 0
   1473 		return z.Set(x)
   1474 	}
   1475 
   1476 	// 0 + y
   1477 	// x + Inf
   1478 	return z.Set(y)
   1479 }
   1480 
   1481 // Sub sets z to the rounded difference x-y and returns z.
   1482 // Precision, rounding, and accuracy reporting are as for Add.
   1483 // Sub panics with ErrNaN if x and y are infinities with equal
   1484 // signs. The value of z is undefined in that case.
   1485 func (z *Float) Sub(x, y *Float) *Float {
   1486 	if debugFloat {
   1487 		x.validate()
   1488 		y.validate()
   1489 	}
   1490 
   1491 	if z.prec == 0 {
   1492 		z.prec = umax32(x.prec, y.prec)
   1493 	}
   1494 
   1495 	if x.form == finite && y.form == finite {
   1496 		// x - y (common case)
   1497 		z.neg = x.neg
   1498 		if x.neg != y.neg {
   1499 			// x - (-y) == x + y
   1500 			// (-x) - y == -(x + y)
   1501 			z.uadd(x, y)
   1502 		} else {
   1503 			// x - y == x - y == -(y - x)
   1504 			// (-x) - (-y) == y - x == -(x - y)
   1505 			if x.ucmp(y) > 0 {
   1506 				z.usub(x, y)
   1507 			} else {
   1508 				z.neg = !z.neg
   1509 				z.usub(y, x)
   1510 			}
   1511 		}
   1512 		return z
   1513 	}
   1514 
   1515 	if x.form == inf && y.form == inf && x.neg == y.neg {
   1516 		// +Inf - +Inf
   1517 		// -Inf - -Inf
   1518 		// value of z is undefined but make sure it's valid
   1519 		z.acc = Exact
   1520 		z.form = zero
   1521 		z.neg = false
   1522 		panic(ErrNaN{"subtraction of infinities with equal signs"})
   1523 	}
   1524 
   1525 	if x.form == zero && y.form == zero {
   1526 		// 0 - 0
   1527 		z.acc = Exact
   1528 		z.form = zero
   1529 		z.neg = x.neg && !y.neg // -0 - +0 == -0
   1530 		return z
   1531 	}
   1532 
   1533 	if x.form == inf || y.form == zero {
   1534 		// Inf - y
   1535 		// x - 0
   1536 		return z.Set(x)
   1537 	}
   1538 
   1539 	// 0 - y
   1540 	// x - Inf
   1541 	return z.Neg(y)
   1542 }
   1543 
   1544 // Mul sets z to the rounded product x*y and returns z.
   1545 // Precision, rounding, and accuracy reporting are as for Add.
   1546 // Mul panics with ErrNaN if one operand is zero and the other
   1547 // operand an infinity. The value of z is undefined in that case.
   1548 func (z *Float) Mul(x, y *Float) *Float {
   1549 	if debugFloat {
   1550 		x.validate()
   1551 		y.validate()
   1552 	}
   1553 
   1554 	if z.prec == 0 {
   1555 		z.prec = umax32(x.prec, y.prec)
   1556 	}
   1557 
   1558 	z.neg = x.neg != y.neg
   1559 
   1560 	if x.form == finite && y.form == finite {
   1561 		// x * y (common case)
   1562 		z.umul(x, y)
   1563 		return z
   1564 	}
   1565 
   1566 	z.acc = Exact
   1567 	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
   1568 		// 0 * Inf
   1569 		// Inf * 0
   1570 		// value of z is undefined but make sure it's valid
   1571 		z.form = zero
   1572 		z.neg = false
   1573 		panic(ErrNaN{"multiplication of zero with infinity"})
   1574 	}
   1575 
   1576 	if x.form == inf || y.form == inf {
   1577 		// Inf * y
   1578 		// x * Inf
   1579 		z.form = inf
   1580 		return z
   1581 	}
   1582 
   1583 	// 0 * y
   1584 	// x * 0
   1585 	z.form = zero
   1586 	return z
   1587 }
   1588 
   1589 // Quo sets z to the rounded quotient x/y and returns z.
   1590 // Precision, rounding, and accuracy reporting are as for Add.
   1591 // Quo panics with ErrNaN if both operands are zero or infinities.
   1592 // The value of z is undefined in that case.
   1593 func (z *Float) Quo(x, y *Float) *Float {
   1594 	if debugFloat {
   1595 		x.validate()
   1596 		y.validate()
   1597 	}
   1598 
   1599 	if z.prec == 0 {
   1600 		z.prec = umax32(x.prec, y.prec)
   1601 	}
   1602 
   1603 	z.neg = x.neg != y.neg
   1604 
   1605 	if x.form == finite && y.form == finite {
   1606 		// x / y (common case)
   1607 		z.uquo(x, y)
   1608 		return z
   1609 	}
   1610 
   1611 	z.acc = Exact
   1612 	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
   1613 		// 0 / 0
   1614 		// Inf / Inf
   1615 		// value of z is undefined but make sure it's valid
   1616 		z.form = zero
   1617 		z.neg = false
   1618 		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
   1619 	}
   1620 
   1621 	if x.form == zero || y.form == inf {
   1622 		// 0 / y
   1623 		// x / Inf
   1624 		z.form = zero
   1625 		return z
   1626 	}
   1627 
   1628 	// x / 0
   1629 	// Inf / y
   1630 	z.form = inf
   1631 	return z
   1632 }
   1633 
   1634 // Cmp compares x and y and returns:
   1635 //
   1636 //   -1 if x <  y
   1637 //    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
   1638 //   +1 if x >  y
   1639 //
   1640 func (x *Float) Cmp(y *Float) int {
   1641 	if debugFloat {
   1642 		x.validate()
   1643 		y.validate()
   1644 	}
   1645 
   1646 	mx := x.ord()
   1647 	my := y.ord()
   1648 	switch {
   1649 	case mx < my:
   1650 		return -1
   1651 	case mx > my:
   1652 		return +1
   1653 	}
   1654 	// mx == my
   1655 
   1656 	// only if |mx| == 1 we have to compare the mantissae
   1657 	switch mx {
   1658 	case -1:
   1659 		return y.ucmp(x)
   1660 	case +1:
   1661 		return x.ucmp(y)
   1662 	}
   1663 
   1664 	return 0
   1665 }
   1666 
   1667 // ord classifies x and returns:
   1668 //
   1669 //	-2 if -Inf == x
   1670 //	-1 if -Inf < x < 0
   1671 //	 0 if x == 0 (signed or unsigned)
   1672 //	+1 if 0 < x < +Inf
   1673 //	+2 if x == +Inf
   1674 //
   1675 func (x *Float) ord() int {
   1676 	var m int
   1677 	switch x.form {
   1678 	case finite:
   1679 		m = 1
   1680 	case zero:
   1681 		return 0
   1682 	case inf:
   1683 		m = 2
   1684 	}
   1685 	if x.neg {
   1686 		m = -m
   1687 	}
   1688 	return m
   1689 }
   1690 
   1691 func umax32(x, y uint32) uint32 {
   1692 	if x > y {
   1693 		return x
   1694 	}
   1695 	return y
   1696 }
   1697