1 // Do not edit. Bootstrap copy of /Volumes/Android/buildbot/src/android/build-tools/out/obj/go/src/cmd/compile/internal/big/float.go 2 3 //line /Volumes/Android/buildbot/src/android/build-tools/out/obj/go/src/cmd/compile/internal/big/float.go:1 4 // Copyright 2014 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 // This file implements multi-precision floating-point numbers. 9 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 10 // can be of mixed precision. Unlike MPFR, the rounding mode is 11 // not specified with each operation, but with each operand. The 12 // rounding mode of the result operand determines the rounding 13 // mode of an operation. This is a from-scratch implementation. 14 15 package big 16 17 import ( 18 "fmt" 19 "math" 20 ) 21 22 const debugFloat = false // enable for debugging 23 24 // A nonzero finite Float represents a multi-precision floating point number 25 // 26 // sign mantissa 2**exponent 27 // 28 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 29 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 30 // All Floats are ordered, and the ordering of two Floats x and y 31 // is defined by x.Cmp(y). 32 // 33 // Each Float value also has a precision, rounding mode, and accuracy. 34 // The precision is the maximum number of mantissa bits available to 35 // represent the value. The rounding mode specifies how a result should 36 // be rounded to fit into the mantissa bits, and accuracy describes the 37 // rounding error with respect to the exact result. 38 // 39 // Unless specified otherwise, all operations (including setters) that 40 // specify a *Float variable for the result (usually via the receiver 41 // with the exception of MantExp), round the numeric result according 42 // to the precision and rounding mode of the result variable. 43 // 44 // If the provided result precision is 0 (see below), it is set to the 45 // precision of the argument with the largest precision value before any 46 // rounding takes place, and the rounding mode remains unchanged. Thus, 47 // uninitialized Floats provided as result arguments will have their 48 // precision set to a reasonable value determined by the operands and 49 // their mode is the zero value for RoundingMode (ToNearestEven). 50 // 51 // By setting the desired precision to 24 or 53 and using matching rounding 52 // mode (typically ToNearestEven), Float operations produce the same results 53 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 54 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 55 // Exponent underflow and overflow lead to a 0 or an Infinity for different 56 // values than IEEE-754 because Float exponents have a much larger range. 57 // 58 // The zero (uninitialized) value for a Float is ready to use and represents 59 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 60 // 61 type Float struct { 62 prec uint32 63 mode RoundingMode 64 acc Accuracy 65 form form 66 neg bool 67 mant nat 68 exp int32 69 } 70 71 // An ErrNaN panic is raised by a Float operation that would lead to 72 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 73 type ErrNaN struct { 74 msg string 75 } 76 77 func (err ErrNaN) Error() string { 78 return err.msg 79 } 80 81 // NewFloat allocates and returns a new Float set to x, 82 // with precision 53 and rounding mode ToNearestEven. 83 // NewFloat panics with ErrNaN if x is a NaN. 84 func NewFloat(x float64) *Float { 85 if math.IsNaN(x) { 86 panic(ErrNaN{"NewFloat(NaN)"}) 87 } 88 return new(Float).SetFloat64(x) 89 } 90 91 // Exponent and precision limits. 92 const ( 93 MaxExp = math.MaxInt32 // largest supported exponent 94 MinExp = math.MinInt32 // smallest supported exponent 95 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 96 ) 97 98 // Internal representation: The mantissa bits x.mant of a nonzero finite 99 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 100 // the slice may (but doesn't have to) be shorter if the mantissa contains 101 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 102 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 103 // trailing 0 bits or x.prec is not a multiple of the the Word size _W, 104 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 105 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 106 // 107 // A zero or non-finite Float x ignores x.mant and x.exp. 108 // 109 // x form neg mant exp 110 // ---------------------------------------------------------- 111 // 0 zero sign - - 112 // 0 < |x| < +Inf finite sign mantissa exponent 113 // Inf inf sign - - 114 115 // A form value describes the internal representation. 116 type form byte 117 118 // The form value order is relevant - do not change! 119 const ( 120 zero form = iota 121 finite 122 inf 123 ) 124 125 // RoundingMode determines how a Float value is rounded to the 126 // desired precision. Rounding may change the Float value; the 127 // rounding error is described by the Float's Accuracy. 128 type RoundingMode byte 129 130 // The following rounding modes are supported. 131 const ( 132 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 133 ToNearestAway // == IEEE 754-2008 roundTiesToAway 134 ToZero // == IEEE 754-2008 roundTowardZero 135 AwayFromZero // no IEEE 754-2008 equivalent 136 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 137 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 138 ) 139 140 //go:generate stringer -type=RoundingMode 141 142 // Accuracy describes the rounding error produced by the most recent 143 // operation that generated a Float value, relative to the exact value. 144 type Accuracy int8 145 146 // Constants describing the Accuracy of a Float. 147 const ( 148 Below Accuracy = -1 149 Exact Accuracy = 0 150 Above Accuracy = +1 151 ) 152 153 //go:generate stringer -type=Accuracy 154 155 // SetPrec sets z's precision to prec and returns the (possibly) rounded 156 // value of z. Rounding occurs according to z's rounding mode if the mantissa 157 // cannot be represented in prec bits without loss of precision. 158 // SetPrec(0) maps all finite values to 0; infinite values remain unchanged. 159 // If prec > MaxPrec, it is set to MaxPrec. 160 func (z *Float) SetPrec(prec uint) *Float { 161 z.acc = Exact // optimistically assume no rounding is needed 162 163 // special case 164 if prec == 0 { 165 z.prec = 0 166 if z.form == finite { 167 // truncate z to 0 168 z.acc = makeAcc(z.neg) 169 z.form = zero 170 } 171 return z 172 } 173 174 // general case 175 if prec > MaxPrec { 176 prec = MaxPrec 177 } 178 old := z.prec 179 z.prec = uint32(prec) 180 if z.prec < old { 181 z.round(0) 182 } 183 return z 184 } 185 186 func makeAcc(above bool) Accuracy { 187 if above { 188 return Above 189 } 190 return Below 191 } 192 193 // SetMode sets z's rounding mode to mode and returns an exact z. 194 // z remains unchanged otherwise. 195 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 196 func (z *Float) SetMode(mode RoundingMode) *Float { 197 z.mode = mode 198 z.acc = Exact 199 return z 200 } 201 202 // Prec returns the mantissa precision of x in bits. 203 // The result may be 0 for |x| == 0 and |x| == Inf. 204 func (x *Float) Prec() uint { 205 return uint(x.prec) 206 } 207 208 // MinPrec returns the minimum precision required to represent x exactly 209 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 210 // The result is 0 for |x| == 0 and |x| == Inf. 211 func (x *Float) MinPrec() uint { 212 if x.form != finite { 213 return 0 214 } 215 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 216 } 217 218 // Mode returns the rounding mode of x. 219 func (x *Float) Mode() RoundingMode { 220 return x.mode 221 } 222 223 // Acc returns the accuracy of x produced by the most recent operation. 224 func (x *Float) Acc() Accuracy { 225 return x.acc 226 } 227 228 // Sign returns: 229 // 230 // -1 if x < 0 231 // 0 if x is 0 232 // +1 if x > 0 233 // 234 func (x *Float) Sign() int { 235 if debugFloat { 236 x.validate() 237 } 238 if x.form == zero { 239 return 0 240 } 241 if x.neg { 242 return -1 243 } 244 return 1 245 } 246 247 // MantExp breaks x into its mantissa and exponent components 248 // and returns the exponent. If a non-nil mant argument is 249 // provided its value is set to the mantissa of x, with the 250 // same precision and rounding mode as x. The components 251 // satisfy x == mant 2**exp, with 0.5 <= |mant| < 1.0. 252 // Calling MantExp with a nil argument is an efficient way to 253 // get the exponent of the receiver. 254 // 255 // Special cases are: 256 // 257 // ( 0).MantExp(mant) = 0, with mant set to 0 258 // (Inf).MantExp(mant) = 0, with mant set to Inf 259 // 260 // x and mant may be the same in which case x is set to its 261 // mantissa value. 262 func (x *Float) MantExp(mant *Float) (exp int) { 263 if debugFloat { 264 x.validate() 265 } 266 if x.form == finite { 267 exp = int(x.exp) 268 } 269 if mant != nil { 270 mant.Copy(x) 271 if mant.form == finite { 272 mant.exp = 0 273 } 274 } 275 return 276 } 277 278 func (z *Float) setExpAndRound(exp int64, sbit uint) { 279 if exp < MinExp { 280 // underflow 281 z.acc = makeAcc(z.neg) 282 z.form = zero 283 return 284 } 285 286 if exp > MaxExp { 287 // overflow 288 z.acc = makeAcc(!z.neg) 289 z.form = inf 290 return 291 } 292 293 z.form = finite 294 z.exp = int32(exp) 295 z.round(sbit) 296 } 297 298 // SetMantExp sets z to mant 2**exp and and returns z. 299 // The result z has the same precision and rounding mode 300 // as mant. SetMantExp is an inverse of MantExp but does 301 // not require 0.5 <= |mant| < 1.0. Specifically: 302 // 303 // mant := new(Float) 304 // new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true 305 // 306 // Special cases are: 307 // 308 // z.SetMantExp( 0, exp) = 0 309 // z.SetMantExp(Inf, exp) = Inf 310 // 311 // z and mant may be the same in which case z's exponent 312 // is set to exp. 313 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 314 if debugFloat { 315 z.validate() 316 mant.validate() 317 } 318 z.Copy(mant) 319 if z.form != finite { 320 return z 321 } 322 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 323 return z 324 } 325 326 // Signbit returns true if x is negative or negative zero. 327 func (x *Float) Signbit() bool { 328 return x.neg 329 } 330 331 // IsInf reports whether x is +Inf or -Inf. 332 func (x *Float) IsInf() bool { 333 return x.form == inf 334 } 335 336 // IsInt reports whether x is an integer. 337 // Inf values are not integers. 338 func (x *Float) IsInt() bool { 339 if debugFloat { 340 x.validate() 341 } 342 // special cases 343 if x.form != finite { 344 return x.form == zero 345 } 346 // x.form == finite 347 if x.exp <= 0 { 348 return false 349 } 350 // x.exp > 0 351 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 352 } 353 354 // debugging support 355 func (x *Float) validate() { 356 if !debugFloat { 357 // avoid performance bugs 358 panic("validate called but debugFloat is not set") 359 } 360 if x.form != finite { 361 return 362 } 363 m := len(x.mant) 364 if m == 0 { 365 panic("nonzero finite number with empty mantissa") 366 } 367 const msb = 1 << (_W - 1) 368 if x.mant[m-1]&msb == 0 { 369 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 370 } 371 if x.prec == 0 { 372 panic("zero precision finite number") 373 } 374 } 375 376 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 377 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 378 // have before calling round. z's mantissa must be normalized (with the msb set) 379 // or empty. 380 // 381 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 382 // sign of z. For correct rounding, the sign of z must be set correctly before 383 // calling round. 384 func (z *Float) round(sbit uint) { 385 if debugFloat { 386 z.validate() 387 } 388 389 z.acc = Exact 390 if z.form != finite { 391 // 0 or Inf => nothing left to do 392 return 393 } 394 // z.form == finite && len(z.mant) > 0 395 // m > 0 implies z.prec > 0 (checked by validate) 396 397 m := uint32(len(z.mant)) // present mantissa length in words 398 bits := m * _W // present mantissa bits 399 if bits <= z.prec { 400 // mantissa fits => nothing to do 401 return 402 } 403 // bits > z.prec 404 405 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 406 407 // Rounding is based on two bits: the rounding bit (rbit) and the 408 // sticky bit (sbit). The rbit is the bit immediately before the 409 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 410 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 411 // 412 // rbit sbit => "fractional part" 413 // 414 // 0 0 == 0 415 // 0 1 > 0 , < 0.5 416 // 1 0 == 0.5 417 // 1 1 > 0.5, < 1.0 418 419 // bits > z.prec: mantissa too large => round 420 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 421 rbit := z.mant.bit(r) // rounding bit 422 if sbit == 0 { 423 sbit = z.mant.sticky(r) 424 } 425 if debugFloat && sbit&^1 != 0 { 426 panic(fmt.Sprintf("invalid sbit %#x", sbit)) 427 } 428 429 // convert ToXInf rounding modes 430 mode := z.mode 431 switch mode { 432 case ToNegativeInf: 433 mode = ToZero 434 if z.neg { 435 mode = AwayFromZero 436 } 437 case ToPositiveInf: 438 mode = AwayFromZero 439 if z.neg { 440 mode = ToZero 441 } 442 } 443 444 // cut off extra words 445 if m > n { 446 copy(z.mant, z.mant[m-n:]) // move n last words to front 447 z.mant = z.mant[:n] 448 } 449 450 // determine number of trailing zero bits t 451 t := n*_W - z.prec // 0 <= t < _W 452 lsb := Word(1) << t 453 454 // make rounding decision 455 // TODO(gri) This can be simplified (see Bits.round in bits_test.go). 456 switch mode { 457 case ToZero: 458 // nothing to do 459 case ToNearestEven, ToNearestAway: 460 if rbit == 0 { 461 // rounding bits == 0b0x 462 mode = ToZero 463 } else if sbit == 1 { 464 // rounding bits == 0b11 465 mode = AwayFromZero 466 } 467 case AwayFromZero: 468 if rbit|sbit == 0 { 469 mode = ToZero 470 } 471 default: 472 // ToXInf modes have been converted to ToZero or AwayFromZero 473 panic("unreachable") 474 } 475 476 // round and determine accuracy 477 switch mode { 478 case ToZero: 479 if rbit|sbit != 0 { 480 z.acc = Below 481 } 482 483 case ToNearestEven, ToNearestAway: 484 if debugFloat && rbit != 1 { 485 panic("internal error in rounding") 486 } 487 if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 { 488 z.acc = Below 489 break 490 } 491 // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0 492 fallthrough 493 494 case AwayFromZero: 495 // add 1 to mantissa 496 if addVW(z.mant, z.mant, lsb) != 0 { 497 // overflow => shift mantissa right by 1 and add msb 498 shrVU(z.mant, z.mant, 1) 499 z.mant[n-1] |= 1 << (_W - 1) 500 // adjust exponent 501 if z.exp < MaxExp { 502 z.exp++ 503 } else { 504 // exponent overflow 505 z.acc = makeAcc(!z.neg) 506 z.form = inf 507 return 508 } 509 } 510 z.acc = Above 511 } 512 513 // zero out trailing bits in least-significant word 514 z.mant[0] &^= lsb - 1 515 516 // update accuracy 517 if z.acc != Exact && z.neg { 518 z.acc = -z.acc 519 } 520 521 if debugFloat { 522 z.validate() 523 } 524 525 return 526 } 527 528 func (z *Float) setBits64(neg bool, x uint64) *Float { 529 if z.prec == 0 { 530 z.prec = 64 531 } 532 z.acc = Exact 533 z.neg = neg 534 if x == 0 { 535 z.form = zero 536 return z 537 } 538 // x != 0 539 z.form = finite 540 s := nlz64(x) 541 z.mant = z.mant.setUint64(x << s) 542 z.exp = int32(64 - s) // always fits 543 if z.prec < 64 { 544 z.round(0) 545 } 546 return z 547 } 548 549 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 550 // If z's precision is 0, it is changed to 64 (and rounding will have 551 // no effect). 552 func (z *Float) SetUint64(x uint64) *Float { 553 return z.setBits64(false, x) 554 } 555 556 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 557 // If z's precision is 0, it is changed to 64 (and rounding will have 558 // no effect). 559 func (z *Float) SetInt64(x int64) *Float { 560 u := x 561 if u < 0 { 562 u = -u 563 } 564 // We cannot simply call z.SetUint64(uint64(u)) and change 565 // the sign afterwards because the sign affects rounding. 566 return z.setBits64(x < 0, uint64(u)) 567 } 568 569 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 570 // If z's precision is 0, it is changed to 53 (and rounding will have 571 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 572 func (z *Float) SetFloat64(x float64) *Float { 573 if z.prec == 0 { 574 z.prec = 53 575 } 576 if math.IsNaN(x) { 577 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 578 } 579 z.acc = Exact 580 z.neg = math.Signbit(x) // handle -0, -Inf correctly 581 if x == 0 { 582 z.form = zero 583 return z 584 } 585 if math.IsInf(x, 0) { 586 z.form = inf 587 return z 588 } 589 // normalized x != 0 590 z.form = finite 591 fmant, exp := math.Frexp(x) // get normalized mantissa 592 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 593 z.exp = int32(exp) // always fits 594 if z.prec < 53 { 595 z.round(0) 596 } 597 return z 598 } 599 600 // fnorm normalizes mantissa m by shifting it to the left 601 // such that the msb of the most-significant word (msw) is 1. 602 // It returns the shift amount. It assumes that len(m) != 0. 603 func fnorm(m nat) int64 { 604 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 605 panic("msw of mantissa is 0") 606 } 607 s := nlz(m[len(m)-1]) 608 if s > 0 { 609 c := shlVU(m, m, s) 610 if debugFloat && c != 0 { 611 panic("nlz or shlVU incorrect") 612 } 613 } 614 return int64(s) 615 } 616 617 // SetInt sets z to the (possibly rounded) value of x and returns z. 618 // If z's precision is 0, it is changed to the larger of x.BitLen() 619 // or 64 (and rounding will have no effect). 620 func (z *Float) SetInt(x *Int) *Float { 621 // TODO(gri) can be more efficient if z.prec > 0 622 // but small compared to the size of x, or if there 623 // are many trailing 0's. 624 bits := uint32(x.BitLen()) 625 if z.prec == 0 { 626 z.prec = umax32(bits, 64) 627 } 628 z.acc = Exact 629 z.neg = x.neg 630 if len(x.abs) == 0 { 631 z.form = zero 632 return z 633 } 634 // x != 0 635 z.mant = z.mant.set(x.abs) 636 fnorm(z.mant) 637 z.setExpAndRound(int64(bits), 0) 638 return z 639 } 640 641 // SetRat sets z to the (possibly rounded) value of x and returns z. 642 // If z's precision is 0, it is changed to the largest of a.BitLen(), 643 // b.BitLen(), or 64; with x = a/b. 644 func (z *Float) SetRat(x *Rat) *Float { 645 if x.IsInt() { 646 return z.SetInt(x.Num()) 647 } 648 var a, b Float 649 a.SetInt(x.Num()) 650 b.SetInt(x.Denom()) 651 if z.prec == 0 { 652 z.prec = umax32(a.prec, b.prec) 653 } 654 return z.Quo(&a, &b) 655 } 656 657 // SetInf sets z to the infinite Float -Inf if signbit is 658 // set, or +Inf if signbit is not set, and returns z. The 659 // precision of z is unchanged and the result is always 660 // Exact. 661 func (z *Float) SetInf(signbit bool) *Float { 662 z.acc = Exact 663 z.form = inf 664 z.neg = signbit 665 return z 666 } 667 668 // Set sets z to the (possibly rounded) value of x and returns z. 669 // If z's precision is 0, it is changed to the precision of x 670 // before setting z (and rounding will have no effect). 671 // Rounding is performed according to z's precision and rounding 672 // mode; and z's accuracy reports the result error relative to the 673 // exact (not rounded) result. 674 func (z *Float) Set(x *Float) *Float { 675 if debugFloat { 676 x.validate() 677 } 678 z.acc = Exact 679 if z != x { 680 z.form = x.form 681 z.neg = x.neg 682 if x.form == finite { 683 z.exp = x.exp 684 z.mant = z.mant.set(x.mant) 685 } 686 if z.prec == 0 { 687 z.prec = x.prec 688 } else if z.prec < x.prec { 689 z.round(0) 690 } 691 } 692 return z 693 } 694 695 // Copy sets z to x, with the same precision, rounding mode, and 696 // accuracy as x, and returns z. x is not changed even if z and 697 // x are the same. 698 func (z *Float) Copy(x *Float) *Float { 699 if debugFloat { 700 x.validate() 701 } 702 if z != x { 703 z.prec = x.prec 704 z.mode = x.mode 705 z.acc = x.acc 706 z.form = x.form 707 z.neg = x.neg 708 if z.form == finite { 709 z.mant = z.mant.set(x.mant) 710 z.exp = x.exp 711 } 712 } 713 return z 714 } 715 716 // msb32 returns the 32 most significant bits of x. 717 func msb32(x nat) uint32 { 718 i := len(x) - 1 719 if i < 0 { 720 return 0 721 } 722 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 723 panic("x not normalized") 724 } 725 switch _W { 726 case 32: 727 return uint32(x[i]) 728 case 64: 729 return uint32(x[i] >> 32) 730 } 731 panic("unreachable") 732 } 733 734 // msb64 returns the 64 most significant bits of x. 735 func msb64(x nat) uint64 { 736 i := len(x) - 1 737 if i < 0 { 738 return 0 739 } 740 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 741 panic("x not normalized") 742 } 743 switch _W { 744 case 32: 745 v := uint64(x[i]) << 32 746 if i > 0 { 747 v |= uint64(x[i-1]) 748 } 749 return v 750 case 64: 751 return uint64(x[i]) 752 } 753 panic("unreachable") 754 } 755 756 // Uint64 returns the unsigned integer resulting from truncating x 757 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 758 // if x is an integer and Below otherwise. 759 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 760 // for x > math.MaxUint64. 761 func (x *Float) Uint64() (uint64, Accuracy) { 762 if debugFloat { 763 x.validate() 764 } 765 766 switch x.form { 767 case finite: 768 if x.neg { 769 return 0, Above 770 } 771 // 0 < x < +Inf 772 if x.exp <= 0 { 773 // 0 < x < 1 774 return 0, Below 775 } 776 // 1 <= x < Inf 777 if x.exp <= 64 { 778 // u = trunc(x) fits into a uint64 779 u := msb64(x.mant) >> (64 - uint32(x.exp)) 780 if x.MinPrec() <= 64 { 781 return u, Exact 782 } 783 return u, Below // x truncated 784 } 785 // x too large 786 return math.MaxUint64, Below 787 788 case zero: 789 return 0, Exact 790 791 case inf: 792 if x.neg { 793 return 0, Above 794 } 795 return math.MaxUint64, Below 796 } 797 798 panic("unreachable") 799 } 800 801 // Int64 returns the integer resulting from truncating x towards zero. 802 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 803 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 804 // The result is (math.MinInt64, Above) for x < math.MinInt64, 805 // and (math.MaxInt64, Below) for x > math.MaxInt64. 806 func (x *Float) Int64() (int64, Accuracy) { 807 if debugFloat { 808 x.validate() 809 } 810 811 switch x.form { 812 case finite: 813 // 0 < |x| < +Inf 814 acc := makeAcc(x.neg) 815 if x.exp <= 0 { 816 // 0 < |x| < 1 817 return 0, acc 818 } 819 // x.exp > 0 820 821 // 1 <= |x| < +Inf 822 if x.exp <= 63 { 823 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 824 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 825 if x.neg { 826 i = -i 827 } 828 if x.MinPrec() <= uint(x.exp) { 829 return i, Exact 830 } 831 return i, acc // x truncated 832 } 833 if x.neg { 834 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 835 if x.exp == 64 && x.MinPrec() == 1 { 836 acc = Exact 837 } 838 return math.MinInt64, acc 839 } 840 // x too large 841 return math.MaxInt64, Below 842 843 case zero: 844 return 0, Exact 845 846 case inf: 847 if x.neg { 848 return math.MinInt64, Above 849 } 850 return math.MaxInt64, Below 851 } 852 853 panic("unreachable") 854 } 855 856 // Float32 returns the float32 value nearest to x. If x is too small to be 857 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 858 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 859 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 860 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 861 func (x *Float) Float32() (float32, Accuracy) { 862 if debugFloat { 863 x.validate() 864 } 865 866 switch x.form { 867 case finite: 868 // 0 < |x| < +Inf 869 870 const ( 871 fbits = 32 // float size 872 mbits = 23 // mantissa size (excluding implicit msb) 873 ebits = fbits - mbits - 1 // 8 exponent size 874 bias = 1<<(ebits-1) - 1 // 127 exponent bias 875 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 876 emin = 1 - bias // -126 smallest unbiased exponent (normal) 877 emax = bias // 127 largest unbiased exponent (normal) 878 ) 879 880 // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa. 881 e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0 882 p := mbits + 1 // precision of normal float 883 884 // If the exponent is too small, we may have a denormal number 885 // in which case we have fewer mantissa bits available: reduce 886 // precision accordingly. 887 if e < emin { 888 p -= emin - int(e) 889 // Make sure we have at least 1 bit so that we don't 890 // lose numbers rounded up to the smallest denormal. 891 if p < 1 { 892 p = 1 893 } 894 } 895 896 // round 897 var r Float 898 r.prec = uint32(p) 899 r.Set(x) 900 e = r.exp - 1 901 902 // Rounding may have caused r to overflow to Inf 903 // (rounding never causes underflows to 0). 904 if r.form == inf { 905 e = emax + 1 // cause overflow below 906 } 907 908 // If the exponent is too large, overflow to Inf. 909 if e > emax { 910 // overflow 911 if x.neg { 912 return float32(math.Inf(-1)), Below 913 } 914 return float32(math.Inf(+1)), Above 915 } 916 // e <= emax 917 918 // Determine sign, biased exponent, and mantissa. 919 var sign, bexp, mant uint32 920 if x.neg { 921 sign = 1 << (fbits - 1) 922 } 923 924 // Rounding may have caused a denormal number to 925 // become normal. Check again. 926 if e < emin { 927 // denormal number 928 if e < dmin { 929 // underflow to 0 930 if x.neg { 931 var z float32 932 return -z, Above 933 } 934 return 0.0, Below 935 } 936 // bexp = 0 937 mant = msb32(r.mant) >> (fbits - r.prec) 938 } else { 939 // normal number: emin <= e <= emax 940 bexp = uint32(e+bias) << mbits 941 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 942 } 943 944 return math.Float32frombits(sign | bexp | mant), r.acc 945 946 case zero: 947 if x.neg { 948 var z float32 949 return -z, Exact 950 } 951 return 0.0, Exact 952 953 case inf: 954 if x.neg { 955 return float32(math.Inf(-1)), Exact 956 } 957 return float32(math.Inf(+1)), Exact 958 } 959 960 panic("unreachable") 961 } 962 963 // Float64 returns the float64 value nearest to x. If x is too small to be 964 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 965 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 966 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 967 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 968 func (x *Float) Float64() (float64, Accuracy) { 969 if debugFloat { 970 x.validate() 971 } 972 973 switch x.form { 974 case finite: 975 // 0 < |x| < +Inf 976 977 const ( 978 fbits = 64 // float size 979 mbits = 52 // mantissa size (excluding implicit msb) 980 ebits = fbits - mbits - 1 // 11 exponent size 981 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 982 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 983 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 984 emax = bias // 1023 largest unbiased exponent (normal) 985 ) 986 987 // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa. 988 e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0 989 p := mbits + 1 // precision of normal float 990 991 // If the exponent is too small, we may have a denormal number 992 // in which case we have fewer mantissa bits available: reduce 993 // precision accordingly. 994 if e < emin { 995 p -= emin - int(e) 996 // Make sure we have at least 1 bit so that we don't 997 // lose numbers rounded up to the smallest denormal. 998 if p < 1 { 999 p = 1 1000 } 1001 } 1002 1003 // round 1004 var r Float 1005 r.prec = uint32(p) 1006 r.Set(x) 1007 e = r.exp - 1 1008 1009 // Rounding may have caused r to overflow to Inf 1010 // (rounding never causes underflows to 0). 1011 if r.form == inf { 1012 e = emax + 1 // cause overflow below 1013 } 1014 1015 // If the exponent is too large, overflow to Inf. 1016 if e > emax { 1017 // overflow 1018 if x.neg { 1019 return math.Inf(-1), Below 1020 } 1021 return math.Inf(+1), Above 1022 } 1023 // e <= emax 1024 1025 // Determine sign, biased exponent, and mantissa. 1026 var sign, bexp, mant uint64 1027 if x.neg { 1028 sign = 1 << (fbits - 1) 1029 } 1030 1031 // Rounding may have caused a denormal number to 1032 // become normal. Check again. 1033 if e < emin { 1034 // denormal number 1035 if e < dmin { 1036 // underflow to 0 1037 if x.neg { 1038 var z float64 1039 return -z, Above 1040 } 1041 return 0.0, Below 1042 } 1043 // bexp = 0 1044 mant = msb64(r.mant) >> (fbits - r.prec) 1045 } else { 1046 // normal number: emin <= e <= emax 1047 bexp = uint64(e+bias) << mbits 1048 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1049 } 1050 1051 return math.Float64frombits(sign | bexp | mant), r.acc 1052 1053 case zero: 1054 if x.neg { 1055 var z float64 1056 return -z, Exact 1057 } 1058 return 0.0, Exact 1059 1060 case inf: 1061 if x.neg { 1062 return math.Inf(-1), Exact 1063 } 1064 return math.Inf(+1), Exact 1065 } 1066 1067 panic("unreachable") 1068 } 1069 1070 // Int returns the result of truncating x towards zero; 1071 // or nil if x is an infinity. 1072 // The result is Exact if x.IsInt(); otherwise it is Below 1073 // for x > 0, and Above for x < 0. 1074 // If a non-nil *Int argument z is provided, Int stores 1075 // the result in z instead of allocating a new Int. 1076 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1077 if debugFloat { 1078 x.validate() 1079 } 1080 1081 if z == nil && x.form <= finite { 1082 z = new(Int) 1083 } 1084 1085 switch x.form { 1086 case finite: 1087 // 0 < |x| < +Inf 1088 acc := makeAcc(x.neg) 1089 if x.exp <= 0 { 1090 // 0 < |x| < 1 1091 return z.SetInt64(0), acc 1092 } 1093 // x.exp > 0 1094 1095 // 1 <= |x| < +Inf 1096 // determine minimum required precision for x 1097 allBits := uint(len(x.mant)) * _W 1098 exp := uint(x.exp) 1099 if x.MinPrec() <= exp { 1100 acc = Exact 1101 } 1102 // shift mantissa as needed 1103 if z == nil { 1104 z = new(Int) 1105 } 1106 z.neg = x.neg 1107 switch { 1108 case exp > allBits: 1109 z.abs = z.abs.shl(x.mant, exp-allBits) 1110 default: 1111 z.abs = z.abs.set(x.mant) 1112 case exp < allBits: 1113 z.abs = z.abs.shr(x.mant, allBits-exp) 1114 } 1115 return z, acc 1116 1117 case zero: 1118 return z.SetInt64(0), Exact 1119 1120 case inf: 1121 return nil, makeAcc(x.neg) 1122 } 1123 1124 panic("unreachable") 1125 } 1126 1127 // Rat returns the rational number corresponding to x; 1128 // or nil if x is an infinity. 1129 // The result is Exact is x is not an Inf. 1130 // If a non-nil *Rat argument z is provided, Rat stores 1131 // the result in z instead of allocating a new Rat. 1132 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1133 if debugFloat { 1134 x.validate() 1135 } 1136 1137 if z == nil && x.form <= finite { 1138 z = new(Rat) 1139 } 1140 1141 switch x.form { 1142 case finite: 1143 // 0 < |x| < +Inf 1144 allBits := int32(len(x.mant)) * _W 1145 // build up numerator and denominator 1146 z.a.neg = x.neg 1147 switch { 1148 case x.exp > allBits: 1149 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1150 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1151 // z already in normal form 1152 default: 1153 z.a.abs = z.a.abs.set(x.mant) 1154 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1155 // z already in normal form 1156 case x.exp < allBits: 1157 z.a.abs = z.a.abs.set(x.mant) 1158 t := z.b.abs.setUint64(1) 1159 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1160 z.norm() 1161 } 1162 return z, Exact 1163 1164 case zero: 1165 return z.SetInt64(0), Exact 1166 1167 case inf: 1168 return nil, makeAcc(x.neg) 1169 } 1170 1171 panic("unreachable") 1172 } 1173 1174 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1175 // and returns z. 1176 func (z *Float) Abs(x *Float) *Float { 1177 z.Set(x) 1178 z.neg = false 1179 return z 1180 } 1181 1182 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1183 // and returns z. 1184 func (z *Float) Neg(x *Float) *Float { 1185 z.Set(x) 1186 z.neg = !z.neg 1187 return z 1188 } 1189 1190 func validateBinaryOperands(x, y *Float) { 1191 if !debugFloat { 1192 // avoid performance bugs 1193 panic("validateBinaryOperands called but debugFloat is not set") 1194 } 1195 if len(x.mant) == 0 { 1196 panic("empty mantissa for x") 1197 } 1198 if len(y.mant) == 0 { 1199 panic("empty mantissa for y") 1200 } 1201 } 1202 1203 // z = x + y, ignoring signs of x and y for the addition 1204 // but using the sign of z for rounding the result. 1205 // x and y must have a non-empty mantissa and valid exponent. 1206 func (z *Float) uadd(x, y *Float) { 1207 // Note: This implementation requires 2 shifts most of the 1208 // time. It is also inefficient if exponents or precisions 1209 // differ by wide margins. The following article describes 1210 // an efficient (but much more complicated) implementation 1211 // compatible with the internal representation used here: 1212 // 1213 // Vincent Lefvre: "The Generic Multiple-Precision Floating- 1214 // Point Addition With Exact Rounding (as in the MPFR Library)" 1215 // http://www.vinc17.net/research/papers/rnc6.pdf 1216 1217 if debugFloat { 1218 validateBinaryOperands(x, y) 1219 } 1220 1221 // compute exponents ex, ey for mantissa with "binary point" 1222 // on the right (mantissa.0) - use int64 to avoid overflow 1223 ex := int64(x.exp) - int64(len(x.mant))*_W 1224 ey := int64(y.exp) - int64(len(y.mant))*_W 1225 1226 // TODO(gri) having a combined add-and-shift primitive 1227 // could make this code significantly faster 1228 switch { 1229 case ex < ey: 1230 // cannot re-use z.mant w/o testing for aliasing 1231 t := nat(nil).shl(y.mant, uint(ey-ex)) 1232 z.mant = z.mant.add(x.mant, t) 1233 default: 1234 // ex == ey, no shift needed 1235 z.mant = z.mant.add(x.mant, y.mant) 1236 case ex > ey: 1237 // cannot re-use z.mant w/o testing for aliasing 1238 t := nat(nil).shl(x.mant, uint(ex-ey)) 1239 z.mant = z.mant.add(t, y.mant) 1240 ex = ey 1241 } 1242 // len(z.mant) > 0 1243 1244 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1245 } 1246 1247 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1248 // but using the sign of z for rounding the result. 1249 // x and y must have a non-empty mantissa and valid exponent. 1250 func (z *Float) usub(x, y *Float) { 1251 // This code is symmetric to uadd. 1252 // We have not factored the common code out because 1253 // eventually uadd (and usub) should be optimized 1254 // by special-casing, and the code will diverge. 1255 1256 if debugFloat { 1257 validateBinaryOperands(x, y) 1258 } 1259 1260 ex := int64(x.exp) - int64(len(x.mant))*_W 1261 ey := int64(y.exp) - int64(len(y.mant))*_W 1262 1263 switch { 1264 case ex < ey: 1265 // cannot re-use z.mant w/o testing for aliasing 1266 t := nat(nil).shl(y.mant, uint(ey-ex)) 1267 z.mant = t.sub(x.mant, t) 1268 default: 1269 // ex == ey, no shift needed 1270 z.mant = z.mant.sub(x.mant, y.mant) 1271 case ex > ey: 1272 // cannot re-use z.mant w/o testing for aliasing 1273 t := nat(nil).shl(x.mant, uint(ex-ey)) 1274 z.mant = t.sub(t, y.mant) 1275 ex = ey 1276 } 1277 1278 // operands may have cancelled each other out 1279 if len(z.mant) == 0 { 1280 z.acc = Exact 1281 z.form = zero 1282 z.neg = false 1283 return 1284 } 1285 // len(z.mant) > 0 1286 1287 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1288 } 1289 1290 // z = x * y, ignoring signs of x and y for the multiplication 1291 // but using the sign of z for rounding the result. 1292 // x and y must have a non-empty mantissa and valid exponent. 1293 func (z *Float) umul(x, y *Float) { 1294 if debugFloat { 1295 validateBinaryOperands(x, y) 1296 } 1297 1298 // Note: This is doing too much work if the precision 1299 // of z is less than the sum of the precisions of x 1300 // and y which is often the case (e.g., if all floats 1301 // have the same precision). 1302 // TODO(gri) Optimize this for the common case. 1303 1304 e := int64(x.exp) + int64(y.exp) 1305 z.mant = z.mant.mul(x.mant, y.mant) 1306 1307 z.setExpAndRound(e-fnorm(z.mant), 0) 1308 } 1309 1310 // z = x / y, ignoring signs of x and y for the division 1311 // but using the sign of z for rounding the result. 1312 // x and y must have a non-empty mantissa and valid exponent. 1313 func (z *Float) uquo(x, y *Float) { 1314 if debugFloat { 1315 validateBinaryOperands(x, y) 1316 } 1317 1318 // mantissa length in words for desired result precision + 1 1319 // (at least one extra bit so we get the rounding bit after 1320 // the division) 1321 n := int(z.prec/_W) + 1 1322 1323 // compute adjusted x.mant such that we get enough result precision 1324 xadj := x.mant 1325 if d := n - len(x.mant) + len(y.mant); d > 0 { 1326 // d extra words needed => add d "0 digits" to x 1327 xadj = make(nat, len(x.mant)+d) 1328 copy(xadj[d:], x.mant) 1329 } 1330 // TODO(gri): If we have too many digits (d < 0), we should be able 1331 // to shorten x for faster division. But we must be extra careful 1332 // with rounding in that case. 1333 1334 // Compute d before division since there may be aliasing of x.mant 1335 // (via xadj) or y.mant with z.mant. 1336 d := len(xadj) - len(y.mant) 1337 1338 // divide 1339 var r nat 1340 z.mant, r = z.mant.div(nil, xadj, y.mant) 1341 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1342 1343 // The result is long enough to include (at least) the rounding bit. 1344 // If there's a non-zero remainder, the corresponding fractional part 1345 // (if it were computed), would have a non-zero sticky bit (if it were 1346 // zero, it couldn't have a non-zero remainder). 1347 var sbit uint 1348 if len(r) > 0 { 1349 sbit = 1 1350 } 1351 1352 z.setExpAndRound(e-fnorm(z.mant), sbit) 1353 } 1354 1355 // ucmp returns -1, 0, or +1, depending on whether 1356 // |x| < |y|, |x| == |y|, or |x| > |y|. 1357 // x and y must have a non-empty mantissa and valid exponent. 1358 func (x *Float) ucmp(y *Float) int { 1359 if debugFloat { 1360 validateBinaryOperands(x, y) 1361 } 1362 1363 switch { 1364 case x.exp < y.exp: 1365 return -1 1366 case x.exp > y.exp: 1367 return +1 1368 } 1369 // x.exp == y.exp 1370 1371 // compare mantissas 1372 i := len(x.mant) 1373 j := len(y.mant) 1374 for i > 0 || j > 0 { 1375 var xm, ym Word 1376 if i > 0 { 1377 i-- 1378 xm = x.mant[i] 1379 } 1380 if j > 0 { 1381 j-- 1382 ym = y.mant[j] 1383 } 1384 switch { 1385 case xm < ym: 1386 return -1 1387 case xm > ym: 1388 return +1 1389 } 1390 } 1391 1392 return 0 1393 } 1394 1395 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1396 // 1397 // When neither the inputs nor result are NaN, the sign of a product or 1398 // quotient is the exclusive OR of the operands signs; the sign of a sum, 1399 // or of a difference xy regarded as a sum x+(y), differs from at most 1400 // one of the addends signs; and the sign of the result of conversions, 1401 // the quantize operation, the roundToIntegral operations, and the 1402 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1403 // These rules shall apply even when operands or results are zero or infinite. 1404 // 1405 // When the sum of two operands with opposite signs (or the difference of 1406 // two operands with like signs) is exactly zero, the sign of that sum (or 1407 // difference) shall be +0 in all rounding-direction attributes except 1408 // roundTowardNegative; under that attribute, the sign of an exact zero 1409 // sum (or difference) shall be 0. However, x+x = x(x) retains the same 1410 // sign as x even when x is zero. 1411 // 1412 // See also: https://play.golang.org/p/RtH3UCt5IH 1413 1414 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1415 // it is changed to the larger of x's or y's precision before the operation. 1416 // Rounding is performed according to z's precision and rounding mode; and 1417 // z's accuracy reports the result error relative to the exact (not rounded) 1418 // result. Add panics with ErrNaN if x and y are infinities with opposite 1419 // signs. The value of z is undefined in that case. 1420 // 1421 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1422 func (z *Float) Add(x, y *Float) *Float { 1423 if debugFloat { 1424 x.validate() 1425 y.validate() 1426 } 1427 1428 if z.prec == 0 { 1429 z.prec = umax32(x.prec, y.prec) 1430 } 1431 1432 if x.form == finite && y.form == finite { 1433 // x + y (commom case) 1434 z.neg = x.neg 1435 if x.neg == y.neg { 1436 // x + y == x + y 1437 // (-x) + (-y) == -(x + y) 1438 z.uadd(x, y) 1439 } else { 1440 // x + (-y) == x - y == -(y - x) 1441 // (-x) + y == y - x == -(x - y) 1442 if x.ucmp(y) > 0 { 1443 z.usub(x, y) 1444 } else { 1445 z.neg = !z.neg 1446 z.usub(y, x) 1447 } 1448 } 1449 return z 1450 } 1451 1452 if x.form == inf && y.form == inf && x.neg != y.neg { 1453 // +Inf + -Inf 1454 // -Inf + +Inf 1455 // value of z is undefined but make sure it's valid 1456 z.acc = Exact 1457 z.form = zero 1458 z.neg = false 1459 panic(ErrNaN{"addition of infinities with opposite signs"}) 1460 } 1461 1462 if x.form == zero && y.form == zero { 1463 // 0 + 0 1464 z.acc = Exact 1465 z.form = zero 1466 z.neg = x.neg && y.neg // -0 + -0 == -0 1467 return z 1468 } 1469 1470 if x.form == inf || y.form == zero { 1471 // Inf + y 1472 // x + 0 1473 return z.Set(x) 1474 } 1475 1476 // 0 + y 1477 // x + Inf 1478 return z.Set(y) 1479 } 1480 1481 // Sub sets z to the rounded difference x-y and returns z. 1482 // Precision, rounding, and accuracy reporting are as for Add. 1483 // Sub panics with ErrNaN if x and y are infinities with equal 1484 // signs. The value of z is undefined in that case. 1485 func (z *Float) Sub(x, y *Float) *Float { 1486 if debugFloat { 1487 x.validate() 1488 y.validate() 1489 } 1490 1491 if z.prec == 0 { 1492 z.prec = umax32(x.prec, y.prec) 1493 } 1494 1495 if x.form == finite && y.form == finite { 1496 // x - y (common case) 1497 z.neg = x.neg 1498 if x.neg != y.neg { 1499 // x - (-y) == x + y 1500 // (-x) - y == -(x + y) 1501 z.uadd(x, y) 1502 } else { 1503 // x - y == x - y == -(y - x) 1504 // (-x) - (-y) == y - x == -(x - y) 1505 if x.ucmp(y) > 0 { 1506 z.usub(x, y) 1507 } else { 1508 z.neg = !z.neg 1509 z.usub(y, x) 1510 } 1511 } 1512 return z 1513 } 1514 1515 if x.form == inf && y.form == inf && x.neg == y.neg { 1516 // +Inf - +Inf 1517 // -Inf - -Inf 1518 // value of z is undefined but make sure it's valid 1519 z.acc = Exact 1520 z.form = zero 1521 z.neg = false 1522 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1523 } 1524 1525 if x.form == zero && y.form == zero { 1526 // 0 - 0 1527 z.acc = Exact 1528 z.form = zero 1529 z.neg = x.neg && !y.neg // -0 - +0 == -0 1530 return z 1531 } 1532 1533 if x.form == inf || y.form == zero { 1534 // Inf - y 1535 // x - 0 1536 return z.Set(x) 1537 } 1538 1539 // 0 - y 1540 // x - Inf 1541 return z.Neg(y) 1542 } 1543 1544 // Mul sets z to the rounded product x*y and returns z. 1545 // Precision, rounding, and accuracy reporting are as for Add. 1546 // Mul panics with ErrNaN if one operand is zero and the other 1547 // operand an infinity. The value of z is undefined in that case. 1548 func (z *Float) Mul(x, y *Float) *Float { 1549 if debugFloat { 1550 x.validate() 1551 y.validate() 1552 } 1553 1554 if z.prec == 0 { 1555 z.prec = umax32(x.prec, y.prec) 1556 } 1557 1558 z.neg = x.neg != y.neg 1559 1560 if x.form == finite && y.form == finite { 1561 // x * y (common case) 1562 z.umul(x, y) 1563 return z 1564 } 1565 1566 z.acc = Exact 1567 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1568 // 0 * Inf 1569 // Inf * 0 1570 // value of z is undefined but make sure it's valid 1571 z.form = zero 1572 z.neg = false 1573 panic(ErrNaN{"multiplication of zero with infinity"}) 1574 } 1575 1576 if x.form == inf || y.form == inf { 1577 // Inf * y 1578 // x * Inf 1579 z.form = inf 1580 return z 1581 } 1582 1583 // 0 * y 1584 // x * 0 1585 z.form = zero 1586 return z 1587 } 1588 1589 // Quo sets z to the rounded quotient x/y and returns z. 1590 // Precision, rounding, and accuracy reporting are as for Add. 1591 // Quo panics with ErrNaN if both operands are zero or infinities. 1592 // The value of z is undefined in that case. 1593 func (z *Float) Quo(x, y *Float) *Float { 1594 if debugFloat { 1595 x.validate() 1596 y.validate() 1597 } 1598 1599 if z.prec == 0 { 1600 z.prec = umax32(x.prec, y.prec) 1601 } 1602 1603 z.neg = x.neg != y.neg 1604 1605 if x.form == finite && y.form == finite { 1606 // x / y (common case) 1607 z.uquo(x, y) 1608 return z 1609 } 1610 1611 z.acc = Exact 1612 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1613 // 0 / 0 1614 // Inf / Inf 1615 // value of z is undefined but make sure it's valid 1616 z.form = zero 1617 z.neg = false 1618 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1619 } 1620 1621 if x.form == zero || y.form == inf { 1622 // 0 / y 1623 // x / Inf 1624 z.form = zero 1625 return z 1626 } 1627 1628 // x / 0 1629 // Inf / y 1630 z.form = inf 1631 return z 1632 } 1633 1634 // Cmp compares x and y and returns: 1635 // 1636 // -1 if x < y 1637 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1638 // +1 if x > y 1639 // 1640 func (x *Float) Cmp(y *Float) int { 1641 if debugFloat { 1642 x.validate() 1643 y.validate() 1644 } 1645 1646 mx := x.ord() 1647 my := y.ord() 1648 switch { 1649 case mx < my: 1650 return -1 1651 case mx > my: 1652 return +1 1653 } 1654 // mx == my 1655 1656 // only if |mx| == 1 we have to compare the mantissae 1657 switch mx { 1658 case -1: 1659 return y.ucmp(x) 1660 case +1: 1661 return x.ucmp(y) 1662 } 1663 1664 return 0 1665 } 1666 1667 // ord classifies x and returns: 1668 // 1669 // -2 if -Inf == x 1670 // -1 if -Inf < x < 0 1671 // 0 if x == 0 (signed or unsigned) 1672 // +1 if 0 < x < +Inf 1673 // +2 if x == +Inf 1674 // 1675 func (x *Float) ord() int { 1676 var m int 1677 switch x.form { 1678 case finite: 1679 m = 1 1680 case zero: 1681 return 0 1682 case inf: 1683 m = 2 1684 } 1685 if x.neg { 1686 m = -m 1687 } 1688 return m 1689 } 1690 1691 func umax32(x, y uint32) uint32 { 1692 if x > y { 1693 return x 1694 } 1695 return y 1696 } 1697