1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/MathExtras.h" 24 #include <cstring> 25 #include <limits.h> 26 27 using namespace llvm; 28 29 /// A macro used to combine two fcCategory enums into one key which can be used 30 /// in a switch statement to classify how the interaction of two APFloat's 31 /// categories affects an operation. 32 /// 33 /// TODO: If clang source code is ever allowed to use constexpr in its own 34 /// codebase, change this into a static inline function. 35 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 36 37 /* Assumed in hexadecimal significand parsing, and conversion to 38 hexadecimal strings. */ 39 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 40 41 namespace llvm { 42 43 /* Represents floating point arithmetic semantics. */ 44 struct fltSemantics { 45 /* The largest E such that 2^E is representable; this matches the 46 definition of IEEE 754. */ 47 APFloat::ExponentType maxExponent; 48 49 /* The smallest E such that 2^E is a normalized number; this 50 matches the definition of IEEE 754. */ 51 APFloat::ExponentType minExponent; 52 53 /* Number of bits in the significand. This includes the integer 54 bit. */ 55 unsigned int precision; 56 57 /* Number of bits actually used in the semantics. */ 58 unsigned int sizeInBits; 59 }; 60 61 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; 62 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; 63 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; 64 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; 65 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; 66 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; 67 68 /* The PowerPC format consists of two doubles. It does not map cleanly 69 onto the usual format above. It is approximated using twice the 70 mantissa bits. Note that for exponents near the double minimum, 71 we no longer can represent the full 106 mantissa bits, so those 72 will be treated as denormal numbers. 73 74 FIXME: While this approximation is equivalent to what GCC uses for 75 compile-time arithmetic on PPC double-double numbers, it is not able 76 to represent all possible values held by a PPC double-double number, 77 for example: (long double) 1.0 + (long double) 0x1p-106 78 Should this be replaced by a full emulation of PPC double-double? */ 79 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; 80 81 /* A tight upper bound on number of parts required to hold the value 82 pow(5, power) is 83 84 power * 815 / (351 * integerPartWidth) + 1 85 86 However, whilst the result may require only this many parts, 87 because we are multiplying two values to get it, the 88 multiplication may require an extra part with the excess part 89 being zero (consider the trivial case of 1 * 1, tcFullMultiply 90 requires two parts to hold the single-part result). So we add an 91 extra one to guarantee enough space whilst multiplying. */ 92 const unsigned int maxExponent = 16383; 93 const unsigned int maxPrecision = 113; 94 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 95 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 96 / (351 * integerPartWidth)); 97 } 98 99 /* A bunch of private, handy routines. */ 100 101 static inline unsigned int 102 partCountForBits(unsigned int bits) 103 { 104 return ((bits) + integerPartWidth - 1) / integerPartWidth; 105 } 106 107 /* Returns 0U-9U. Return values >= 10U are not digits. */ 108 static inline unsigned int 109 decDigitValue(unsigned int c) 110 { 111 return c - '0'; 112 } 113 114 /* Return the value of a decimal exponent of the form 115 [+-]ddddddd. 116 117 If the exponent overflows, returns a large exponent with the 118 appropriate sign. */ 119 static int 120 readExponent(StringRef::iterator begin, StringRef::iterator end) 121 { 122 bool isNegative; 123 unsigned int absExponent; 124 const unsigned int overlargeExponent = 24000; /* FIXME. */ 125 StringRef::iterator p = begin; 126 127 assert(p != end && "Exponent has no digits"); 128 129 isNegative = (*p == '-'); 130 if (*p == '-' || *p == '+') { 131 p++; 132 assert(p != end && "Exponent has no digits"); 133 } 134 135 absExponent = decDigitValue(*p++); 136 assert(absExponent < 10U && "Invalid character in exponent"); 137 138 for (; p != end; ++p) { 139 unsigned int value; 140 141 value = decDigitValue(*p); 142 assert(value < 10U && "Invalid character in exponent"); 143 144 value += absExponent * 10; 145 if (absExponent >= overlargeExponent) { 146 absExponent = overlargeExponent; 147 p = end; /* outwit assert below */ 148 break; 149 } 150 absExponent = value; 151 } 152 153 assert(p == end && "Invalid exponent in exponent"); 154 155 if (isNegative) 156 return -(int) absExponent; 157 else 158 return (int) absExponent; 159 } 160 161 /* This is ugly and needs cleaning up, but I don't immediately see 162 how whilst remaining safe. */ 163 static int 164 totalExponent(StringRef::iterator p, StringRef::iterator end, 165 int exponentAdjustment) 166 { 167 int unsignedExponent; 168 bool negative, overflow; 169 int exponent = 0; 170 171 assert(p != end && "Exponent has no digits"); 172 173 negative = *p == '-'; 174 if (*p == '-' || *p == '+') { 175 p++; 176 assert(p != end && "Exponent has no digits"); 177 } 178 179 unsignedExponent = 0; 180 overflow = false; 181 for (; p != end; ++p) { 182 unsigned int value; 183 184 value = decDigitValue(*p); 185 assert(value < 10U && "Invalid character in exponent"); 186 187 unsignedExponent = unsignedExponent * 10 + value; 188 if (unsignedExponent > 32767) { 189 overflow = true; 190 break; 191 } 192 } 193 194 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 195 overflow = true; 196 197 if (!overflow) { 198 exponent = unsignedExponent; 199 if (negative) 200 exponent = -exponent; 201 exponent += exponentAdjustment; 202 if (exponent > 32767 || exponent < -32768) 203 overflow = true; 204 } 205 206 if (overflow) 207 exponent = negative ? -32768: 32767; 208 209 return exponent; 210 } 211 212 static StringRef::iterator 213 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 214 StringRef::iterator *dot) 215 { 216 StringRef::iterator p = begin; 217 *dot = end; 218 while (p != end && *p == '0') 219 p++; 220 221 if (p != end && *p == '.') { 222 *dot = p++; 223 224 assert(end - begin != 1 && "Significand has no digits"); 225 226 while (p != end && *p == '0') 227 p++; 228 } 229 230 return p; 231 } 232 233 /* Given a normal decimal floating point number of the form 234 235 dddd.dddd[eE][+-]ddd 236 237 where the decimal point and exponent are optional, fill out the 238 structure D. Exponent is appropriate if the significand is 239 treated as an integer, and normalizedExponent if the significand 240 is taken to have the decimal point after a single leading 241 non-zero digit. 242 243 If the value is zero, V->firstSigDigit points to a non-digit, and 244 the return exponent is zero. 245 */ 246 struct decimalInfo { 247 const char *firstSigDigit; 248 const char *lastSigDigit; 249 int exponent; 250 int normalizedExponent; 251 }; 252 253 static void 254 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 255 decimalInfo *D) 256 { 257 StringRef::iterator dot = end; 258 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 259 260 D->firstSigDigit = p; 261 D->exponent = 0; 262 D->normalizedExponent = 0; 263 264 for (; p != end; ++p) { 265 if (*p == '.') { 266 assert(dot == end && "String contains multiple dots"); 267 dot = p++; 268 if (p == end) 269 break; 270 } 271 if (decDigitValue(*p) >= 10U) 272 break; 273 } 274 275 if (p != end) { 276 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 277 assert(p != begin && "Significand has no digits"); 278 assert((dot == end || p - begin != 1) && "Significand has no digits"); 279 280 /* p points to the first non-digit in the string */ 281 D->exponent = readExponent(p + 1, end); 282 283 /* Implied decimal point? */ 284 if (dot == end) 285 dot = p; 286 } 287 288 /* If number is all zeroes accept any exponent. */ 289 if (p != D->firstSigDigit) { 290 /* Drop insignificant trailing zeroes. */ 291 if (p != begin) { 292 do 293 do 294 p--; 295 while (p != begin && *p == '0'); 296 while (p != begin && *p == '.'); 297 } 298 299 /* Adjust the exponents for any decimal point. */ 300 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 301 D->normalizedExponent = (D->exponent + 302 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 303 - (dot > D->firstSigDigit && dot < p))); 304 } 305 306 D->lastSigDigit = p; 307 } 308 309 /* Return the trailing fraction of a hexadecimal number. 310 DIGITVALUE is the first hex digit of the fraction, P points to 311 the next digit. */ 312 static lostFraction 313 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 314 unsigned int digitValue) 315 { 316 unsigned int hexDigit; 317 318 /* If the first trailing digit isn't 0 or 8 we can work out the 319 fraction immediately. */ 320 if (digitValue > 8) 321 return lfMoreThanHalf; 322 else if (digitValue < 8 && digitValue > 0) 323 return lfLessThanHalf; 324 325 // Otherwise we need to find the first non-zero digit. 326 while (p != end && (*p == '0' || *p == '.')) 327 p++; 328 329 assert(p != end && "Invalid trailing hexadecimal fraction!"); 330 331 hexDigit = hexDigitValue(*p); 332 333 /* If we ran off the end it is exactly zero or one-half, otherwise 334 a little more. */ 335 if (hexDigit == -1U) 336 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 337 else 338 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 339 } 340 341 /* Return the fraction lost were a bignum truncated losing the least 342 significant BITS bits. */ 343 static lostFraction 344 lostFractionThroughTruncation(const integerPart *parts, 345 unsigned int partCount, 346 unsigned int bits) 347 { 348 unsigned int lsb; 349 350 lsb = APInt::tcLSB(parts, partCount); 351 352 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 353 if (bits <= lsb) 354 return lfExactlyZero; 355 if (bits == lsb + 1) 356 return lfExactlyHalf; 357 if (bits <= partCount * integerPartWidth && 358 APInt::tcExtractBit(parts, bits - 1)) 359 return lfMoreThanHalf; 360 361 return lfLessThanHalf; 362 } 363 364 /* Shift DST right BITS bits noting lost fraction. */ 365 static lostFraction 366 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 367 { 368 lostFraction lost_fraction; 369 370 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 371 372 APInt::tcShiftRight(dst, parts, bits); 373 374 return lost_fraction; 375 } 376 377 /* Combine the effect of two lost fractions. */ 378 static lostFraction 379 combineLostFractions(lostFraction moreSignificant, 380 lostFraction lessSignificant) 381 { 382 if (lessSignificant != lfExactlyZero) { 383 if (moreSignificant == lfExactlyZero) 384 moreSignificant = lfLessThanHalf; 385 else if (moreSignificant == lfExactlyHalf) 386 moreSignificant = lfMoreThanHalf; 387 } 388 389 return moreSignificant; 390 } 391 392 /* The error from the true value, in half-ulps, on multiplying two 393 floating point numbers, which differ from the value they 394 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 395 than the returned value. 396 397 See "How to Read Floating Point Numbers Accurately" by William D 398 Clinger. */ 399 static unsigned int 400 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 401 { 402 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 403 404 if (HUerr1 + HUerr2 == 0) 405 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 406 else 407 return inexactMultiply + 2 * (HUerr1 + HUerr2); 408 } 409 410 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 411 when the least significant BITS are truncated. BITS cannot be 412 zero. */ 413 static integerPart 414 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 415 { 416 unsigned int count, partBits; 417 integerPart part, boundary; 418 419 assert(bits != 0); 420 421 bits--; 422 count = bits / integerPartWidth; 423 partBits = bits % integerPartWidth + 1; 424 425 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 426 427 if (isNearest) 428 boundary = (integerPart) 1 << (partBits - 1); 429 else 430 boundary = 0; 431 432 if (count == 0) { 433 if (part - boundary <= boundary - part) 434 return part - boundary; 435 else 436 return boundary - part; 437 } 438 439 if (part == boundary) { 440 while (--count) 441 if (parts[count]) 442 return ~(integerPart) 0; /* A lot. */ 443 444 return parts[0]; 445 } else if (part == boundary - 1) { 446 while (--count) 447 if (~parts[count]) 448 return ~(integerPart) 0; /* A lot. */ 449 450 return -parts[0]; 451 } 452 453 return ~(integerPart) 0; /* A lot. */ 454 } 455 456 /* Place pow(5, power) in DST, and return the number of parts used. 457 DST must be at least one part larger than size of the answer. */ 458 static unsigned int 459 powerOf5(integerPart *dst, unsigned int power) 460 { 461 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 462 15625, 78125 }; 463 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 464 pow5s[0] = 78125 * 5; 465 466 unsigned int partsCount[16] = { 1 }; 467 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 468 unsigned int result; 469 assert(power <= maxExponent); 470 471 p1 = dst; 472 p2 = scratch; 473 474 *p1 = firstEightPowers[power & 7]; 475 power >>= 3; 476 477 result = 1; 478 pow5 = pow5s; 479 480 for (unsigned int n = 0; power; power >>= 1, n++) { 481 unsigned int pc; 482 483 pc = partsCount[n]; 484 485 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 486 if (pc == 0) { 487 pc = partsCount[n - 1]; 488 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 489 pc *= 2; 490 if (pow5[pc - 1] == 0) 491 pc--; 492 partsCount[n] = pc; 493 } 494 495 if (power & 1) { 496 integerPart *tmp; 497 498 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 499 result += pc; 500 if (p2[result - 1] == 0) 501 result--; 502 503 /* Now result is in p1 with partsCount parts and p2 is scratch 504 space. */ 505 tmp = p1; 506 p1 = p2; 507 p2 = tmp; 508 } 509 510 pow5 += pc; 511 } 512 513 if (p1 != dst) 514 APInt::tcAssign(dst, p1, result); 515 516 return result; 517 } 518 519 /* Zero at the end to avoid modular arithmetic when adding one; used 520 when rounding up during hexadecimal output. */ 521 static const char hexDigitsLower[] = "0123456789abcdef0"; 522 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 523 static const char infinityL[] = "infinity"; 524 static const char infinityU[] = "INFINITY"; 525 static const char NaNL[] = "nan"; 526 static const char NaNU[] = "NAN"; 527 528 /* Write out an integerPart in hexadecimal, starting with the most 529 significant nibble. Write out exactly COUNT hexdigits, return 530 COUNT. */ 531 static unsigned int 532 partAsHex (char *dst, integerPart part, unsigned int count, 533 const char *hexDigitChars) 534 { 535 unsigned int result = count; 536 537 assert(count != 0 && count <= integerPartWidth / 4); 538 539 part >>= (integerPartWidth - 4 * count); 540 while (count--) { 541 dst[count] = hexDigitChars[part & 0xf]; 542 part >>= 4; 543 } 544 545 return result; 546 } 547 548 /* Write out an unsigned decimal integer. */ 549 static char * 550 writeUnsignedDecimal (char *dst, unsigned int n) 551 { 552 char buff[40], *p; 553 554 p = buff; 555 do 556 *p++ = '0' + n % 10; 557 while (n /= 10); 558 559 do 560 *dst++ = *--p; 561 while (p != buff); 562 563 return dst; 564 } 565 566 /* Write out a signed decimal integer. */ 567 static char * 568 writeSignedDecimal (char *dst, int value) 569 { 570 if (value < 0) { 571 *dst++ = '-'; 572 dst = writeUnsignedDecimal(dst, -(unsigned) value); 573 } else 574 dst = writeUnsignedDecimal(dst, value); 575 576 return dst; 577 } 578 579 /* Constructors. */ 580 void 581 APFloat::initialize(const fltSemantics *ourSemantics) 582 { 583 unsigned int count; 584 585 semantics = ourSemantics; 586 count = partCount(); 587 if (count > 1) 588 significand.parts = new integerPart[count]; 589 } 590 591 void 592 APFloat::freeSignificand() 593 { 594 if (needsCleanup()) 595 delete [] significand.parts; 596 } 597 598 void 599 APFloat::assign(const APFloat &rhs) 600 { 601 assert(semantics == rhs.semantics); 602 603 sign = rhs.sign; 604 category = rhs.category; 605 exponent = rhs.exponent; 606 if (isFiniteNonZero() || category == fcNaN) 607 copySignificand(rhs); 608 } 609 610 void 611 APFloat::copySignificand(const APFloat &rhs) 612 { 613 assert(isFiniteNonZero() || category == fcNaN); 614 assert(rhs.partCount() >= partCount()); 615 616 APInt::tcAssign(significandParts(), rhs.significandParts(), 617 partCount()); 618 } 619 620 /* Make this number a NaN, with an arbitrary but deterministic value 621 for the significand. If double or longer, this is a signalling NaN, 622 which may not be ideal. If float, this is QNaN(0). */ 623 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 624 { 625 category = fcNaN; 626 sign = Negative; 627 628 integerPart *significand = significandParts(); 629 unsigned numParts = partCount(); 630 631 // Set the significand bits to the fill. 632 if (!fill || fill->getNumWords() < numParts) 633 APInt::tcSet(significand, 0, numParts); 634 if (fill) { 635 APInt::tcAssign(significand, fill->getRawData(), 636 std::min(fill->getNumWords(), numParts)); 637 638 // Zero out the excess bits of the significand. 639 unsigned bitsToPreserve = semantics->precision - 1; 640 unsigned part = bitsToPreserve / 64; 641 bitsToPreserve %= 64; 642 significand[part] &= ((1ULL << bitsToPreserve) - 1); 643 for (part++; part != numParts; ++part) 644 significand[part] = 0; 645 } 646 647 unsigned QNaNBit = semantics->precision - 2; 648 649 if (SNaN) { 650 // We always have to clear the QNaN bit to make it an SNaN. 651 APInt::tcClearBit(significand, QNaNBit); 652 653 // If there are no bits set in the payload, we have to set 654 // *something* to make it a NaN instead of an infinity; 655 // conventionally, this is the next bit down from the QNaN bit. 656 if (APInt::tcIsZero(significand, numParts)) 657 APInt::tcSetBit(significand, QNaNBit - 1); 658 } else { 659 // We always have to set the QNaN bit to make it a QNaN. 660 APInt::tcSetBit(significand, QNaNBit); 661 } 662 663 // For x87 extended precision, we want to make a NaN, not a 664 // pseudo-NaN. Maybe we should expose the ability to make 665 // pseudo-NaNs? 666 if (semantics == &APFloat::x87DoubleExtended) 667 APInt::tcSetBit(significand, QNaNBit + 1); 668 } 669 670 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 671 const APInt *fill) { 672 APFloat value(Sem, uninitialized); 673 value.makeNaN(SNaN, Negative, fill); 674 return value; 675 } 676 677 APFloat & 678 APFloat::operator=(const APFloat &rhs) 679 { 680 if (this != &rhs) { 681 if (semantics != rhs.semantics) { 682 freeSignificand(); 683 initialize(rhs.semantics); 684 } 685 assign(rhs); 686 } 687 688 return *this; 689 } 690 691 APFloat & 692 APFloat::operator=(APFloat &&rhs) { 693 freeSignificand(); 694 695 semantics = rhs.semantics; 696 significand = rhs.significand; 697 exponent = rhs.exponent; 698 category = rhs.category; 699 sign = rhs.sign; 700 701 rhs.semantics = &Bogus; 702 return *this; 703 } 704 705 bool 706 APFloat::isDenormal() const { 707 return isFiniteNonZero() && (exponent == semantics->minExponent) && 708 (APInt::tcExtractBit(significandParts(), 709 semantics->precision - 1) == 0); 710 } 711 712 bool 713 APFloat::isSmallest() const { 714 // The smallest number by magnitude in our format will be the smallest 715 // denormal, i.e. the floating point number with exponent being minimum 716 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 717 return isFiniteNonZero() && exponent == semantics->minExponent && 718 significandMSB() == 0; 719 } 720 721 bool APFloat::isSignificandAllOnes() const { 722 // Test if the significand excluding the integral bit is all ones. This allows 723 // us to test for binade boundaries. 724 const integerPart *Parts = significandParts(); 725 const unsigned PartCount = partCount(); 726 for (unsigned i = 0; i < PartCount - 1; i++) 727 if (~Parts[i]) 728 return false; 729 730 // Set the unused high bits to all ones when we compare. 731 const unsigned NumHighBits = 732 PartCount*integerPartWidth - semantics->precision + 1; 733 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 734 "fill than integerPartWidth"); 735 const integerPart HighBitFill = 736 ~integerPart(0) << (integerPartWidth - NumHighBits); 737 if (~(Parts[PartCount - 1] | HighBitFill)) 738 return false; 739 740 return true; 741 } 742 743 bool APFloat::isSignificandAllZeros() const { 744 // Test if the significand excluding the integral bit is all zeros. This 745 // allows us to test for binade boundaries. 746 const integerPart *Parts = significandParts(); 747 const unsigned PartCount = partCount(); 748 749 for (unsigned i = 0; i < PartCount - 1; i++) 750 if (Parts[i]) 751 return false; 752 753 const unsigned NumHighBits = 754 PartCount*integerPartWidth - semantics->precision + 1; 755 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 756 "clear than integerPartWidth"); 757 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 758 759 if (Parts[PartCount - 1] & HighBitMask) 760 return false; 761 762 return true; 763 } 764 765 bool 766 APFloat::isLargest() const { 767 // The largest number by magnitude in our format will be the floating point 768 // number with maximum exponent and with significand that is all ones. 769 return isFiniteNonZero() && exponent == semantics->maxExponent 770 && isSignificandAllOnes(); 771 } 772 773 bool 774 APFloat::isInteger() const { 775 // This could be made more efficient; I'm going for obviously correct. 776 if (!isFinite()) return false; 777 APFloat truncated = *this; 778 truncated.roundToIntegral(rmTowardZero); 779 return compare(truncated) == cmpEqual; 780 } 781 782 bool 783 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 784 if (this == &rhs) 785 return true; 786 if (semantics != rhs.semantics || 787 category != rhs.category || 788 sign != rhs.sign) 789 return false; 790 if (category==fcZero || category==fcInfinity) 791 return true; 792 793 if (isFiniteNonZero() && exponent != rhs.exponent) 794 return false; 795 796 return std::equal(significandParts(), significandParts() + partCount(), 797 rhs.significandParts()); 798 } 799 800 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 801 initialize(&ourSemantics); 802 sign = 0; 803 category = fcNormal; 804 zeroSignificand(); 805 exponent = ourSemantics.precision - 1; 806 significandParts()[0] = value; 807 normalize(rmNearestTiesToEven, lfExactlyZero); 808 } 809 810 APFloat::APFloat(const fltSemantics &ourSemantics) { 811 initialize(&ourSemantics); 812 category = fcZero; 813 sign = false; 814 } 815 816 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 817 // Allocates storage if necessary but does not initialize it. 818 initialize(&ourSemantics); 819 } 820 821 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 822 initialize(&ourSemantics); 823 convertFromString(text, rmNearestTiesToEven); 824 } 825 826 APFloat::APFloat(const APFloat &rhs) { 827 initialize(rhs.semantics); 828 assign(rhs); 829 } 830 831 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 832 *this = std::move(rhs); 833 } 834 835 APFloat::~APFloat() 836 { 837 freeSignificand(); 838 } 839 840 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 841 void APFloat::Profile(FoldingSetNodeID& ID) const { 842 ID.Add(bitcastToAPInt()); 843 } 844 845 unsigned int 846 APFloat::partCount() const 847 { 848 return partCountForBits(semantics->precision + 1); 849 } 850 851 unsigned int 852 APFloat::semanticsPrecision(const fltSemantics &semantics) 853 { 854 return semantics.precision; 855 } 856 APFloat::ExponentType 857 APFloat::semanticsMaxExponent(const fltSemantics &semantics) 858 { 859 return semantics.maxExponent; 860 } 861 APFloat::ExponentType 862 APFloat::semanticsMinExponent(const fltSemantics &semantics) 863 { 864 return semantics.minExponent; 865 } 866 unsigned int 867 APFloat::semanticsSizeInBits(const fltSemantics &semantics) 868 { 869 return semantics.sizeInBits; 870 } 871 872 const integerPart * 873 APFloat::significandParts() const 874 { 875 return const_cast<APFloat *>(this)->significandParts(); 876 } 877 878 integerPart * 879 APFloat::significandParts() 880 { 881 if (partCount() > 1) 882 return significand.parts; 883 else 884 return &significand.part; 885 } 886 887 void 888 APFloat::zeroSignificand() 889 { 890 APInt::tcSet(significandParts(), 0, partCount()); 891 } 892 893 /* Increment an fcNormal floating point number's significand. */ 894 void 895 APFloat::incrementSignificand() 896 { 897 integerPart carry; 898 899 carry = APInt::tcIncrement(significandParts(), partCount()); 900 901 /* Our callers should never cause us to overflow. */ 902 assert(carry == 0); 903 (void)carry; 904 } 905 906 /* Add the significand of the RHS. Returns the carry flag. */ 907 integerPart 908 APFloat::addSignificand(const APFloat &rhs) 909 { 910 integerPart *parts; 911 912 parts = significandParts(); 913 914 assert(semantics == rhs.semantics); 915 assert(exponent == rhs.exponent); 916 917 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 918 } 919 920 /* Subtract the significand of the RHS with a borrow flag. Returns 921 the borrow flag. */ 922 integerPart 923 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 924 { 925 integerPart *parts; 926 927 parts = significandParts(); 928 929 assert(semantics == rhs.semantics); 930 assert(exponent == rhs.exponent); 931 932 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 933 partCount()); 934 } 935 936 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 937 on to the full-precision result of the multiplication. Returns the 938 lost fraction. */ 939 lostFraction 940 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 941 { 942 unsigned int omsb; // One, not zero, based MSB. 943 unsigned int partsCount, newPartsCount, precision; 944 integerPart *lhsSignificand; 945 integerPart scratch[4]; 946 integerPart *fullSignificand; 947 lostFraction lost_fraction; 948 bool ignored; 949 950 assert(semantics == rhs.semantics); 951 952 precision = semantics->precision; 953 954 // Allocate space for twice as many bits as the original significand, plus one 955 // extra bit for the addition to overflow into. 956 newPartsCount = partCountForBits(precision * 2 + 1); 957 958 if (newPartsCount > 4) 959 fullSignificand = new integerPart[newPartsCount]; 960 else 961 fullSignificand = scratch; 962 963 lhsSignificand = significandParts(); 964 partsCount = partCount(); 965 966 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 967 rhs.significandParts(), partsCount, partsCount); 968 969 lost_fraction = lfExactlyZero; 970 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 971 exponent += rhs.exponent; 972 973 // Assume the operands involved in the multiplication are single-precision 974 // FP, and the two multiplicants are: 975 // *this = a23 . a22 ... a0 * 2^e1 976 // rhs = b23 . b22 ... b0 * 2^e2 977 // the result of multiplication is: 978 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 979 // Note that there are three significant bits at the left-hand side of the 980 // radix point: two for the multiplication, and an overflow bit for the 981 // addition (that will always be zero at this point). Move the radix point 982 // toward left by two bits, and adjust exponent accordingly. 983 exponent += 2; 984 985 if (addend && addend->isNonZero()) { 986 // The intermediate result of the multiplication has "2 * precision" 987 // signicant bit; adjust the addend to be consistent with mul result. 988 // 989 Significand savedSignificand = significand; 990 const fltSemantics *savedSemantics = semantics; 991 fltSemantics extendedSemantics; 992 opStatus status; 993 unsigned int extendedPrecision; 994 995 // Normalize our MSB to one below the top bit to allow for overflow. 996 extendedPrecision = 2 * precision + 1; 997 if (omsb != extendedPrecision - 1) { 998 assert(extendedPrecision > omsb); 999 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1000 (extendedPrecision - 1) - omsb); 1001 exponent -= (extendedPrecision - 1) - omsb; 1002 } 1003 1004 /* Create new semantics. */ 1005 extendedSemantics = *semantics; 1006 extendedSemantics.precision = extendedPrecision; 1007 1008 if (newPartsCount == 1) 1009 significand.part = fullSignificand[0]; 1010 else 1011 significand.parts = fullSignificand; 1012 semantics = &extendedSemantics; 1013 1014 APFloat extendedAddend(*addend); 1015 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1016 assert(status == opOK); 1017 (void)status; 1018 1019 // Shift the significand of the addend right by one bit. This guarantees 1020 // that the high bit of the significand is zero (same as fullSignificand), 1021 // so the addition will overflow (if it does overflow at all) into the top bit. 1022 lost_fraction = extendedAddend.shiftSignificandRight(1); 1023 assert(lost_fraction == lfExactlyZero && 1024 "Lost precision while shifting addend for fused-multiply-add."); 1025 1026 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1027 1028 /* Restore our state. */ 1029 if (newPartsCount == 1) 1030 fullSignificand[0] = significand.part; 1031 significand = savedSignificand; 1032 semantics = savedSemantics; 1033 1034 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1035 } 1036 1037 // Convert the result having "2 * precision" significant-bits back to the one 1038 // having "precision" significant-bits. First, move the radix point from 1039 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1040 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1041 exponent -= precision + 1; 1042 1043 // In case MSB resides at the left-hand side of radix point, shift the 1044 // mantissa right by some amount to make sure the MSB reside right before 1045 // the radix point (i.e. "MSB . rest-significant-bits"). 1046 // 1047 // Note that the result is not normalized when "omsb < precision". So, the 1048 // caller needs to call APFloat::normalize() if normalized value is expected. 1049 if (omsb > precision) { 1050 unsigned int bits, significantParts; 1051 lostFraction lf; 1052 1053 bits = omsb - precision; 1054 significantParts = partCountForBits(omsb); 1055 lf = shiftRight(fullSignificand, significantParts, bits); 1056 lost_fraction = combineLostFractions(lf, lost_fraction); 1057 exponent += bits; 1058 } 1059 1060 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1061 1062 if (newPartsCount > 4) 1063 delete [] fullSignificand; 1064 1065 return lost_fraction; 1066 } 1067 1068 /* Multiply the significands of LHS and RHS to DST. */ 1069 lostFraction 1070 APFloat::divideSignificand(const APFloat &rhs) 1071 { 1072 unsigned int bit, i, partsCount; 1073 const integerPart *rhsSignificand; 1074 integerPart *lhsSignificand, *dividend, *divisor; 1075 integerPart scratch[4]; 1076 lostFraction lost_fraction; 1077 1078 assert(semantics == rhs.semantics); 1079 1080 lhsSignificand = significandParts(); 1081 rhsSignificand = rhs.significandParts(); 1082 partsCount = partCount(); 1083 1084 if (partsCount > 2) 1085 dividend = new integerPart[partsCount * 2]; 1086 else 1087 dividend = scratch; 1088 1089 divisor = dividend + partsCount; 1090 1091 /* Copy the dividend and divisor as they will be modified in-place. */ 1092 for (i = 0; i < partsCount; i++) { 1093 dividend[i] = lhsSignificand[i]; 1094 divisor[i] = rhsSignificand[i]; 1095 lhsSignificand[i] = 0; 1096 } 1097 1098 exponent -= rhs.exponent; 1099 1100 unsigned int precision = semantics->precision; 1101 1102 /* Normalize the divisor. */ 1103 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1104 if (bit) { 1105 exponent += bit; 1106 APInt::tcShiftLeft(divisor, partsCount, bit); 1107 } 1108 1109 /* Normalize the dividend. */ 1110 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1111 if (bit) { 1112 exponent -= bit; 1113 APInt::tcShiftLeft(dividend, partsCount, bit); 1114 } 1115 1116 /* Ensure the dividend >= divisor initially for the loop below. 1117 Incidentally, this means that the division loop below is 1118 guaranteed to set the integer bit to one. */ 1119 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1120 exponent--; 1121 APInt::tcShiftLeft(dividend, partsCount, 1); 1122 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1123 } 1124 1125 /* Long division. */ 1126 for (bit = precision; bit; bit -= 1) { 1127 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1128 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1129 APInt::tcSetBit(lhsSignificand, bit - 1); 1130 } 1131 1132 APInt::tcShiftLeft(dividend, partsCount, 1); 1133 } 1134 1135 /* Figure out the lost fraction. */ 1136 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1137 1138 if (cmp > 0) 1139 lost_fraction = lfMoreThanHalf; 1140 else if (cmp == 0) 1141 lost_fraction = lfExactlyHalf; 1142 else if (APInt::tcIsZero(dividend, partsCount)) 1143 lost_fraction = lfExactlyZero; 1144 else 1145 lost_fraction = lfLessThanHalf; 1146 1147 if (partsCount > 2) 1148 delete [] dividend; 1149 1150 return lost_fraction; 1151 } 1152 1153 unsigned int 1154 APFloat::significandMSB() const 1155 { 1156 return APInt::tcMSB(significandParts(), partCount()); 1157 } 1158 1159 unsigned int 1160 APFloat::significandLSB() const 1161 { 1162 return APInt::tcLSB(significandParts(), partCount()); 1163 } 1164 1165 /* Note that a zero result is NOT normalized to fcZero. */ 1166 lostFraction 1167 APFloat::shiftSignificandRight(unsigned int bits) 1168 { 1169 /* Our exponent should not overflow. */ 1170 assert((ExponentType) (exponent + bits) >= exponent); 1171 1172 exponent += bits; 1173 1174 return shiftRight(significandParts(), partCount(), bits); 1175 } 1176 1177 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1178 void 1179 APFloat::shiftSignificandLeft(unsigned int bits) 1180 { 1181 assert(bits < semantics->precision); 1182 1183 if (bits) { 1184 unsigned int partsCount = partCount(); 1185 1186 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1187 exponent -= bits; 1188 1189 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1190 } 1191 } 1192 1193 APFloat::cmpResult 1194 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1195 { 1196 int compare; 1197 1198 assert(semantics == rhs.semantics); 1199 assert(isFiniteNonZero()); 1200 assert(rhs.isFiniteNonZero()); 1201 1202 compare = exponent - rhs.exponent; 1203 1204 /* If exponents are equal, do an unsigned bignum comparison of the 1205 significands. */ 1206 if (compare == 0) 1207 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1208 partCount()); 1209 1210 if (compare > 0) 1211 return cmpGreaterThan; 1212 else if (compare < 0) 1213 return cmpLessThan; 1214 else 1215 return cmpEqual; 1216 } 1217 1218 /* Handle overflow. Sign is preserved. We either become infinity or 1219 the largest finite number. */ 1220 APFloat::opStatus 1221 APFloat::handleOverflow(roundingMode rounding_mode) 1222 { 1223 /* Infinity? */ 1224 if (rounding_mode == rmNearestTiesToEven || 1225 rounding_mode == rmNearestTiesToAway || 1226 (rounding_mode == rmTowardPositive && !sign) || 1227 (rounding_mode == rmTowardNegative && sign)) { 1228 category = fcInfinity; 1229 return (opStatus) (opOverflow | opInexact); 1230 } 1231 1232 /* Otherwise we become the largest finite number. */ 1233 category = fcNormal; 1234 exponent = semantics->maxExponent; 1235 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1236 semantics->precision); 1237 1238 return opInexact; 1239 } 1240 1241 /* Returns TRUE if, when truncating the current number, with BIT the 1242 new LSB, with the given lost fraction and rounding mode, the result 1243 would need to be rounded away from zero (i.e., by increasing the 1244 signficand). This routine must work for fcZero of both signs, and 1245 fcNormal numbers. */ 1246 bool 1247 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1248 lostFraction lost_fraction, 1249 unsigned int bit) const 1250 { 1251 /* NaNs and infinities should not have lost fractions. */ 1252 assert(isFiniteNonZero() || category == fcZero); 1253 1254 /* Current callers never pass this so we don't handle it. */ 1255 assert(lost_fraction != lfExactlyZero); 1256 1257 switch (rounding_mode) { 1258 case rmNearestTiesToAway: 1259 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1260 1261 case rmNearestTiesToEven: 1262 if (lost_fraction == lfMoreThanHalf) 1263 return true; 1264 1265 /* Our zeroes don't have a significand to test. */ 1266 if (lost_fraction == lfExactlyHalf && category != fcZero) 1267 return APInt::tcExtractBit(significandParts(), bit); 1268 1269 return false; 1270 1271 case rmTowardZero: 1272 return false; 1273 1274 case rmTowardPositive: 1275 return !sign; 1276 1277 case rmTowardNegative: 1278 return sign; 1279 } 1280 llvm_unreachable("Invalid rounding mode found"); 1281 } 1282 1283 APFloat::opStatus 1284 APFloat::normalize(roundingMode rounding_mode, 1285 lostFraction lost_fraction) 1286 { 1287 unsigned int omsb; /* One, not zero, based MSB. */ 1288 int exponentChange; 1289 1290 if (!isFiniteNonZero()) 1291 return opOK; 1292 1293 /* Before rounding normalize the exponent of fcNormal numbers. */ 1294 omsb = significandMSB() + 1; 1295 1296 if (omsb) { 1297 /* OMSB is numbered from 1. We want to place it in the integer 1298 bit numbered PRECISION if possible, with a compensating change in 1299 the exponent. */ 1300 exponentChange = omsb - semantics->precision; 1301 1302 /* If the resulting exponent is too high, overflow according to 1303 the rounding mode. */ 1304 if (exponent + exponentChange > semantics->maxExponent) 1305 return handleOverflow(rounding_mode); 1306 1307 /* Subnormal numbers have exponent minExponent, and their MSB 1308 is forced based on that. */ 1309 if (exponent + exponentChange < semantics->minExponent) 1310 exponentChange = semantics->minExponent - exponent; 1311 1312 /* Shifting left is easy as we don't lose precision. */ 1313 if (exponentChange < 0) { 1314 assert(lost_fraction == lfExactlyZero); 1315 1316 shiftSignificandLeft(-exponentChange); 1317 1318 return opOK; 1319 } 1320 1321 if (exponentChange > 0) { 1322 lostFraction lf; 1323 1324 /* Shift right and capture any new lost fraction. */ 1325 lf = shiftSignificandRight(exponentChange); 1326 1327 lost_fraction = combineLostFractions(lf, lost_fraction); 1328 1329 /* Keep OMSB up-to-date. */ 1330 if (omsb > (unsigned) exponentChange) 1331 omsb -= exponentChange; 1332 else 1333 omsb = 0; 1334 } 1335 } 1336 1337 /* Now round the number according to rounding_mode given the lost 1338 fraction. */ 1339 1340 /* As specified in IEEE 754, since we do not trap we do not report 1341 underflow for exact results. */ 1342 if (lost_fraction == lfExactlyZero) { 1343 /* Canonicalize zeroes. */ 1344 if (omsb == 0) 1345 category = fcZero; 1346 1347 return opOK; 1348 } 1349 1350 /* Increment the significand if we're rounding away from zero. */ 1351 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1352 if (omsb == 0) 1353 exponent = semantics->minExponent; 1354 1355 incrementSignificand(); 1356 omsb = significandMSB() + 1; 1357 1358 /* Did the significand increment overflow? */ 1359 if (omsb == (unsigned) semantics->precision + 1) { 1360 /* Renormalize by incrementing the exponent and shifting our 1361 significand right one. However if we already have the 1362 maximum exponent we overflow to infinity. */ 1363 if (exponent == semantics->maxExponent) { 1364 category = fcInfinity; 1365 1366 return (opStatus) (opOverflow | opInexact); 1367 } 1368 1369 shiftSignificandRight(1); 1370 1371 return opInexact; 1372 } 1373 } 1374 1375 /* The normal case - we were and are not denormal, and any 1376 significand increment above didn't overflow. */ 1377 if (omsb == semantics->precision) 1378 return opInexact; 1379 1380 /* We have a non-zero denormal. */ 1381 assert(omsb < semantics->precision); 1382 1383 /* Canonicalize zeroes. */ 1384 if (omsb == 0) 1385 category = fcZero; 1386 1387 /* The fcZero case is a denormal that underflowed to zero. */ 1388 return (opStatus) (opUnderflow | opInexact); 1389 } 1390 1391 APFloat::opStatus 1392 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1393 { 1394 switch (PackCategoriesIntoKey(category, rhs.category)) { 1395 default: 1396 llvm_unreachable(nullptr); 1397 1398 case PackCategoriesIntoKey(fcNaN, fcZero): 1399 case PackCategoriesIntoKey(fcNaN, fcNormal): 1400 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1401 case PackCategoriesIntoKey(fcNaN, fcNaN): 1402 case PackCategoriesIntoKey(fcNormal, fcZero): 1403 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1404 case PackCategoriesIntoKey(fcInfinity, fcZero): 1405 return opOK; 1406 1407 case PackCategoriesIntoKey(fcZero, fcNaN): 1408 case PackCategoriesIntoKey(fcNormal, fcNaN): 1409 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1410 // We need to be sure to flip the sign here for subtraction because we 1411 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1412 sign = rhs.sign ^ subtract; 1413 category = fcNaN; 1414 copySignificand(rhs); 1415 return opOK; 1416 1417 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1418 case PackCategoriesIntoKey(fcZero, fcInfinity): 1419 category = fcInfinity; 1420 sign = rhs.sign ^ subtract; 1421 return opOK; 1422 1423 case PackCategoriesIntoKey(fcZero, fcNormal): 1424 assign(rhs); 1425 sign = rhs.sign ^ subtract; 1426 return opOK; 1427 1428 case PackCategoriesIntoKey(fcZero, fcZero): 1429 /* Sign depends on rounding mode; handled by caller. */ 1430 return opOK; 1431 1432 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1433 /* Differently signed infinities can only be validly 1434 subtracted. */ 1435 if (((sign ^ rhs.sign)!=0) != subtract) { 1436 makeNaN(); 1437 return opInvalidOp; 1438 } 1439 1440 return opOK; 1441 1442 case PackCategoriesIntoKey(fcNormal, fcNormal): 1443 return opDivByZero; 1444 } 1445 } 1446 1447 /* Add or subtract two normal numbers. */ 1448 lostFraction 1449 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1450 { 1451 integerPart carry; 1452 lostFraction lost_fraction; 1453 int bits; 1454 1455 /* Determine if the operation on the absolute values is effectively 1456 an addition or subtraction. */ 1457 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1458 1459 /* Are we bigger exponent-wise than the RHS? */ 1460 bits = exponent - rhs.exponent; 1461 1462 /* Subtraction is more subtle than one might naively expect. */ 1463 if (subtract) { 1464 APFloat temp_rhs(rhs); 1465 bool reverse; 1466 1467 if (bits == 0) { 1468 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1469 lost_fraction = lfExactlyZero; 1470 } else if (bits > 0) { 1471 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1472 shiftSignificandLeft(1); 1473 reverse = false; 1474 } else { 1475 lost_fraction = shiftSignificandRight(-bits - 1); 1476 temp_rhs.shiftSignificandLeft(1); 1477 reverse = true; 1478 } 1479 1480 if (reverse) { 1481 carry = temp_rhs.subtractSignificand 1482 (*this, lost_fraction != lfExactlyZero); 1483 copySignificand(temp_rhs); 1484 sign = !sign; 1485 } else { 1486 carry = subtractSignificand 1487 (temp_rhs, lost_fraction != lfExactlyZero); 1488 } 1489 1490 /* Invert the lost fraction - it was on the RHS and 1491 subtracted. */ 1492 if (lost_fraction == lfLessThanHalf) 1493 lost_fraction = lfMoreThanHalf; 1494 else if (lost_fraction == lfMoreThanHalf) 1495 lost_fraction = lfLessThanHalf; 1496 1497 /* The code above is intended to ensure that no borrow is 1498 necessary. */ 1499 assert(!carry); 1500 (void)carry; 1501 } else { 1502 if (bits > 0) { 1503 APFloat temp_rhs(rhs); 1504 1505 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1506 carry = addSignificand(temp_rhs); 1507 } else { 1508 lost_fraction = shiftSignificandRight(-bits); 1509 carry = addSignificand(rhs); 1510 } 1511 1512 /* We have a guard bit; generating a carry cannot happen. */ 1513 assert(!carry); 1514 (void)carry; 1515 } 1516 1517 return lost_fraction; 1518 } 1519 1520 APFloat::opStatus 1521 APFloat::multiplySpecials(const APFloat &rhs) 1522 { 1523 switch (PackCategoriesIntoKey(category, rhs.category)) { 1524 default: 1525 llvm_unreachable(nullptr); 1526 1527 case PackCategoriesIntoKey(fcNaN, fcZero): 1528 case PackCategoriesIntoKey(fcNaN, fcNormal): 1529 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1530 case PackCategoriesIntoKey(fcNaN, fcNaN): 1531 sign = false; 1532 return opOK; 1533 1534 case PackCategoriesIntoKey(fcZero, fcNaN): 1535 case PackCategoriesIntoKey(fcNormal, fcNaN): 1536 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1537 sign = false; 1538 category = fcNaN; 1539 copySignificand(rhs); 1540 return opOK; 1541 1542 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1543 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1544 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1545 category = fcInfinity; 1546 return opOK; 1547 1548 case PackCategoriesIntoKey(fcZero, fcNormal): 1549 case PackCategoriesIntoKey(fcNormal, fcZero): 1550 case PackCategoriesIntoKey(fcZero, fcZero): 1551 category = fcZero; 1552 return opOK; 1553 1554 case PackCategoriesIntoKey(fcZero, fcInfinity): 1555 case PackCategoriesIntoKey(fcInfinity, fcZero): 1556 makeNaN(); 1557 return opInvalidOp; 1558 1559 case PackCategoriesIntoKey(fcNormal, fcNormal): 1560 return opOK; 1561 } 1562 } 1563 1564 APFloat::opStatus 1565 APFloat::divideSpecials(const APFloat &rhs) 1566 { 1567 switch (PackCategoriesIntoKey(category, rhs.category)) { 1568 default: 1569 llvm_unreachable(nullptr); 1570 1571 case PackCategoriesIntoKey(fcZero, fcNaN): 1572 case PackCategoriesIntoKey(fcNormal, fcNaN): 1573 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1574 category = fcNaN; 1575 copySignificand(rhs); 1576 case PackCategoriesIntoKey(fcNaN, fcZero): 1577 case PackCategoriesIntoKey(fcNaN, fcNormal): 1578 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1579 case PackCategoriesIntoKey(fcNaN, fcNaN): 1580 sign = false; 1581 case PackCategoriesIntoKey(fcInfinity, fcZero): 1582 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1583 case PackCategoriesIntoKey(fcZero, fcInfinity): 1584 case PackCategoriesIntoKey(fcZero, fcNormal): 1585 return opOK; 1586 1587 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1588 category = fcZero; 1589 return opOK; 1590 1591 case PackCategoriesIntoKey(fcNormal, fcZero): 1592 category = fcInfinity; 1593 return opDivByZero; 1594 1595 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1596 case PackCategoriesIntoKey(fcZero, fcZero): 1597 makeNaN(); 1598 return opInvalidOp; 1599 1600 case PackCategoriesIntoKey(fcNormal, fcNormal): 1601 return opOK; 1602 } 1603 } 1604 1605 APFloat::opStatus 1606 APFloat::modSpecials(const APFloat &rhs) 1607 { 1608 switch (PackCategoriesIntoKey(category, rhs.category)) { 1609 default: 1610 llvm_unreachable(nullptr); 1611 1612 case PackCategoriesIntoKey(fcNaN, fcZero): 1613 case PackCategoriesIntoKey(fcNaN, fcNormal): 1614 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1615 case PackCategoriesIntoKey(fcNaN, fcNaN): 1616 case PackCategoriesIntoKey(fcZero, fcInfinity): 1617 case PackCategoriesIntoKey(fcZero, fcNormal): 1618 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1619 return opOK; 1620 1621 case PackCategoriesIntoKey(fcZero, fcNaN): 1622 case PackCategoriesIntoKey(fcNormal, fcNaN): 1623 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1624 sign = false; 1625 category = fcNaN; 1626 copySignificand(rhs); 1627 return opOK; 1628 1629 case PackCategoriesIntoKey(fcNormal, fcZero): 1630 case PackCategoriesIntoKey(fcInfinity, fcZero): 1631 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1632 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1633 case PackCategoriesIntoKey(fcZero, fcZero): 1634 makeNaN(); 1635 return opInvalidOp; 1636 1637 case PackCategoriesIntoKey(fcNormal, fcNormal): 1638 return opOK; 1639 } 1640 } 1641 1642 /* Change sign. */ 1643 void 1644 APFloat::changeSign() 1645 { 1646 /* Look mummy, this one's easy. */ 1647 sign = !sign; 1648 } 1649 1650 void 1651 APFloat::clearSign() 1652 { 1653 /* So is this one. */ 1654 sign = 0; 1655 } 1656 1657 void 1658 APFloat::copySign(const APFloat &rhs) 1659 { 1660 /* And this one. */ 1661 sign = rhs.sign; 1662 } 1663 1664 /* Normalized addition or subtraction. */ 1665 APFloat::opStatus 1666 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1667 bool subtract) 1668 { 1669 opStatus fs; 1670 1671 fs = addOrSubtractSpecials(rhs, subtract); 1672 1673 /* This return code means it was not a simple case. */ 1674 if (fs == opDivByZero) { 1675 lostFraction lost_fraction; 1676 1677 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1678 fs = normalize(rounding_mode, lost_fraction); 1679 1680 /* Can only be zero if we lost no fraction. */ 1681 assert(category != fcZero || lost_fraction == lfExactlyZero); 1682 } 1683 1684 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1685 positive zero unless rounding to minus infinity, except that 1686 adding two like-signed zeroes gives that zero. */ 1687 if (category == fcZero) { 1688 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1689 sign = (rounding_mode == rmTowardNegative); 1690 } 1691 1692 return fs; 1693 } 1694 1695 /* Normalized addition. */ 1696 APFloat::opStatus 1697 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1698 { 1699 return addOrSubtract(rhs, rounding_mode, false); 1700 } 1701 1702 /* Normalized subtraction. */ 1703 APFloat::opStatus 1704 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1705 { 1706 return addOrSubtract(rhs, rounding_mode, true); 1707 } 1708 1709 /* Normalized multiply. */ 1710 APFloat::opStatus 1711 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1712 { 1713 opStatus fs; 1714 1715 sign ^= rhs.sign; 1716 fs = multiplySpecials(rhs); 1717 1718 if (isFiniteNonZero()) { 1719 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1720 fs = normalize(rounding_mode, lost_fraction); 1721 if (lost_fraction != lfExactlyZero) 1722 fs = (opStatus) (fs | opInexact); 1723 } 1724 1725 return fs; 1726 } 1727 1728 /* Normalized divide. */ 1729 APFloat::opStatus 1730 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1731 { 1732 opStatus fs; 1733 1734 sign ^= rhs.sign; 1735 fs = divideSpecials(rhs); 1736 1737 if (isFiniteNonZero()) { 1738 lostFraction lost_fraction = divideSignificand(rhs); 1739 fs = normalize(rounding_mode, lost_fraction); 1740 if (lost_fraction != lfExactlyZero) 1741 fs = (opStatus) (fs | opInexact); 1742 } 1743 1744 return fs; 1745 } 1746 1747 /* Normalized remainder. This is not currently correct in all cases. */ 1748 APFloat::opStatus 1749 APFloat::remainder(const APFloat &rhs) 1750 { 1751 opStatus fs; 1752 APFloat V = *this; 1753 unsigned int origSign = sign; 1754 1755 fs = V.divide(rhs, rmNearestTiesToEven); 1756 if (fs == opDivByZero) 1757 return fs; 1758 1759 int parts = partCount(); 1760 integerPart *x = new integerPart[parts]; 1761 bool ignored; 1762 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1763 rmNearestTiesToEven, &ignored); 1764 if (fs==opInvalidOp) 1765 return fs; 1766 1767 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1768 rmNearestTiesToEven); 1769 assert(fs==opOK); // should always work 1770 1771 fs = V.multiply(rhs, rmNearestTiesToEven); 1772 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1773 1774 fs = subtract(V, rmNearestTiesToEven); 1775 assert(fs==opOK || fs==opInexact); // likewise 1776 1777 if (isZero()) 1778 sign = origSign; // IEEE754 requires this 1779 delete[] x; 1780 return fs; 1781 } 1782 1783 /* Normalized llvm frem (C fmod). 1784 This is not currently correct in all cases. */ 1785 APFloat::opStatus 1786 APFloat::mod(const APFloat &rhs) 1787 { 1788 opStatus fs; 1789 fs = modSpecials(rhs); 1790 1791 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1792 APFloat V = *this; 1793 unsigned int origSign = sign; 1794 1795 fs = V.divide(rhs, rmNearestTiesToEven); 1796 if (fs == opDivByZero) 1797 return fs; 1798 1799 int parts = partCount(); 1800 integerPart *x = new integerPart[parts]; 1801 bool ignored; 1802 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1803 rmTowardZero, &ignored); 1804 if (fs==opInvalidOp) 1805 return fs; 1806 1807 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1808 rmNearestTiesToEven); 1809 assert(fs==opOK); // should always work 1810 1811 fs = V.multiply(rhs, rmNearestTiesToEven); 1812 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1813 1814 fs = subtract(V, rmNearestTiesToEven); 1815 assert(fs==opOK || fs==opInexact); // likewise 1816 1817 if (isZero()) 1818 sign = origSign; // IEEE754 requires this 1819 delete[] x; 1820 } 1821 return fs; 1822 } 1823 1824 /* Normalized fused-multiply-add. */ 1825 APFloat::opStatus 1826 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1827 const APFloat &addend, 1828 roundingMode rounding_mode) 1829 { 1830 opStatus fs; 1831 1832 /* Post-multiplication sign, before addition. */ 1833 sign ^= multiplicand.sign; 1834 1835 /* If and only if all arguments are normal do we need to do an 1836 extended-precision calculation. */ 1837 if (isFiniteNonZero() && 1838 multiplicand.isFiniteNonZero() && 1839 addend.isFinite()) { 1840 lostFraction lost_fraction; 1841 1842 lost_fraction = multiplySignificand(multiplicand, &addend); 1843 fs = normalize(rounding_mode, lost_fraction); 1844 if (lost_fraction != lfExactlyZero) 1845 fs = (opStatus) (fs | opInexact); 1846 1847 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1848 positive zero unless rounding to minus infinity, except that 1849 adding two like-signed zeroes gives that zero. */ 1850 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1851 sign = (rounding_mode == rmTowardNegative); 1852 } else { 1853 fs = multiplySpecials(multiplicand); 1854 1855 /* FS can only be opOK or opInvalidOp. There is no more work 1856 to do in the latter case. The IEEE-754R standard says it is 1857 implementation-defined in this case whether, if ADDEND is a 1858 quiet NaN, we raise invalid op; this implementation does so. 1859 1860 If we need to do the addition we can do so with normal 1861 precision. */ 1862 if (fs == opOK) 1863 fs = addOrSubtract(addend, rounding_mode, false); 1864 } 1865 1866 return fs; 1867 } 1868 1869 /* Rounding-mode corrrect round to integral value. */ 1870 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1871 opStatus fs; 1872 1873 // If the exponent is large enough, we know that this value is already 1874 // integral, and the arithmetic below would potentially cause it to saturate 1875 // to +/-Inf. Bail out early instead. 1876 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1877 return opOK; 1878 1879 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1880 // precision of our format, and then subtract it back off again. The choice 1881 // of rounding modes for the addition/subtraction determines the rounding mode 1882 // for our integral rounding as well. 1883 // NOTE: When the input value is negative, we do subtraction followed by 1884 // addition instead. 1885 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1886 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1887 APFloat MagicConstant(*semantics); 1888 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1889 rmNearestTiesToEven); 1890 MagicConstant.copySign(*this); 1891 1892 if (fs != opOK) 1893 return fs; 1894 1895 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1896 bool inputSign = isNegative(); 1897 1898 fs = add(MagicConstant, rounding_mode); 1899 if (fs != opOK && fs != opInexact) 1900 return fs; 1901 1902 fs = subtract(MagicConstant, rounding_mode); 1903 1904 // Restore the input sign. 1905 if (inputSign != isNegative()) 1906 changeSign(); 1907 1908 return fs; 1909 } 1910 1911 1912 /* Comparison requires normalized numbers. */ 1913 APFloat::cmpResult 1914 APFloat::compare(const APFloat &rhs) const 1915 { 1916 cmpResult result; 1917 1918 assert(semantics == rhs.semantics); 1919 1920 switch (PackCategoriesIntoKey(category, rhs.category)) { 1921 default: 1922 llvm_unreachable(nullptr); 1923 1924 case PackCategoriesIntoKey(fcNaN, fcZero): 1925 case PackCategoriesIntoKey(fcNaN, fcNormal): 1926 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1927 case PackCategoriesIntoKey(fcNaN, fcNaN): 1928 case PackCategoriesIntoKey(fcZero, fcNaN): 1929 case PackCategoriesIntoKey(fcNormal, fcNaN): 1930 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1931 return cmpUnordered; 1932 1933 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1934 case PackCategoriesIntoKey(fcInfinity, fcZero): 1935 case PackCategoriesIntoKey(fcNormal, fcZero): 1936 if (sign) 1937 return cmpLessThan; 1938 else 1939 return cmpGreaterThan; 1940 1941 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1942 case PackCategoriesIntoKey(fcZero, fcInfinity): 1943 case PackCategoriesIntoKey(fcZero, fcNormal): 1944 if (rhs.sign) 1945 return cmpGreaterThan; 1946 else 1947 return cmpLessThan; 1948 1949 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1950 if (sign == rhs.sign) 1951 return cmpEqual; 1952 else if (sign) 1953 return cmpLessThan; 1954 else 1955 return cmpGreaterThan; 1956 1957 case PackCategoriesIntoKey(fcZero, fcZero): 1958 return cmpEqual; 1959 1960 case PackCategoriesIntoKey(fcNormal, fcNormal): 1961 break; 1962 } 1963 1964 /* Two normal numbers. Do they have the same sign? */ 1965 if (sign != rhs.sign) { 1966 if (sign) 1967 result = cmpLessThan; 1968 else 1969 result = cmpGreaterThan; 1970 } else { 1971 /* Compare absolute values; invert result if negative. */ 1972 result = compareAbsoluteValue(rhs); 1973 1974 if (sign) { 1975 if (result == cmpLessThan) 1976 result = cmpGreaterThan; 1977 else if (result == cmpGreaterThan) 1978 result = cmpLessThan; 1979 } 1980 } 1981 1982 return result; 1983 } 1984 1985 /// APFloat::convert - convert a value of one floating point type to another. 1986 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1987 /// records whether the transformation lost information, i.e. whether 1988 /// converting the result back to the original type will produce the 1989 /// original value (this is almost the same as return value==fsOK, but there 1990 /// are edge cases where this is not so). 1991 1992 APFloat::opStatus 1993 APFloat::convert(const fltSemantics &toSemantics, 1994 roundingMode rounding_mode, bool *losesInfo) 1995 { 1996 lostFraction lostFraction; 1997 unsigned int newPartCount, oldPartCount; 1998 opStatus fs; 1999 int shift; 2000 const fltSemantics &fromSemantics = *semantics; 2001 2002 lostFraction = lfExactlyZero; 2003 newPartCount = partCountForBits(toSemantics.precision + 1); 2004 oldPartCount = partCount(); 2005 shift = toSemantics.precision - fromSemantics.precision; 2006 2007 bool X86SpecialNan = false; 2008 if (&fromSemantics == &APFloat::x87DoubleExtended && 2009 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 2010 (!(*significandParts() & 0x8000000000000000ULL) || 2011 !(*significandParts() & 0x4000000000000000ULL))) { 2012 // x86 has some unusual NaNs which cannot be represented in any other 2013 // format; note them here. 2014 X86SpecialNan = true; 2015 } 2016 2017 // If this is a truncation of a denormal number, and the target semantics 2018 // has larger exponent range than the source semantics (this can happen 2019 // when truncating from PowerPC double-double to double format), the 2020 // right shift could lose result mantissa bits. Adjust exponent instead 2021 // of performing excessive shift. 2022 if (shift < 0 && isFiniteNonZero()) { 2023 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2024 if (exponent + exponentChange < toSemantics.minExponent) 2025 exponentChange = toSemantics.minExponent - exponent; 2026 if (exponentChange < shift) 2027 exponentChange = shift; 2028 if (exponentChange < 0) { 2029 shift -= exponentChange; 2030 exponent += exponentChange; 2031 } 2032 } 2033 2034 // If this is a truncation, perform the shift before we narrow the storage. 2035 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2036 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2037 2038 // Fix the storage so it can hold to new value. 2039 if (newPartCount > oldPartCount) { 2040 // The new type requires more storage; make it available. 2041 integerPart *newParts; 2042 newParts = new integerPart[newPartCount]; 2043 APInt::tcSet(newParts, 0, newPartCount); 2044 if (isFiniteNonZero() || category==fcNaN) 2045 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2046 freeSignificand(); 2047 significand.parts = newParts; 2048 } else if (newPartCount == 1 && oldPartCount != 1) { 2049 // Switch to built-in storage for a single part. 2050 integerPart newPart = 0; 2051 if (isFiniteNonZero() || category==fcNaN) 2052 newPart = significandParts()[0]; 2053 freeSignificand(); 2054 significand.part = newPart; 2055 } 2056 2057 // Now that we have the right storage, switch the semantics. 2058 semantics = &toSemantics; 2059 2060 // If this is an extension, perform the shift now that the storage is 2061 // available. 2062 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2063 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2064 2065 if (isFiniteNonZero()) { 2066 fs = normalize(rounding_mode, lostFraction); 2067 *losesInfo = (fs != opOK); 2068 } else if (category == fcNaN) { 2069 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2070 2071 // For x87 extended precision, we want to make a NaN, not a special NaN if 2072 // the input wasn't special either. 2073 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2074 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2075 2076 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2077 // does not give you back the same bits. This is dubious, and we 2078 // don't currently do it. You're really supposed to get 2079 // an invalid operation signal at runtime, but nobody does that. 2080 fs = opOK; 2081 } else { 2082 *losesInfo = false; 2083 fs = opOK; 2084 } 2085 2086 return fs; 2087 } 2088 2089 /* Convert a floating point number to an integer according to the 2090 rounding mode. If the rounded integer value is out of range this 2091 returns an invalid operation exception and the contents of the 2092 destination parts are unspecified. If the rounded value is in 2093 range but the floating point number is not the exact integer, the C 2094 standard doesn't require an inexact exception to be raised. IEEE 2095 854 does require it so we do that. 2096 2097 Note that for conversions to integer type the C standard requires 2098 round-to-zero to always be used. */ 2099 APFloat::opStatus 2100 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2101 bool isSigned, 2102 roundingMode rounding_mode, 2103 bool *isExact) const 2104 { 2105 lostFraction lost_fraction; 2106 const integerPart *src; 2107 unsigned int dstPartsCount, truncatedBits; 2108 2109 *isExact = false; 2110 2111 /* Handle the three special cases first. */ 2112 if (category == fcInfinity || category == fcNaN) 2113 return opInvalidOp; 2114 2115 dstPartsCount = partCountForBits(width); 2116 2117 if (category == fcZero) { 2118 APInt::tcSet(parts, 0, dstPartsCount); 2119 // Negative zero can't be represented as an int. 2120 *isExact = !sign; 2121 return opOK; 2122 } 2123 2124 src = significandParts(); 2125 2126 /* Step 1: place our absolute value, with any fraction truncated, in 2127 the destination. */ 2128 if (exponent < 0) { 2129 /* Our absolute value is less than one; truncate everything. */ 2130 APInt::tcSet(parts, 0, dstPartsCount); 2131 /* For exponent -1 the integer bit represents .5, look at that. 2132 For smaller exponents leftmost truncated bit is 0. */ 2133 truncatedBits = semantics->precision -1U - exponent; 2134 } else { 2135 /* We want the most significant (exponent + 1) bits; the rest are 2136 truncated. */ 2137 unsigned int bits = exponent + 1U; 2138 2139 /* Hopelessly large in magnitude? */ 2140 if (bits > width) 2141 return opInvalidOp; 2142 2143 if (bits < semantics->precision) { 2144 /* We truncate (semantics->precision - bits) bits. */ 2145 truncatedBits = semantics->precision - bits; 2146 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2147 } else { 2148 /* We want at least as many bits as are available. */ 2149 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2150 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2151 truncatedBits = 0; 2152 } 2153 } 2154 2155 /* Step 2: work out any lost fraction, and increment the absolute 2156 value if we would round away from zero. */ 2157 if (truncatedBits) { 2158 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2159 truncatedBits); 2160 if (lost_fraction != lfExactlyZero && 2161 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2162 if (APInt::tcIncrement(parts, dstPartsCount)) 2163 return opInvalidOp; /* Overflow. */ 2164 } 2165 } else { 2166 lost_fraction = lfExactlyZero; 2167 } 2168 2169 /* Step 3: check if we fit in the destination. */ 2170 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2171 2172 if (sign) { 2173 if (!isSigned) { 2174 /* Negative numbers cannot be represented as unsigned. */ 2175 if (omsb != 0) 2176 return opInvalidOp; 2177 } else { 2178 /* It takes omsb bits to represent the unsigned integer value. 2179 We lose a bit for the sign, but care is needed as the 2180 maximally negative integer is a special case. */ 2181 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2182 return opInvalidOp; 2183 2184 /* This case can happen because of rounding. */ 2185 if (omsb > width) 2186 return opInvalidOp; 2187 } 2188 2189 APInt::tcNegate (parts, dstPartsCount); 2190 } else { 2191 if (omsb >= width + !isSigned) 2192 return opInvalidOp; 2193 } 2194 2195 if (lost_fraction == lfExactlyZero) { 2196 *isExact = true; 2197 return opOK; 2198 } else 2199 return opInexact; 2200 } 2201 2202 /* Same as convertToSignExtendedInteger, except we provide 2203 deterministic values in case of an invalid operation exception, 2204 namely zero for NaNs and the minimal or maximal value respectively 2205 for underflow or overflow. 2206 The *isExact output tells whether the result is exact, in the sense 2207 that converting it back to the original floating point type produces 2208 the original value. This is almost equivalent to result==opOK, 2209 except for negative zeroes. 2210 */ 2211 APFloat::opStatus 2212 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2213 bool isSigned, 2214 roundingMode rounding_mode, bool *isExact) const 2215 { 2216 opStatus fs; 2217 2218 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2219 isExact); 2220 2221 if (fs == opInvalidOp) { 2222 unsigned int bits, dstPartsCount; 2223 2224 dstPartsCount = partCountForBits(width); 2225 2226 if (category == fcNaN) 2227 bits = 0; 2228 else if (sign) 2229 bits = isSigned; 2230 else 2231 bits = width - isSigned; 2232 2233 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2234 if (sign && isSigned) 2235 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2236 } 2237 2238 return fs; 2239 } 2240 2241 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2242 an APSInt, whose initial bit-width and signed-ness are used to determine the 2243 precision of the conversion. 2244 */ 2245 APFloat::opStatus 2246 APFloat::convertToInteger(APSInt &result, 2247 roundingMode rounding_mode, bool *isExact) const 2248 { 2249 unsigned bitWidth = result.getBitWidth(); 2250 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2251 opStatus status = convertToInteger( 2252 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2253 // Keeps the original signed-ness. 2254 result = APInt(bitWidth, parts); 2255 return status; 2256 } 2257 2258 /* Convert an unsigned integer SRC to a floating point number, 2259 rounding according to ROUNDING_MODE. The sign of the floating 2260 point number is not modified. */ 2261 APFloat::opStatus 2262 APFloat::convertFromUnsignedParts(const integerPart *src, 2263 unsigned int srcCount, 2264 roundingMode rounding_mode) 2265 { 2266 unsigned int omsb, precision, dstCount; 2267 integerPart *dst; 2268 lostFraction lost_fraction; 2269 2270 category = fcNormal; 2271 omsb = APInt::tcMSB(src, srcCount) + 1; 2272 dst = significandParts(); 2273 dstCount = partCount(); 2274 precision = semantics->precision; 2275 2276 /* We want the most significant PRECISION bits of SRC. There may not 2277 be that many; extract what we can. */ 2278 if (precision <= omsb) { 2279 exponent = omsb - 1; 2280 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2281 omsb - precision); 2282 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2283 } else { 2284 exponent = precision - 1; 2285 lost_fraction = lfExactlyZero; 2286 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2287 } 2288 2289 return normalize(rounding_mode, lost_fraction); 2290 } 2291 2292 APFloat::opStatus 2293 APFloat::convertFromAPInt(const APInt &Val, 2294 bool isSigned, 2295 roundingMode rounding_mode) 2296 { 2297 unsigned int partCount = Val.getNumWords(); 2298 APInt api = Val; 2299 2300 sign = false; 2301 if (isSigned && api.isNegative()) { 2302 sign = true; 2303 api = -api; 2304 } 2305 2306 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2307 } 2308 2309 /* Convert a two's complement integer SRC to a floating point number, 2310 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2311 integer is signed, in which case it must be sign-extended. */ 2312 APFloat::opStatus 2313 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2314 unsigned int srcCount, 2315 bool isSigned, 2316 roundingMode rounding_mode) 2317 { 2318 opStatus status; 2319 2320 if (isSigned && 2321 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2322 integerPart *copy; 2323 2324 /* If we're signed and negative negate a copy. */ 2325 sign = true; 2326 copy = new integerPart[srcCount]; 2327 APInt::tcAssign(copy, src, srcCount); 2328 APInt::tcNegate(copy, srcCount); 2329 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2330 delete [] copy; 2331 } else { 2332 sign = false; 2333 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2334 } 2335 2336 return status; 2337 } 2338 2339 /* FIXME: should this just take a const APInt reference? */ 2340 APFloat::opStatus 2341 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2342 unsigned int width, bool isSigned, 2343 roundingMode rounding_mode) 2344 { 2345 unsigned int partCount = partCountForBits(width); 2346 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2347 2348 sign = false; 2349 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2350 sign = true; 2351 api = -api; 2352 } 2353 2354 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2355 } 2356 2357 APFloat::opStatus 2358 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2359 { 2360 lostFraction lost_fraction = lfExactlyZero; 2361 2362 category = fcNormal; 2363 zeroSignificand(); 2364 exponent = 0; 2365 2366 integerPart *significand = significandParts(); 2367 unsigned partsCount = partCount(); 2368 unsigned bitPos = partsCount * integerPartWidth; 2369 bool computedTrailingFraction = false; 2370 2371 // Skip leading zeroes and any (hexa)decimal point. 2372 StringRef::iterator begin = s.begin(); 2373 StringRef::iterator end = s.end(); 2374 StringRef::iterator dot; 2375 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2376 StringRef::iterator firstSignificantDigit = p; 2377 2378 while (p != end) { 2379 integerPart hex_value; 2380 2381 if (*p == '.') { 2382 assert(dot == end && "String contains multiple dots"); 2383 dot = p++; 2384 continue; 2385 } 2386 2387 hex_value = hexDigitValue(*p); 2388 if (hex_value == -1U) 2389 break; 2390 2391 p++; 2392 2393 // Store the number while we have space. 2394 if (bitPos) { 2395 bitPos -= 4; 2396 hex_value <<= bitPos % integerPartWidth; 2397 significand[bitPos / integerPartWidth] |= hex_value; 2398 } else if (!computedTrailingFraction) { 2399 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2400 computedTrailingFraction = true; 2401 } 2402 } 2403 2404 /* Hex floats require an exponent but not a hexadecimal point. */ 2405 assert(p != end && "Hex strings require an exponent"); 2406 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2407 assert(p != begin && "Significand has no digits"); 2408 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2409 2410 /* Ignore the exponent if we are zero. */ 2411 if (p != firstSignificantDigit) { 2412 int expAdjustment; 2413 2414 /* Implicit hexadecimal point? */ 2415 if (dot == end) 2416 dot = p; 2417 2418 /* Calculate the exponent adjustment implicit in the number of 2419 significant digits. */ 2420 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2421 if (expAdjustment < 0) 2422 expAdjustment++; 2423 expAdjustment = expAdjustment * 4 - 1; 2424 2425 /* Adjust for writing the significand starting at the most 2426 significant nibble. */ 2427 expAdjustment += semantics->precision; 2428 expAdjustment -= partsCount * integerPartWidth; 2429 2430 /* Adjust for the given exponent. */ 2431 exponent = totalExponent(p + 1, end, expAdjustment); 2432 } 2433 2434 return normalize(rounding_mode, lost_fraction); 2435 } 2436 2437 APFloat::opStatus 2438 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2439 unsigned sigPartCount, int exp, 2440 roundingMode rounding_mode) 2441 { 2442 unsigned int parts, pow5PartCount; 2443 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2444 integerPart pow5Parts[maxPowerOfFiveParts]; 2445 bool isNearest; 2446 2447 isNearest = (rounding_mode == rmNearestTiesToEven || 2448 rounding_mode == rmNearestTiesToAway); 2449 2450 parts = partCountForBits(semantics->precision + 11); 2451 2452 /* Calculate pow(5, abs(exp)). */ 2453 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2454 2455 for (;; parts *= 2) { 2456 opStatus sigStatus, powStatus; 2457 unsigned int excessPrecision, truncatedBits; 2458 2459 calcSemantics.precision = parts * integerPartWidth - 1; 2460 excessPrecision = calcSemantics.precision - semantics->precision; 2461 truncatedBits = excessPrecision; 2462 2463 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2464 APFloat pow5(calcSemantics); 2465 2466 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2467 rmNearestTiesToEven); 2468 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2469 rmNearestTiesToEven); 2470 /* Add exp, as 10^n = 5^n * 2^n. */ 2471 decSig.exponent += exp; 2472 2473 lostFraction calcLostFraction; 2474 integerPart HUerr, HUdistance; 2475 unsigned int powHUerr; 2476 2477 if (exp >= 0) { 2478 /* multiplySignificand leaves the precision-th bit set to 1. */ 2479 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2480 powHUerr = powStatus != opOK; 2481 } else { 2482 calcLostFraction = decSig.divideSignificand(pow5); 2483 /* Denormal numbers have less precision. */ 2484 if (decSig.exponent < semantics->minExponent) { 2485 excessPrecision += (semantics->minExponent - decSig.exponent); 2486 truncatedBits = excessPrecision; 2487 if (excessPrecision > calcSemantics.precision) 2488 excessPrecision = calcSemantics.precision; 2489 } 2490 /* Extra half-ulp lost in reciprocal of exponent. */ 2491 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2492 } 2493 2494 /* Both multiplySignificand and divideSignificand return the 2495 result with the integer bit set. */ 2496 assert(APInt::tcExtractBit 2497 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2498 2499 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2500 powHUerr); 2501 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2502 excessPrecision, isNearest); 2503 2504 /* Are we guaranteed to round correctly if we truncate? */ 2505 if (HUdistance >= HUerr) { 2506 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2507 calcSemantics.precision - excessPrecision, 2508 excessPrecision); 2509 /* Take the exponent of decSig. If we tcExtract-ed less bits 2510 above we must adjust our exponent to compensate for the 2511 implicit right shift. */ 2512 exponent = (decSig.exponent + semantics->precision 2513 - (calcSemantics.precision - excessPrecision)); 2514 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2515 decSig.partCount(), 2516 truncatedBits); 2517 return normalize(rounding_mode, calcLostFraction); 2518 } 2519 } 2520 } 2521 2522 APFloat::opStatus 2523 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2524 { 2525 decimalInfo D; 2526 opStatus fs; 2527 2528 /* Scan the text. */ 2529 StringRef::iterator p = str.begin(); 2530 interpretDecimal(p, str.end(), &D); 2531 2532 /* Handle the quick cases. First the case of no significant digits, 2533 i.e. zero, and then exponents that are obviously too large or too 2534 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2535 definitely overflows if 2536 2537 (exp - 1) * L >= maxExponent 2538 2539 and definitely underflows to zero where 2540 2541 (exp + 1) * L <= minExponent - precision 2542 2543 With integer arithmetic the tightest bounds for L are 2544 2545 93/28 < L < 196/59 [ numerator <= 256 ] 2546 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2547 */ 2548 2549 // Test if we have a zero number allowing for strings with no null terminators 2550 // and zero decimals with non-zero exponents. 2551 // 2552 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2553 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2554 // be at most one dot. On the other hand, if we have a zero with a non-zero 2555 // exponent, then we know that D.firstSigDigit will be non-numeric. 2556 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2557 category = fcZero; 2558 fs = opOK; 2559 2560 /* Check whether the normalized exponent is high enough to overflow 2561 max during the log-rebasing in the max-exponent check below. */ 2562 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2563 fs = handleOverflow(rounding_mode); 2564 2565 /* If it wasn't, then it also wasn't high enough to overflow max 2566 during the log-rebasing in the min-exponent check. Check that it 2567 won't overflow min in either check, then perform the min-exponent 2568 check. */ 2569 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2570 (D.normalizedExponent + 1) * 28738 <= 2571 8651 * (semantics->minExponent - (int) semantics->precision)) { 2572 /* Underflow to zero and round. */ 2573 category = fcNormal; 2574 zeroSignificand(); 2575 fs = normalize(rounding_mode, lfLessThanHalf); 2576 2577 /* We can finally safely perform the max-exponent check. */ 2578 } else if ((D.normalizedExponent - 1) * 42039 2579 >= 12655 * semantics->maxExponent) { 2580 /* Overflow and round. */ 2581 fs = handleOverflow(rounding_mode); 2582 } else { 2583 integerPart *decSignificand; 2584 unsigned int partCount; 2585 2586 /* A tight upper bound on number of bits required to hold an 2587 N-digit decimal integer is N * 196 / 59. Allocate enough space 2588 to hold the full significand, and an extra part required by 2589 tcMultiplyPart. */ 2590 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2591 partCount = partCountForBits(1 + 196 * partCount / 59); 2592 decSignificand = new integerPart[partCount + 1]; 2593 partCount = 0; 2594 2595 /* Convert to binary efficiently - we do almost all multiplication 2596 in an integerPart. When this would overflow do we do a single 2597 bignum multiplication, and then revert again to multiplication 2598 in an integerPart. */ 2599 do { 2600 integerPart decValue, val, multiplier; 2601 2602 val = 0; 2603 multiplier = 1; 2604 2605 do { 2606 if (*p == '.') { 2607 p++; 2608 if (p == str.end()) { 2609 break; 2610 } 2611 } 2612 decValue = decDigitValue(*p++); 2613 assert(decValue < 10U && "Invalid character in significand"); 2614 multiplier *= 10; 2615 val = val * 10 + decValue; 2616 /* The maximum number that can be multiplied by ten with any 2617 digit added without overflowing an integerPart. */ 2618 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2619 2620 /* Multiply out the current part. */ 2621 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2622 partCount, partCount + 1, false); 2623 2624 /* If we used another part (likely but not guaranteed), increase 2625 the count. */ 2626 if (decSignificand[partCount]) 2627 partCount++; 2628 } while (p <= D.lastSigDigit); 2629 2630 category = fcNormal; 2631 fs = roundSignificandWithExponent(decSignificand, partCount, 2632 D.exponent, rounding_mode); 2633 2634 delete [] decSignificand; 2635 } 2636 2637 return fs; 2638 } 2639 2640 bool 2641 APFloat::convertFromStringSpecials(StringRef str) { 2642 if (str.equals("inf") || str.equals("INFINITY")) { 2643 makeInf(false); 2644 return true; 2645 } 2646 2647 if (str.equals("-inf") || str.equals("-INFINITY")) { 2648 makeInf(true); 2649 return true; 2650 } 2651 2652 if (str.equals("nan") || str.equals("NaN")) { 2653 makeNaN(false, false); 2654 return true; 2655 } 2656 2657 if (str.equals("-nan") || str.equals("-NaN")) { 2658 makeNaN(false, true); 2659 return true; 2660 } 2661 2662 return false; 2663 } 2664 2665 APFloat::opStatus 2666 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2667 { 2668 assert(!str.empty() && "Invalid string length"); 2669 2670 // Handle special cases. 2671 if (convertFromStringSpecials(str)) 2672 return opOK; 2673 2674 /* Handle a leading minus sign. */ 2675 StringRef::iterator p = str.begin(); 2676 size_t slen = str.size(); 2677 sign = *p == '-' ? 1 : 0; 2678 if (*p == '-' || *p == '+') { 2679 p++; 2680 slen--; 2681 assert(slen && "String has no digits"); 2682 } 2683 2684 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2685 assert(slen - 2 && "Invalid string"); 2686 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2687 rounding_mode); 2688 } 2689 2690 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2691 } 2692 2693 /* Write out a hexadecimal representation of the floating point value 2694 to DST, which must be of sufficient size, in the C99 form 2695 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2696 excluding the terminating NUL. 2697 2698 If UPPERCASE, the output is in upper case, otherwise in lower case. 2699 2700 HEXDIGITS digits appear altogether, rounding the value if 2701 necessary. If HEXDIGITS is 0, the minimal precision to display the 2702 number precisely is used instead. If nothing would appear after 2703 the decimal point it is suppressed. 2704 2705 The decimal exponent is always printed and has at least one digit. 2706 Zero values display an exponent of zero. Infinities and NaNs 2707 appear as "infinity" or "nan" respectively. 2708 2709 The above rules are as specified by C99. There is ambiguity about 2710 what the leading hexadecimal digit should be. This implementation 2711 uses whatever is necessary so that the exponent is displayed as 2712 stored. This implies the exponent will fall within the IEEE format 2713 range, and the leading hexadecimal digit will be 0 (for denormals), 2714 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2715 any other digits zero). 2716 */ 2717 unsigned int 2718 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2719 bool upperCase, roundingMode rounding_mode) const 2720 { 2721 char *p; 2722 2723 p = dst; 2724 if (sign) 2725 *dst++ = '-'; 2726 2727 switch (category) { 2728 case fcInfinity: 2729 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2730 dst += sizeof infinityL - 1; 2731 break; 2732 2733 case fcNaN: 2734 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2735 dst += sizeof NaNU - 1; 2736 break; 2737 2738 case fcZero: 2739 *dst++ = '0'; 2740 *dst++ = upperCase ? 'X': 'x'; 2741 *dst++ = '0'; 2742 if (hexDigits > 1) { 2743 *dst++ = '.'; 2744 memset (dst, '0', hexDigits - 1); 2745 dst += hexDigits - 1; 2746 } 2747 *dst++ = upperCase ? 'P': 'p'; 2748 *dst++ = '0'; 2749 break; 2750 2751 case fcNormal: 2752 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2753 break; 2754 } 2755 2756 *dst = 0; 2757 2758 return static_cast<unsigned int>(dst - p); 2759 } 2760 2761 /* Does the hard work of outputting the correctly rounded hexadecimal 2762 form of a normal floating point number with the specified number of 2763 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2764 digits necessary to print the value precisely is output. */ 2765 char * 2766 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2767 bool upperCase, 2768 roundingMode rounding_mode) const 2769 { 2770 unsigned int count, valueBits, shift, partsCount, outputDigits; 2771 const char *hexDigitChars; 2772 const integerPart *significand; 2773 char *p; 2774 bool roundUp; 2775 2776 *dst++ = '0'; 2777 *dst++ = upperCase ? 'X': 'x'; 2778 2779 roundUp = false; 2780 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2781 2782 significand = significandParts(); 2783 partsCount = partCount(); 2784 2785 /* +3 because the first digit only uses the single integer bit, so 2786 we have 3 virtual zero most-significant-bits. */ 2787 valueBits = semantics->precision + 3; 2788 shift = integerPartWidth - valueBits % integerPartWidth; 2789 2790 /* The natural number of digits required ignoring trailing 2791 insignificant zeroes. */ 2792 outputDigits = (valueBits - significandLSB () + 3) / 4; 2793 2794 /* hexDigits of zero means use the required number for the 2795 precision. Otherwise, see if we are truncating. If we are, 2796 find out if we need to round away from zero. */ 2797 if (hexDigits) { 2798 if (hexDigits < outputDigits) { 2799 /* We are dropping non-zero bits, so need to check how to round. 2800 "bits" is the number of dropped bits. */ 2801 unsigned int bits; 2802 lostFraction fraction; 2803 2804 bits = valueBits - hexDigits * 4; 2805 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2806 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2807 } 2808 outputDigits = hexDigits; 2809 } 2810 2811 /* Write the digits consecutively, and start writing in the location 2812 of the hexadecimal point. We move the most significant digit 2813 left and add the hexadecimal point later. */ 2814 p = ++dst; 2815 2816 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2817 2818 while (outputDigits && count) { 2819 integerPart part; 2820 2821 /* Put the most significant integerPartWidth bits in "part". */ 2822 if (--count == partsCount) 2823 part = 0; /* An imaginary higher zero part. */ 2824 else 2825 part = significand[count] << shift; 2826 2827 if (count && shift) 2828 part |= significand[count - 1] >> (integerPartWidth - shift); 2829 2830 /* Convert as much of "part" to hexdigits as we can. */ 2831 unsigned int curDigits = integerPartWidth / 4; 2832 2833 if (curDigits > outputDigits) 2834 curDigits = outputDigits; 2835 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2836 outputDigits -= curDigits; 2837 } 2838 2839 if (roundUp) { 2840 char *q = dst; 2841 2842 /* Note that hexDigitChars has a trailing '0'. */ 2843 do { 2844 q--; 2845 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2846 } while (*q == '0'); 2847 assert(q >= p); 2848 } else { 2849 /* Add trailing zeroes. */ 2850 memset (dst, '0', outputDigits); 2851 dst += outputDigits; 2852 } 2853 2854 /* Move the most significant digit to before the point, and if there 2855 is something after the decimal point add it. This must come 2856 after rounding above. */ 2857 p[-1] = p[0]; 2858 if (dst -1 == p) 2859 dst--; 2860 else 2861 p[0] = '.'; 2862 2863 /* Finally output the exponent. */ 2864 *dst++ = upperCase ? 'P': 'p'; 2865 2866 return writeSignedDecimal (dst, exponent); 2867 } 2868 2869 hash_code llvm::hash_value(const APFloat &Arg) { 2870 if (!Arg.isFiniteNonZero()) 2871 return hash_combine((uint8_t)Arg.category, 2872 // NaN has no sign, fix it at zero. 2873 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2874 Arg.semantics->precision); 2875 2876 // Normal floats need their exponent and significand hashed. 2877 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2878 Arg.semantics->precision, Arg.exponent, 2879 hash_combine_range( 2880 Arg.significandParts(), 2881 Arg.significandParts() + Arg.partCount())); 2882 } 2883 2884 // Conversion from APFloat to/from host float/double. It may eventually be 2885 // possible to eliminate these and have everybody deal with APFloats, but that 2886 // will take a while. This approach will not easily extend to long double. 2887 // Current implementation requires integerPartWidth==64, which is correct at 2888 // the moment but could be made more general. 2889 2890 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2891 // the actual IEEE respresentations. We compensate for that here. 2892 2893 APInt 2894 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2895 { 2896 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2897 assert(partCount()==2); 2898 2899 uint64_t myexponent, mysignificand; 2900 2901 if (isFiniteNonZero()) { 2902 myexponent = exponent+16383; //bias 2903 mysignificand = significandParts()[0]; 2904 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2905 myexponent = 0; // denormal 2906 } else if (category==fcZero) { 2907 myexponent = 0; 2908 mysignificand = 0; 2909 } else if (category==fcInfinity) { 2910 myexponent = 0x7fff; 2911 mysignificand = 0x8000000000000000ULL; 2912 } else { 2913 assert(category == fcNaN && "Unknown category"); 2914 myexponent = 0x7fff; 2915 mysignificand = significandParts()[0]; 2916 } 2917 2918 uint64_t words[2]; 2919 words[0] = mysignificand; 2920 words[1] = ((uint64_t)(sign & 1) << 15) | 2921 (myexponent & 0x7fffLL); 2922 return APInt(80, words); 2923 } 2924 2925 APInt 2926 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2927 { 2928 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2929 assert(partCount()==2); 2930 2931 uint64_t words[2]; 2932 opStatus fs; 2933 bool losesInfo; 2934 2935 // Convert number to double. To avoid spurious underflows, we re- 2936 // normalize against the "double" minExponent first, and only *then* 2937 // truncate the mantissa. The result of that second conversion 2938 // may be inexact, but should never underflow. 2939 // Declare fltSemantics before APFloat that uses it (and 2940 // saves pointer to it) to ensure correct destruction order. 2941 fltSemantics extendedSemantics = *semantics; 2942 extendedSemantics.minExponent = IEEEdouble.minExponent; 2943 APFloat extended(*this); 2944 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2945 assert(fs == opOK && !losesInfo); 2946 (void)fs; 2947 2948 APFloat u(extended); 2949 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2950 assert(fs == opOK || fs == opInexact); 2951 (void)fs; 2952 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2953 2954 // If conversion was exact or resulted in a special case, we're done; 2955 // just set the second double to zero. Otherwise, re-convert back to 2956 // the extended format and compute the difference. This now should 2957 // convert exactly to double. 2958 if (u.isFiniteNonZero() && losesInfo) { 2959 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2960 assert(fs == opOK && !losesInfo); 2961 (void)fs; 2962 2963 APFloat v(extended); 2964 v.subtract(u, rmNearestTiesToEven); 2965 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2966 assert(fs == opOK && !losesInfo); 2967 (void)fs; 2968 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2969 } else { 2970 words[1] = 0; 2971 } 2972 2973 return APInt(128, words); 2974 } 2975 2976 APInt 2977 APFloat::convertQuadrupleAPFloatToAPInt() const 2978 { 2979 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2980 assert(partCount()==2); 2981 2982 uint64_t myexponent, mysignificand, mysignificand2; 2983 2984 if (isFiniteNonZero()) { 2985 myexponent = exponent+16383; //bias 2986 mysignificand = significandParts()[0]; 2987 mysignificand2 = significandParts()[1]; 2988 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2989 myexponent = 0; // denormal 2990 } else if (category==fcZero) { 2991 myexponent = 0; 2992 mysignificand = mysignificand2 = 0; 2993 } else if (category==fcInfinity) { 2994 myexponent = 0x7fff; 2995 mysignificand = mysignificand2 = 0; 2996 } else { 2997 assert(category == fcNaN && "Unknown category!"); 2998 myexponent = 0x7fff; 2999 mysignificand = significandParts()[0]; 3000 mysignificand2 = significandParts()[1]; 3001 } 3002 3003 uint64_t words[2]; 3004 words[0] = mysignificand; 3005 words[1] = ((uint64_t)(sign & 1) << 63) | 3006 ((myexponent & 0x7fff) << 48) | 3007 (mysignificand2 & 0xffffffffffffLL); 3008 3009 return APInt(128, words); 3010 } 3011 3012 APInt 3013 APFloat::convertDoubleAPFloatToAPInt() const 3014 { 3015 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 3016 assert(partCount()==1); 3017 3018 uint64_t myexponent, mysignificand; 3019 3020 if (isFiniteNonZero()) { 3021 myexponent = exponent+1023; //bias 3022 mysignificand = *significandParts(); 3023 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3024 myexponent = 0; // denormal 3025 } else if (category==fcZero) { 3026 myexponent = 0; 3027 mysignificand = 0; 3028 } else if (category==fcInfinity) { 3029 myexponent = 0x7ff; 3030 mysignificand = 0; 3031 } else { 3032 assert(category == fcNaN && "Unknown category!"); 3033 myexponent = 0x7ff; 3034 mysignificand = *significandParts(); 3035 } 3036 3037 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3038 ((myexponent & 0x7ff) << 52) | 3039 (mysignificand & 0xfffffffffffffLL)))); 3040 } 3041 3042 APInt 3043 APFloat::convertFloatAPFloatToAPInt() const 3044 { 3045 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3046 assert(partCount()==1); 3047 3048 uint32_t myexponent, mysignificand; 3049 3050 if (isFiniteNonZero()) { 3051 myexponent = exponent+127; //bias 3052 mysignificand = (uint32_t)*significandParts(); 3053 if (myexponent == 1 && !(mysignificand & 0x800000)) 3054 myexponent = 0; // denormal 3055 } else if (category==fcZero) { 3056 myexponent = 0; 3057 mysignificand = 0; 3058 } else if (category==fcInfinity) { 3059 myexponent = 0xff; 3060 mysignificand = 0; 3061 } else { 3062 assert(category == fcNaN && "Unknown category!"); 3063 myexponent = 0xff; 3064 mysignificand = (uint32_t)*significandParts(); 3065 } 3066 3067 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3068 (mysignificand & 0x7fffff))); 3069 } 3070 3071 APInt 3072 APFloat::convertHalfAPFloatToAPInt() const 3073 { 3074 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3075 assert(partCount()==1); 3076 3077 uint32_t myexponent, mysignificand; 3078 3079 if (isFiniteNonZero()) { 3080 myexponent = exponent+15; //bias 3081 mysignificand = (uint32_t)*significandParts(); 3082 if (myexponent == 1 && !(mysignificand & 0x400)) 3083 myexponent = 0; // denormal 3084 } else if (category==fcZero) { 3085 myexponent = 0; 3086 mysignificand = 0; 3087 } else if (category==fcInfinity) { 3088 myexponent = 0x1f; 3089 mysignificand = 0; 3090 } else { 3091 assert(category == fcNaN && "Unknown category!"); 3092 myexponent = 0x1f; 3093 mysignificand = (uint32_t)*significandParts(); 3094 } 3095 3096 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3097 (mysignificand & 0x3ff))); 3098 } 3099 3100 // This function creates an APInt that is just a bit map of the floating 3101 // point constant as it would appear in memory. It is not a conversion, 3102 // and treating the result as a normal integer is unlikely to be useful. 3103 3104 APInt 3105 APFloat::bitcastToAPInt() const 3106 { 3107 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3108 return convertHalfAPFloatToAPInt(); 3109 3110 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3111 return convertFloatAPFloatToAPInt(); 3112 3113 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3114 return convertDoubleAPFloatToAPInt(); 3115 3116 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3117 return convertQuadrupleAPFloatToAPInt(); 3118 3119 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3120 return convertPPCDoubleDoubleAPFloatToAPInt(); 3121 3122 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3123 "unknown format!"); 3124 return convertF80LongDoubleAPFloatToAPInt(); 3125 } 3126 3127 float 3128 APFloat::convertToFloat() const 3129 { 3130 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3131 "Float semantics are not IEEEsingle"); 3132 APInt api = bitcastToAPInt(); 3133 return api.bitsToFloat(); 3134 } 3135 3136 double 3137 APFloat::convertToDouble() const 3138 { 3139 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3140 "Float semantics are not IEEEdouble"); 3141 APInt api = bitcastToAPInt(); 3142 return api.bitsToDouble(); 3143 } 3144 3145 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3146 /// does not support these bit patterns: 3147 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3148 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3149 /// exponent = 0, integer bit 1 ("pseudodenormal") 3150 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3151 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3152 void 3153 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3154 { 3155 assert(api.getBitWidth()==80); 3156 uint64_t i1 = api.getRawData()[0]; 3157 uint64_t i2 = api.getRawData()[1]; 3158 uint64_t myexponent = (i2 & 0x7fff); 3159 uint64_t mysignificand = i1; 3160 3161 initialize(&APFloat::x87DoubleExtended); 3162 assert(partCount()==2); 3163 3164 sign = static_cast<unsigned int>(i2>>15); 3165 if (myexponent==0 && mysignificand==0) { 3166 // exponent, significand meaningless 3167 category = fcZero; 3168 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3169 // exponent, significand meaningless 3170 category = fcInfinity; 3171 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3172 // exponent meaningless 3173 category = fcNaN; 3174 significandParts()[0] = mysignificand; 3175 significandParts()[1] = 0; 3176 } else { 3177 category = fcNormal; 3178 exponent = myexponent - 16383; 3179 significandParts()[0] = mysignificand; 3180 significandParts()[1] = 0; 3181 if (myexponent==0) // denormal 3182 exponent = -16382; 3183 } 3184 } 3185 3186 void 3187 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3188 { 3189 assert(api.getBitWidth()==128); 3190 uint64_t i1 = api.getRawData()[0]; 3191 uint64_t i2 = api.getRawData()[1]; 3192 opStatus fs; 3193 bool losesInfo; 3194 3195 // Get the first double and convert to our format. 3196 initFromDoubleAPInt(APInt(64, i1)); 3197 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3198 assert(fs == opOK && !losesInfo); 3199 (void)fs; 3200 3201 // Unless we have a special case, add in second double. 3202 if (isFiniteNonZero()) { 3203 APFloat v(IEEEdouble, APInt(64, i2)); 3204 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3205 assert(fs == opOK && !losesInfo); 3206 (void)fs; 3207 3208 add(v, rmNearestTiesToEven); 3209 } 3210 } 3211 3212 void 3213 APFloat::initFromQuadrupleAPInt(const APInt &api) 3214 { 3215 assert(api.getBitWidth()==128); 3216 uint64_t i1 = api.getRawData()[0]; 3217 uint64_t i2 = api.getRawData()[1]; 3218 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3219 uint64_t mysignificand = i1; 3220 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3221 3222 initialize(&APFloat::IEEEquad); 3223 assert(partCount()==2); 3224 3225 sign = static_cast<unsigned int>(i2>>63); 3226 if (myexponent==0 && 3227 (mysignificand==0 && mysignificand2==0)) { 3228 // exponent, significand meaningless 3229 category = fcZero; 3230 } else if (myexponent==0x7fff && 3231 (mysignificand==0 && mysignificand2==0)) { 3232 // exponent, significand meaningless 3233 category = fcInfinity; 3234 } else if (myexponent==0x7fff && 3235 (mysignificand!=0 || mysignificand2 !=0)) { 3236 // exponent meaningless 3237 category = fcNaN; 3238 significandParts()[0] = mysignificand; 3239 significandParts()[1] = mysignificand2; 3240 } else { 3241 category = fcNormal; 3242 exponent = myexponent - 16383; 3243 significandParts()[0] = mysignificand; 3244 significandParts()[1] = mysignificand2; 3245 if (myexponent==0) // denormal 3246 exponent = -16382; 3247 else 3248 significandParts()[1] |= 0x1000000000000LL; // integer bit 3249 } 3250 } 3251 3252 void 3253 APFloat::initFromDoubleAPInt(const APInt &api) 3254 { 3255 assert(api.getBitWidth()==64); 3256 uint64_t i = *api.getRawData(); 3257 uint64_t myexponent = (i >> 52) & 0x7ff; 3258 uint64_t mysignificand = i & 0xfffffffffffffLL; 3259 3260 initialize(&APFloat::IEEEdouble); 3261 assert(partCount()==1); 3262 3263 sign = static_cast<unsigned int>(i>>63); 3264 if (myexponent==0 && mysignificand==0) { 3265 // exponent, significand meaningless 3266 category = fcZero; 3267 } else if (myexponent==0x7ff && mysignificand==0) { 3268 // exponent, significand meaningless 3269 category = fcInfinity; 3270 } else if (myexponent==0x7ff && mysignificand!=0) { 3271 // exponent meaningless 3272 category = fcNaN; 3273 *significandParts() = mysignificand; 3274 } else { 3275 category = fcNormal; 3276 exponent = myexponent - 1023; 3277 *significandParts() = mysignificand; 3278 if (myexponent==0) // denormal 3279 exponent = -1022; 3280 else 3281 *significandParts() |= 0x10000000000000LL; // integer bit 3282 } 3283 } 3284 3285 void 3286 APFloat::initFromFloatAPInt(const APInt & api) 3287 { 3288 assert(api.getBitWidth()==32); 3289 uint32_t i = (uint32_t)*api.getRawData(); 3290 uint32_t myexponent = (i >> 23) & 0xff; 3291 uint32_t mysignificand = i & 0x7fffff; 3292 3293 initialize(&APFloat::IEEEsingle); 3294 assert(partCount()==1); 3295 3296 sign = i >> 31; 3297 if (myexponent==0 && mysignificand==0) { 3298 // exponent, significand meaningless 3299 category = fcZero; 3300 } else if (myexponent==0xff && mysignificand==0) { 3301 // exponent, significand meaningless 3302 category = fcInfinity; 3303 } else if (myexponent==0xff && mysignificand!=0) { 3304 // sign, exponent, significand meaningless 3305 category = fcNaN; 3306 *significandParts() = mysignificand; 3307 } else { 3308 category = fcNormal; 3309 exponent = myexponent - 127; //bias 3310 *significandParts() = mysignificand; 3311 if (myexponent==0) // denormal 3312 exponent = -126; 3313 else 3314 *significandParts() |= 0x800000; // integer bit 3315 } 3316 } 3317 3318 void 3319 APFloat::initFromHalfAPInt(const APInt & api) 3320 { 3321 assert(api.getBitWidth()==16); 3322 uint32_t i = (uint32_t)*api.getRawData(); 3323 uint32_t myexponent = (i >> 10) & 0x1f; 3324 uint32_t mysignificand = i & 0x3ff; 3325 3326 initialize(&APFloat::IEEEhalf); 3327 assert(partCount()==1); 3328 3329 sign = i >> 15; 3330 if (myexponent==0 && mysignificand==0) { 3331 // exponent, significand meaningless 3332 category = fcZero; 3333 } else if (myexponent==0x1f && mysignificand==0) { 3334 // exponent, significand meaningless 3335 category = fcInfinity; 3336 } else if (myexponent==0x1f && mysignificand!=0) { 3337 // sign, exponent, significand meaningless 3338 category = fcNaN; 3339 *significandParts() = mysignificand; 3340 } else { 3341 category = fcNormal; 3342 exponent = myexponent - 15; //bias 3343 *significandParts() = mysignificand; 3344 if (myexponent==0) // denormal 3345 exponent = -14; 3346 else 3347 *significandParts() |= 0x400; // integer bit 3348 } 3349 } 3350 3351 /// Treat api as containing the bits of a floating point number. Currently 3352 /// we infer the floating point type from the size of the APInt. The 3353 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3354 /// when the size is anything else). 3355 void 3356 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3357 { 3358 if (Sem == &IEEEhalf) 3359 return initFromHalfAPInt(api); 3360 if (Sem == &IEEEsingle) 3361 return initFromFloatAPInt(api); 3362 if (Sem == &IEEEdouble) 3363 return initFromDoubleAPInt(api); 3364 if (Sem == &x87DoubleExtended) 3365 return initFromF80LongDoubleAPInt(api); 3366 if (Sem == &IEEEquad) 3367 return initFromQuadrupleAPInt(api); 3368 if (Sem == &PPCDoubleDouble) 3369 return initFromPPCDoubleDoubleAPInt(api); 3370 3371 llvm_unreachable(nullptr); 3372 } 3373 3374 APFloat 3375 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3376 { 3377 switch (BitWidth) { 3378 case 16: 3379 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3380 case 32: 3381 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3382 case 64: 3383 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3384 case 80: 3385 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3386 case 128: 3387 if (isIEEE) 3388 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3389 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3390 default: 3391 llvm_unreachable("Unknown floating bit width"); 3392 } 3393 } 3394 3395 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { 3396 return Sem.sizeInBits; 3397 } 3398 3399 /// Make this number the largest magnitude normal number in the given 3400 /// semantics. 3401 void APFloat::makeLargest(bool Negative) { 3402 // We want (in interchange format): 3403 // sign = {Negative} 3404 // exponent = 1..10 3405 // significand = 1..1 3406 category = fcNormal; 3407 sign = Negative; 3408 exponent = semantics->maxExponent; 3409 3410 // Use memset to set all but the highest integerPart to all ones. 3411 integerPart *significand = significandParts(); 3412 unsigned PartCount = partCount(); 3413 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3414 3415 // Set the high integerPart especially setting all unused top bits for 3416 // internal consistency. 3417 const unsigned NumUnusedHighBits = 3418 PartCount*integerPartWidth - semantics->precision; 3419 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3420 ? (~integerPart(0) >> NumUnusedHighBits) 3421 : 0; 3422 } 3423 3424 /// Make this number the smallest magnitude denormal number in the given 3425 /// semantics. 3426 void APFloat::makeSmallest(bool Negative) { 3427 // We want (in interchange format): 3428 // sign = {Negative} 3429 // exponent = 0..0 3430 // significand = 0..01 3431 category = fcNormal; 3432 sign = Negative; 3433 exponent = semantics->minExponent; 3434 APInt::tcSet(significandParts(), 1, partCount()); 3435 } 3436 3437 3438 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3439 // We want (in interchange format): 3440 // sign = {Negative} 3441 // exponent = 1..10 3442 // significand = 1..1 3443 APFloat Val(Sem, uninitialized); 3444 Val.makeLargest(Negative); 3445 return Val; 3446 } 3447 3448 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3449 // We want (in interchange format): 3450 // sign = {Negative} 3451 // exponent = 0..0 3452 // significand = 0..01 3453 APFloat Val(Sem, uninitialized); 3454 Val.makeSmallest(Negative); 3455 return Val; 3456 } 3457 3458 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3459 APFloat Val(Sem, uninitialized); 3460 3461 // We want (in interchange format): 3462 // sign = {Negative} 3463 // exponent = 0..0 3464 // significand = 10..0 3465 3466 Val.category = fcNormal; 3467 Val.zeroSignificand(); 3468 Val.sign = Negative; 3469 Val.exponent = Sem.minExponent; 3470 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3471 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3472 3473 return Val; 3474 } 3475 3476 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3477 initFromAPInt(&Sem, API); 3478 } 3479 3480 APFloat::APFloat(float f) { 3481 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3482 } 3483 3484 APFloat::APFloat(double d) { 3485 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3486 } 3487 3488 namespace { 3489 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3490 Buffer.append(Str.begin(), Str.end()); 3491 } 3492 3493 /// Removes data from the given significand until it is no more 3494 /// precise than is required for the desired precision. 3495 void AdjustToPrecision(APInt &significand, 3496 int &exp, unsigned FormatPrecision) { 3497 unsigned bits = significand.getActiveBits(); 3498 3499 // 196/59 is a very slight overestimate of lg_2(10). 3500 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3501 3502 if (bits <= bitsRequired) return; 3503 3504 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3505 if (!tensRemovable) return; 3506 3507 exp += tensRemovable; 3508 3509 APInt divisor(significand.getBitWidth(), 1); 3510 APInt powten(significand.getBitWidth(), 10); 3511 while (true) { 3512 if (tensRemovable & 1) 3513 divisor *= powten; 3514 tensRemovable >>= 1; 3515 if (!tensRemovable) break; 3516 powten *= powten; 3517 } 3518 3519 significand = significand.udiv(divisor); 3520 3521 // Truncate the significand down to its active bit count. 3522 significand = significand.trunc(significand.getActiveBits()); 3523 } 3524 3525 3526 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3527 int &exp, unsigned FormatPrecision) { 3528 unsigned N = buffer.size(); 3529 if (N <= FormatPrecision) return; 3530 3531 // The most significant figures are the last ones in the buffer. 3532 unsigned FirstSignificant = N - FormatPrecision; 3533 3534 // Round. 3535 // FIXME: this probably shouldn't use 'round half up'. 3536 3537 // Rounding down is just a truncation, except we also want to drop 3538 // trailing zeros from the new result. 3539 if (buffer[FirstSignificant - 1] < '5') { 3540 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3541 FirstSignificant++; 3542 3543 exp += FirstSignificant; 3544 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3545 return; 3546 } 3547 3548 // Rounding up requires a decimal add-with-carry. If we continue 3549 // the carry, the newly-introduced zeros will just be truncated. 3550 for (unsigned I = FirstSignificant; I != N; ++I) { 3551 if (buffer[I] == '9') { 3552 FirstSignificant++; 3553 } else { 3554 buffer[I]++; 3555 break; 3556 } 3557 } 3558 3559 // If we carried through, we have exactly one digit of precision. 3560 if (FirstSignificant == N) { 3561 exp += FirstSignificant; 3562 buffer.clear(); 3563 buffer.push_back('1'); 3564 return; 3565 } 3566 3567 exp += FirstSignificant; 3568 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3569 } 3570 } 3571 3572 void APFloat::toString(SmallVectorImpl<char> &Str, 3573 unsigned FormatPrecision, 3574 unsigned FormatMaxPadding) const { 3575 switch (category) { 3576 case fcInfinity: 3577 if (isNegative()) 3578 return append(Str, "-Inf"); 3579 else 3580 return append(Str, "+Inf"); 3581 3582 case fcNaN: return append(Str, "NaN"); 3583 3584 case fcZero: 3585 if (isNegative()) 3586 Str.push_back('-'); 3587 3588 if (!FormatMaxPadding) 3589 append(Str, "0.0E+0"); 3590 else 3591 Str.push_back('0'); 3592 return; 3593 3594 case fcNormal: 3595 break; 3596 } 3597 3598 if (isNegative()) 3599 Str.push_back('-'); 3600 3601 // Decompose the number into an APInt and an exponent. 3602 int exp = exponent - ((int) semantics->precision - 1); 3603 APInt significand(semantics->precision, 3604 makeArrayRef(significandParts(), 3605 partCountForBits(semantics->precision))); 3606 3607 // Set FormatPrecision if zero. We want to do this before we 3608 // truncate trailing zeros, as those are part of the precision. 3609 if (!FormatPrecision) { 3610 // We use enough digits so the number can be round-tripped back to an 3611 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3612 // Accurately" by Steele and White. 3613 // FIXME: Using a formula based purely on the precision is conservative; 3614 // we can print fewer digits depending on the actual value being printed. 3615 3616 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3617 FormatPrecision = 2 + semantics->precision * 59 / 196; 3618 } 3619 3620 // Ignore trailing binary zeros. 3621 int trailingZeros = significand.countTrailingZeros(); 3622 exp += trailingZeros; 3623 significand = significand.lshr(trailingZeros); 3624 3625 // Change the exponent from 2^e to 10^e. 3626 if (exp == 0) { 3627 // Nothing to do. 3628 } else if (exp > 0) { 3629 // Just shift left. 3630 significand = significand.zext(semantics->precision + exp); 3631 significand <<= exp; 3632 exp = 0; 3633 } else { /* exp < 0 */ 3634 int texp = -exp; 3635 3636 // We transform this using the identity: 3637 // (N)(2^-e) == (N)(5^e)(10^-e) 3638 // This means we have to multiply N (the significand) by 5^e. 3639 // To avoid overflow, we have to operate on numbers large 3640 // enough to store N * 5^e: 3641 // log2(N * 5^e) == log2(N) + e * log2(5) 3642 // <= semantics->precision + e * 137 / 59 3643 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3644 3645 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3646 3647 // Multiply significand by 5^e. 3648 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3649 significand = significand.zext(precision); 3650 APInt five_to_the_i(precision, 5); 3651 while (true) { 3652 if (texp & 1) significand *= five_to_the_i; 3653 3654 texp >>= 1; 3655 if (!texp) break; 3656 five_to_the_i *= five_to_the_i; 3657 } 3658 } 3659 3660 AdjustToPrecision(significand, exp, FormatPrecision); 3661 3662 SmallVector<char, 256> buffer; 3663 3664 // Fill the buffer. 3665 unsigned precision = significand.getBitWidth(); 3666 APInt ten(precision, 10); 3667 APInt digit(precision, 0); 3668 3669 bool inTrail = true; 3670 while (significand != 0) { 3671 // digit <- significand % 10 3672 // significand <- significand / 10 3673 APInt::udivrem(significand, ten, significand, digit); 3674 3675 unsigned d = digit.getZExtValue(); 3676 3677 // Drop trailing zeros. 3678 if (inTrail && !d) exp++; 3679 else { 3680 buffer.push_back((char) ('0' + d)); 3681 inTrail = false; 3682 } 3683 } 3684 3685 assert(!buffer.empty() && "no characters in buffer!"); 3686 3687 // Drop down to FormatPrecision. 3688 // TODO: don't do more precise calculations above than are required. 3689 AdjustToPrecision(buffer, exp, FormatPrecision); 3690 3691 unsigned NDigits = buffer.size(); 3692 3693 // Check whether we should use scientific notation. 3694 bool FormatScientific; 3695 if (!FormatMaxPadding) 3696 FormatScientific = true; 3697 else { 3698 if (exp >= 0) { 3699 // 765e3 --> 765000 3700 // ^^^ 3701 // But we shouldn't make the number look more precise than it is. 3702 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3703 NDigits + (unsigned) exp > FormatPrecision); 3704 } else { 3705 // Power of the most significant digit. 3706 int MSD = exp + (int) (NDigits - 1); 3707 if (MSD >= 0) { 3708 // 765e-2 == 7.65 3709 FormatScientific = false; 3710 } else { 3711 // 765e-5 == 0.00765 3712 // ^ ^^ 3713 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3714 } 3715 } 3716 } 3717 3718 // Scientific formatting is pretty straightforward. 3719 if (FormatScientific) { 3720 exp += (NDigits - 1); 3721 3722 Str.push_back(buffer[NDigits-1]); 3723 Str.push_back('.'); 3724 if (NDigits == 1) 3725 Str.push_back('0'); 3726 else 3727 for (unsigned I = 1; I != NDigits; ++I) 3728 Str.push_back(buffer[NDigits-1-I]); 3729 Str.push_back('E'); 3730 3731 Str.push_back(exp >= 0 ? '+' : '-'); 3732 if (exp < 0) exp = -exp; 3733 SmallVector<char, 6> expbuf; 3734 do { 3735 expbuf.push_back((char) ('0' + (exp % 10))); 3736 exp /= 10; 3737 } while (exp); 3738 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3739 Str.push_back(expbuf[E-1-I]); 3740 return; 3741 } 3742 3743 // Non-scientific, positive exponents. 3744 if (exp >= 0) { 3745 for (unsigned I = 0; I != NDigits; ++I) 3746 Str.push_back(buffer[NDigits-1-I]); 3747 for (unsigned I = 0; I != (unsigned) exp; ++I) 3748 Str.push_back('0'); 3749 return; 3750 } 3751 3752 // Non-scientific, negative exponents. 3753 3754 // The number of digits to the left of the decimal point. 3755 int NWholeDigits = exp + (int) NDigits; 3756 3757 unsigned I = 0; 3758 if (NWholeDigits > 0) { 3759 for (; I != (unsigned) NWholeDigits; ++I) 3760 Str.push_back(buffer[NDigits-I-1]); 3761 Str.push_back('.'); 3762 } else { 3763 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3764 3765 Str.push_back('0'); 3766 Str.push_back('.'); 3767 for (unsigned Z = 1; Z != NZeros; ++Z) 3768 Str.push_back('0'); 3769 } 3770 3771 for (; I != NDigits; ++I) 3772 Str.push_back(buffer[NDigits-I-1]); 3773 } 3774 3775 bool APFloat::getExactInverse(APFloat *inv) const { 3776 // Special floats and denormals have no exact inverse. 3777 if (!isFiniteNonZero()) 3778 return false; 3779 3780 // Check that the number is a power of two by making sure that only the 3781 // integer bit is set in the significand. 3782 if (significandLSB() != semantics->precision - 1) 3783 return false; 3784 3785 // Get the inverse. 3786 APFloat reciprocal(*semantics, 1ULL); 3787 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3788 return false; 3789 3790 // Avoid multiplication with a denormal, it is not safe on all platforms and 3791 // may be slower than a normal division. 3792 if (reciprocal.isDenormal()) 3793 return false; 3794 3795 assert(reciprocal.isFiniteNonZero() && 3796 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3797 3798 if (inv) 3799 *inv = reciprocal; 3800 3801 return true; 3802 } 3803 3804 bool APFloat::isSignaling() const { 3805 if (!isNaN()) 3806 return false; 3807 3808 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3809 // first bit of the trailing significand being 0. 3810 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3811 } 3812 3813 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3814 /// 3815 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3816 /// appropriate sign switching before/after the computation. 3817 APFloat::opStatus APFloat::next(bool nextDown) { 3818 // If we are performing nextDown, swap sign so we have -x. 3819 if (nextDown) 3820 changeSign(); 3821 3822 // Compute nextUp(x) 3823 opStatus result = opOK; 3824 3825 // Handle each float category separately. 3826 switch (category) { 3827 case fcInfinity: 3828 // nextUp(+inf) = +inf 3829 if (!isNegative()) 3830 break; 3831 // nextUp(-inf) = -getLargest() 3832 makeLargest(true); 3833 break; 3834 case fcNaN: 3835 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3836 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3837 // change the payload. 3838 if (isSignaling()) { 3839 result = opInvalidOp; 3840 // For consistency, propagate the sign of the sNaN to the qNaN. 3841 makeNaN(false, isNegative(), nullptr); 3842 } 3843 break; 3844 case fcZero: 3845 // nextUp(pm 0) = +getSmallest() 3846 makeSmallest(false); 3847 break; 3848 case fcNormal: 3849 // nextUp(-getSmallest()) = -0 3850 if (isSmallest() && isNegative()) { 3851 APInt::tcSet(significandParts(), 0, partCount()); 3852 category = fcZero; 3853 exponent = 0; 3854 break; 3855 } 3856 3857 // nextUp(getLargest()) == INFINITY 3858 if (isLargest() && !isNegative()) { 3859 APInt::tcSet(significandParts(), 0, partCount()); 3860 category = fcInfinity; 3861 exponent = semantics->maxExponent + 1; 3862 break; 3863 } 3864 3865 // nextUp(normal) == normal + inc. 3866 if (isNegative()) { 3867 // If we are negative, we need to decrement the significand. 3868 3869 // We only cross a binade boundary that requires adjusting the exponent 3870 // if: 3871 // 1. exponent != semantics->minExponent. This implies we are not in the 3872 // smallest binade or are dealing with denormals. 3873 // 2. Our significand excluding the integral bit is all zeros. 3874 bool WillCrossBinadeBoundary = 3875 exponent != semantics->minExponent && isSignificandAllZeros(); 3876 3877 // Decrement the significand. 3878 // 3879 // We always do this since: 3880 // 1. If we are dealing with a non-binade decrement, by definition we 3881 // just decrement the significand. 3882 // 2. If we are dealing with a normal -> normal binade decrement, since 3883 // we have an explicit integral bit the fact that all bits but the 3884 // integral bit are zero implies that subtracting one will yield a 3885 // significand with 0 integral bit and 1 in all other spots. Thus we 3886 // must just adjust the exponent and set the integral bit to 1. 3887 // 3. If we are dealing with a normal -> denormal binade decrement, 3888 // since we set the integral bit to 0 when we represent denormals, we 3889 // just decrement the significand. 3890 integerPart *Parts = significandParts(); 3891 APInt::tcDecrement(Parts, partCount()); 3892 3893 if (WillCrossBinadeBoundary) { 3894 // Our result is a normal number. Do the following: 3895 // 1. Set the integral bit to 1. 3896 // 2. Decrement the exponent. 3897 APInt::tcSetBit(Parts, semantics->precision - 1); 3898 exponent--; 3899 } 3900 } else { 3901 // If we are positive, we need to increment the significand. 3902 3903 // We only cross a binade boundary that requires adjusting the exponent if 3904 // the input is not a denormal and all of said input's significand bits 3905 // are set. If all of said conditions are true: clear the significand, set 3906 // the integral bit to 1, and increment the exponent. If we have a 3907 // denormal always increment since moving denormals and the numbers in the 3908 // smallest normal binade have the same exponent in our representation. 3909 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3910 3911 if (WillCrossBinadeBoundary) { 3912 integerPart *Parts = significandParts(); 3913 APInt::tcSet(Parts, 0, partCount()); 3914 APInt::tcSetBit(Parts, semantics->precision - 1); 3915 assert(exponent != semantics->maxExponent && 3916 "We can not increment an exponent beyond the maxExponent allowed" 3917 " by the given floating point semantics."); 3918 exponent++; 3919 } else { 3920 incrementSignificand(); 3921 } 3922 } 3923 break; 3924 } 3925 3926 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3927 if (nextDown) 3928 changeSign(); 3929 3930 return result; 3931 } 3932 3933 void 3934 APFloat::makeInf(bool Negative) { 3935 category = fcInfinity; 3936 sign = Negative; 3937 exponent = semantics->maxExponent + 1; 3938 APInt::tcSet(significandParts(), 0, partCount()); 3939 } 3940 3941 void 3942 APFloat::makeZero(bool Negative) { 3943 category = fcZero; 3944 sign = Negative; 3945 exponent = semantics->minExponent-1; 3946 APInt::tcSet(significandParts(), 0, partCount()); 3947 } 3948 3949 void APFloat::makeQuiet() { 3950 assert(isNaN()); 3951 APInt::tcSetBit(significandParts(), semantics->precision - 2); 3952 } 3953 3954 int llvm::ilogb(const APFloat &Arg) { 3955 if (Arg.isNaN()) 3956 return APFloat::IEK_NaN; 3957 if (Arg.isZero()) 3958 return APFloat::IEK_Zero; 3959 if (Arg.isInfinity()) 3960 return APFloat::IEK_Inf; 3961 if (!Arg.isDenormal()) 3962 return Arg.exponent; 3963 3964 APFloat Normalized(Arg); 3965 int SignificandBits = Arg.getSemantics().precision - 1; 3966 3967 Normalized.exponent += SignificandBits; 3968 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero); 3969 return Normalized.exponent - SignificandBits; 3970 } 3971 3972 APFloat llvm::scalbn(APFloat X, int Exp, APFloat::roundingMode RoundingMode) { 3973 auto MaxExp = X.getSemantics().maxExponent; 3974 auto MinExp = X.getSemantics().minExponent; 3975 3976 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 3977 // overflow; clamp it to a safe range before adding, but ensure that the range 3978 // is large enough that the clamp does not change the result. The range we 3979 // need to support is the difference between the largest possible exponent and 3980 // the normalized exponent of half the smallest denormal. 3981 3982 int SignificandBits = X.getSemantics().precision - 1; 3983 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 3984 3985 // Clamp to one past the range ends to let normalize handle overlflow. 3986 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 3987 X.normalize(RoundingMode, lfExactlyZero); 3988 if (X.isNaN()) 3989 X.makeQuiet(); 3990 return X; 3991 } 3992 3993 APFloat llvm::frexp(const APFloat &Val, int &Exp, APFloat::roundingMode RM) { 3994 Exp = ilogb(Val); 3995 3996 // Quiet signalling nans. 3997 if (Exp == APFloat::IEK_NaN) { 3998 APFloat Quiet(Val); 3999 Quiet.makeQuiet(); 4000 return Quiet; 4001 } 4002 4003 if (Exp == APFloat::IEK_Inf) 4004 return Val; 4005 4006 // 1 is added because frexp is defined to return a normalized fraction in 4007 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4008 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1; 4009 return scalbn(Val, -Exp, RM); 4010 } 4011