Home | History | Annotate | Download | only in internal
      1 // Tencent is pleased to support the open source community by making RapidJSON available.
      2 //
      3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
      4 //
      5 // Licensed under the MIT License (the "License"); you may not use this file except
      6 // in compliance with the License. You may obtain a copy of the License at
      7 //
      8 // http://opensource.org/licenses/MIT
      9 //
     10 // Unless required by applicable law or agreed to in writing, software distributed
     11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
     12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
     13 // specific language governing permissions and limitations under the License.
     14 
     15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
     16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
     17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
     18 
     19 #ifndef RAPIDJSON_DIYFP_H_
     20 #define RAPIDJSON_DIYFP_H_
     21 
     22 #include "../rapidjson.h"
     23 
     24 #if defined(_MSC_VER) && defined(_M_AMD64)
     25 #include <intrin.h>
     26 #pragma intrinsic(_BitScanReverse64)
     27 #pragma intrinsic(_umul128)
     28 #endif
     29 
     30 RAPIDJSON_NAMESPACE_BEGIN
     31 namespace internal {
     32 
     33 #ifdef __GNUC__
     34 RAPIDJSON_DIAG_PUSH
     35 RAPIDJSON_DIAG_OFF(effc++)
     36 #endif
     37 
     38 struct DiyFp {
     39     DiyFp() {}
     40 
     41     DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
     42 
     43     explicit DiyFp(double d) {
     44         union {
     45             double d;
     46             uint64_t u64;
     47         } u = { d };
     48 
     49         int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
     50         uint64_t significand = (u.u64 & kDpSignificandMask);
     51         if (biased_e != 0) {
     52             f = significand + kDpHiddenBit;
     53             e = biased_e - kDpExponentBias;
     54         }
     55         else {
     56             f = significand;
     57             e = kDpMinExponent + 1;
     58         }
     59     }
     60 
     61     DiyFp operator-(const DiyFp& rhs) const {
     62         return DiyFp(f - rhs.f, e);
     63     }
     64 
     65     DiyFp operator*(const DiyFp& rhs) const {
     66 #if defined(_MSC_VER) && defined(_M_AMD64)
     67         uint64_t h;
     68         uint64_t l = _umul128(f, rhs.f, &h);
     69         if (l & (uint64_t(1) << 63)) // rounding
     70             h++;
     71         return DiyFp(h, e + rhs.e + 64);
     72 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
     73         __extension__ typedef unsigned __int128 uint128;
     74         uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
     75         uint64_t h = static_cast<uint64_t>(p >> 64);
     76         uint64_t l = static_cast<uint64_t>(p);
     77         if (l & (uint64_t(1) << 63)) // rounding
     78             h++;
     79         return DiyFp(h, e + rhs.e + 64);
     80 #else
     81         const uint64_t M32 = 0xFFFFFFFF;
     82         const uint64_t a = f >> 32;
     83         const uint64_t b = f & M32;
     84         const uint64_t c = rhs.f >> 32;
     85         const uint64_t d = rhs.f & M32;
     86         const uint64_t ac = a * c;
     87         const uint64_t bc = b * c;
     88         const uint64_t ad = a * d;
     89         const uint64_t bd = b * d;
     90         uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
     91         tmp += 1U << 31;  /// mult_round
     92         return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
     93 #endif
     94     }
     95 
     96     DiyFp Normalize() const {
     97 #if defined(_MSC_VER) && defined(_M_AMD64)
     98         unsigned long index;
     99         _BitScanReverse64(&index, f);
    100         return DiyFp(f << (63 - index), e - (63 - index));
    101 #elif defined(__GNUC__) && __GNUC__ >= 4
    102         int s = __builtin_clzll(f);
    103         return DiyFp(f << s, e - s);
    104 #else
    105         DiyFp res = *this;
    106         while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
    107             res.f <<= 1;
    108             res.e--;
    109         }
    110         return res;
    111 #endif
    112     }
    113 
    114     DiyFp NormalizeBoundary() const {
    115         DiyFp res = *this;
    116         while (!(res.f & (kDpHiddenBit << 1))) {
    117             res.f <<= 1;
    118             res.e--;
    119         }
    120         res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
    121         res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
    122         return res;
    123     }
    124 
    125     void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
    126         DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
    127         DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
    128         mi.f <<= mi.e - pl.e;
    129         mi.e = pl.e;
    130         *plus = pl;
    131         *minus = mi;
    132     }
    133 
    134     double ToDouble() const {
    135         union {
    136             double d;
    137             uint64_t u64;
    138         }u;
    139         const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
    140             static_cast<uint64_t>(e + kDpExponentBias);
    141         u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
    142         return u.d;
    143     }
    144 
    145     static const int kDiySignificandSize = 64;
    146     static const int kDpSignificandSize = 52;
    147     static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
    148     static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
    149     static const int kDpMinExponent = -kDpExponentBias;
    150     static const int kDpDenormalExponent = -kDpExponentBias + 1;
    151     static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
    152     static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
    153     static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
    154 
    155     uint64_t f;
    156     int e;
    157 };
    158 
    159 inline DiyFp GetCachedPowerByIndex(size_t index) {
    160     // 10^-348, 10^-340, ..., 10^340
    161     static const uint64_t kCachedPowers_F[] = {
    162         RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
    163         RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
    164         RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
    165         RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
    166         RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
    167         RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
    168         RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
    169         RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
    170         RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
    171         RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
    172         RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
    173         RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
    174         RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
    175         RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
    176         RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
    177         RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
    178         RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
    179         RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
    180         RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
    181         RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
    182         RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
    183         RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
    184         RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
    185         RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
    186         RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
    187         RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
    188         RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
    189         RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
    190         RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
    191         RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
    192         RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
    193         RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
    194         RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
    195         RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
    196         RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
    197         RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
    198         RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
    199         RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
    200         RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
    201         RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
    202         RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
    203         RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
    204         RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
    205         RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
    206     };
    207     static const int16_t kCachedPowers_E[] = {
    208         -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
    209         -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
    210         -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
    211         -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
    212         -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
    213         109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
    214         375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
    215         641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
    216         907,   933,   960,   986,  1013,  1039,  1066
    217     };
    218     return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
    219 }
    220 
    221 inline DiyFp GetCachedPower(int e, int* K) {
    222 
    223     //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
    224     double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
    225     int k = static_cast<int>(dk);
    226     if (dk - k > 0.0)
    227         k++;
    228 
    229     unsigned index = static_cast<unsigned>((k >> 3) + 1);
    230     *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
    231 
    232     return GetCachedPowerByIndex(index);
    233 }
    234 
    235 inline DiyFp GetCachedPower10(int exp, int *outExp) {
    236      unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
    237      *outExp = -348 + static_cast<int>(index) * 8;
    238      return GetCachedPowerByIndex(index);
    239  }
    240 
    241 #ifdef __GNUC__
    242 RAPIDJSON_DIAG_POP
    243 #endif
    244 
    245 } // namespace internal
    246 RAPIDJSON_NAMESPACE_END
    247 
    248 #endif // RAPIDJSON_DIYFP_H_
    249