Home | History | Annotate | Download | only in test
      1 // Copyright (c) 2015-2016 The Khronos Group Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <cfloat>
     16 #include <cmath>
     17 #include <cstdio>
     18 #include <limits>
     19 #include <sstream>
     20 #include <string>
     21 #include <tuple>
     22 #include <utility>
     23 #include <vector>
     24 
     25 #include "gmock/gmock.h"
     26 #include "source/util/hex_float.h"
     27 #include "test/unit_spirv.h"
     28 
     29 namespace spvtools {
     30 namespace utils {
     31 namespace {
     32 
     33 using ::testing::Eq;
     34 
     35 // In this file "encode" means converting a number into a string,
     36 // and "decode" means converting a string into a number.
     37 
     38 using HexFloatTest =
     39     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
     40 using DecodeHexFloatTest =
     41     ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
     42 using HexDoubleTest =
     43     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
     44 using DecodeHexDoubleTest =
     45     ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
     46 using RoundTripFloatTest = ::testing::TestWithParam<float>;
     47 using RoundTripDoubleTest = ::testing::TestWithParam<double>;
     48 
     49 // Hex-encodes a float value.
     50 template <typename T>
     51 std::string EncodeViaHexFloat(const T& value) {
     52   std::stringstream ss;
     53   ss << HexFloat<T>(value);
     54   return ss.str();
     55 }
     56 
     57 // The following two tests can't be DRY because they take different parameter
     58 // types.
     59 
     60 TEST_P(HexFloatTest, EncodeCorrectly) {
     61   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
     62 }
     63 
     64 TEST_P(HexDoubleTest, EncodeCorrectly) {
     65   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
     66 }
     67 
     68 // Decodes a hex-float string.
     69 template <typename T>
     70 FloatProxy<T> Decode(const std::string& str) {
     71   HexFloat<FloatProxy<T>> decoded(0.f);
     72   EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
     73   return decoded.value();
     74 }
     75 
     76 TEST_P(HexFloatTest, DecodeCorrectly) {
     77   EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
     78 }
     79 
     80 TEST_P(HexDoubleTest, DecodeCorrectly) {
     81   EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
     82 }
     83 
     84 INSTANTIATE_TEST_CASE_P(
     85     Float32Tests, HexFloatTest,
     86     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
     87         {0.f, "0x0p+0"},
     88         {1.f, "0x1p+0"},
     89         {2.f, "0x1p+1"},
     90         {3.f, "0x1.8p+1"},
     91         {0.5f, "0x1p-1"},
     92         {0.25f, "0x1p-2"},
     93         {0.75f, "0x1.8p-1"},
     94         {-0.f, "-0x0p+0"},
     95         {-1.f, "-0x1p+0"},
     96         {-0.5f, "-0x1p-1"},
     97         {-0.25f, "-0x1p-2"},
     98         {-0.75f, "-0x1.8p-1"},
     99 
    100         // Larger numbers
    101         {512.f, "0x1p+9"},
    102         {-512.f, "-0x1p+9"},
    103         {1024.f, "0x1p+10"},
    104         {-1024.f, "-0x1p+10"},
    105         {1024.f + 8.f, "0x1.02p+10"},
    106         {-1024.f - 8.f, "-0x1.02p+10"},
    107 
    108         // Small numbers
    109         {1.0f / 512.f, "0x1p-9"},
    110         {1.0f / -512.f, "-0x1p-9"},
    111         {1.0f / 1024.f, "0x1p-10"},
    112         {1.0f / -1024.f, "-0x1p-10"},
    113         {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
    114         {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
    115 
    116         // lowest non-denorm
    117         {float(ldexp(1.0f, -126)), "0x1p-126"},
    118         {float(ldexp(-1.0f, -126)), "-0x1p-126"},
    119 
    120         // Denormalized values
    121         {float(ldexp(1.0f, -127)), "0x1p-127"},
    122         {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
    123         {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
    124         {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
    125         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
    126         {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
    127         {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
    128         {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
    129 
    130         {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
    131         {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
    132          "0x1.8p-128"},
    133 
    134     })), );
    135 
    136 INSTANTIATE_TEST_CASE_P(
    137     Float32NanTests, HexFloatTest,
    138     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
    139         // Various NAN and INF cases
    140         {uint32_t(0xFF800000), "-0x1p+128"},         // -inf
    141         {uint32_t(0x7F800000), "0x1p+128"},          // inf
    142         {uint32_t(0xFFC00000), "-0x1.8p+128"},       // -nan
    143         {uint32_t(0xFF800100), "-0x1.0002p+128"},    // -nan
    144         {uint32_t(0xFF800c00), "-0x1.0018p+128"},    // -nan
    145         {uint32_t(0xFF80F000), "-0x1.01ep+128"},     // -nan
    146         {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"},  // -nan
    147         {uint32_t(0x7FC00000), "0x1.8p+128"},        // +nan
    148         {uint32_t(0x7F800100), "0x1.0002p+128"},     // +nan
    149         {uint32_t(0x7f800c00), "0x1.0018p+128"},     // +nan
    150         {uint32_t(0x7F80F000), "0x1.01ep+128"},      // +nan
    151         {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"},   // +nan
    152     })), );
    153 
    154 INSTANTIATE_TEST_CASE_P(
    155     Float64Tests, HexDoubleTest,
    156     ::testing::ValuesIn(
    157         std::vector<std::pair<FloatProxy<double>, std::string>>({
    158             {0., "0x0p+0"},
    159             {1., "0x1p+0"},
    160             {2., "0x1p+1"},
    161             {3., "0x1.8p+1"},
    162             {0.5, "0x1p-1"},
    163             {0.25, "0x1p-2"},
    164             {0.75, "0x1.8p-1"},
    165             {-0., "-0x0p+0"},
    166             {-1., "-0x1p+0"},
    167             {-0.5, "-0x1p-1"},
    168             {-0.25, "-0x1p-2"},
    169             {-0.75, "-0x1.8p-1"},
    170 
    171             // Larger numbers
    172             {512., "0x1p+9"},
    173             {-512., "-0x1p+9"},
    174             {1024., "0x1p+10"},
    175             {-1024., "-0x1p+10"},
    176             {1024. + 8., "0x1.02p+10"},
    177             {-1024. - 8., "-0x1.02p+10"},
    178 
    179             // Large outside the range of normal floats
    180             {ldexp(1.0, 128), "0x1p+128"},
    181             {ldexp(1.0, 129), "0x1p+129"},
    182             {ldexp(-1.0, 128), "-0x1p+128"},
    183             {ldexp(-1.0, 129), "-0x1p+129"},
    184             {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
    185             {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
    186             {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
    187             {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
    188 
    189             // Small numbers
    190             {1.0 / 512., "0x1p-9"},
    191             {1.0 / -512., "-0x1p-9"},
    192             {1.0 / 1024., "0x1p-10"},
    193             {1.0 / -1024., "-0x1p-10"},
    194             {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
    195             {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
    196 
    197             // Small outside the range of normal floats
    198             {ldexp(1.0, -128), "0x1p-128"},
    199             {ldexp(1.0, -129), "0x1p-129"},
    200             {ldexp(-1.0, -128), "-0x1p-128"},
    201             {ldexp(-1.0, -129), "-0x1p-129"},
    202             {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
    203             {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
    204             {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
    205             {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
    206 
    207             // lowest non-denorm
    208             {ldexp(1.0, -1022), "0x1p-1022"},
    209             {ldexp(-1.0, -1022), "-0x1p-1022"},
    210 
    211             // Denormalized values
    212             {ldexp(1.0, -1023), "0x1p-1023"},
    213             {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
    214             {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
    215             {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
    216             {ldexp(-1.0, -1024), "-0x1p-1024"},
    217             {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
    218             {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
    219             {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
    220 
    221             {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
    222             {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
    223              "0x1.8p-1024"},
    224 
    225         })), );
    226 
    227 INSTANTIATE_TEST_CASE_P(
    228     Float64NanTests, HexDoubleTest,
    229     ::testing::ValuesIn(std::vector<
    230                         std::pair<FloatProxy<double>, std::string>>({
    231         // Various NAN and INF cases
    232         {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"},                // -inf
    233         {uint64_t(0x7FF0000000000000LL), "0x1p+1024"},                 // +inf
    234         {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"},              // -nan
    235         {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},             // -nan
    236         {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"},  // -nan
    237         {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"},          // -nan
    238         {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"},  // -nan
    239         {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},               // +nan
    240         {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"},              // +nan
    241         {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"},   // -nan
    242         {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"},           // -nan
    243         {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"},   // -nan
    244     })), );
    245 
    246 // Tests that encoding a value and decoding it again restores
    247 // the same value.
    248 TEST_P(RoundTripFloatTest, CanStoreAccurately) {
    249   std::stringstream ss;
    250   ss << FloatProxy<float>(GetParam());
    251   ss.seekg(0);
    252   FloatProxy<float> res;
    253   ss >> res;
    254   EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
    255 }
    256 
    257 TEST_P(RoundTripDoubleTest, CanStoreAccurately) {
    258   std::stringstream ss;
    259   ss << FloatProxy<double>(GetParam());
    260   ss.seekg(0);
    261   FloatProxy<double> res;
    262   ss >> res;
    263   EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
    264 }
    265 
    266 INSTANTIATE_TEST_CASE_P(
    267     Float32StoreTests, RoundTripFloatTest,
    268     ::testing::ValuesIn(std::vector<float>(
    269         {// Value requiring more than 6 digits of precision to be
    270          // represented accurately.
    271          3.0000002f})));
    272 
    273 INSTANTIATE_TEST_CASE_P(
    274     Float64StoreTests, RoundTripDoubleTest,
    275     ::testing::ValuesIn(std::vector<double>(
    276         {// Value requiring more than 15 digits of precision to be
    277          // represented accurately.
    278          1.5000000000000002})));
    279 
    280 TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
    281   std::stringstream s;
    282   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
    283     << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
    284   EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
    285 }
    286 
    287 TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
    288   std::stringstream s;
    289   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
    290     << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
    291     << 9;
    292   EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
    293 }
    294 
    295 TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
    296   EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
    297 }
    298 
    299 TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
    300   EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
    301 }
    302 
    303 INSTANTIATE_TEST_CASE_P(
    304     Float32DecodeTests, DecodeHexFloatTest,
    305     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
    306         {"0x0p+000", 0.f},
    307         {"0x0p0", 0.f},
    308         {"0x0p-0", 0.f},
    309 
    310         // flush to zero cases
    311         {"0x1p-500", 0.f},  // Exponent underflows.
    312         {"-0x1p-500", -0.f},
    313         {"0x0.00000000001p-126", 0.f},  // Fraction causes underflow.
    314         {"-0x0.0000000001p-127", -0.f},
    315         {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow.
    316         {"0x0.01p-142", 0.f},
    317 
    318         // Some floats that do not encode the same way as they decode.
    319         {"0x2p+0", 2.f},
    320         {"0xFFp+0", 255.f},
    321         {"0x0.8p+0", 0.5f},
    322         {"0x0.4p+0", 0.25f},
    323     })), );
    324 
    325 INSTANTIATE_TEST_CASE_P(
    326     Float32DecodeInfTests, DecodeHexFloatTest,
    327     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
    328         // inf cases
    329         {"-0x1p+128", uint32_t(0xFF800000)},   // -inf
    330         {"0x32p+127", uint32_t(0x7F800000)},   // inf
    331         {"0x32p+500", uint32_t(0x7F800000)},   // inf
    332         {"-0x32p+127", uint32_t(0xFF800000)},  // -inf
    333     })), );
    334 
    335 INSTANTIATE_TEST_CASE_P(
    336     Float64DecodeTests, DecodeHexDoubleTest,
    337     ::testing::ValuesIn(
    338         std::vector<std::pair<std::string, FloatProxy<double>>>({
    339             {"0x0p+000", 0.},
    340             {"0x0p0", 0.},
    341             {"0x0p-0", 0.},
    342 
    343             // flush to zero cases
    344             {"0x1p-5000", 0.},  // Exponent underflows.
    345             {"-0x1p-5000", -0.},
    346             {"0x0.0000000000000001p-1023", 0.},  // Fraction causes underflow.
    347             {"-0x0.000000000000001p-1024", -0.},
    348             {"-0x0.01p-1090", -0.f},  // Fraction causes additional underflow.
    349             {"0x0.01p-1090", 0.},
    350 
    351             // Some floats that do not encode the same way as they decode.
    352             {"0x2p+0", 2.},
    353             {"0xFFp+0", 255.},
    354             {"0x0.8p+0", 0.5},
    355             {"0x0.4p+0", 0.25},
    356         })), );
    357 
    358 INSTANTIATE_TEST_CASE_P(
    359     Float64DecodeInfTests, DecodeHexDoubleTest,
    360     ::testing::ValuesIn(
    361         std::vector<std::pair<std::string, FloatProxy<double>>>({
    362             // inf cases
    363             {"-0x1p+1024", uint64_t(0xFFF0000000000000)},   // -inf
    364             {"0x32p+1023", uint64_t(0x7FF0000000000000)},   // inf
    365             {"0x32p+5000", uint64_t(0x7FF0000000000000)},   // inf
    366             {"-0x32p+1023", uint64_t(0xFFF0000000000000)},  // -inf
    367         })), );
    368 
    369 TEST(FloatProxy, ValidConversion) {
    370   EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
    371   EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
    372   EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
    373   EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
    374   EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
    375   EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
    376 
    377   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
    378   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
    379   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
    380   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
    381   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
    382   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
    383   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
    384   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
    385   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
    386   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
    387   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
    388   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
    389 
    390   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
    391   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
    392   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
    393   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
    394   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
    395   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
    396   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
    397   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
    398   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
    399   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
    400   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
    401   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
    402 }
    403 
    404 TEST(FloatProxy, Nan) {
    405   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
    406   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
    407   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
    408   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
    409   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
    410   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
    411   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
    412   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
    413   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
    414   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
    415 }
    416 
    417 TEST(FloatProxy, Negation) {
    418   EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
    419   EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
    420 
    421   EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
    422   EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
    423 
    424   EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
    425   EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
    426 
    427   EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
    428   EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
    429 
    430   EXPECT_THAT(
    431       (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
    432       Eq(-std::numeric_limits<float>::infinity()));
    433   EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
    434                   .getAsFloat(),
    435               Eq(std::numeric_limits<float>::infinity()));
    436 }
    437 
    438 // Test conversion of FloatProxy values to strings.
    439 //
    440 // In previous cases, we always wrapped the FloatProxy value in a HexFloat
    441 // before conversion to a string.  In the following cases, the FloatProxy
    442 // decides for itself whether to print as a regular number or as a hex float.
    443 
    444 using FloatProxyFloatTest =
    445     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
    446 using FloatProxyDoubleTest =
    447     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
    448 
    449 // Converts a float value to a string via a FloatProxy.
    450 template <typename T>
    451 std::string EncodeViaFloatProxy(const T& value) {
    452   std::stringstream ss;
    453   ss << value;
    454   return ss.str();
    455 }
    456 
    457 // Converts a floating point string so that the exponent prefix
    458 // is 'e', and the exponent value does not have leading zeros.
    459 // The Microsoft runtime library likes to write things like "2.5E+010".
    460 // Convert that to "2.5e+10".
    461 // We don't care what happens to strings that are not floating point
    462 // strings.
    463 std::string NormalizeExponentInFloatString(std::string in) {
    464   std::string result;
    465   // Reserve one spot for the terminating null, even when the sscanf fails.
    466   std::vector<char> prefix(in.size() + 1);
    467   char e;
    468   char plus_or_minus;
    469   int exponent;  // in base 10
    470   if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
    471                         &plus_or_minus, &exponent)) &&
    472       (e == 'e' || e == 'E') &&
    473       (plus_or_minus == '-' || plus_or_minus == '+')) {
    474     // It looks like a floating point value with exponent.
    475     std::stringstream out;
    476     out << prefix.data() << 'e' << plus_or_minus << exponent;
    477     result = out.str();
    478   } else {
    479     result = in;
    480   }
    481   return result;
    482 }
    483 
    484 TEST(NormalizeFloat, Sample) {
    485   EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
    486   EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
    487   EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
    488   EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
    489   EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
    490 }
    491 
    492 // The following two tests can't be DRY because they take different parameter
    493 // types.
    494 TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
    495   EXPECT_THAT(
    496       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
    497       Eq(GetParam().second));
    498 }
    499 
    500 TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
    501   EXPECT_THAT(
    502       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
    503       Eq(GetParam().second));
    504 }
    505 
    506 INSTANTIATE_TEST_CASE_P(
    507     Float32Tests, FloatProxyFloatTest,
    508     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
    509         // Zero
    510         {0.f, "0"},
    511         // Normal numbers
    512         {1.f, "1"},
    513         {-0.25f, "-0.25"},
    514         {1000.0f, "1000"},
    515 
    516         // Still normal numbers, but with large magnitude exponents.
    517         {float(ldexp(1.f, 126)), "8.50705917e+37"},
    518         {float(ldexp(-1.f, -126)), "-1.17549435e-38"},
    519 
    520         // denormalized values are printed as hex floats.
    521         {float(ldexp(1.0f, -127)), "0x1p-127"},
    522         {float(ldexp(1.5f, -128)), "0x1.8p-128"},
    523         {float(ldexp(1.25, -129)), "0x1.4p-129"},
    524         {float(ldexp(1.125, -130)), "0x1.2p-130"},
    525         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
    526         {float(ldexp(-1.0f, -128)), "-0x1p-128"},
    527         {float(ldexp(-1.0f, -129)), "-0x1p-129"},
    528         {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
    529 
    530         // NaNs
    531         {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
    532         {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
    533 
    534         {std::numeric_limits<float>::infinity(), "0x1p+128"},
    535         {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
    536     })), );
    537 
    538 INSTANTIATE_TEST_CASE_P(
    539     Float64Tests, FloatProxyDoubleTest,
    540     ::testing::ValuesIn(
    541         std::vector<std::pair<FloatProxy<double>, std::string>>({
    542             {0., "0"},
    543             {1., "1"},
    544             {-0.25, "-0.25"},
    545             {1000.0, "1000"},
    546 
    547             // Large outside the range of normal floats
    548             {ldexp(1.0, 128), "3.4028236692093846e+38"},
    549             {ldexp(1.5, 129), "1.0208471007628154e+39"},
    550             {ldexp(-1.0, 128), "-3.4028236692093846e+38"},
    551             {ldexp(-1.5, 129), "-1.0208471007628154e+39"},
    552 
    553             // Small outside the range of normal floats
    554             {ldexp(1.5, -129), "2.2040519077917891e-39"},
    555             {ldexp(-1.5, -129), "-2.2040519077917891e-39"},
    556 
    557             // lowest non-denorm
    558             {ldexp(1.0, -1022), "2.2250738585072014e-308"},
    559             {ldexp(-1.0, -1022), "-2.2250738585072014e-308"},
    560 
    561             // Denormalized values
    562             {ldexp(1.125, -1023), "0x1.2p-1023"},
    563             {ldexp(-1.375, -1024), "-0x1.6p-1024"},
    564 
    565             // NaNs
    566             {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
    567             {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
    568 
    569             // Infinity
    570             {std::numeric_limits<double>::infinity(), "0x1p+1024"},
    571             {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
    572 
    573         })), );
    574 
    575 // double is used so that unbiased_exponent can be used with the output
    576 // of ldexp directly.
    577 int32_t unbiased_exponent(double f) {
    578   return HexFloat<FloatProxy<float>>(static_cast<float>(f))
    579       .getUnbiasedNormalizedExponent();
    580 }
    581 
    582 int16_t unbiased_half_exponent(uint16_t f) {
    583   return HexFloat<FloatProxy<Float16>>(f).getUnbiasedNormalizedExponent();
    584 }
    585 
    586 TEST(HexFloatOperationTest, UnbiasedExponent) {
    587   // Float cases
    588   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
    589   EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
    590   EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
    591   EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
    592 
    593   EXPECT_EQ(128,
    594             HexFloat<FloatProxy<float>>(std::numeric_limits<float>::infinity())
    595                 .getUnbiasedNormalizedExponent());
    596 
    597   EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
    598   EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127)));  // First denorm
    599   EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
    600   EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
    601   EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
    602   // Smallest representable number
    603   EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
    604   // Should get rounded to 0 first.
    605   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
    606 
    607   // Float16 cases
    608   // The exponent is represented in the bits 0x7C00
    609   // The offset is -15
    610   EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
    611   EXPECT_EQ(3, unbiased_half_exponent(0x4800));
    612   EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
    613   EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
    614   EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
    615   EXPECT_EQ(10, unbiased_half_exponent(0x6400));
    616 
    617   // Smallest representable number
    618   EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
    619 }
    620 
    621 // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
    622 float float_fractions(const std::vector<uint32_t>& fractions) {
    623   float f = 0;
    624   for (int32_t i : fractions) {
    625     f += std::ldexp(1.0f, -i);
    626   }
    627   return f;
    628 }
    629 
    630 // Returns the normalized significand of a HexFloat<FloatProxy<float>>
    631 // that was created by calling float_fractions with the input fractions,
    632 // raised to the power of exp.
    633 uint32_t normalized_significand(const std::vector<uint32_t>& fractions,
    634                                 uint32_t exp) {
    635   return HexFloat<FloatProxy<float>>(
    636              static_cast<float>(ldexp(float_fractions(fractions), exp)))
    637       .getNormalizedSignificand();
    638 }
    639 
    640 // Sets the bits from MSB to LSB of the significand part of a float.
    641 // For example 0 would set the bit 23 (counting from LSB to MSB),
    642 // and 1 would set the 22nd bit.
    643 uint32_t bits_set(const std::vector<uint32_t>& bits) {
    644   const uint32_t top_bit = 1u << 22u;
    645   uint32_t val = 0;
    646   for (uint32_t i : bits) {
    647     val |= top_bit >> i;
    648   }
    649   return val;
    650 }
    651 
    652 // The same as bits_set but for a Float16 value instead of 32-bit floating
    653 // point.
    654 uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
    655   const uint32_t top_bit = 1u << 9u;
    656   uint32_t val = 0;
    657   for (uint32_t i : bits) {
    658     val |= top_bit >> i;
    659   }
    660   return static_cast<uint16_t>(val);
    661 }
    662 
    663 TEST(HexFloatOperationTest, NormalizedSignificand) {
    664   // For normalized numbers (the following) it should be a simple matter
    665   // of getting rid of the top implicit bit
    666   EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
    667   EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
    668   EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
    669   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
    670   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
    671   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
    672 
    673   // For denormalized numbers we expect the normalized significand to
    674   // shift as if it were normalized. This means, in practice that the
    675   // top_most set bit will be cut off. Looks very similar to above (on purpose)
    676   EXPECT_EQ(bits_set({}),
    677             normalized_significand({0}, static_cast<uint32_t>(-127)));
    678   EXPECT_EQ(bits_set({3}),
    679             normalized_significand({0, 4}, static_cast<uint32_t>(-128)));
    680   EXPECT_EQ(bits_set({3}),
    681             normalized_significand({0, 4}, static_cast<uint32_t>(-127)));
    682   EXPECT_EQ(bits_set({}),
    683             normalized_significand({22}, static_cast<uint32_t>(-127)));
    684   EXPECT_EQ(bits_set({0}),
    685             normalized_significand({21, 22}, static_cast<uint32_t>(-127)));
    686 }
    687 
    688 // Returns the 32-bit floating point value created by
    689 // calling setFromSignUnbiasedExponentAndNormalizedSignificand
    690 // on a HexFloat<FloatProxy<float>>
    691 float set_from_sign(bool negative, int32_t unbiased_exponent,
    692                     uint32_t significand, bool round_denorm_up) {
    693   HexFloat<FloatProxy<float>> f(0.f);
    694   f.setFromSignUnbiasedExponentAndNormalizedSignificand(
    695       negative, unbiased_exponent, significand, round_denorm_up);
    696   return f.value().getAsFloat();
    697 }
    698 
    699 TEST(HexFloatOperationTests,
    700      SetFromSignUnbiasedExponentAndNormalizedSignificand) {
    701   EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
    702 
    703   // Tests insertion of various denormalized numbers with and without round up.
    704   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
    705             set_from_sign(false, -149, 0, false));
    706   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
    707             set_from_sign(false, -149, 0, true));
    708   EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
    709   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
    710             set_from_sign(false, -150, 1, true));
    711 
    712   EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
    713   EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
    714   EXPECT_EQ(float_fractions({0, 1, 2, 5}),
    715             set_from_sign(false, 0, bits_set({0, 1, 4}), false));
    716   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
    717             set_from_sign(false, -32, bits_set({0, 1, 4}), false));
    718   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
    719             set_from_sign(false, -128, bits_set({0, 1, 4}), false));
    720 
    721   // The negative cases from above.
    722   EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
    723   EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
    724   EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
    725   EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
    726             set_from_sign(true, 0, bits_set({0, 1, 4}), false));
    727   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
    728             set_from_sign(true, -32, bits_set({0, 1, 4}), false));
    729   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
    730             set_from_sign(true, -128, bits_set({0, 1, 4}), false));
    731 }
    732 
    733 TEST(HexFloatOperationTests, NonRounding) {
    734   // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
    735   // except in the denorm case which is a bit more complex.
    736   using HF = HexFloat<FloatProxy<float>>;
    737   bool carry_bit = false;
    738 
    739   round_direction rounding[] = {round_direction::kToZero,
    740                                 round_direction::kToNearestEven,
    741                                 round_direction::kToPositiveInfinity,
    742                                 round_direction::kToNegativeInfinity};
    743 
    744   // Everything fits, so this should be straight-forward
    745   for (round_direction round : rounding) {
    746     EXPECT_EQ(bits_set({}),
    747               HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit));
    748     EXPECT_FALSE(carry_bit);
    749 
    750     EXPECT_EQ(bits_set({0}),
    751               HF(float_fractions({0, 1}))
    752                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
    753     EXPECT_FALSE(carry_bit);
    754 
    755     EXPECT_EQ(bits_set({1, 3}),
    756               HF(float_fractions({0, 2, 4}))
    757                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
    758     EXPECT_FALSE(carry_bit);
    759 
    760     EXPECT_EQ(
    761         bits_set({0, 1, 4}),
    762         HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
    763             .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
    764     EXPECT_FALSE(carry_bit);
    765 
    766     EXPECT_EQ(bits_set({0, 1, 4, 22}),
    767               HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
    768                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
    769     EXPECT_FALSE(carry_bit);
    770   }
    771 }
    772 
    773 using RD = round_direction;
    774 struct RoundSignificandCase {
    775   float source_float;
    776   std::pair<int16_t, bool> expected_results;
    777   round_direction round;
    778 };
    779 
    780 using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>;
    781 
    782 TEST_P(HexFloatRoundTest, RoundDownToFP16) {
    783   using HF = HexFloat<FloatProxy<float>>;
    784   using HF16 = HexFloat<FloatProxy<Float16>>;
    785 
    786   HF input_value(GetParam().source_float);
    787   bool carry_bit = false;
    788   EXPECT_EQ(GetParam().expected_results.first,
    789             input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round,
    790                                                               &carry_bit));
    791   EXPECT_EQ(carry_bit, GetParam().expected_results.second);
    792 }
    793 
    794 // clang-format off
    795 INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatRoundTest,
    796   ::testing::ValuesIn(std::vector<RoundSignificandCase>(
    797   {
    798     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
    799     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
    800     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
    801     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
    802     {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    803 
    804     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    805     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
    806     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
    807     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
    808 
    809     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
    810     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
    811     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
    812     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
    813 
    814     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    815     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
    816     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
    817     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
    818 
    819     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    820     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
    821     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
    822     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
    823 
    824     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    825     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
    826     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
    827     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
    828 
    829     // Carries
    830     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
    831     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
    832     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
    833     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
    834 
    835     // Cases where original number was denorm. Note: this should have no effect
    836     // the number is pre-normalized.
    837     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
    838     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
    839     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
    840     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
    841   })),);
    842 // clang-format on
    843 
    844 struct UpCastSignificandCase {
    845   uint16_t source_half;
    846   uint32_t expected_result;
    847 };
    848 
    849 using HexFloatRoundUpSignificandTest =
    850     ::testing::TestWithParam<UpCastSignificandCase>;
    851 TEST_P(HexFloatRoundUpSignificandTest, Widening) {
    852   using HF = HexFloat<FloatProxy<float>>;
    853   using HF16 = HexFloat<FloatProxy<Float16>>;
    854   bool carry_bit = false;
    855 
    856   round_direction rounding[] = {round_direction::kToZero,
    857                                 round_direction::kToNearestEven,
    858                                 round_direction::kToPositiveInfinity,
    859                                 round_direction::kToNegativeInfinity};
    860 
    861   // Everything fits, so everything should just be bit-shifts.
    862   for (round_direction round : rounding) {
    863     carry_bit = false;
    864     HF16 input_value(GetParam().source_half);
    865     EXPECT_EQ(
    866         GetParam().expected_result,
    867         input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
    868         << std::hex << "0x"
    869         << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
    870         << "  0x" << GetParam().expected_result;
    871     EXPECT_FALSE(carry_bit);
    872   }
    873 }
    874 
    875 INSTANTIATE_TEST_CASE_P(
    876     F16toF32, HexFloatRoundUpSignificandTest,
    877     // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
    878     // They are ignored for this test.
    879     ::testing::ValuesIn(std::vector<UpCastSignificandCase>({
    880         {0x3F00, 0x600000},
    881         {0x0F00, 0x600000},
    882         {0x0F01, 0x602000},
    883         {0x0FFF, 0x7FE000},
    884     })), );
    885 
    886 struct DownCastTest {
    887   float source_float;
    888   uint16_t expected_half;
    889   std::vector<round_direction> directions;
    890 };
    891 
    892 std::string get_round_text(round_direction direction) {
    893 #define CASE(round_direction) \
    894   case round_direction:       \
    895     return #round_direction
    896 
    897   switch (direction) {
    898     CASE(round_direction::kToZero);
    899     CASE(round_direction::kToPositiveInfinity);
    900     CASE(round_direction::kToNegativeInfinity);
    901     CASE(round_direction::kToNearestEven);
    902   }
    903 #undef CASE
    904   return "";
    905 }
    906 
    907 using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
    908 
    909 TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
    910   using HF = HexFloat<FloatProxy<float>>;
    911   using HF16 = HexFloat<FloatProxy<Float16>>;
    912   HF f(GetParam().source_float);
    913   for (auto round : GetParam().directions) {
    914     HF16 half(0);
    915     f.castTo(half, round);
    916     EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
    917         << get_round_text(round) << "  " << std::hex
    918         << BitwiseCast<uint32_t>(GetParam().source_float)
    919         << " cast to: " << half.value().getAsFloat().get_value();
    920   }
    921 }
    922 
    923 const uint16_t positive_infinity = 0x7C00;
    924 const uint16_t negative_infinity = 0xFC00;
    925 
    926 INSTANTIATE_TEST_CASE_P(
    927     F32ToF16, HexFloatFP32To16Tests,
    928     ::testing::ValuesIn(std::vector<DownCastTest>({
    929         // Exactly representable as half.
    930         {0.f,
    931          0x0,
    932          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    933           RD::kToNearestEven}},
    934         {-0.f,
    935          0x8000,
    936          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    937           RD::kToNearestEven}},
    938         {1.0f,
    939          0x3C00,
    940          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    941           RD::kToNearestEven}},
    942         {-1.0f,
    943          0xBC00,
    944          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    945           RD::kToNearestEven}},
    946 
    947         {float_fractions({0, 1, 10}),
    948          0x3E01,
    949          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    950           RD::kToNearestEven}},
    951         {-float_fractions({0, 1, 10}),
    952          0xBE01,
    953          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    954           RD::kToNearestEven}},
    955         {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)),
    956          0x4A01,
    957          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    958           RD::kToNearestEven}},
    959         {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)),
    960          0xCA01,
    961          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    962           RD::kToNearestEven}},
    963 
    964         // Underflow
    965         {static_cast<float>(ldexp(1.0f, -25)),
    966          0x0,
    967          {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
    968         {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
    969         {static_cast<float>(-ldexp(1.0f, -25)),
    970          0x8000,
    971          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
    972         {static_cast<float>(-ldexp(1.0f, -25)),
    973          0x8001,
    974          {RD::kToNegativeInfinity}},
    975         {static_cast<float>(ldexp(1.0f, -24)),
    976          0x1,
    977          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    978           RD::kToNearestEven}},
    979 
    980         // Overflow
    981         {static_cast<float>(ldexp(1.0f, 16)),
    982          positive_infinity,
    983          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    984           RD::kToNearestEven}},
    985         {static_cast<float>(ldexp(1.0f, 18)),
    986          positive_infinity,
    987          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    988           RD::kToNearestEven}},
    989         {static_cast<float>(ldexp(1.3f, 16)),
    990          positive_infinity,
    991          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    992           RD::kToNearestEven}},
    993         {static_cast<float>(-ldexp(1.0f, 16)),
    994          negative_infinity,
    995          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
    996           RD::kToNearestEven}},
    997         {static_cast<float>(-ldexp(1.0f, 18)),
    998          negative_infinity,
    999          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
   1000           RD::kToNearestEven}},
   1001         {static_cast<float>(-ldexp(1.3f, 16)),
   1002          negative_infinity,
   1003          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
   1004           RD::kToNearestEven}},
   1005 
   1006         // Transfer of Infinities
   1007         {std::numeric_limits<float>::infinity(),
   1008          positive_infinity,
   1009          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
   1010           RD::kToNearestEven}},
   1011         {-std::numeric_limits<float>::infinity(),
   1012          negative_infinity,
   1013          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
   1014           RD::kToNearestEven}},
   1015 
   1016         // Nans are below because we cannot test for equality.
   1017     })), );
   1018 
   1019 struct UpCastCase {
   1020   uint16_t source_half;
   1021   float expected_float;
   1022 };
   1023 
   1024 using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
   1025 TEST_P(HexFloatFP16To32Tests, WideningCasts) {
   1026   using HF = HexFloat<FloatProxy<float>>;
   1027   using HF16 = HexFloat<FloatProxy<Float16>>;
   1028   HF16 f(GetParam().source_half);
   1029 
   1030   round_direction rounding[] = {round_direction::kToZero,
   1031                                 round_direction::kToNearestEven,
   1032                                 round_direction::kToPositiveInfinity,
   1033                                 round_direction::kToNegativeInfinity};
   1034 
   1035   // Everything fits, so everything should just be bit-shifts.
   1036   for (round_direction round : rounding) {
   1037     HF flt(0.f);
   1038     f.castTo(flt, round);
   1039     EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
   1040         << get_round_text(round) << "  " << std::hex
   1041         << BitwiseCast<uint16_t>(GetParam().source_half)
   1042         << " cast to: " << flt.value().getAsFloat();
   1043   }
   1044 }
   1045 
   1046 INSTANTIATE_TEST_CASE_P(
   1047     F16ToF32, HexFloatFP16To32Tests,
   1048     ::testing::ValuesIn(std::vector<UpCastCase>({
   1049         {0x0000, 0.f},
   1050         {0x8000, -0.f},
   1051         {0x3C00, 1.0f},
   1052         {0xBC00, -1.0f},
   1053         {0x3F00, float_fractions({0, 1, 2})},
   1054         {0xBF00, -float_fractions({0, 1, 2})},
   1055         {0x3F01, float_fractions({0, 1, 2, 10})},
   1056         {0xBF01, -float_fractions({0, 1, 2, 10})},
   1057 
   1058         // denorm
   1059         {0x0001, static_cast<float>(ldexp(1.0, -24))},
   1060         {0x0002, static_cast<float>(ldexp(1.0, -23))},
   1061         {0x8001, static_cast<float>(-ldexp(1.0, -24))},
   1062         {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
   1063 
   1064         // inf
   1065         {0x7C00, std::numeric_limits<float>::infinity()},
   1066         {0xFC00, -std::numeric_limits<float>::infinity()},
   1067     })), );
   1068 
   1069 TEST(HexFloatOperationTests, NanTests) {
   1070   using HF = HexFloat<FloatProxy<float>>;
   1071   using HF16 = HexFloat<FloatProxy<Float16>>;
   1072   round_direction rounding[] = {round_direction::kToZero,
   1073                                 round_direction::kToNearestEven,
   1074                                 round_direction::kToPositiveInfinity,
   1075                                 round_direction::kToNegativeInfinity};
   1076 
   1077   // Everything fits, so everything should just be bit-shifts.
   1078   for (round_direction round : rounding) {
   1079     HF16 f16(0);
   1080     HF f(0.f);
   1081     HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
   1082     EXPECT_TRUE(f16.value().isNan());
   1083     HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
   1084     EXPECT_TRUE(f16.value().isNan());
   1085 
   1086     HF16(0x7C01).castTo(f, round);
   1087     EXPECT_TRUE(f.value().isNan());
   1088     HF16(0x7C11).castTo(f, round);
   1089     EXPECT_TRUE(f.value().isNan());
   1090     HF16(0xFC01).castTo(f, round);
   1091     EXPECT_TRUE(f.value().isNan());
   1092     HF16(0x7C10).castTo(f, round);
   1093     EXPECT_TRUE(f.value().isNan());
   1094     HF16(0xFF00).castTo(f, round);
   1095     EXPECT_TRUE(f.value().isNan());
   1096   }
   1097 }
   1098 
   1099 // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
   1100 template <typename T>
   1101 struct FloatParseCase {
   1102   std::string literal;
   1103   bool negate_value;
   1104   bool expect_success;
   1105   HexFloat<FloatProxy<T>> expected_value;
   1106 };
   1107 
   1108 using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
   1109 
   1110 TEST_P(ParseNormalFloatTest, Samples) {
   1111   std::stringstream input(GetParam().literal);
   1112   HexFloat<FloatProxy<float>> parsed_value(0.0f);
   1113   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
   1114   EXPECT_NE(GetParam().expect_success, input.fail())
   1115       << " literal: " << GetParam().literal
   1116       << " negate: " << GetParam().negate_value;
   1117   if (GetParam().expect_success) {
   1118     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
   1119         << " literal: " << GetParam().literal
   1120         << " negate: " << GetParam().negate_value;
   1121   }
   1122 }
   1123 
   1124 // Returns a FloatParseCase with expected failure.
   1125 template <typename T>
   1126 FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
   1127                                     T expected_value) {
   1128   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
   1129   return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
   1130 }
   1131 
   1132 // Returns a FloatParseCase that should successfully parse to a given value.
   1133 template <typename T>
   1134 FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
   1135                                      T expected_value) {
   1136   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
   1137   return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
   1138 }
   1139 
   1140 INSTANTIATE_TEST_CASE_P(
   1141     FloatParse, ParseNormalFloatTest,
   1142     ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
   1143         // Failing cases due to trivially incorrect syntax.
   1144         BadFloatParseCase("abc", false, 0.0f),
   1145         BadFloatParseCase("abc", true, 0.0f),
   1146 
   1147         // Valid cases.
   1148         GoodFloatParseCase("0", false, 0.0f),
   1149         GoodFloatParseCase("0.0", false, 0.0f),
   1150         GoodFloatParseCase("-0.0", false, -0.0f),
   1151         GoodFloatParseCase("2.0", false, 2.0f),
   1152         GoodFloatParseCase("-2.0", false, -2.0f),
   1153         GoodFloatParseCase("+2.0", false, 2.0f),
   1154         // Cases with negate_value being true.
   1155         GoodFloatParseCase("0.0", true, -0.0f),
   1156         GoodFloatParseCase("2.0", true, -2.0f),
   1157 
   1158         // When negate_value is true, we should not accept a
   1159         // leading minus or plus.
   1160         BadFloatParseCase("-0.0", true, 0.0f),
   1161         BadFloatParseCase("-2.0", true, 0.0f),
   1162         BadFloatParseCase("+0.0", true, 0.0f),
   1163         BadFloatParseCase("+2.0", true, 0.0f),
   1164 
   1165         // Overflow is an error for 32-bit float parsing.
   1166         BadFloatParseCase("1e40", false, FLT_MAX),
   1167         BadFloatParseCase("1e40", true, -FLT_MAX),
   1168         BadFloatParseCase("-1e40", false, -FLT_MAX),
   1169         // We can't have -1e40 and negate_value == true since
   1170         // that represents an original case of "--1e40" which
   1171         // is invalid.
   1172     }), );
   1173 
   1174 using ParseNormalFloat16Test =
   1175     ::testing::TestWithParam<FloatParseCase<Float16>>;
   1176 
   1177 TEST_P(ParseNormalFloat16Test, Samples) {
   1178   std::stringstream input(GetParam().literal);
   1179   HexFloat<FloatProxy<Float16>> parsed_value(0);
   1180   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
   1181   EXPECT_NE(GetParam().expect_success, input.fail())
   1182       << " literal: " << GetParam().literal
   1183       << " negate: " << GetParam().negate_value;
   1184   if (GetParam().expect_success) {
   1185     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
   1186         << " literal: " << GetParam().literal
   1187         << " negate: " << GetParam().negate_value;
   1188   }
   1189 }
   1190 
   1191 INSTANTIATE_TEST_CASE_P(
   1192     Float16Parse, ParseNormalFloat16Test,
   1193     ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
   1194         // Failing cases due to trivially incorrect syntax.
   1195         BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
   1196         BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
   1197 
   1198         // Valid cases.
   1199         GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
   1200         GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
   1201         GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
   1202         GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
   1203         GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
   1204         GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
   1205         // Cases with negate_value being true.
   1206         GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
   1207         GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
   1208 
   1209         // When negate_value is true, we should not accept a leading minus or
   1210         // plus.
   1211         BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
   1212         BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
   1213         BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
   1214         BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
   1215     }), );
   1216 
   1217 // A test case for detecting infinities.
   1218 template <typename T>
   1219 struct OverflowParseCase {
   1220   std::string input;
   1221   bool expect_success;
   1222   T expected_value;
   1223 };
   1224 
   1225 using FloatProxyParseOverflowFloatTest =
   1226     ::testing::TestWithParam<OverflowParseCase<float>>;
   1227 
   1228 TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
   1229   std::istringstream input(GetParam().input);
   1230   HexFloat<FloatProxy<float>> value(0.0f);
   1231   input >> value;
   1232   EXPECT_NE(GetParam().expect_success, input.fail());
   1233   if (GetParam().expect_success) {
   1234     EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
   1235   }
   1236 }
   1237 
   1238 INSTANTIATE_TEST_CASE_P(
   1239     FloatOverflow, FloatProxyParseOverflowFloatTest,
   1240     ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
   1241         {"0", true, 0.0f},
   1242         {"0.0", true, 0.0f},
   1243         {"1.0", true, 1.0f},
   1244         {"1e38", true, 1e38f},
   1245         {"-1e38", true, -1e38f},
   1246         {"1e40", false, FLT_MAX},
   1247         {"-1e40", false, -FLT_MAX},
   1248         {"1e400", false, FLT_MAX},
   1249         {"-1e400", false, -FLT_MAX},
   1250     })), );
   1251 
   1252 using FloatProxyParseOverflowDoubleTest =
   1253     ::testing::TestWithParam<OverflowParseCase<double>>;
   1254 
   1255 TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
   1256   std::istringstream input(GetParam().input);
   1257   HexFloat<FloatProxy<double>> value(0.0);
   1258   input >> value;
   1259   EXPECT_NE(GetParam().expect_success, input.fail());
   1260   if (GetParam().expect_success) {
   1261     EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
   1262   }
   1263 }
   1264 
   1265 INSTANTIATE_TEST_CASE_P(
   1266     DoubleOverflow, FloatProxyParseOverflowDoubleTest,
   1267     ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
   1268         {"0", true, 0.0},
   1269         {"0.0", true, 0.0},
   1270         {"1.0", true, 1.0},
   1271         {"1e38", true, 1e38},
   1272         {"-1e38", true, -1e38},
   1273         {"1e40", true, 1e40},
   1274         {"-1e40", true, -1e40},
   1275         {"1e400", false, DBL_MAX},
   1276         {"-1e400", false, -DBL_MAX},
   1277     })), );
   1278 
   1279 using FloatProxyParseOverflowFloat16Test =
   1280     ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
   1281 
   1282 TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
   1283   std::istringstream input(GetParam().input);
   1284   HexFloat<FloatProxy<Float16>> value(0);
   1285   input >> value;
   1286   EXPECT_NE(GetParam().expect_success, input.fail())
   1287       << " literal: " << GetParam().input;
   1288   if (GetParam().expect_success) {
   1289     EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
   1290         << " literal: " << GetParam().input;
   1291   }
   1292 }
   1293 
   1294 INSTANTIATE_TEST_CASE_P(
   1295     Float16Overflow, FloatProxyParseOverflowFloat16Test,
   1296     ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
   1297         {"0", true, uint16_t{0}},
   1298         {"0.0", true, uint16_t{0}},
   1299         {"1.0", true, uint16_t{0x3c00}},
   1300         // Overflow for 16-bit float is an error, and returns max or
   1301         // lowest value.
   1302         {"1e38", false, uint16_t{0x7bff}},
   1303         {"1e40", false, uint16_t{0x7bff}},
   1304         {"1e400", false, uint16_t{0x7bff}},
   1305         {"-1e38", false, uint16_t{0xfbff}},
   1306         {"-1e40", false, uint16_t{0xfbff}},
   1307         {"-1e400", false, uint16_t{0xfbff}},
   1308     })), );
   1309 
   1310 TEST(FloatProxy, Max) {
   1311   EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
   1312               Eq(uint16_t{0x7bff}));
   1313   EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
   1314               Eq(std::numeric_limits<float>::max()));
   1315   EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
   1316               Eq(std::numeric_limits<double>::max()));
   1317 }
   1318 
   1319 TEST(FloatProxy, Lowest) {
   1320   EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
   1321               Eq(uint16_t{0xfbff}));
   1322   EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
   1323               Eq(std::numeric_limits<float>::lowest()));
   1324   EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
   1325               Eq(std::numeric_limits<double>::lowest()));
   1326 }
   1327 
   1328 // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
   1329 }  // namespace
   1330 }  // namespace utils
   1331 }  // namespace spvtools
   1332