Home | History | Annotate | Download | only in math
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #pragma once
     18 
     19 #include <stdint.h>
     20 #include <iosfwd>
     21 #include <limits>
     22 #include <type_traits>
     23 
     24 #ifndef LIKELY
     25 #define LIKELY_DEFINED_LOCAL
     26 #ifdef __cplusplus
     27 #   define LIKELY( exp )    (__builtin_expect( !!(exp), true ))
     28 #   define UNLIKELY( exp )  (__builtin_expect( !!(exp), false ))
     29 #else
     30 #   define LIKELY( exp )    (__builtin_expect( !!(exp), 1 ))
     31 #   define UNLIKELY( exp )  (__builtin_expect( !!(exp), 0 ))
     32 #endif
     33 #endif
     34 
     35 #if __cplusplus >= 201402L
     36 #define CONSTEXPR constexpr
     37 #else
     38 #define CONSTEXPR
     39 #endif
     40 
     41 namespace android {
     42 
     43 /*
     44  * half-float
     45  *
     46  *  1   5       10
     47  * +-+------+------------+
     48  * |s|eee.ee|mm.mmmm.mmmm|
     49  * +-+------+------------+
     50  *
     51  * minimum (denormal) value: 2^-24 = 5.96e-8
     52  * minimum (normal) value:   2^-14 = 6.10e-5
     53  * maximum value:            2-2^-10 = 65504
     54  *
     55  * Integers between 0 and 2048 can be represented exactly
     56  */
     57 class half {
     58     struct fp16 {
     59         uint16_t bits;
     60         explicit constexpr fp16() noexcept : bits(0) { }
     61         explicit constexpr fp16(uint16_t b) noexcept : bits(b) { }
     62         void setS(unsigned int s) noexcept { bits = uint16_t((bits & 0x7FFF) | (s<<15)); }
     63         void setE(unsigned int s) noexcept { bits = uint16_t((bits & 0xE3FF) | (s<<10)); }
     64         void setM(unsigned int s) noexcept { bits = uint16_t((bits & 0xFC00) | (s<< 0)); }
     65         constexpr unsigned int getS() const noexcept { return  bits >> 15u; }
     66         constexpr unsigned int getE() const noexcept { return (bits >> 10u) & 0x1Fu; }
     67         constexpr unsigned int getM() const noexcept { return  bits         & 0x3FFu; }
     68     };
     69     struct fp32 {
     70         union {
     71             uint32_t bits;
     72             float fp;
     73         };
     74         explicit constexpr fp32() noexcept : bits(0) { }
     75         explicit constexpr fp32(float f) noexcept : fp(f) { }
     76         void setS(unsigned int s) noexcept { bits = uint32_t((bits & 0x7FFFFFFF) | (s<<31)); }
     77         void setE(unsigned int s) noexcept { bits = uint32_t((bits & 0x807FFFFF) | (s<<23)); }
     78         void setM(unsigned int s) noexcept { bits = uint32_t((bits & 0xFF800000) | (s<< 0)); }
     79         constexpr unsigned int getS() const noexcept { return  bits >> 31u; }
     80         constexpr unsigned int getE() const noexcept { return (bits >> 23u) & 0xFFu; }
     81         constexpr unsigned int getM() const noexcept { return  bits         & 0x7FFFFFu; }
     82     };
     83 
     84 public:
     85     CONSTEXPR half(float v) noexcept : mBits(ftoh(v)) { }
     86     CONSTEXPR operator float() const noexcept { return htof(mBits); }
     87 
     88     uint16_t getBits() const noexcept { return mBits.bits; }
     89     unsigned int getExponent() const noexcept { return mBits.getE(); }
     90     unsigned int getMantissa() const noexcept { return mBits.getM(); }
     91 
     92 private:
     93     friend class std::numeric_limits<half>;
     94     friend CONSTEXPR half operator"" _hf(long double v);
     95 
     96     enum Binary { binary };
     97     explicit constexpr half(Binary, uint16_t bits) noexcept : mBits(bits) { }
     98     static CONSTEXPR fp16 ftoh(float v) noexcept;
     99     static CONSTEXPR float htof(fp16 v) noexcept;
    100     fp16 mBits;
    101 };
    102 
    103 inline CONSTEXPR half::fp16 half::ftoh(float v) noexcept {
    104     fp16 out;
    105     fp32 in(v);
    106     if (UNLIKELY(in.getE() == 0xFF)) { // inf or nan
    107         out.setE(0x1F);
    108         out.setM(in.getM() ? 0x200 : 0);
    109     } else {
    110         int e = static_cast<int>(in.getE()) - 127 + 15;
    111         if (e >= 0x1F) {
    112             // overflow
    113             out.setE(0x31); // +/- inf
    114         } else if (e <= 0) {
    115             // underflow
    116             // flush to +/- 0
    117         } else {
    118             unsigned int m = in.getM();
    119             out.setE(uint16_t(e));
    120             out.setM(m >> 13);
    121             if (m & 0x1000) {
    122                 // rounding
    123                 out.bits++;
    124             }
    125         }
    126     }
    127     out.setS(in.getS());
    128     return out;
    129 }
    130 
    131 inline CONSTEXPR float half::htof(half::fp16 in) noexcept {
    132     fp32 out;
    133     if (UNLIKELY(in.getE() == 0x1F)) { // inf or nan
    134         out.setE(0xFF);
    135         out.setM(in.getM() ? 0x400000 : 0);
    136     } else {
    137         if (in.getE() == 0) {
    138             if (in.getM()) {
    139                 // TODO: denormal half float, treat as zero for now
    140                 // (it's stupid because they can be represented as regular float)
    141             }
    142         } else {
    143             int e = static_cast<int>(in.getE()) - 15 + 127;
    144             unsigned int m = in.getM();
    145             out.setE(uint32_t(e));
    146             out.setM(m << 13);
    147         }
    148     }
    149     out.setS(in.getS());
    150     return out.fp;
    151 }
    152 
    153 inline CONSTEXPR android::half operator"" _hf(long double v) {
    154     return android::half(android::half::binary, android::half::ftoh(static_cast<float>(v)).bits);
    155 }
    156 
    157 } // namespace android
    158 
    159 namespace std {
    160 
    161 template<> struct is_floating_point<android::half> : public std::true_type {};
    162 
    163 template<>
    164 class numeric_limits<android::half> {
    165 public:
    166     typedef android::half type;
    167 
    168     static constexpr const bool is_specialized = true;
    169     static constexpr const bool is_signed = true;
    170     static constexpr const bool is_integer = false;
    171     static constexpr const bool is_exact = false;
    172     static constexpr const bool has_infinity = true;
    173     static constexpr const bool has_quiet_NaN = true;
    174     static constexpr const bool has_signaling_NaN = false;
    175     static constexpr const float_denorm_style has_denorm = denorm_absent;
    176     static constexpr const bool has_denorm_loss = true;
    177     static constexpr const bool is_iec559 = false;
    178     static constexpr const bool is_bounded = true;
    179     static constexpr const bool is_modulo = false;
    180     static constexpr const bool traps = false;
    181     static constexpr const bool tinyness_before = false;
    182     static constexpr const float_round_style round_style = round_indeterminate;
    183 
    184     static constexpr const int digits = 11;
    185     static constexpr const int digits10 = 3;
    186     static constexpr const int max_digits10 = 5;
    187     static constexpr const int radix = 2;
    188     static constexpr const int min_exponent = -13;
    189     static constexpr const int min_exponent10 = -4;
    190     static constexpr const int max_exponent = 16;
    191     static constexpr const int max_exponent10 = 4;
    192 
    193     inline static constexpr type round_error() noexcept { return android::half(android::half::binary, 0x3800); }
    194     inline static constexpr type min() noexcept { return android::half(android::half::binary, 0x0400); }
    195     inline static constexpr type max() noexcept { return android::half(android::half::binary, 0x7bff); }
    196     inline static constexpr type lowest() noexcept { return android::half(android::half::binary, 0xfbff); }
    197     inline static constexpr type epsilon() noexcept { return android::half(android::half::binary, 0x1400); }
    198     inline static constexpr type infinity() noexcept { return android::half(android::half::binary, 0x7c00); }
    199     inline static constexpr type quiet_NaN() noexcept { return android::half(android::half::binary, 0x7fff); }
    200     inline static constexpr type denorm_min() noexcept { return android::half(android::half::binary, 0x0001); }
    201     inline static constexpr type signaling_NaN() noexcept { return android::half(android::half::binary, 0x7dff); }
    202 };
    203 
    204 } // namespace std
    205 
    206 #ifdef LIKELY_DEFINED_LOCAL
    207 #undef LIKELY_DEFINED_LOCAL
    208 #undef LIKELY
    209 #undef UNLIKELY
    210 #endif // LIKELY_DEFINED_LOCAL
    211 
    212 #undef CONSTEXPR
    213