Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.util;
     18 
     19 import android.annotation.HalfFloat;
     20 import android.annotation.NonNull;
     21 import android.annotation.Nullable;
     22 
     23 import sun.misc.FloatingDecimal;
     24 
     25 /**
     26  * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit
     27  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
     28  * floating point data types (also called fp16 or binary16). A half-precision float can be
     29  * created from or converted to single-precision floats, and is stored in a short data type.
     30  * To distinguish short values holding half-precision floats from regular short values,
     31  * it is recommended to use the <code>@HalfFloat</code> annotation.</p>
     32  *
     33  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
     34  * <ul>
     35  * <li>Sign bit: 1 bit</li>
     36  * <li>Exponent width: 5 bits</li>
     37  * <li>Significand: 10 bits</li>
     38  * </ul>
     39  *
     40  * <p>The format is laid out as follows:</p>
     41  * <pre>
     42  * 1   11111   1111111111
     43  * ^   --^--   -----^----
     44  * sign  |          |_______ significand
     45  *       |
     46  *       -- exponent
     47  * </pre>
     48  *
     49  * <p>Half-precision floating points can be useful to save memory and/or
     50  * bandwidth at the expense of range and precision when compared to single-precision
     51  * floating points (fp32).</p>
     52  * <p>To help you decide whether fp16 is the right storage type for you need, please
     53  * refer to the table below that shows the available precision throughout the range of
     54  * possible values. The <em>precision</em> column indicates the step size between two
     55  * consecutive numbers in a specific part of the range.</p>
     56  *
     57  * <table summary="Precision of fp16 across the range">
     58  *     <tr><th>Range start</th><th>Precision</th></tr>
     59  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
     60  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
     61  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
     62  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
     63  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
     64  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
     65  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
     66  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
     67  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
     68  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
     69  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
     70  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
     71  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
     72  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
     73  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
     74  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
     75  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
     76  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
     77  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
     78  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
     79  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
     80  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
     81  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
     82  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
     83  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
     84  *     <tr><td>1,024</td><td>1</td></tr>
     85  *     <tr><td>2,048</td><td>2</td></tr>
     86  *     <tr><td>4,096</td><td>4</td></tr>
     87  *     <tr><td>8,192</td><td>8</td></tr>
     88  *     <tr><td>16,384</td><td>16</td></tr>
     89  *     <tr><td>32,768</td><td>32</td></tr>
     90  * </table>
     91  *
     92  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
     93  */
     94 @SuppressWarnings("SimplifiableIfStatement")
     95 public final class Half extends Number implements Comparable<Half> {
     96     /**
     97      * The number of bits used to represent a half-precision float value.
     98      */
     99     public static final int SIZE = 16;
    100 
    101     /**
    102      * Epsilon is the difference between 1.0 and the next value representable
    103      * by a half-precision floating-point.
    104      */
    105     public static final @HalfFloat short EPSILON = (short) 0x1400;
    106 
    107     /**
    108      * Maximum exponent a finite half-precision float may have.
    109      */
    110     public static final int MAX_EXPONENT = 15;
    111     /**
    112      * Minimum exponent a normalized half-precision float may have.
    113      */
    114     public static final int MIN_EXPONENT = -14;
    115 
    116     /**
    117      * Smallest negative value a half-precision float may have.
    118      */
    119     public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff;
    120     /**
    121      * Maximum positive finite value a half-precision float may have.
    122      */
    123     public static final @HalfFloat short MAX_VALUE = (short) 0x7bff;
    124     /**
    125      * Smallest positive normal value a half-precision float may have.
    126      */
    127     public static final @HalfFloat short MIN_NORMAL = (short) 0x0400;
    128     /**
    129      * Smallest positive non-zero value a half-precision float may have.
    130      */
    131     public static final @HalfFloat short MIN_VALUE = (short) 0x0001;
    132     /**
    133      * A Not-a-Number representation of a half-precision float.
    134      */
    135     public static final @HalfFloat short NaN = (short) 0x7e00;
    136     /**
    137      * Negative infinity of type half-precision float.
    138      */
    139     public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00;
    140     /**
    141      * Negative 0 of type half-precision float.
    142      */
    143     public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000;
    144     /**
    145      * Positive infinity of type half-precision float.
    146      */
    147     public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00;
    148     /**
    149      * Positive 0 of type half-precision float.
    150      */
    151     public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000;
    152 
    153     private static final int FP16_SIGN_SHIFT        = 15;
    154     private static final int FP16_SIGN_MASK         = 0x8000;
    155     private static final int FP16_EXPONENT_SHIFT    = 10;
    156     private static final int FP16_EXPONENT_MASK     = 0x1f;
    157     private static final int FP16_SIGNIFICAND_MASK  = 0x3ff;
    158     private static final int FP16_EXPONENT_BIAS     = 15;
    159     private static final int FP16_COMBINED          = 0x7fff;
    160     private static final int FP16_EXPONENT_MAX      = 0x7c00;
    161 
    162     private static final int FP32_SIGN_SHIFT        = 31;
    163     private static final int FP32_EXPONENT_SHIFT    = 23;
    164     private static final int FP32_EXPONENT_MASK     = 0xff;
    165     private static final int FP32_SIGNIFICAND_MASK  = 0x7fffff;
    166     private static final int FP32_EXPONENT_BIAS     = 127;
    167 
    168     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
    169     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
    170 
    171     private final @HalfFloat short mValue;
    172 
    173     /**
    174      * Constructs a newly allocated {@code Half} object that represents the
    175      * half-precision float type argument.
    176      *
    177      * @param value The value to be represented by the {@code Half}
    178      */
    179     public Half(@HalfFloat short value) {
    180         mValue = value;
    181     }
    182 
    183     /**
    184      * Constructs a newly allocated {@code Half} object that represents the
    185      * argument converted to a half-precision float.
    186      *
    187      * @param value The value to be represented by the {@code Half}
    188      *
    189      * @see #toHalf(float)
    190      */
    191     public Half(float value) {
    192         mValue = toHalf(value);
    193     }
    194 
    195     /**
    196      * Constructs a newly allocated {@code Half} object that
    197      * represents the argument converted to a half-precision float.
    198      *
    199      * @param value The value to be represented by the {@code Half}
    200      *
    201      * @see #toHalf(float)
    202      */
    203     public Half(double value) {
    204         mValue = toHalf((float) value);
    205     }
    206 
    207     /**
    208      * <p>Constructs a newly allocated {@code Half} object that represents the
    209      * half-precision float value represented by the string.
    210      * The string is converted to a half-precision float value as if by the
    211      * {@link #valueOf(String)} method.</p>
    212      *
    213      * <p>Calling this constructor is equivalent to calling:</p>
    214      * <pre>
    215      *     new Half(Float.parseFloat(value))
    216      * </pre>
    217      *
    218      * @param value A string to be converted to a {@code Half}
    219      * @throws NumberFormatException if the string does not contain a parsable number
    220      *
    221      * @see Float#valueOf(java.lang.String)
    222      * @see #toHalf(float)
    223      */
    224     public Half(@NonNull String value) throws NumberFormatException {
    225         mValue = toHalf(Float.parseFloat(value));
    226     }
    227 
    228     /**
    229      * Returns the half-precision value of this {@code Half} as a {@code short}
    230      * containing the bit representation described in {@link Half}.
    231      *
    232      * @return The half-precision float value represented by this object
    233      */
    234     public @HalfFloat short halfValue() {
    235         return mValue;
    236     }
    237 
    238     /**
    239      * Returns the value of this {@code Half} as a {@code byte} after
    240      * a narrowing primitive conversion.
    241      *
    242      * @return The half-precision float value represented by this object
    243      *         converted to type {@code byte}
    244      */
    245     @Override
    246     public byte byteValue() {
    247         return (byte) toFloat(mValue);
    248     }
    249 
    250     /**
    251      * Returns the value of this {@code Half} as a {@code short} after
    252      * a narrowing primitive conversion.
    253      *
    254      * @return The half-precision float value represented by this object
    255      *         converted to type {@code short}
    256      */
    257     @Override
    258     public short shortValue() {
    259         return (short) toFloat(mValue);
    260     }
    261 
    262     /**
    263      * Returns the value of this {@code Half} as a {@code int} after
    264      * a narrowing primitive conversion.
    265      *
    266      * @return The half-precision float value represented by this object
    267      *         converted to type {@code int}
    268      */
    269     @Override
    270     public int intValue() {
    271         return (int) toFloat(mValue);
    272     }
    273 
    274     /**
    275      * Returns the value of this {@code Half} as a {@code long} after
    276      * a narrowing primitive conversion.
    277      *
    278      * @return The half-precision float value represented by this object
    279      *         converted to type {@code long}
    280      */
    281     @Override
    282     public long longValue() {
    283         return (long) toFloat(mValue);
    284     }
    285 
    286     /**
    287      * Returns the value of this {@code Half} as a {@code float} after
    288      * a widening primitive conversion.
    289      *
    290      * @return The half-precision float value represented by this object
    291      *         converted to type {@code float}
    292      */
    293     @Override
    294     public float floatValue() {
    295         return toFloat(mValue);
    296     }
    297 
    298     /**
    299      * Returns the value of this {@code Half} as a {@code double} after
    300      * a widening primitive conversion.
    301      *
    302      * @return The half-precision float value represented by this object
    303      *         converted to type {@code double}
    304      */
    305     @Override
    306     public double doubleValue() {
    307         return toFloat(mValue);
    308     }
    309 
    310     /**
    311      * Returns true if this {@code Half} value represents a Not-a-Number,
    312      * false otherwise.
    313      *
    314      * @return True if the value is a NaN, false otherwise
    315      */
    316     public boolean isNaN() {
    317         return isNaN(mValue);
    318     }
    319 
    320     /**
    321      * Compares this object against the specified object. The result is {@code true}
    322      * if and only if the argument is not {@code null} and is a {@code Half} object
    323      * that represents the same half-precision value as the this object. Two
    324      * half-precision values are considered to be the same if and only if the method
    325      * {@link #halfToIntBits(short)} returns an identical {@code int} value for both.
    326      *
    327      * @param o The object to compare
    328      * @return True if the objects are the same, false otherwise
    329      *
    330      * @see #halfToIntBits(short)
    331      */
    332     @Override
    333     public boolean equals(@Nullable Object o) {
    334         return (o instanceof Half) &&
    335                 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue));
    336     }
    337 
    338     /**
    339      * Returns a hash code for this {@code Half} object. The result is the
    340      * integer bit representation, exactly as produced by the method
    341      * {@link #halfToIntBits(short)}, of the primitive half-precision float
    342      * value represented by this {@code Half} object.
    343      *
    344      * @return A hash code value for this object
    345      */
    346     @Override
    347     public int hashCode() {
    348         return hashCode(mValue);
    349     }
    350 
    351     /**
    352      * Returns a string representation of the specified half-precision
    353      * float value. See {@link #toString(short)} for more information.
    354      *
    355      * @return A string representation of this {@code Half} object
    356      */
    357     @NonNull
    358     @Override
    359     public String toString() {
    360         return toString(mValue);
    361     }
    362 
    363     /**
    364      * <p>Compares the two specified half-precision float values. The following
    365      * conditions apply during the comparison:</p>
    366      *
    367      * <ul>
    368      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
    369      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
    370      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
    371      * {@link #NEGATIVE_ZERO}.</li>
    372      * </ul>
    373      *
    374      * @param h The half-precision float value to compare to the half-precision value
    375      *          represented by this {@code Half} object
    376      *
    377      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}; a
    378      *          value less than {@code 0} if {@code x} is numerically less than {@code y};
    379      *          and a value greater than {@code 0} if {@code x} is numerically greater
    380      *          than {@code y}
    381      */
    382     @Override
    383     public int compareTo(@NonNull Half h) {
    384         return compare(mValue, h.mValue);
    385     }
    386 
    387     /**
    388      * Returns a hash code for a half-precision float value.
    389      *
    390      * @param h The value to hash
    391      *
    392      * @return A hash code value for a half-precision float value
    393      */
    394     public static int hashCode(@HalfFloat short h) {
    395         return halfToIntBits(h);
    396     }
    397 
    398     /**
    399      * <p>Compares the two specified half-precision float values. The following
    400      * conditions apply during the comparison:</p>
    401      *
    402      * <ul>
    403      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
    404      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
    405      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
    406      * {@link #NEGATIVE_ZERO}.</li>
    407      * </ul>
    408      *
    409      * @param x The first half-precision float value to compare.
    410      * @param y The second half-precision float value to compare
    411      *
    412      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
    413      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
    414      *          and a value greater than {@code 0} if {@code x} is numerically greater
    415      *          than {@code y}
    416      */
    417     public static int compare(@HalfFloat short x, @HalfFloat short y) {
    418         if (less(x, y)) return -1;
    419         if (greater(x, y)) return 1;
    420 
    421         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
    422         // (signed) short value types to preserve the ordering of -0.0
    423         // and +0.0
    424         short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x;
    425         short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y;
    426 
    427         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
    428     }
    429 
    430     /**
    431      * <p>Returns a representation of the specified half-precision float value
    432      * according to the bit layout described in {@link Half}.</p>
    433      *
    434      * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all
    435      * possible Not-a-Number values to a single canonical Not-a-Number value
    436      * defined by {@link #NaN}.</p>
    437      *
    438      * @param h A half-precision float value
    439      * @return The bits that represent the half-precision float value
    440      *
    441      * @see #halfToIntBits(short)
    442      */
    443     public static @HalfFloat short halfToShortBits(@HalfFloat short h) {
    444         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h;
    445     }
    446 
    447     /**
    448      * <p>Returns a representation of the specified half-precision float value
    449      * according to the bit layout described in {@link Half}.</p>
    450      *
    451      * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all
    452      * possible Not-a-Number values to a single canonical Not-a-Number value
    453      * defined by {@link #NaN}.</p>
    454      *
    455      * @param h A half-precision float value
    456      * @return The bits that represent the half-precision float value
    457      *
    458      * @see #halfToRawIntBits(short)
    459      * @see #halfToShortBits(short)
    460      * @see #intBitsToHalf(int)
    461      */
    462     public static int halfToIntBits(@HalfFloat short h) {
    463         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff;
    464     }
    465 
    466     /**
    467      * <p>Returns a representation of the specified half-precision float value
    468      * according to the bit layout described in {@link Half}.</p>
    469      *
    470      * <p>The argument is considered to be a representation of a half-precision
    471      * float value according to the bit layout described in {@link Half}. The 16
    472      * most significant bits of the returned value are set to 0.</p>
    473      *
    474      * @param h A half-precision float value
    475      * @return The bits that represent the half-precision float value
    476      *
    477      * @see #halfToIntBits(short)
    478      * @see #intBitsToHalf(int)
    479      */
    480     public static int halfToRawIntBits(@HalfFloat short h) {
    481         return h & 0xffff;
    482     }
    483 
    484     /**
    485      * <p>Returns the half-precision float value corresponding to a given
    486      * bit representation.</p>
    487      *
    488      * <p>The argument is considered to be a representation of a half-precision
    489      * float value according to the bit layout described in {@link Half}. The 16
    490      * most significant bits of the argument are ignored.</p>
    491      *
    492      * @param bits An integer
    493      * @return The half-precision float value with the same bit pattern
    494      */
    495     public static @HalfFloat short intBitsToHalf(int bits) {
    496         return (short) (bits & 0xffff);
    497     }
    498 
    499     /**
    500      * Returns the first parameter with the sign of the second parameter.
    501      * This method treats NaNs as having a sign.
    502      *
    503      * @param magnitude A half-precision float value providing the magnitude of the result
    504      * @param sign  A half-precision float value providing the sign of the result
    505      * @return A value with the magnitude of the first parameter and the sign
    506      *         of the second parameter
    507      */
    508     public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) {
    509         return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED));
    510     }
    511 
    512     /**
    513      * Returns the absolute value of the specified half-precision float.
    514      * Special values are handled in the following ways:
    515      * <ul>
    516      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    517      * <li>If the specified half-precision float is zero (negative or positive),
    518      * the result is positive zero (see {@link #POSITIVE_ZERO})</li>
    519      * <li>If the specified half-precision float is infinity (negative or positive),
    520      * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li>
    521      * </ul>
    522      *
    523      * @param h A half-precision float value
    524      * @return The absolute value of the specified half-precision float
    525      */
    526     public static @HalfFloat short abs(@HalfFloat short h) {
    527         return (short) (h & FP16_COMBINED);
    528     }
    529 
    530     /**
    531      * Returns the closest integral half-precision float value to the specified
    532      * half-precision float value. Special values are handled in the
    533      * following ways:
    534      * <ul>
    535      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    536      * <li>If the specified half-precision float is infinity (negative or positive),
    537      * the result is infinity (with the same sign)</li>
    538      * <li>If the specified half-precision float is zero (negative or positive),
    539      * the result is zero (with the same sign)</li>
    540      * </ul>
    541      *
    542      * @param h A half-precision float value
    543      * @return The value of the specified half-precision float rounded to the nearest
    544      *         half-precision float value
    545      */
    546     public static @HalfFloat short round(@HalfFloat short h) {
    547         int bits = h & 0xffff;
    548         int e = bits & 0x7fff;
    549         int result = bits;
    550 
    551         if (e < 0x3c00) {
    552             result &= FP16_SIGN_MASK;
    553             result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0));
    554         } else if (e < 0x6400) {
    555             e = 25 - (e >> 10);
    556             int mask = (1 << e) - 1;
    557             result += (1 << (e - 1));
    558             result &= ~mask;
    559         }
    560 
    561         return (short) result;
    562     }
    563 
    564     /**
    565      * Returns the smallest half-precision float value toward negative infinity
    566      * greater than or equal to the specified half-precision float value.
    567      * Special values are handled in the following ways:
    568      * <ul>
    569      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    570      * <li>If the specified half-precision float is infinity (negative or positive),
    571      * the result is infinity (with the same sign)</li>
    572      * <li>If the specified half-precision float is zero (negative or positive),
    573      * the result is zero (with the same sign)</li>
    574      * </ul>
    575      *
    576      * @param h A half-precision float value
    577      * @return The smallest half-precision float value toward negative infinity
    578      *         greater than or equal to the specified half-precision float value
    579      */
    580     public static @HalfFloat short ceil(@HalfFloat short h) {
    581         int bits = h & 0xffff;
    582         int e = bits & 0x7fff;
    583         int result = bits;
    584 
    585         if (e < 0x3c00) {
    586             result &= FP16_SIGN_MASK;
    587             result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0));
    588         } else if (e < 0x6400) {
    589             e = 25 - (e >> 10);
    590             int mask = (1 << e) - 1;
    591             result += mask & ((bits >> 15) - 1);
    592             result &= ~mask;
    593         }
    594 
    595         return (short) result;
    596     }
    597 
    598     /**
    599      * Returns the largest half-precision float value toward positive infinity
    600      * less than or equal to the specified half-precision float value.
    601      * Special values are handled in the following ways:
    602      * <ul>
    603      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    604      * <li>If the specified half-precision float is infinity (negative or positive),
    605      * the result is infinity (with the same sign)</li>
    606      * <li>If the specified half-precision float is zero (negative or positive),
    607      * the result is zero (with the same sign)</li>
    608      * </ul>
    609      *
    610      * @param h A half-precision float value
    611      * @return The largest half-precision float value toward positive infinity
    612      *         less than or equal to the specified half-precision float value
    613      */
    614     public static @HalfFloat short floor(@HalfFloat short h) {
    615         int bits = h & 0xffff;
    616         int e = bits & 0x7fff;
    617         int result = bits;
    618 
    619         if (e < 0x3c00) {
    620             result &= FP16_SIGN_MASK;
    621             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
    622         } else if (e < 0x6400) {
    623             e = 25 - (e >> 10);
    624             int mask = (1 << e) - 1;
    625             result += mask & -(bits >> 15);
    626             result &= ~mask;
    627         }
    628 
    629         return (short) result;
    630     }
    631 
    632     /**
    633      * Returns the truncated half-precision float value of the specified
    634      * half-precision float value. Special values are handled in the following ways:
    635      * <ul>
    636      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    637      * <li>If the specified half-precision float is infinity (negative or positive),
    638      * the result is infinity (with the same sign)</li>
    639      * <li>If the specified half-precision float is zero (negative or positive),
    640      * the result is zero (with the same sign)</li>
    641      * </ul>
    642      *
    643      * @param h A half-precision float value
    644      * @return The truncated half-precision float value of the specified
    645      *         half-precision float value
    646      */
    647     public static @HalfFloat short trunc(@HalfFloat short h) {
    648         int bits = h & 0xffff;
    649         int e = bits & 0x7fff;
    650         int result = bits;
    651 
    652         if (e < 0x3c00) {
    653             result &= FP16_SIGN_MASK;
    654         } else if (e < 0x6400) {
    655             e = 25 - (e >> 10);
    656             int mask = (1 << e) - 1;
    657             result &= ~mask;
    658         }
    659 
    660         return (short) result;
    661     }
    662 
    663     /**
    664      * Returns the smaller of two half-precision float values (the value closest
    665      * to negative infinity). Special values are handled in the following ways:
    666      * <ul>
    667      * <li>If either value is NaN, the result is NaN</li>
    668      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
    669      * </ul>
    670      *
    671      * @param x The first half-precision value
    672      * @param y The second half-precision value
    673      * @return The smaller of the two specified half-precision values
    674      */
    675     public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) {
    676         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    677         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    678 
    679         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
    680             return (x & FP16_SIGN_MASK) != 0 ? x : y;
    681         }
    682 
    683         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
    684                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
    685     }
    686 
    687     /**
    688      * Returns the larger of two half-precision float values (the value closest
    689      * to positive infinity). Special values are handled in the following ways:
    690      * <ul>
    691      * <li>If either value is NaN, the result is NaN</li>
    692      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
    693      * </ul>
    694      *
    695      * @param x The first half-precision value
    696      * @param y The second half-precision value
    697      *
    698      * @return The larger of the two specified half-precision values
    699      */
    700     public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) {
    701         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    702         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    703 
    704         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
    705             return (x & FP16_SIGN_MASK) != 0 ? y : x;
    706         }
    707 
    708         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
    709                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
    710     }
    711 
    712     /**
    713      * Returns true if the first half-precision float value is less (smaller
    714      * toward negative infinity) than the second half-precision float value.
    715      * If either of the values is NaN, the result is false.
    716      *
    717      * @param x The first half-precision value
    718      * @param y The second half-precision value
    719      *
    720      * @return True if x is less than y, false otherwise
    721      */
    722     public static boolean less(@HalfFloat short x, @HalfFloat short y) {
    723         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    724         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    725 
    726         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
    727                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    728     }
    729 
    730     /**
    731      * Returns true if the first half-precision float value is less (smaller
    732      * toward negative infinity) than or equal to the second half-precision
    733      * float value. If either of the values is NaN, the result is false.
    734      *
    735      * @param x The first half-precision value
    736      * @param y The second half-precision value
    737      *
    738      * @return True if x is less than or equal to y, false otherwise
    739      */
    740     public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) {
    741         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    742         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    743 
    744         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
    745                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    746     }
    747 
    748     /**
    749      * Returns true if the first half-precision float value is greater (larger
    750      * toward positive infinity) than the second half-precision float value.
    751      * If either of the values is NaN, the result is false.
    752      *
    753      * @param x The first half-precision value
    754      * @param y The second half-precision value
    755      *
    756      * @return True if x is greater than y, false otherwise
    757      */
    758     public static boolean greater(@HalfFloat short x, @HalfFloat short y) {
    759         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    760         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    761 
    762         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
    763                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    764     }
    765 
    766     /**
    767      * Returns true if the first half-precision float value is greater (larger
    768      * toward positive infinity) than or equal to the second half-precision float
    769      * value. If either of the values is NaN, the result is false.
    770      *
    771      * @param x The first half-precision value
    772      * @param y The second half-precision value
    773      *
    774      * @return True if x is greater than y, false otherwise
    775      */
    776     public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) {
    777         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    778         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    779 
    780         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
    781                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    782     }
    783 
    784     /**
    785      * Returns true if the two half-precision float values are equal.
    786      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
    787      * and {@link #NEGATIVE_ZERO} are considered equal.
    788      *
    789      * @param x The first half-precision value
    790      * @param y The second half-precision value
    791      *
    792      * @return True if x is equal to y, false otherwise
    793      */
    794     public static boolean equals(@HalfFloat short x, @HalfFloat short y) {
    795         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    796         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    797 
    798         return x == y || ((x | y) & FP16_COMBINED) == 0;
    799     }
    800 
    801     /**
    802      * Returns the sign of the specified half-precision float.
    803      *
    804      * @param h A half-precision float value
    805      * @return 1 if the value is positive, -1 if the value is negative
    806      */
    807     public static int getSign(@HalfFloat short h) {
    808         return (h & FP16_SIGN_MASK) == 0 ? 1 : -1;
    809     }
    810 
    811     /**
    812      * Returns the unbiased exponent used in the representation of
    813      * the specified  half-precision float value. if the value is NaN
    814      * or infinite, this* method returns {@link #MAX_EXPONENT} + 1.
    815      * If the argument is 0 or a subnormal representation, this method
    816      * returns {@link #MIN_EXPONENT} - 1.
    817      *
    818      * @param h A half-precision float value
    819      * @return The unbiased exponent of the specified value
    820      */
    821     public static int getExponent(@HalfFloat short h) {
    822         return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS;
    823     }
    824 
    825     /**
    826      * Returns the significand, or mantissa, used in the representation
    827      * of the specified half-precision float value.
    828      *
    829      * @param h A half-precision float value
    830      * @return The significand, or significand, of the specified vlaue
    831      */
    832     public static int getSignificand(@HalfFloat short h) {
    833         return h & FP16_SIGNIFICAND_MASK;
    834     }
    835 
    836     /**
    837      * Returns true if the specified half-precision float value represents
    838      * infinity, false otherwise.
    839      *
    840      * @param h A half-precision float value
    841      * @return True if the value is positive infinity or negative infinity,
    842      *         false otherwise
    843      */
    844     public static boolean isInfinite(@HalfFloat short h) {
    845         return (h & FP16_COMBINED) == FP16_EXPONENT_MAX;
    846     }
    847 
    848     /**
    849      * Returns true if the specified half-precision float value represents
    850      * a Not-a-Number, false otherwise.
    851      *
    852      * @param h A half-precision float value
    853      * @return True if the value is a NaN, false otherwise
    854      */
    855     public static boolean isNaN(@HalfFloat short h) {
    856         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX;
    857     }
    858 
    859     /**
    860      * Returns true if the specified half-precision float value is normalized
    861      * (does not have a subnormal representation). If the specified value is
    862      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
    863      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
    864      * number, this method returns false.
    865      *
    866      * @param h A half-precision float value
    867      * @return True if the value is normalized, false otherwise
    868      */
    869     public static boolean isNormalized(@HalfFloat short h) {
    870         return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX;
    871     }
    872 
    873     /**
    874      * <p>Converts the specified half-precision float value into a
    875      * single-precision float value. The following special cases are handled:</p>
    876      * <ul>
    877      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
    878      * <li>If the input is {@link #POSITIVE_INFINITY} or
    879      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
    880      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
    881      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
    882      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
    883      * </ul>
    884      *
    885      * @param h The half-precision float value to convert to single-precision
    886      * @return A normalized single-precision float value
    887      */
    888     public static float toFloat(@HalfFloat short h) {
    889         int bits = h & 0xffff;
    890         int s = bits & FP16_SIGN_MASK;
    891         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
    892         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
    893 
    894         int outE = 0;
    895         int outM = 0;
    896 
    897         if (e == 0) { // Denormal or 0
    898             if (m != 0) {
    899                 // Convert denorm fp16 into normalized fp32
    900                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
    901                 o -= FP32_DENORMAL_FLOAT;
    902                 return s == 0 ? o : -o;
    903             }
    904         } else {
    905             outM = m << 13;
    906             if (e == 0x1f) { // Infinite or NaN
    907                 outE = 0xff;
    908             } else {
    909                 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS;
    910             }
    911         }
    912 
    913         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
    914         return Float.intBitsToFloat(out);
    915     }
    916 
    917     /**
    918      * <p>Converts the specified single-precision float value into a
    919      * half-precision float value. The following special cases are handled:</p>
    920      * <ul>
    921      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
    922      * value is {@link #NaN}</li>
    923      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
    924      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
    925      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
    926      * <li>If the input is 0 (positive or negative), the returned value is
    927      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
    928      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
    929      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
    930      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
    931      * is a denorm half-precision float</li>
    932      * <li>Otherwise, the returned value is rounded to the nearest
    933      * representable half-precision float value</li>
    934      * </ul>
    935      *
    936      * @param f The single-precision float value to convert to half-precision
    937      * @return A half-precision float value
    938      */
    939     @SuppressWarnings("StatementWithEmptyBody")
    940     public static @HalfFloat short toHalf(float f) {
    941         int bits = Float.floatToRawIntBits(f);
    942         int s = (bits >>> FP32_SIGN_SHIFT    );
    943         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK;
    944         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
    945 
    946         int outE = 0;
    947         int outM = 0;
    948 
    949         if (e == 0xff) { // Infinite or NaN
    950             outE = 0x1f;
    951             outM = m != 0 ? 0x200 : 0;
    952         } else {
    953             e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS;
    954             if (e >= 0x1f) { // Overflow
    955                 outE = 0x31;
    956             } else if (e <= 0) { // Underflow
    957                 if (e < -10) {
    958                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
    959                 } else {
    960                     // The fp32 value is a normalized float less than MIN_NORMAL,
    961                     // we convert to a denorm fp16
    962                     m = (m | 0x800000) >> (1 - e);
    963                     if ((m & 0x1000) != 0) m += 0x2000;
    964                     outM = m >> 13;
    965                 }
    966             } else {
    967                 outE = e;
    968                 outM = m >> 13;
    969                 if ((m & 0x1000) != 0) {
    970                     // Round to nearest "0.5" up
    971                     int out = (outE << FP16_EXPONENT_SHIFT) | outM;
    972                     out++;
    973                     return (short) (out | (s << FP16_SIGN_SHIFT));
    974                 }
    975             }
    976         }
    977 
    978         return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM);
    979     }
    980 
    981     /**
    982      * Returns a {@code Half} instance representing the specified
    983      * half-precision float value.
    984      *
    985      * @param h A half-precision float value
    986      * @return a {@code Half} instance representing {@code h}
    987      */
    988     public static @NonNull Half valueOf(@HalfFloat short h) {
    989         return new Half(h);
    990     }
    991 
    992     /**
    993      * Returns a {@code Half} instance representing the specified float value.
    994      *
    995      * @param f A float value
    996      * @return a {@code Half} instance representing {@code f}
    997      */
    998     public static @NonNull Half valueOf(float f) {
    999         return new Half(f);
   1000     }
   1001 
   1002     /**
   1003      * Returns a {@code Half} instance representing the specified string value.
   1004      * Calling this method is equivalent to calling
   1005      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
   1006      * for more information on the format of the string representation.
   1007      *
   1008      * @param s The string to be parsed
   1009      * @return a {@code Half} instance representing {@code h}
   1010      * @throws NumberFormatException if the string does not contain a parsable
   1011      *         half-precision float value
   1012      */
   1013     public static @NonNull Half valueOf(@NonNull String s) {
   1014         return new Half(s);
   1015     }
   1016 
   1017     /**
   1018      * Returns the half-precision float value represented by the specified string.
   1019      * Calling this method is equivalent to calling
   1020      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
   1021      * for more information on the format of the string representation.
   1022      *
   1023      * @param s The string to be parsed
   1024      * @return A half-precision float value represented by the string
   1025      * @throws NumberFormatException if the string does not contain a parsable
   1026      *         half-precision float value
   1027      */
   1028     public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException {
   1029         return toHalf(FloatingDecimal.parseFloat(s));
   1030     }
   1031 
   1032     /**
   1033      * Returns a string representation of the specified half-precision
   1034      * float value. Calling this method is equivalent to calling
   1035      * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
   1036      * for more information on the format of the string representation.
   1037      *
   1038      * @param h A half-precision float value
   1039      * @return A string representation of the specified value
   1040      */
   1041     @NonNull
   1042     public static String toString(@HalfFloat short h) {
   1043         return Float.toString(toFloat(h));
   1044     }
   1045 
   1046     /**
   1047      * <p>Returns a hexadecimal string representation of the specified half-precision
   1048      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
   1049      * otherwise the result follows this format:</p>
   1050      * <ul>
   1051      * <li>If the sign is positive, no sign character appears in the result</li>
   1052      * <li>If the sign is negative, the first character is <code>'-'</code></li>
   1053      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
   1054      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
   1055      * <li>If the value has a normalized representation, the exponent and
   1056      * significand are represented in the string in two fields. The significand
   1057      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
   1058      * representation. Trailing zeroes are removed unless all digits are 0, then
   1059      * a single zero is used. The significand representation is followed by the
   1060      * exponent, represented by <code>"p"</code>, itself followed by a decimal
   1061      * string of the unbiased exponent</li>
   1062      * <li>If the value has a subnormal representation, the significand starts
   1063      * with <code>"0x0."</code> followed by its lowercase hexadecimal
   1064      * representation. Trailing zeroes are removed unless all digits are 0, then
   1065      * a single zero is used. The significand representation is followed by the
   1066      * exponent, represented by <code>"p-14"</code></li>
   1067      * </ul>
   1068      *
   1069      * @param h A half-precision float value
   1070      * @return A hexadecimal string representation of the specified value
   1071      */
   1072     @NonNull
   1073     public static String toHexString(@HalfFloat short h) {
   1074         StringBuilder o = new StringBuilder();
   1075 
   1076         int bits = h & 0xffff;
   1077         int s = (bits >>> FP16_SIGN_SHIFT    );
   1078         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
   1079         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
   1080 
   1081         if (e == 0x1f) { // Infinite or NaN
   1082             if (m == 0) {
   1083                 if (s != 0) o.append('-');
   1084                 o.append("Infinity");
   1085             } else {
   1086                 o.append("NaN");
   1087             }
   1088         } else {
   1089             if (s == 1) o.append('-');
   1090             if (e == 0) {
   1091                 if (m == 0) {
   1092                     o.append("0x0.0p0");
   1093                 } else {
   1094                     o.append("0x0.");
   1095                     String significand = Integer.toHexString(m);
   1096                     o.append(significand.replaceFirst("0{2,}$", ""));
   1097                     o.append("p-14");
   1098                 }
   1099             } else {
   1100                 o.append("0x1.");
   1101                 String significand = Integer.toHexString(m);
   1102                 o.append(significand.replaceFirst("0{2,}$", ""));
   1103                 o.append('p');
   1104                 o.append(Integer.toString(e - FP16_EXPONENT_BIAS));
   1105             }
   1106         }
   1107 
   1108         return o.toString();
   1109     }
   1110 }
   1111