Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.util;
     18 
     19 import android.annotation.HalfFloat;
     20 import android.annotation.NonNull;
     21 import android.annotation.Nullable;
     22 
     23 /**
     24  * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit
     25  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
     26  * floating point data types (also called fp16 or binary16). A half-precision float can be
     27  * created from or converted to single-precision floats, and is stored in a short data type.
     28  * To distinguish short values holding half-precision floats from regular short values,
     29  * it is recommended to use the <code>@HalfFloat</code> annotation.</p>
     30  *
     31  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
     32  * <ul>
     33  * <li>Sign bit: 1 bit</li>
     34  * <li>Exponent width: 5 bits</li>
     35  * <li>Significand: 10 bits</li>
     36  * </ul>
     37  *
     38  * <p>The format is laid out as follows:</p>
     39  * <pre>
     40  * 1   11111   1111111111
     41  * ^   --^--   -----^----
     42  * sign  |          |_______ significand
     43  *       |
     44  *       -- exponent
     45  * </pre>
     46  *
     47  * <p>Half-precision floating points can be useful to save memory and/or
     48  * bandwidth at the expense of range and precision when compared to single-precision
     49  * floating points (fp32).</p>
     50  * <p>To help you decide whether fp16 is the right storage type for you need, please
     51  * refer to the table below that shows the available precision throughout the range of
     52  * possible values. The <em>precision</em> column indicates the step size between two
     53  * consecutive numbers in a specific part of the range.</p>
     54  *
     55  * <table summary="Precision of fp16 across the range">
     56  *     <tr><th>Range start</th><th>Precision</th></tr>
     57  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
     58  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
     59  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
     60  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
     61  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
     62  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
     63  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
     64  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
     65  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
     66  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
     67  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
     68  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
     69  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
     70  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
     71  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
     72  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
     73  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
     74  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
     75  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
     76  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
     77  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
     78  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
     79  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
     80  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
     81  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
     82  *     <tr><td>1,024</td><td>1</td></tr>
     83  *     <tr><td>2,048</td><td>2</td></tr>
     84  *     <tr><td>4,096</td><td>4</td></tr>
     85  *     <tr><td>8,192</td><td>8</td></tr>
     86  *     <tr><td>16,384</td><td>16</td></tr>
     87  *     <tr><td>32,768</td><td>32</td></tr>
     88  * </table>
     89  *
     90  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
     91  */
     92 @SuppressWarnings("SimplifiableIfStatement")
     93 public final class Half extends Number implements Comparable<Half> {
     94     /**
     95      * The number of bits used to represent a half-precision float value.
     96      */
     97     public static final int SIZE = 16;
     98 
     99     /**
    100      * Epsilon is the difference between 1.0 and the next value representable
    101      * by a half-precision floating-point.
    102      */
    103     public static final @HalfFloat short EPSILON = (short) 0x1400;
    104 
    105     /**
    106      * Maximum exponent a finite half-precision float may have.
    107      */
    108     public static final int MAX_EXPONENT = 15;
    109     /**
    110      * Minimum exponent a normalized half-precision float may have.
    111      */
    112     public static final int MIN_EXPONENT = -14;
    113 
    114     /**
    115      * Smallest negative value a half-precision float may have.
    116      */
    117     public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff;
    118     /**
    119      * Maximum positive finite value a half-precision float may have.
    120      */
    121     public static final @HalfFloat short MAX_VALUE = (short) 0x7bff;
    122     /**
    123      * Smallest positive normal value a half-precision float may have.
    124      */
    125     public static final @HalfFloat short MIN_NORMAL = (short) 0x0400;
    126     /**
    127      * Smallest positive non-zero value a half-precision float may have.
    128      */
    129     public static final @HalfFloat short MIN_VALUE = (short) 0x0001;
    130     /**
    131      * A Not-a-Number representation of a half-precision float.
    132      */
    133     public static final @HalfFloat short NaN = (short) 0x7e00;
    134     /**
    135      * Negative infinity of type half-precision float.
    136      */
    137     public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00;
    138     /**
    139      * Negative 0 of type half-precision float.
    140      */
    141     public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000;
    142     /**
    143      * Positive infinity of type half-precision float.
    144      */
    145     public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00;
    146     /**
    147      * Positive 0 of type half-precision float.
    148      */
    149     public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000;
    150 
    151     private static final int FP16_SIGN_SHIFT        = 15;
    152     private static final int FP16_SIGN_MASK         = 0x8000;
    153     private static final int FP16_EXPONENT_SHIFT    = 10;
    154     private static final int FP16_EXPONENT_MASK     = 0x1f;
    155     private static final int FP16_SIGNIFICAND_MASK  = 0x3ff;
    156     private static final int FP16_EXPONENT_BIAS     = 15;
    157     private static final int FP16_COMBINED          = 0x7fff;
    158     private static final int FP16_EXPONENT_MAX      = 0x7c00;
    159 
    160     private static final int FP32_SIGN_SHIFT        = 31;
    161     private static final int FP32_EXPONENT_SHIFT    = 23;
    162     private static final int FP32_EXPONENT_MASK     = 0xff;
    163     private static final int FP32_SIGNIFICAND_MASK  = 0x7fffff;
    164     private static final int FP32_EXPONENT_BIAS     = 127;
    165     private static final int FP32_QNAN_MASK         = 0x400000;
    166 
    167     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
    168     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
    169 
    170     private final @HalfFloat short mValue;
    171 
    172     /**
    173      * Constructs a newly allocated {@code Half} object that represents the
    174      * half-precision float type argument.
    175      *
    176      * @param value The value to be represented by the {@code Half}
    177      */
    178     public Half(@HalfFloat short value) {
    179         mValue = value;
    180     }
    181 
    182     /**
    183      * Constructs a newly allocated {@code Half} object that represents the
    184      * argument converted to a half-precision float.
    185      *
    186      * @param value The value to be represented by the {@code Half}
    187      *
    188      * @see #toHalf(float)
    189      */
    190     public Half(float value) {
    191         mValue = toHalf(value);
    192     }
    193 
    194     /**
    195      * Constructs a newly allocated {@code Half} object that
    196      * represents the argument converted to a half-precision float.
    197      *
    198      * @param value The value to be represented by the {@code Half}
    199      *
    200      * @see #toHalf(float)
    201      */
    202     public Half(double value) {
    203         mValue = toHalf((float) value);
    204     }
    205 
    206     /**
    207      * <p>Constructs a newly allocated {@code Half} object that represents the
    208      * half-precision float value represented by the string.
    209      * The string is converted to a half-precision float value as if by the
    210      * {@link #valueOf(String)} method.</p>
    211      *
    212      * <p>Calling this constructor is equivalent to calling:</p>
    213      * <pre>
    214      *     new Half(Float.parseFloat(value))
    215      * </pre>
    216      *
    217      * @param value A string to be converted to a {@code Half}
    218      * @throws NumberFormatException if the string does not contain a parsable number
    219      *
    220      * @see Float#valueOf(java.lang.String)
    221      * @see #toHalf(float)
    222      */
    223     public Half(@NonNull String value) throws NumberFormatException {
    224         mValue = toHalf(Float.parseFloat(value));
    225     }
    226 
    227     /**
    228      * Returns the half-precision value of this {@code Half} as a {@code short}
    229      * containing the bit representation described in {@link Half}.
    230      *
    231      * @return The half-precision float value represented by this object
    232      */
    233     public @HalfFloat short halfValue() {
    234         return mValue;
    235     }
    236 
    237     /**
    238      * Returns the value of this {@code Half} as a {@code byte} after
    239      * a narrowing primitive conversion.
    240      *
    241      * @return The half-precision float value represented by this object
    242      *         converted to type {@code byte}
    243      */
    244     @Override
    245     public byte byteValue() {
    246         return (byte) toFloat(mValue);
    247     }
    248 
    249     /**
    250      * Returns the value of this {@code Half} as a {@code short} after
    251      * a narrowing primitive conversion.
    252      *
    253      * @return The half-precision float value represented by this object
    254      *         converted to type {@code short}
    255      */
    256     @Override
    257     public short shortValue() {
    258         return (short) toFloat(mValue);
    259     }
    260 
    261     /**
    262      * Returns the value of this {@code Half} as a {@code int} after
    263      * a narrowing primitive conversion.
    264      *
    265      * @return The half-precision float value represented by this object
    266      *         converted to type {@code int}
    267      */
    268     @Override
    269     public int intValue() {
    270         return (int) toFloat(mValue);
    271     }
    272 
    273     /**
    274      * Returns the value of this {@code Half} as a {@code long} after
    275      * a narrowing primitive conversion.
    276      *
    277      * @return The half-precision float value represented by this object
    278      *         converted to type {@code long}
    279      */
    280     @Override
    281     public long longValue() {
    282         return (long) toFloat(mValue);
    283     }
    284 
    285     /**
    286      * Returns the value of this {@code Half} as a {@code float} after
    287      * a widening primitive conversion.
    288      *
    289      * @return The half-precision float value represented by this object
    290      *         converted to type {@code float}
    291      */
    292     @Override
    293     public float floatValue() {
    294         return toFloat(mValue);
    295     }
    296 
    297     /**
    298      * Returns the value of this {@code Half} as a {@code double} after
    299      * a widening primitive conversion.
    300      *
    301      * @return The half-precision float value represented by this object
    302      *         converted to type {@code double}
    303      */
    304     @Override
    305     public double doubleValue() {
    306         return toFloat(mValue);
    307     }
    308 
    309     /**
    310      * Returns true if this {@code Half} value represents a Not-a-Number,
    311      * false otherwise.
    312      *
    313      * @return True if the value is a NaN, false otherwise
    314      */
    315     public boolean isNaN() {
    316         return isNaN(mValue);
    317     }
    318 
    319     /**
    320      * Compares this object against the specified object. The result is {@code true}
    321      * if and only if the argument is not {@code null} and is a {@code Half} object
    322      * that represents the same half-precision value as the this object. Two
    323      * half-precision values are considered to be the same if and only if the method
    324      * {@link #halfToIntBits(short)} returns an identical {@code int} value for both.
    325      *
    326      * @param o The object to compare
    327      * @return True if the objects are the same, false otherwise
    328      *
    329      * @see #halfToIntBits(short)
    330      */
    331     @Override
    332     public boolean equals(@Nullable Object o) {
    333         return (o instanceof Half) &&
    334                 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue));
    335     }
    336 
    337     /**
    338      * Returns a hash code for this {@code Half} object. The result is the
    339      * integer bit representation, exactly as produced by the method
    340      * {@link #halfToIntBits(short)}, of the primitive half-precision float
    341      * value represented by this {@code Half} object.
    342      *
    343      * @return A hash code value for this object
    344      */
    345     @Override
    346     public int hashCode() {
    347         return hashCode(mValue);
    348     }
    349 
    350     /**
    351      * Returns a string representation of the specified half-precision
    352      * float value. See {@link #toString(short)} for more information.
    353      *
    354      * @return A string representation of this {@code Half} object
    355      */
    356     @NonNull
    357     @Override
    358     public String toString() {
    359         return toString(mValue);
    360     }
    361 
    362     /**
    363      * <p>Compares the two specified half-precision float values. The following
    364      * conditions apply during the comparison:</p>
    365      *
    366      * <ul>
    367      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
    368      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
    369      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
    370      * {@link #NEGATIVE_ZERO}.</li>
    371      * </ul>
    372      *
    373      * @param h The half-precision float value to compare to the half-precision value
    374      *          represented by this {@code Half} object
    375      *
    376      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}; a
    377      *          value less than {@code 0} if {@code x} is numerically less than {@code y};
    378      *          and a value greater than {@code 0} if {@code x} is numerically greater
    379      *          than {@code y}
    380      */
    381     @Override
    382     public int compareTo(@NonNull Half h) {
    383         return compare(mValue, h.mValue);
    384     }
    385 
    386     /**
    387      * Returns a hash code for a half-precision float value.
    388      *
    389      * @param h The value to hash
    390      *
    391      * @return A hash code value for a half-precision float value
    392      */
    393     public static int hashCode(@HalfFloat short h) {
    394         return halfToIntBits(h);
    395     }
    396 
    397     /**
    398      * <p>Compares the two specified half-precision float values. The following
    399      * conditions apply during the comparison:</p>
    400      *
    401      * <ul>
    402      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
    403      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
    404      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
    405      * {@link #NEGATIVE_ZERO}.</li>
    406      * </ul>
    407      *
    408      * @param x The first half-precision float value to compare.
    409      * @param y The second half-precision float value to compare
    410      *
    411      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
    412      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
    413      *          and a value greater than {@code 0} if {@code x} is numerically greater
    414      *          than {@code y}
    415      */
    416     public static int compare(@HalfFloat short x, @HalfFloat short y) {
    417         if (less(x, y)) return -1;
    418         if (greater(x, y)) return 1;
    419 
    420         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
    421         // (signed) short value types to preserve the ordering of -0.0
    422         // and +0.0
    423         short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x;
    424         short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y;
    425 
    426         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
    427     }
    428 
    429     /**
    430      * <p>Returns a representation of the specified half-precision float value
    431      * according to the bit layout described in {@link Half}.</p>
    432      *
    433      * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all
    434      * possible Not-a-Number values to a single canonical Not-a-Number value
    435      * defined by {@link #NaN}.</p>
    436      *
    437      * @param h A half-precision float value
    438      * @return The bits that represent the half-precision float value
    439      *
    440      * @see #halfToIntBits(short)
    441      */
    442     public static @HalfFloat short halfToShortBits(@HalfFloat short h) {
    443         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h;
    444     }
    445 
    446     /**
    447      * <p>Returns a representation of the specified half-precision float value
    448      * according to the bit layout described in {@link Half}.</p>
    449      *
    450      * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all
    451      * possible Not-a-Number values to a single canonical Not-a-Number value
    452      * defined by {@link #NaN}.</p>
    453      *
    454      * @param h A half-precision float value
    455      * @return The bits that represent the half-precision float value
    456      *
    457      * @see #halfToRawIntBits(short)
    458      * @see #halfToShortBits(short)
    459      * @see #intBitsToHalf(int)
    460      */
    461     public static int halfToIntBits(@HalfFloat short h) {
    462         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff;
    463     }
    464 
    465     /**
    466      * <p>Returns a representation of the specified half-precision float value
    467      * according to the bit layout described in {@link Half}.</p>
    468      *
    469      * <p>The argument is considered to be a representation of a half-precision
    470      * float value according to the bit layout described in {@link Half}. The 16
    471      * most significant bits of the returned value are set to 0.</p>
    472      *
    473      * @param h A half-precision float value
    474      * @return The bits that represent the half-precision float value
    475      *
    476      * @see #halfToIntBits(short)
    477      * @see #intBitsToHalf(int)
    478      */
    479     public static int halfToRawIntBits(@HalfFloat short h) {
    480         return h & 0xffff;
    481     }
    482 
    483     /**
    484      * <p>Returns the half-precision float value corresponding to a given
    485      * bit representation.</p>
    486      *
    487      * <p>The argument is considered to be a representation of a half-precision
    488      * float value according to the bit layout described in {@link Half}. The 16
    489      * most significant bits of the argument are ignored.</p>
    490      *
    491      * @param bits An integer
    492      * @return The half-precision float value with the same bit pattern
    493      */
    494     public static @HalfFloat short intBitsToHalf(int bits) {
    495         return (short) (bits & 0xffff);
    496     }
    497 
    498     /**
    499      * Returns the first parameter with the sign of the second parameter.
    500      * This method treats NaNs as having a sign.
    501      *
    502      * @param magnitude A half-precision float value providing the magnitude of the result
    503      * @param sign  A half-precision float value providing the sign of the result
    504      * @return A value with the magnitude of the first parameter and the sign
    505      *         of the second parameter
    506      */
    507     public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) {
    508         return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED));
    509     }
    510 
    511     /**
    512      * Returns the absolute value of the specified half-precision float.
    513      * Special values are handled in the following ways:
    514      * <ul>
    515      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    516      * <li>If the specified half-precision float is zero (negative or positive),
    517      * the result is positive zero (see {@link #POSITIVE_ZERO})</li>
    518      * <li>If the specified half-precision float is infinity (negative or positive),
    519      * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li>
    520      * </ul>
    521      *
    522      * @param h A half-precision float value
    523      * @return The absolute value of the specified half-precision float
    524      */
    525     public static @HalfFloat short abs(@HalfFloat short h) {
    526         return (short) (h & FP16_COMBINED);
    527     }
    528 
    529     /**
    530      * Returns the closest integral half-precision float value to the specified
    531      * half-precision float value. Special values are handled in the
    532      * following ways:
    533      * <ul>
    534      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    535      * <li>If the specified half-precision float is infinity (negative or positive),
    536      * the result is infinity (with the same sign)</li>
    537      * <li>If the specified half-precision float is zero (negative or positive),
    538      * the result is zero (with the same sign)</li>
    539      * </ul>
    540      *
    541      * @param h A half-precision float value
    542      * @return The value of the specified half-precision float rounded to the nearest
    543      *         half-precision float value
    544      */
    545     public static @HalfFloat short round(@HalfFloat short h) {
    546         int bits = h & 0xffff;
    547         int e = bits & 0x7fff;
    548         int result = bits;
    549 
    550         if (e < 0x3c00) {
    551             result &= FP16_SIGN_MASK;
    552             result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0));
    553         } else if (e < 0x6400) {
    554             e = 25 - (e >> 10);
    555             int mask = (1 << e) - 1;
    556             result += (1 << (e - 1));
    557             result &= ~mask;
    558         }
    559 
    560         return (short) result;
    561     }
    562 
    563     /**
    564      * Returns the smallest half-precision float value toward negative infinity
    565      * greater than or equal to the specified half-precision float value.
    566      * Special values are handled in the following ways:
    567      * <ul>
    568      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    569      * <li>If the specified half-precision float is infinity (negative or positive),
    570      * the result is infinity (with the same sign)</li>
    571      * <li>If the specified half-precision float is zero (negative or positive),
    572      * the result is zero (with the same sign)</li>
    573      * </ul>
    574      *
    575      * @param h A half-precision float value
    576      * @return The smallest half-precision float value toward negative infinity
    577      *         greater than or equal to the specified half-precision float value
    578      */
    579     public static @HalfFloat short ceil(@HalfFloat short h) {
    580         int bits = h & 0xffff;
    581         int e = bits & 0x7fff;
    582         int result = bits;
    583 
    584         if (e < 0x3c00) {
    585             result &= FP16_SIGN_MASK;
    586             result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0));
    587         } else if (e < 0x6400) {
    588             e = 25 - (e >> 10);
    589             int mask = (1 << e) - 1;
    590             result += mask & ((bits >> 15) - 1);
    591             result &= ~mask;
    592         }
    593 
    594         return (short) result;
    595     }
    596 
    597     /**
    598      * Returns the largest half-precision float value toward positive infinity
    599      * less than or equal to the specified half-precision float value.
    600      * Special values are handled in the following ways:
    601      * <ul>
    602      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    603      * <li>If the specified half-precision float is infinity (negative or positive),
    604      * the result is infinity (with the same sign)</li>
    605      * <li>If the specified half-precision float is zero (negative or positive),
    606      * the result is zero (with the same sign)</li>
    607      * </ul>
    608      *
    609      * @param h A half-precision float value
    610      * @return The largest half-precision float value toward positive infinity
    611      *         less than or equal to the specified half-precision float value
    612      */
    613     public static @HalfFloat short floor(@HalfFloat short h) {
    614         int bits = h & 0xffff;
    615         int e = bits & 0x7fff;
    616         int result = bits;
    617 
    618         if (e < 0x3c00) {
    619             result &= FP16_SIGN_MASK;
    620             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
    621         } else if (e < 0x6400) {
    622             e = 25 - (e >> 10);
    623             int mask = (1 << e) - 1;
    624             result += mask & -(bits >> 15);
    625             result &= ~mask;
    626         }
    627 
    628         return (short) result;
    629     }
    630 
    631     /**
    632      * Returns the truncated half-precision float value of the specified
    633      * half-precision float value. Special values are handled in the following ways:
    634      * <ul>
    635      * <li>If the specified half-precision float is NaN, the result is NaN</li>
    636      * <li>If the specified half-precision float is infinity (negative or positive),
    637      * the result is infinity (with the same sign)</li>
    638      * <li>If the specified half-precision float is zero (negative or positive),
    639      * the result is zero (with the same sign)</li>
    640      * </ul>
    641      *
    642      * @param h A half-precision float value
    643      * @return The truncated half-precision float value of the specified
    644      *         half-precision float value
    645      */
    646     public static @HalfFloat short trunc(@HalfFloat short h) {
    647         int bits = h & 0xffff;
    648         int e = bits & 0x7fff;
    649         int result = bits;
    650 
    651         if (e < 0x3c00) {
    652             result &= FP16_SIGN_MASK;
    653         } else if (e < 0x6400) {
    654             e = 25 - (e >> 10);
    655             int mask = (1 << e) - 1;
    656             result &= ~mask;
    657         }
    658 
    659         return (short) result;
    660     }
    661 
    662     /**
    663      * Returns the smaller of two half-precision float values (the value closest
    664      * to negative infinity). Special values are handled in the following ways:
    665      * <ul>
    666      * <li>If either value is NaN, the result is NaN</li>
    667      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
    668      * </ul>
    669      *
    670      * @param x The first half-precision value
    671      * @param y The second half-precision value
    672      * @return The smaller of the two specified half-precision values
    673      */
    674     public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) {
    675         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    676         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    677 
    678         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
    679             return (x & FP16_SIGN_MASK) != 0 ? x : y;
    680         }
    681 
    682         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
    683                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
    684     }
    685 
    686     /**
    687      * Returns the larger of two half-precision float values (the value closest
    688      * to positive infinity). Special values are handled in the following ways:
    689      * <ul>
    690      * <li>If either value is NaN, the result is NaN</li>
    691      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
    692      * </ul>
    693      *
    694      * @param x The first half-precision value
    695      * @param y The second half-precision value
    696      *
    697      * @return The larger of the two specified half-precision values
    698      */
    699     public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) {
    700         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    701         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
    702 
    703         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
    704             return (x & FP16_SIGN_MASK) != 0 ? y : x;
    705         }
    706 
    707         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
    708                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
    709     }
    710 
    711     /**
    712      * Returns true if the first half-precision float value is less (smaller
    713      * toward negative infinity) than the second half-precision float value.
    714      * If either of the values is NaN, the result is false.
    715      *
    716      * @param x The first half-precision value
    717      * @param y The second half-precision value
    718      *
    719      * @return True if x is less than y, false otherwise
    720      */
    721     public static boolean less(@HalfFloat short x, @HalfFloat short y) {
    722         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    723         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    724 
    725         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
    726                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    727     }
    728 
    729     /**
    730      * Returns true if the first half-precision float value is less (smaller
    731      * toward negative infinity) than or equal to the second half-precision
    732      * float value. If either of the values is NaN, the result is false.
    733      *
    734      * @param x The first half-precision value
    735      * @param y The second half-precision value
    736      *
    737      * @return True if x is less than or equal to y, false otherwise
    738      */
    739     public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) {
    740         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    741         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    742 
    743         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
    744                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    745     }
    746 
    747     /**
    748      * Returns true if the first half-precision float value is greater (larger
    749      * toward positive infinity) than the second half-precision float value.
    750      * If either of the values is NaN, the result is false.
    751      *
    752      * @param x The first half-precision value
    753      * @param y The second half-precision value
    754      *
    755      * @return True if x is greater than y, false otherwise
    756      */
    757     public static boolean greater(@HalfFloat short x, @HalfFloat short y) {
    758         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    759         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    760 
    761         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
    762                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    763     }
    764 
    765     /**
    766      * Returns true if the first half-precision float value is greater (larger
    767      * toward positive infinity) than or equal to the second half-precision float
    768      * value. If either of the values is NaN, the result is false.
    769      *
    770      * @param x The first half-precision value
    771      * @param y The second half-precision value
    772      *
    773      * @return True if x is greater than y, false otherwise
    774      */
    775     public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) {
    776         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    777         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    778 
    779         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
    780                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
    781     }
    782 
    783     /**
    784      * Returns true if the two half-precision float values are equal.
    785      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
    786      * and {@link #NEGATIVE_ZERO} are considered equal.
    787      *
    788      * @param x The first half-precision value
    789      * @param y The second half-precision value
    790      *
    791      * @return True if x is equal to y, false otherwise
    792      */
    793     public static boolean equals(@HalfFloat short x, @HalfFloat short y) {
    794         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    795         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
    796 
    797         return x == y || ((x | y) & FP16_COMBINED) == 0;
    798     }
    799 
    800     /**
    801      * Returns the sign of the specified half-precision float.
    802      *
    803      * @param h A half-precision float value
    804      * @return 1 if the value is positive, -1 if the value is negative
    805      */
    806     public static int getSign(@HalfFloat short h) {
    807         return (h & FP16_SIGN_MASK) == 0 ? 1 : -1;
    808     }
    809 
    810     /**
    811      * Returns the unbiased exponent used in the representation of
    812      * the specified  half-precision float value. if the value is NaN
    813      * or infinite, this* method returns {@link #MAX_EXPONENT} + 1.
    814      * If the argument is 0 or a subnormal representation, this method
    815      * returns {@link #MIN_EXPONENT} - 1.
    816      *
    817      * @param h A half-precision float value
    818      * @return The unbiased exponent of the specified value
    819      */
    820     public static int getExponent(@HalfFloat short h) {
    821         return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS;
    822     }
    823 
    824     /**
    825      * Returns the significand, or mantissa, used in the representation
    826      * of the specified half-precision float value.
    827      *
    828      * @param h A half-precision float value
    829      * @return The significand, or significand, of the specified vlaue
    830      */
    831     public static int getSignificand(@HalfFloat short h) {
    832         return h & FP16_SIGNIFICAND_MASK;
    833     }
    834 
    835     /**
    836      * Returns true if the specified half-precision float value represents
    837      * infinity, false otherwise.
    838      *
    839      * @param h A half-precision float value
    840      * @return True if the value is positive infinity or negative infinity,
    841      *         false otherwise
    842      */
    843     public static boolean isInfinite(@HalfFloat short h) {
    844         return (h & FP16_COMBINED) == FP16_EXPONENT_MAX;
    845     }
    846 
    847     /**
    848      * Returns true if the specified half-precision float value represents
    849      * a Not-a-Number, false otherwise.
    850      *
    851      * @param h A half-precision float value
    852      * @return True if the value is a NaN, false otherwise
    853      */
    854     public static boolean isNaN(@HalfFloat short h) {
    855         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX;
    856     }
    857 
    858     /**
    859      * Returns true if the specified half-precision float value is normalized
    860      * (does not have a subnormal representation). If the specified value is
    861      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
    862      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
    863      * number, this method returns false.
    864      *
    865      * @param h A half-precision float value
    866      * @return True if the value is normalized, false otherwise
    867      */
    868     public static boolean isNormalized(@HalfFloat short h) {
    869         return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX;
    870     }
    871 
    872     /**
    873      * <p>Converts the specified half-precision float value into a
    874      * single-precision float value. The following special cases are handled:</p>
    875      * <ul>
    876      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
    877      * <li>If the input is {@link #POSITIVE_INFINITY} or
    878      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
    879      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
    880      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
    881      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
    882      * </ul>
    883      *
    884      * @param h The half-precision float value to convert to single-precision
    885      * @return A normalized single-precision float value
    886      */
    887     public static float toFloat(@HalfFloat short h) {
    888         int bits = h & 0xffff;
    889         int s = bits & FP16_SIGN_MASK;
    890         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
    891         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
    892 
    893         int outE = 0;
    894         int outM = 0;
    895 
    896         if (e == 0) { // Denormal or 0
    897             if (m != 0) {
    898                 // Convert denorm fp16 into normalized fp32
    899                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
    900                 o -= FP32_DENORMAL_FLOAT;
    901                 return s == 0 ? o : -o;
    902             }
    903         } else {
    904             outM = m << 13;
    905             if (e == 0x1f) { // Infinite or NaN
    906                 outE = 0xff;
    907                 if (outM != 0) { // SNaNs are quieted
    908                     outM |= FP32_QNAN_MASK;
    909                 }
    910             } else {
    911                 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS;
    912             }
    913         }
    914 
    915         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
    916         return Float.intBitsToFloat(out);
    917     }
    918 
    919     /**
    920      * <p>Converts the specified single-precision float value into a
    921      * half-precision float value. The following special cases are handled:</p>
    922      * <ul>
    923      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
    924      * value is {@link #NaN}</li>
    925      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
    926      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
    927      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
    928      * <li>If the input is 0 (positive or negative), the returned value is
    929      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
    930      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
    931      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
    932      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
    933      * is a denorm half-precision float</li>
    934      * <li>Otherwise, the returned value is rounded to the nearest
    935      * representable half-precision float value</li>
    936      * </ul>
    937      *
    938      * @param f The single-precision float value to convert to half-precision
    939      * @return A half-precision float value
    940      */
    941     @SuppressWarnings("StatementWithEmptyBody")
    942     public static @HalfFloat short toHalf(float f) {
    943         int bits = Float.floatToRawIntBits(f);
    944         int s = (bits >>> FP32_SIGN_SHIFT    );
    945         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK;
    946         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
    947 
    948         int outE = 0;
    949         int outM = 0;
    950 
    951         if (e == 0xff) { // Infinite or NaN
    952             outE = 0x1f;
    953             outM = m != 0 ? 0x200 : 0;
    954         } else {
    955             e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS;
    956             if (e >= 0x1f) { // Overflow
    957                 outE = 0x31;
    958             } else if (e <= 0) { // Underflow
    959                 if (e < -10) {
    960                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
    961                 } else {
    962                     // The fp32 value is a normalized float less than MIN_NORMAL,
    963                     // we convert to a denorm fp16
    964                     m = (m | 0x800000) >> (1 - e);
    965                     if ((m & 0x1000) != 0) m += 0x2000;
    966                     outM = m >> 13;
    967                 }
    968             } else {
    969                 outE = e;
    970                 outM = m >> 13;
    971                 if ((m & 0x1000) != 0) {
    972                     // Round to nearest "0.5" up
    973                     int out = (outE << FP16_EXPONENT_SHIFT) | outM;
    974                     out++;
    975                     return (short) (out | (s << FP16_SIGN_SHIFT));
    976                 }
    977             }
    978         }
    979 
    980         return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM);
    981     }
    982 
    983     /**
    984      * Returns a {@code Half} instance representing the specified
    985      * half-precision float value.
    986      *
    987      * @param h A half-precision float value
    988      * @return a {@code Half} instance representing {@code h}
    989      */
    990     public static @NonNull Half valueOf(@HalfFloat short h) {
    991         return new Half(h);
    992     }
    993 
    994     /**
    995      * Returns a {@code Half} instance representing the specified float value.
    996      *
    997      * @param f A float value
    998      * @return a {@code Half} instance representing {@code f}
    999      */
   1000     public static @NonNull Half valueOf(float f) {
   1001         return new Half(f);
   1002     }
   1003 
   1004     /**
   1005      * Returns a {@code Half} instance representing the specified string value.
   1006      * Calling this method is equivalent to calling
   1007      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
   1008      * for more information on the format of the string representation.
   1009      *
   1010      * @param s The string to be parsed
   1011      * @return a {@code Half} instance representing {@code h}
   1012      * @throws NumberFormatException if the string does not contain a parsable
   1013      *         half-precision float value
   1014      */
   1015     public static @NonNull Half valueOf(@NonNull String s) {
   1016         return new Half(s);
   1017     }
   1018 
   1019     /**
   1020      * Returns the half-precision float value represented by the specified string.
   1021      * Calling this method is equivalent to calling
   1022      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
   1023      * for more information on the format of the string representation.
   1024      *
   1025      * @param s The string to be parsed
   1026      * @return A half-precision float value represented by the string
   1027      * @throws NumberFormatException if the string does not contain a parsable
   1028      *         half-precision float value
   1029      */
   1030     public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException {
   1031         return toHalf(Float.parseFloat(s));
   1032     }
   1033 
   1034     /**
   1035      * Returns a string representation of the specified half-precision
   1036      * float value. Calling this method is equivalent to calling
   1037      * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
   1038      * for more information on the format of the string representation.
   1039      *
   1040      * @param h A half-precision float value
   1041      * @return A string representation of the specified value
   1042      */
   1043     @NonNull
   1044     public static String toString(@HalfFloat short h) {
   1045         return Float.toString(toFloat(h));
   1046     }
   1047 
   1048     /**
   1049      * <p>Returns a hexadecimal string representation of the specified half-precision
   1050      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
   1051      * otherwise the result follows this format:</p>
   1052      * <ul>
   1053      * <li>If the sign is positive, no sign character appears in the result</li>
   1054      * <li>If the sign is negative, the first character is <code>'-'</code></li>
   1055      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
   1056      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
   1057      * <li>If the value has a normalized representation, the exponent and
   1058      * significand are represented in the string in two fields. The significand
   1059      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
   1060      * representation. Trailing zeroes are removed unless all digits are 0, then
   1061      * a single zero is used. The significand representation is followed by the
   1062      * exponent, represented by <code>"p"</code>, itself followed by a decimal
   1063      * string of the unbiased exponent</li>
   1064      * <li>If the value has a subnormal representation, the significand starts
   1065      * with <code>"0x0."</code> followed by its lowercase hexadecimal
   1066      * representation. Trailing zeroes are removed unless all digits are 0, then
   1067      * a single zero is used. The significand representation is followed by the
   1068      * exponent, represented by <code>"p-14"</code></li>
   1069      * </ul>
   1070      *
   1071      * @param h A half-precision float value
   1072      * @return A hexadecimal string representation of the specified value
   1073      */
   1074     @NonNull
   1075     public static String toHexString(@HalfFloat short h) {
   1076         StringBuilder o = new StringBuilder();
   1077 
   1078         int bits = h & 0xffff;
   1079         int s = (bits >>> FP16_SIGN_SHIFT    );
   1080         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
   1081         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
   1082 
   1083         if (e == 0x1f) { // Infinite or NaN
   1084             if (m == 0) {
   1085                 if (s != 0) o.append('-');
   1086                 o.append("Infinity");
   1087             } else {
   1088                 o.append("NaN");
   1089             }
   1090         } else {
   1091             if (s == 1) o.append('-');
   1092             if (e == 0) {
   1093                 if (m == 0) {
   1094                     o.append("0x0.0p0");
   1095                 } else {
   1096                     o.append("0x0.");
   1097                     String significand = Integer.toHexString(m);
   1098                     o.append(significand.replaceFirst("0{2,}$", ""));
   1099                     o.append("p-14");
   1100                 }
   1101             } else {
   1102                 o.append("0x1.");
   1103                 String significand = Integer.toHexString(m);
   1104                 o.append(significand.replaceFirst("0{2,}$", ""));
   1105                 o.append('p');
   1106                 o.append(Integer.toString(e - FP16_EXPONENT_BIAS));
   1107             }
   1108         }
   1109 
   1110         return o.toString();
   1111     }
   1112 }
   1113