Home | History | Annotate | Download | only in cts
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.renderscript.cts;
     18 
     19 import android.renderscript.RSRuntimeException;
     20 import android.util.Log;
     21 
     22 import junit.framework.Assert;
     23 
     24 /** This class contains utility functions needed by RenderScript CTS tests to handle Float16
     25  * operations.
     26  */
     27 class Float16Utils {
     28     // 16-bit masks for extracting sign, exponent and mantissa bits
     29     private static short SIGN_MASK     = (short) 0x8000;
     30     private static short EXPONENT_MASK = (short) 0x7C00;
     31     private static short MANTISSA_MASK = (short) 0x03FF;
     32 
     33     private static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
     34     private static long DOUBLE_EXPONENT_MASK = 0x7ff0000000000000L;
     35     private static long DOUBLE_MANTISSA_MASK = 0x000fffffffffffffL;
     36 
     37     static double MIN_NORMAL = Math.scalb(1.0, -14); // smallest Float16 normal is 2 ^ -14
     38     static double MIN_VALUE = Math.scalb(1.0, -24); // smallest Float16 value is 2 ^ -24
     39     static double MAX_VALUE = 65504; // largest Float16 value is 2^16 - 32
     40 
     41     // NaN has all exponent bits set to 1 and a non-zero mantissa
     42     static boolean isFloat16NaN(short val) {
     43         return (val & EXPONENT_MASK) == EXPONENT_MASK &&
     44                (val & MANTISSA_MASK) != 0;
     45     }
     46 
     47     // Infinity has all exponent bits set to 1 and zeroes in mantissa
     48     static boolean isFloat16Infinite(short val) {
     49         return (val & EXPONENT_MASK) == EXPONENT_MASK &&
     50                (val & MANTISSA_MASK) == 0;
     51     }
     52 
     53     // Subnormal numbers have exponent bits set to 0 and a non-zero mantissa
     54     static boolean isFloat16SubNormal(short val) {
     55         return (val & EXPONENT_MASK) == 0 && (val & MANTISSA_MASK) != 0;
     56     }
     57 
     58     // Zero has all but the sign bit set to zero
     59     static boolean isFloat16Zero(short val) {
     60         return (val & ~SIGN_MASK) == 0;
     61     }
     62 
     63     // Negativity test checks the sign bit
     64     static boolean isFloat16Negative(short val) {
     65         return (val & SIGN_MASK) != 0;
     66     }
     67 
     68     // Check if this is a finite, non-zero FP16 value
     69     static boolean isFloat16FiniteNonZero(short val) {
     70         return !isFloat16NaN(val) && !isFloat16Infinite(val) && !isFloat16Zero(val);
     71     }
     72 
     73     static float convertFloat16ToFloat(short val) {
     74         // Extract sign, exponent and mantissa
     75         int sign = val & SIGN_MASK;
     76         int exponent = (val & EXPONENT_MASK) >> 10;
     77         int mantissa = val & MANTISSA_MASK;
     78 
     79         // 0.<mantissa> = <mantissa> * 2^-10
     80         float mantissaAsFloat = Math.scalb(mantissa, -10);
     81 
     82         float result;
     83         if (isFloat16Zero(val))
     84             result = 0.0f;
     85         else if (isFloat16Infinite(val))
     86             result = java.lang.Float.POSITIVE_INFINITY;
     87         else if (isFloat16NaN(val))
     88             result = java.lang.Float.NaN;
     89         else if (isFloat16SubNormal(val)) {
     90             // value is 2^-14 * mantissaAsFloat
     91             result = Math.scalb(1, -14) * mantissaAsFloat;
     92         }
     93         else {
     94             // value is 2^(exponent - 15) * 1.<mantissa>
     95             result = Math.scalb(1, exponent - 15) * (1 + mantissaAsFloat);
     96         }
     97 
     98         if (sign != 0)
     99             result = -result;
    100         return result;
    101     }
    102 
    103     static double convertFloat16ToDouble(short val) {
    104         return (double) convertFloat16ToFloat(val);
    105     }
    106 
    107     /* This utility function accepts the mantissa, exponent and an isNegative flag and constructs a
    108      * double value.  The exponent should be biased, but not shifted left by 52-bits.
    109      */
    110     private static double constructDouble(long mantissa, long exponent, boolean isNegative) {
    111         exponent = exponent << 52;
    112         long bits = (exponent & DOUBLE_EXPONENT_MASK) | (mantissa & DOUBLE_MANTISSA_MASK);
    113         if (isNegative) bits |= DOUBLE_SIGN_MASK;
    114         return Double.longBitsToDouble(bits);
    115     }
    116 
    117     /* This function takes a double value and returns an array with the double representations of
    118      * the Float16 values immediately smaller and larger than the input.  If the input value is
    119      * precisely representable in Float16, it is copied into both the entries of the array.
    120      *
    121      * The returned values can be subnormal Float16 numbers.  Handling subnormals is delegated to
    122      * the caller.
    123      *
    124      * TODO Extend this function to handle rounding for both float16 and float32.
    125      */
    126     static double[] roundToFloat16(double value) {
    127         long valueBits = Double.doubleToLongBits(value);
    128         long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
    129         long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
    130         long unbiasedExponent = (exponent >> 52) - 1023;
    131         boolean isNegative = (valueBits & DOUBLE_SIGN_MASK) != 0;
    132 
    133         double[] result = new double[2];
    134         if (Double.isNaN(value) || Double.isInfinite(value)) {
    135             // Input is NaN or Infinity.  Return unchanged.
    136             result[0] = value;
    137             result[1] = value;
    138             return result; // Note that we skip the negation at the end of this function
    139         }
    140 
    141         if (unbiasedExponent == -1023 && mantissa == 0) {
    142             // Zero.  Assign 0 and adjust sign at the end of this function
    143             result[0] = 0.;
    144             result[1] = 0.;
    145         }
    146         else if (unbiasedExponent < -24) {
    147             // Absolute value is between 0 and MIN_VALUE.  Return 0 and MIN_VALUE
    148             result[0] = 0.;
    149             result[1] = MIN_VALUE;
    150         }
    151         else if (unbiasedExponent <= 15) {
    152             /*
    153              * Either subnormal or normal.  We compute a mask for the excess precision bits in the
    154              * mantissa.
    155              *
    156              * (a) If none of these bits are set, the current value's mantissa and exponent are used
    157              * for both the low and high values.
    158              * (b) If some of these bits are set, we zero-out the extra bits to get the mantissa and
    159              * exponent of the lower value.  For the higher value, we increment the masked mantissa
    160              * at the least-significant bit within the range of this Float16 value.  To handle
    161              * overflows during the the increment, we need to increment the exponent and round up to
    162              * infinity if needed.
    163              */
    164 
    165             // 'mask' is used to detect and zero-out excess bits set.  'mask + 1' is the value
    166             // added to zero-ed out mantissa to get the next higher Float16 value.
    167             long mask;
    168             long maxSigMantissaBits;
    169 
    170             if (unbiasedExponent < -14) {
    171                 // Subnormal Float16.  For Float16's MIN_VALUE, mantissa can have no bits set (after
    172                 // adjusting for the implied one bit.  For each higher exponent, an extra bit of
    173                 // precision is allowed in the mantissa.  This computes to "24 + unbiasedExponent".
    174                 maxSigMantissaBits = 24 + unbiasedExponent;
    175             } else {
    176                 // For normal Float16 values have 10 bits of precision in the mantissa.
    177                 maxSigMantissaBits = 10;
    178             }
    179             mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits;
    180 
    181             // zero-out the excess precision bits for the mantissa for both low and high values.
    182             long lowFloat16Mantissa = mantissa & ~mask;
    183             long highFloat16Mantissa = mantissa & ~mask;
    184 
    185             long lowFloat16Exponent = unbiasedExponent;
    186             long highFloat16Exponent = unbiasedExponent;
    187 
    188             if ((mantissa & mask) != 0) {
    189                 // If mantissa has extra bits set, increment the mantissa at the LSB (for this
    190                 // Float16 value)
    191                 highFloat16Mantissa += mask + 1;
    192 
    193                 // If this overflows the mantissa into the exponent, set mantissa to zero and
    194                 // increment the exponent.
    195                 if ((highFloat16Mantissa & DOUBLE_EXPONENT_MASK) != 0) {
    196                     highFloat16Mantissa = 0;
    197                     highFloat16Exponent += 1;
    198                 }
    199 
    200                 // If the exponent exceeds the range of Float16 exponents, set it to 1024, so the
    201                 // value gets rounded up to Double.POSITIVE_INFINITY.
    202                 if (highFloat16Exponent == 16) {
    203                     highFloat16Exponent = 1024;
    204                 }
    205             }
    206 
    207             result[0] = constructDouble(lowFloat16Mantissa, lowFloat16Exponent + 1023, false);
    208             result[1] = constructDouble(highFloat16Mantissa, highFloat16Exponent + 1023, false);
    209         } else {
    210             // Exponent is outside Float16's range.  Use POSITIVE_INFINITY for both bounds.
    211             result[0] = Double.POSITIVE_INFINITY;
    212             result[1] = Double.POSITIVE_INFINITY;
    213         }
    214 
    215         // Swap values in result and negate them if the input value is negative.
    216         if (isNegative) {
    217             double tmp = result[0];
    218             result[0] = -result[1];
    219             result[1] = -tmp;
    220         }
    221 
    222         return result;
    223     }
    224 
    225     // This function takes a double value and returns 1 ulp, in Float16 precision, of that value.
    226     // Both the parameter and return value have 'double' type but they should be exactly
    227     // representable in Float16.  If the parameter exceeds the precision of Float16, an exception is
    228     // thrown.
    229     static double float16Ulp(double value) {
    230         long valueBits = Double.doubleToLongBits(value);
    231         long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
    232         long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
    233         long unbiasedExponent = (exponent >> 52) - 1023;
    234 
    235         if (unbiasedExponent == 1024) { // i.e. NaN or infinity
    236             if (mantissa == 0) {
    237                 return Double.POSITIVE_INFINITY; // ulp of +/- infinity is +infinity
    238             } else {
    239                 return Double.NaN; // ulp for NaN is NaN
    240             }
    241         }
    242 
    243         if (unbiasedExponent == -1023) {
    244             // assert that mantissa is zero, i.e. value is zero and not a subnormal value.
    245             if (mantissa != 0) {
    246                 throw new RSRuntimeException("float16ulp: Double parameter is subnormal");
    247             }
    248             return MIN_VALUE;
    249         }
    250 
    251         if (unbiasedExponent < -24 || unbiasedExponent > 15) {
    252             throw new RSRuntimeException("float16Ulp: Double parameter's exponent out of range");
    253         }
    254 
    255         if (unbiasedExponent >= -24 && unbiasedExponent < -14) {
    256             // Exponent within the range of Float16 subnormals.
    257 
    258             // Ensure that mantissa doesn't have too much precision.  For example, the smallest
    259             // normal number has an unbiased exponent of -24 and has one bit in mantissa.  Each
    260             // higher exponent allows one extra bit of precision in the mantissa.  Combined with the
    261             // implied one bit, the mantissa can have "24 + unbiasedExponent" significant bits.  The
    262             // rest of the 52 bits in mantissa must be zero.
    263 
    264             long maxSigMantissaBits = 24 + unbiasedExponent;
    265             long mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits;
    266 
    267             if((mask & mantissa) != 0) {
    268                 throw new RSRuntimeException("float16ulp: Double parameter is too precise for subnormal Float16 values.");
    269             }
    270             return MIN_VALUE;
    271         }
    272         if (unbiasedExponent >= -14) {
    273             // Exponent within the range of Float16 normals.  Ensure that the mantissa has at most
    274             // 10 significant bits.
    275             long mask = DOUBLE_MANTISSA_MASK >> 10;
    276             if ((mantissa & mask) != 0) {
    277                 throw new RSRuntimeException("float16ulp: Double parameter is too precise for normal Float16 values.");
    278             }
    279             return Math.scalb(1.0, (int) (unbiasedExponent - 10));
    280         }
    281         throw new RSRuntimeException("float16Ulp: unreachable line executed");
    282     }
    283 
    284     // This function converts its double input value to its Float16 representation (represented as a
    285     // short).  It assumes, but does not check, that the input is precisely representable in Float16
    286     // precision.  No rounding is performed either.
    287     static short convertDoubleToFloat16(double value) {
    288         if (value == 0.) {
    289             if (Double.doubleToLongBits(value) == 0)
    290                 return (short) 0x0;
    291             else
    292                 return (short) 0x8000;
    293         } else if (Double.isNaN(value)) {
    294             // return Quiet NaN irrespective of what kind of NaN 'value' is.
    295             return (short) 0x7e00;
    296         } else if (value == Double.POSITIVE_INFINITY) {
    297             return (short) 0x7c00;
    298         } else if (value == Double.NEGATIVE_INFINITY) {
    299             return (short) 0xfc00;
    300         }
    301 
    302         double positiveValue = Math.abs(value);
    303         boolean isNegative = (value < 0.);
    304         if (positiveValue < MIN_NORMAL) {
    305             short quotient = (short) (positiveValue / MIN_VALUE);
    306             return (isNegative) ? (short) (0x8000 | quotient) : quotient;
    307         } else {
    308             long valueBits = Double.doubleToLongBits(value);
    309             long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
    310             long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
    311             long unbiasedExponent = (exponent >> 52) - 1023;
    312 
    313             short halfExponent = (short) ((unbiasedExponent + 15) << 10);
    314             short halfMantissa = (short) (mantissa >> 42);
    315             short halfValue = (short) (halfExponent | halfMantissa);
    316             return (isNegative) ? (short) (0x8000 | halfValue) : halfValue;
    317         }
    318     }
    319 
    320 }
    321