1 #ifndef _TCUFLOAT_HPP 2 #define _TCUFLOAT_HPP 3 /*------------------------------------------------------------------------- 4 * drawElements Quality Program Tester Core 5 * ---------------------------------------- 6 * 7 * Copyright 2014 The Android Open Source Project 8 * 9 * Licensed under the Apache License, Version 2.0 (the "License"); 10 * you may not use this file except in compliance with the License. 11 * You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 * 21 *//*! 22 * \file 23 * \brief Reconfigurable floating-point value template. 24 *//*--------------------------------------------------------------------*/ 25 26 #include "tcuDefs.hpp" 27 28 // For memcpy(). 29 #include <string.h> 30 31 namespace tcu 32 { 33 34 enum FloatFlags 35 { 36 FLOAT_HAS_SIGN = (1<<0), 37 FLOAT_SUPPORT_DENORM = (1<<1) 38 }; 39 40 /*--------------------------------------------------------------------*//*! 41 * \brief Floating-point format template 42 * 43 * This template implements arbitrary floating-point handling. Template 44 * can be used for conversion between different formats and checking 45 * various properties of floating-point values. 46 *//*--------------------------------------------------------------------*/ 47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 48 class Float 49 { 50 public: 51 typedef StorageType_ StorageType; 52 53 enum 54 { 55 EXPONENT_BITS = ExponentBits, 56 MANTISSA_BITS = MantissaBits, 57 EXPONENT_BIAS = ExponentBias, 58 FLAGS = Flags, 59 }; 60 61 Float (void); 62 explicit Float (StorageType value); 63 explicit Float (float v); 64 explicit Float (double v); 65 66 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags> 67 static Float convert (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src); 68 69 static inline Float convert (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; } 70 71 /*--------------------------------------------------------------------*//*! 72 * \brief Construct floating point value 73 * \param sign Sign. Must be +1/-1 74 * \param exponent Exponent in range [1-ExponentBias, ExponentBias+1] 75 * \param mantissa Mantissa bits with implicit leading bit explicitly set 76 * \return The specified float 77 * 78 * This function constructs a floating point value from its inputs. 79 * The normally implicit leading bit of the mantissa must be explicitly set. 80 * The exponent normally used for zero/subnormals is an invalid input. Such 81 * values are specified with the leading mantissa bit of zero and the lowest 82 * normal exponent (1-ExponentBias). Additionally having both exponent and 83 * mantissa set to zero is a shorthand notation for the correctly signed 84 * floating point zero. Inf and NaN must be specified directly with an 85 * exponent of ExponentBias+1 and the appropriate mantissa (with leading 86 * bit set) 87 *//*--------------------------------------------------------------------*/ 88 static inline Float construct (int sign, int exponent, StorageType mantissa); 89 90 /*--------------------------------------------------------------------*//*! 91 * \brief Construct floating point value. Explicit version 92 * \param sign Sign. Must be +1/-1 93 * \param exponent Exponent in range [-ExponentBias, ExponentBias+1] 94 * \param mantissa Mantissa bits 95 * \return The specified float 96 * 97 * This function constructs a floating point value from its inputs with 98 * minimal intervention. 99 * The sign is turned into a sign bit and the exponent bias is added. 100 * See IEEE-754 for additional information on the inputs and 101 * the encoding of special values. 102 *//*--------------------------------------------------------------------*/ 103 static Float constructBits (int sign, int exponent, StorageType mantissaBits); 104 105 StorageType bits (void) const { return m_value; } 106 float asFloat (void) const; 107 double asDouble (void) const; 108 109 inline int signBit (void) const { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1; } 110 inline StorageType exponentBits (void) const { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1); } 111 inline StorageType mantissaBits (void) const { return m_value & ((StorageType(1)<<MantissaBits)-1); } 112 113 inline int sign (void) const { return signBit() ? -1 : 1; } 114 inline int exponent (void) const { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias; } 115 inline StorageType mantissa (void) const { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits)); } 116 117 inline bool isInf (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() == 0; } 118 inline bool isNaN (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() != 0; } 119 inline bool isZero (void) const { return exponentBits() == 0 && mantissaBits() == 0; } 120 inline bool isDenorm (void) const { return exponentBits() == 0 && mantissaBits() != 0; } 121 122 static Float zero (int sign); 123 static Float inf (int sign); 124 static Float nan (void); 125 126 private: 127 StorageType m_value; 128 } DE_WARN_UNUSED_TYPE; 129 130 // Common floating-point types. 131 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float16; //!< IEEE 754-2008 16-bit floating-point value 132 typedef Float<deUint32, 8, 23, 127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float32; //!< IEEE 754 32-bit floating-point value 133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float64; //!< IEEE 754 64-bit floating-point value 134 135 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN> Float16Denormless; //!< IEEE 754-2008 16-bit floating-point value without denormalized support 136 137 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 138 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void) 139 : m_value(0) 140 { 141 } 142 143 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 144 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value) 145 : m_value(value) 146 { 147 } 148 149 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 150 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value) 151 : m_value(0) 152 { 153 deUint32 u32; 154 memcpy(&u32, &value, sizeof(deUint32)); 155 *this = convert(Float32(u32)); 156 } 157 158 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 159 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value) 160 : m_value(0) 161 { 162 deUint64 u64; 163 memcpy(&u64, &value, sizeof(deUint64)); 164 *this = convert(Float64(u64)); 165 } 166 167 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 168 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const 169 { 170 float v; 171 deUint32 u32 = Float32::convert(*this).bits(); 172 memcpy(&v, &u32, sizeof(deUint32)); 173 return v; 174 } 175 176 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 177 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const 178 { 179 double v; 180 deUint64 u64 = Float64::convert(*this).bits(); 181 memcpy(&v, &u64, sizeof(deUint64)); 182 return v; 183 } 184 185 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 186 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign) 187 { 188 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); 189 return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits))); 190 } 191 192 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 193 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign) 194 { 195 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1)); 196 return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits))); 197 } 198 199 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 200 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void) 201 { 202 return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1)); 203 } 204 205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 206 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> 207 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct 208 (int sign, int exponent, StorageType mantissa) 209 { 210 // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation) 211 const bool isShorthandZero = exponent == 0 && mantissa == 0; 212 213 // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used. 214 // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero. 215 const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0); 216 const StorageType s = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits))); 217 const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias); 218 219 DE_ASSERT(sign == +1 || sign == -1); 220 DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1); 221 DE_ASSERT(exp >> ExponentBits == 0); 222 223 return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1)))); 224 } 225 226 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 227 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> 228 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits 229 (int sign, int exponent, StorageType mantissaBits) 230 { 231 const StorageType signBit = static_cast<StorageType>(sign < 0 ? 1 : 0); 232 const StorageType exponentBits = static_cast<StorageType>(exponent + ExponentBias); 233 234 DE_ASSERT(sign == +1 || sign == -1 ); 235 DE_ASSERT(exponentBits >> ExponentBits == 0); 236 DE_ASSERT(mantissaBits >> MantissaBits == 0); 237 238 return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits))); 239 } 240 241 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags> 242 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags> 243 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> 244 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert 245 (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other) 246 { 247 if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0) 248 { 249 // Negative number, truncate to zero. 250 return zero(+1); 251 } 252 else if (other.isInf()) 253 { 254 return inf(other.sign()); 255 } 256 else if (other.isNaN()) 257 { 258 return nan(); 259 } 260 else if (other.isZero()) 261 { 262 return zero(other.sign()); 263 } 264 else 265 { 266 const int eMin = 1 - ExponentBias; 267 const int eMax = ((1<<ExponentBits)-2) - ExponentBias; 268 269 const StorageType s = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit. 270 int e = other.exponent(); 271 deUint64 m = other.mantissa(); 272 273 // Normalize denormalized values prior to conversion. 274 while (!(m & (1ull<<OtherMantissaBits))) 275 { 276 m <<= 1; 277 e -= 1; 278 } 279 280 if (e < eMin) 281 { 282 // Underflow. 283 if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits)) 284 { 285 // Shift and round (RTE). 286 int bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e); 287 deUint64 half = (1ull << (bitDiff - 1)) - 1; 288 deUint64 bias = (m >> bitDiff) & 1; 289 290 return Float(StorageType(s | (m + half + bias) >> bitDiff)); 291 } 292 else 293 return zero(other.sign()); 294 } 295 else 296 { 297 // Remove leading 1. 298 m = m & ~(1ull<<OtherMantissaBits); 299 300 if (MantissaBits < OtherMantissaBits) 301 { 302 // Round mantissa (round to nearest even). 303 int bitDiff = OtherMantissaBits-MantissaBits; 304 deUint64 half = (1ull << (bitDiff - 1)) - 1; 305 deUint64 bias = (m >> bitDiff) & 1; 306 307 m = (m + half + bias) >> bitDiff; 308 309 if (m & (1ull<<MantissaBits)) 310 { 311 // Overflow in mantissa. 312 m = 0; 313 e += 1; 314 } 315 } 316 else 317 { 318 int bitDiff = MantissaBits-OtherMantissaBits; 319 m = m << bitDiff; 320 } 321 322 if (e > eMax) 323 { 324 // Overflow. 325 return inf(other.sign()); 326 } 327 else 328 { 329 DE_ASSERT(de::inRange(e, eMin, eMax)); 330 DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0); 331 DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0); 332 333 return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m)); 334 } 335 } 336 } 337 } 338 339 } // tcu 340 341 #endif // _TCUFLOAT_HPP 342