1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program Tester Core 3 * ---------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Adjustable-precision floating point operations. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "tcuFloatFormat.hpp" 25 26 #include "deMath.h" 27 #include "deUniquePtr.hpp" 28 29 #include <sstream> 30 #include <iomanip> 31 #include <limits> 32 33 namespace tcu 34 { 35 namespace 36 { 37 38 Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes) 39 { 40 switch (choice) 41 { 42 case NO: return no; 43 case YES: return yes; 44 case MAYBE: return no | yes; 45 default: DE_FATAL("Impossible case"); 46 } 47 48 return Interval(); 49 } 50 51 double computeMaxValue (int maxExp, int fractionBits) 52 { 53 return (deLdExp(1.0, maxExp) + 54 deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits)); 55 } 56 57 } // anonymous 58 59 FloatFormat::FloatFormat (int minExp, 60 int maxExp, 61 int fractionBits, 62 bool exactPrecision, 63 YesNoMaybe hasSubnormal_, 64 YesNoMaybe hasInf_, 65 YesNoMaybe hasNaN_) 66 : m_minExp (minExp) 67 , m_maxExp (maxExp) 68 , m_fractionBits (fractionBits) 69 , m_hasSubnormal (hasSubnormal_) 70 , m_hasInf (hasInf_) 71 , m_hasNaN (hasNaN_) 72 , m_exactPrecision (exactPrecision) 73 , m_maxValue (computeMaxValue(maxExp, fractionBits)) 74 { 75 DE_ASSERT(minExp <= maxExp); 76 } 77 78 /*------------------------------------------------------------------------- 79 * On the definition of ULP 80 * 81 * The GLSL spec does not define ULP. However, it refers to IEEE 754, which 82 * (reportedly) uses Harrison's definition: 83 * 84 * ULP(x) is the distance between the closest floating point numbers 85 * a and be such that a <= x <= b and a != b 86 * 87 * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the 88 * distance to the next lowest float, not next highest. 89 * 90 * Furthermore, it is assumed that ULP is calculated relative to the exact 91 * value, not the approximation. This is because otherwise a less accurate 92 * approximation could be closer in ULPs, because its ULPs are bigger. 93 * 94 * For details, see "On the definition of ulp(x)" by Jean-Michel Muller 95 * 96 *-----------------------------------------------------------------------*/ 97 98 double FloatFormat::ulp (double x, double count) const 99 { 100 int exp = 0; 101 const double frac = deFractExp(deAbs(x), &exp); 102 103 if (deIsNaN(frac)) 104 return TCU_NAN; 105 else if (deIsInf(frac)) 106 return deLdExp(1.0, m_maxExp - m_fractionBits); 107 else if (frac == 1.0) 108 { 109 // Harrison's ULP: choose distance to closest (i.e. next lower) at binade 110 // boundary. 111 --exp; 112 } 113 else if (frac == 0.0) 114 exp = m_minExp; 115 116 // ULP cannot be lower than the smallest quantum. 117 exp = de::max(exp, m_minExp); 118 119 { 120 const double oneULP = deLdExp(1.0, exp - m_fractionBits); 121 ScopedRoundingMode ctx (DE_ROUNDINGMODE_TO_POSITIVE_INF); 122 123 return oneULP * count; 124 } 125 } 126 127 //! Return the difference between the given nominal exponent and 128 //! the exponent of the lowest significand bit of the 129 //! representation of a number with this format. 130 //! For normal numbers this is the number of significand bits, but 131 //! for subnormals it is less and for values of exp where 2^exp is too 132 //! small to represent it is <0 133 int FloatFormat::exponentShift (int exp) const 134 { 135 return m_fractionBits - de::max(m_minExp - exp, 0); 136 } 137 138 //! Return the number closest to `d` that is exactly representable with the 139 //! significand bits and minimum exponent of the floatformat. Round up if 140 //! `upward` is true, otherwise down. 141 double FloatFormat::round (double d, bool upward) const 142 { 143 int exp = 0; 144 const double frac = deFractExp(d, &exp); 145 const int shift = exponentShift(exp); 146 const double shiftFrac = deLdExp(frac, shift); 147 const double roundFrac = upward ? deCeil(shiftFrac) : deFloor(shiftFrac); 148 149 return deLdExp(roundFrac, exp - shift); 150 } 151 152 //! Return the range of numbers that `d` might be converted to in the 153 //! floatformat, given its limitations with infinities, subnormals and maximum 154 //! exponent. 155 Interval FloatFormat::clampValue (double d) const 156 { 157 const double rSign = deSign(d); 158 int rExp = 0; 159 160 DE_ASSERT(!deIsNaN(d)); 161 162 deFractExp(d, &rExp); 163 if (rExp < m_minExp) 164 return chooseInterval(m_hasSubnormal, rSign * 0.0, d); 165 else if (deIsInf(d) || rExp > m_maxExp) 166 return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY); 167 168 return Interval(d); 169 } 170 171 //! Return the range of numbers that might be used with this format to 172 //! represent a number within `x`. 173 Interval FloatFormat::convert (const Interval& x) const 174 { 175 Interval ret; 176 Interval tmp = x; 177 178 if (x.hasNaN()) 179 { 180 // If NaN might be supported, NaN is a legal return value 181 if (m_hasNaN != NO) 182 ret |= TCU_NAN; 183 184 // If NaN might not be supported, any (non-NaN) value is legal, 185 // _subject_ to clamping. Hence we modify tmp, not ret. 186 if (m_hasNaN != YES) 187 tmp = Interval::unbounded(); 188 } 189 190 // Round both bounds _inwards_ to closest representable values. 191 if (!tmp.empty()) 192 ret |= clampValue(round(tmp.lo(), true)) | clampValue(round(tmp.hi(), false)); 193 194 // If this format's precision is not exact, the (possibly out-of-bounds) 195 // original value is also a possible result. 196 if (!m_exactPrecision) 197 ret |= x; 198 199 return ret; 200 } 201 202 double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const 203 { 204 int exp = 0; 205 deFractExp(d, &exp); 206 207 if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0))) 208 return deSign(d) * getMaxValue(); 209 else 210 return round(d, upward); 211 } 212 213 //! Round output of an operation. 214 //! \param roundUnderOverflow Can +/-inf rounded to min/max representable; 215 //! should be false if any of operands was inf, true otherwise. 216 Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const 217 { 218 Interval ret = x.nan(); 219 220 if (!x.empty()) 221 ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow), 222 roundOut(x.hi(), true, roundUnderOverflow)); 223 224 return ret; 225 } 226 227 std::string FloatFormat::floatToHex (double x) const 228 { 229 if (deIsNaN(x)) 230 return "NaN"; 231 else if (deIsInf(x)) 232 return (x < 0.0 ? "-" : "+") + std::string("inf"); 233 else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero 234 return "0.0"; 235 236 int exp = 0; 237 const double frac = deFractExp(deAbs(x), &exp); 238 const int shift = exponentShift(exp); 239 const deUint64 bits = deUint64(deLdExp(frac, shift)); 240 const deUint64 whole = bits >> m_fractionBits; 241 const deUint64 fraction = bits & ((deUint64(1) << m_fractionBits) - 1); 242 const int exponent = exp + m_fractionBits - shift; 243 const int numDigits = (m_fractionBits + 3) / 4; 244 const deUint64 aligned = fraction << (numDigits * 4 - m_fractionBits); 245 std::ostringstream oss; 246 247 oss << (x < 0 ? "-" : "") 248 << "0x" << whole << "." 249 << std::hex << std::setw(numDigits) << std::setfill('0') << aligned 250 << "p" << std::dec << std::setw(0) << exponent; 251 252 return oss.str(); 253 } 254 255 std::string FloatFormat::intervalToHex (const Interval& interval) const 256 { 257 if (interval.empty()) 258 return interval.hasNaN() ? "{ NaN }" : "{}"; 259 260 else if (interval.lo() == interval.hi()) 261 return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") + 262 floatToHex(interval.lo()) + " }"); 263 else if (interval == Interval::unbounded(true)) 264 return "<any>"; 265 266 return (std::string(interval.hasNaN() ? "{ NaN } | " : "") + 267 "[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]"); 268 } 269 270 template <typename T> 271 static FloatFormat nativeFormat (void) 272 { 273 typedef std::numeric_limits<T> Limits; 274 275 DE_ASSERT(Limits::radix == 2); 276 277 return FloatFormat(Limits::min_exponent - 1, // These have a built-in offset of one 278 Limits::max_exponent - 1, 279 Limits::digits - 1, // don't count the hidden bit 280 Limits::has_denorm != std::denorm_absent, 281 Limits::has_infinity ? YES : NO, 282 Limits::has_quiet_NaN ? YES : NO, 283 ((Limits::has_denorm == std::denorm_present) ? YES : 284 (Limits::has_denorm == std::denorm_absent) ? NO : 285 MAYBE)); 286 } 287 288 FloatFormat FloatFormat::nativeFloat (void) 289 { 290 return nativeFormat<float>(); 291 } 292 293 FloatFormat FloatFormat::nativeDouble (void) 294 { 295 return nativeFormat<double>(); 296 } 297 298 namespace 299 { 300 301 using std::string; 302 using std::ostringstream; 303 using de::MovePtr; 304 using de::UniquePtr; 305 306 class Test 307 { 308 protected: 309 310 Test (MovePtr<FloatFormat> fmt) : m_fmt(fmt) {} 311 double p (int e) const { return deLdExp(1.0, e); } 312 void check (const string& expr, 313 double result, 314 double reference) const; 315 void testULP (double arg, double ref) const; 316 void testRound (double arg, double refDown, double refUp) const; 317 318 UniquePtr<FloatFormat> m_fmt; 319 }; 320 321 void Test::check (const string& expr, double result, double reference) const 322 { 323 if (result != reference) 324 { 325 ostringstream oss; 326 oss << expr << " returned " << result << ", expected " << reference; 327 TCU_FAIL(oss.str().c_str()); 328 } 329 } 330 331 void Test::testULP (double arg, double ref) const 332 { 333 ostringstream oss; 334 335 oss << "ulp(" << arg << ")"; 336 check(oss.str(), m_fmt->ulp(arg), ref); 337 } 338 339 void Test::testRound (double arg, double refDown, double refUp) const 340 { 341 { 342 ostringstream oss; 343 oss << "round(" << arg << ", false)"; 344 check(oss.str(), m_fmt->round(arg, false), refDown); 345 } 346 { 347 ostringstream oss; 348 oss << "round(" << arg << ", true)"; 349 check(oss.str(), m_fmt->round(arg, true), refUp); 350 } 351 } 352 353 class TestBinary32 : public Test 354 { 355 public: 356 TestBinary32 (void) 357 : Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {} 358 359 void runTest (void) const; 360 }; 361 362 void TestBinary32::runTest (void) const 363 { 364 testULP(p(0), p(-24)); 365 testULP(p(0) + p(-23), p(-23)); 366 testULP(p(-124), p(-148)); 367 testULP(p(-125), p(-149)); 368 testULP(p(-125) + p(-140), p(-148)); 369 testULP(p(-126), p(-149)); 370 testULP(p(-130), p(-149)); 371 372 testRound(p(0) + p(-20) + p(-40), p(0) + p(-20), p(0) + p(-20) + p(-23)); 373 testRound(p(-126) - p(-150), p(-126) - p(-149), p(-126)); 374 375 TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0"); 376 TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8"); 377 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126"); 378 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126"); 379 TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125"); 380 } 381 382 } // anonymous 383 384 void FloatFormat_selfTest (void) 385 { 386 TestBinary32 test32; 387 test32.runTest(); 388 } 389 390 } // tcu 391