Home | History | Annotate | Download | only in common
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Tester Core
      3  * ----------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Adjustable-precision floating point operations.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "tcuFloatFormat.hpp"
     25 
     26 #include "deMath.h"
     27 #include "deUniquePtr.hpp"
     28 
     29 #include <sstream>
     30 #include <iomanip>
     31 #include <limits>
     32 
     33 namespace tcu
     34 {
     35 namespace
     36 {
     37 
     38 Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes)
     39 {
     40 	switch (choice)
     41 	{
     42 		case NO:	return no;
     43 		case YES:	return yes;
     44 		case MAYBE:	return no | yes;
     45 		default:	DE_FATAL("Impossible case");
     46 	}
     47 
     48 	return Interval();
     49 }
     50 
     51 double computeMaxValue (int maxExp, int fractionBits)
     52 {
     53 	return (deLdExp(1.0, maxExp) +
     54 			deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
     55 }
     56 
     57 } // anonymous
     58 
     59 FloatFormat::FloatFormat (int			minExp,
     60 						  int			maxExp,
     61 						  int			fractionBits,
     62 						  bool			exactPrecision,
     63 						  YesNoMaybe	hasSubnormal_,
     64 						  YesNoMaybe	hasInf_,
     65 						  YesNoMaybe	hasNaN_)
     66 	: m_minExp			(minExp)
     67 	, m_maxExp			(maxExp)
     68 	, m_fractionBits	(fractionBits)
     69 	, m_hasSubnormal	(hasSubnormal_)
     70 	, m_hasInf			(hasInf_)
     71 	, m_hasNaN			(hasNaN_)
     72 	, m_exactPrecision	(exactPrecision)
     73 	, m_maxValue		(computeMaxValue(maxExp, fractionBits))
     74 {
     75 	DE_ASSERT(minExp <= maxExp);
     76 }
     77 
     78 /*-------------------------------------------------------------------------
     79  * On the definition of ULP
     80  *
     81  * The GLSL spec does not define ULP. However, it refers to IEEE 754, which
     82  * (reportedly) uses Harrison's definition:
     83  *
     84  * ULP(x) is the distance between the closest floating point numbers
     85  * a and be such that a <= x <= b and a != b
     86  *
     87  * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
     88  * distance to the next lowest float, not next highest.
     89  *
     90  * Furthermore, it is assumed that ULP is calculated relative to the exact
     91  * value, not the approximation. This is because otherwise a less accurate
     92  * approximation could be closer in ULPs, because its ULPs are bigger.
     93  *
     94  * For details, see "On the definition of ulp(x)" by Jean-Michel Muller
     95  *
     96  *-----------------------------------------------------------------------*/
     97 
     98 double FloatFormat::ulp (double x, double count) const
     99 {
    100 	int				exp		= 0;
    101 	const double	frac	= deFractExp(deAbs(x), &exp);
    102 
    103 	if (deIsNaN(frac))
    104 		return TCU_NAN;
    105 	else if (deIsInf(frac))
    106 		return deLdExp(1.0, m_maxExp - m_fractionBits);
    107 	else if (frac == 1.0)
    108 	{
    109 		// Harrison's ULP: choose distance to closest (i.e. next lower) at binade
    110 		// boundary.
    111 		--exp;
    112 	}
    113 	else if (frac == 0.0)
    114 		exp = m_minExp;
    115 
    116 	// ULP cannot be lower than the smallest quantum.
    117 	exp = de::max(exp, m_minExp);
    118 
    119 	{
    120 		const double		oneULP	= deLdExp(1.0, exp - m_fractionBits);
    121 		ScopedRoundingMode	ctx		(DE_ROUNDINGMODE_TO_POSITIVE_INF);
    122 
    123 		return oneULP * count;
    124 	}
    125 }
    126 
    127 //! Return the difference between the given nominal exponent and
    128 //! the exponent of the lowest significand bit of the
    129 //! representation of a number with this format.
    130 //! For normal numbers this is the number of significand bits, but
    131 //! for subnormals it is less and for values of exp where 2^exp is too
    132 //! small to represent it is <0
    133 int FloatFormat::exponentShift (int exp) const
    134 {
    135 	return m_fractionBits - de::max(m_minExp - exp, 0);
    136 }
    137 
    138 //! Return the number closest to `d` that is exactly representable with the
    139 //! significand bits and minimum exponent of the floatformat. Round up if
    140 //! `upward` is true, otherwise down.
    141 double FloatFormat::round (double d, bool upward) const
    142 {
    143 	int				exp			= 0;
    144 	const double	frac		= deFractExp(d, &exp);
    145 	const int		shift		= exponentShift(exp);
    146 	const double	shiftFrac	= deLdExp(frac, shift);
    147 	const double	roundFrac	= upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
    148 
    149 	return deLdExp(roundFrac, exp - shift);
    150 }
    151 
    152 //! Return the range of numbers that `d` might be converted to in the
    153 //! floatformat, given its limitations with infinities, subnormals and maximum
    154 //! exponent.
    155 Interval FloatFormat::clampValue (double d) const
    156 {
    157 	const double	rSign		= deSign(d);
    158 	int				rExp		= 0;
    159 
    160 	DE_ASSERT(!deIsNaN(d));
    161 
    162 	deFractExp(d, &rExp);
    163 	if (rExp < m_minExp)
    164 		return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
    165 	else if (deIsInf(d) || rExp > m_maxExp)
    166 		return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
    167 
    168 	return Interval(d);
    169 }
    170 
    171 //! Return the range of numbers that might be used with this format to
    172 //! represent a number within `x`.
    173 Interval FloatFormat::convert (const Interval& x) const
    174 {
    175 	Interval ret;
    176 	Interval tmp = x;
    177 
    178 	if (x.hasNaN())
    179 	{
    180 		// If NaN might be supported, NaN is a legal return value
    181 		if (m_hasNaN != NO)
    182 			ret |= TCU_NAN;
    183 
    184 		// If NaN might not be supported, any (non-NaN) value is legal,
    185 		// _subject_ to clamping. Hence we modify tmp, not ret.
    186 		if (m_hasNaN != YES)
    187 			tmp = Interval::unbounded();
    188 	}
    189 
    190 	// Round both bounds _inwards_ to closest representable values.
    191 	if (!tmp.empty())
    192 		ret |= clampValue(round(tmp.lo(), true)) | clampValue(round(tmp.hi(), false));
    193 
    194 	// If this format's precision is not exact, the (possibly out-of-bounds)
    195 	// original value is also a possible result.
    196 	if (!m_exactPrecision)
    197 		ret |= x;
    198 
    199 	return ret;
    200 }
    201 
    202 double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
    203 {
    204 	int	exp	= 0;
    205 	deFractExp(d, &exp);
    206 
    207 	if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
    208 		return deSign(d) * getMaxValue();
    209 	else
    210 		return round(d, upward);
    211 }
    212 
    213 //! Round output of an operation.
    214 //! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
    215 //!							  should be false if any of operands was inf, true otherwise.
    216 Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const
    217 {
    218 	Interval ret = x.nan();
    219 
    220 	if (!x.empty())
    221 		ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow),
    222 						roundOut(x.hi(), true, roundUnderOverflow));
    223 
    224 	return ret;
    225 }
    226 
    227 std::string	FloatFormat::floatToHex	(double x) const
    228 {
    229 	if (deIsNaN(x))
    230 		return "NaN";
    231 	else if (deIsInf(x))
    232 		return (x < 0.0 ? "-" : "+") + std::string("inf");
    233 	else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
    234 		return "0.0";
    235 
    236 	int					exp			= 0;
    237 	const double		frac		= deFractExp(deAbs(x), &exp);
    238 	const int			shift		= exponentShift(exp);
    239 	const deUint64		bits		= deUint64(deLdExp(frac, shift));
    240 	const deUint64		whole		= bits >> m_fractionBits;
    241 	const deUint64		fraction	= bits & ((deUint64(1) << m_fractionBits) - 1);
    242 	const int			exponent	= exp + m_fractionBits - shift;
    243 	const int			numDigits	= (m_fractionBits + 3) / 4;
    244 	const deUint64		aligned		= fraction << (numDigits * 4 - m_fractionBits);
    245 	std::ostringstream	oss;
    246 
    247 	oss << (x < 0 ? "-" : "")
    248 		<< "0x" << whole << "."
    249 		<< std::hex << std::setw(numDigits) << std::setfill('0') << aligned
    250 		<< "p" << std::dec << std::setw(0) << exponent;
    251 
    252 	return oss.str();
    253 }
    254 
    255 std::string FloatFormat::intervalToHex (const Interval& interval) const
    256 {
    257 	if (interval.empty())
    258 		return interval.hasNaN() ? "{ NaN }" : "{}";
    259 
    260 	else if (interval.lo() == interval.hi())
    261 		return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") +
    262 				floatToHex(interval.lo()) + " }");
    263 	else if (interval == Interval::unbounded(true))
    264 		return "<any>";
    265 
    266 	return (std::string(interval.hasNaN() ? "{ NaN } | " : "") +
    267 			"[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]");
    268 }
    269 
    270 template <typename T>
    271 static FloatFormat nativeFormat (void)
    272 {
    273 	typedef std::numeric_limits<T> Limits;
    274 
    275 	DE_ASSERT(Limits::radix == 2);
    276 
    277 	return FloatFormat(Limits::min_exponent - 1,	// These have a built-in offset of one
    278 					   Limits::max_exponent - 1,
    279 					   Limits::digits - 1,			// don't count the hidden bit
    280 					   Limits::has_denorm != std::denorm_absent,
    281 					   Limits::has_infinity ? YES : NO,
    282 					   Limits::has_quiet_NaN ? YES : NO,
    283 					   ((Limits::has_denorm == std::denorm_present) ? YES :
    284 						(Limits::has_denorm == std::denorm_absent) ? NO :
    285 						MAYBE));
    286 }
    287 
    288 FloatFormat	FloatFormat::nativeFloat (void)
    289 {
    290 	return nativeFormat<float>();
    291 }
    292 
    293 FloatFormat	FloatFormat::nativeDouble (void)
    294 {
    295 	return nativeFormat<double>();
    296 }
    297 
    298 namespace
    299 {
    300 
    301 using std::string;
    302 using std::ostringstream;
    303 using de::MovePtr;
    304 using de::UniquePtr;
    305 
    306 class Test
    307 {
    308 protected:
    309 
    310 							Test		(MovePtr<FloatFormat> fmt) : m_fmt(fmt) {}
    311 	double					p			(int e) const	 			{ return deLdExp(1.0, e); }
    312 	void					check		(const string&	expr,
    313 										 double			result,
    314 										 double			reference) const;
    315 	void					testULP		(double arg, double ref) const;
    316 	void					testRound	(double arg, double refDown, double refUp) const;
    317 
    318 	UniquePtr<FloatFormat>	m_fmt;
    319 };
    320 
    321 void Test::check (const string& expr, double result, double reference) const
    322 {
    323 	if (result != reference)
    324 	{
    325 		ostringstream oss;
    326 		oss << expr << " returned " << result << ", expected " << reference;
    327 		TCU_FAIL(oss.str().c_str());
    328 	}
    329 }
    330 
    331 void Test::testULP (double arg, double ref) const
    332 {
    333 	ostringstream	oss;
    334 
    335 	oss << "ulp(" << arg << ")";
    336 	check(oss.str(), m_fmt->ulp(arg), ref);
    337 }
    338 
    339 void Test::testRound (double arg, double refDown, double refUp) const
    340 {
    341 	{
    342 		ostringstream oss;
    343 		oss << "round(" << arg << ", false)";
    344 		check(oss.str(), m_fmt->round(arg, false), refDown);
    345 	}
    346 	{
    347 		ostringstream oss;
    348 		oss << "round(" << arg << ", true)";
    349 		check(oss.str(), m_fmt->round(arg, true), refUp);
    350 	}
    351 }
    352 
    353 class TestBinary32 : public Test
    354 {
    355 public:
    356 			TestBinary32 (void)
    357 				: Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {}
    358 
    359 	void	runTest	(void) const;
    360 };
    361 
    362 void TestBinary32::runTest (void) const
    363 {
    364 	testULP(p(0),				p(-24));
    365 	testULP(p(0) + p(-23),		p(-23));
    366 	testULP(p(-124),			p(-148));
    367 	testULP(p(-125),			p(-149));
    368 	testULP(p(-125) + p(-140),	p(-148));
    369 	testULP(p(-126),			p(-149));
    370 	testULP(p(-130),			p(-149));
    371 
    372 	testRound(p(0) + p(-20) + p(-40),	p(0) + p(-20),		p(0) + p(-20) + p(-23));
    373 	testRound(p(-126) - p(-150),		p(-126) - p(-149),	p(-126));
    374 
    375 	TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
    376 	TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
    377 	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
    378 	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
    379 	TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
    380 }
    381 
    382 } // anonymous
    383 
    384 void FloatFormat_selfTest (void)
    385 {
    386 	TestBinary32	test32;
    387 	test32.runTest();
    388 }
    389 
    390 } // tcu
    391