Home | History | Annotate | Download | only in common
      1 #ifndef _TCUFLOAT_HPP
      2 #define _TCUFLOAT_HPP
      3 /*-------------------------------------------------------------------------
      4  * drawElements Quality Program Tester Core
      5  * ----------------------------------------
      6  *
      7  * Copyright 2014 The Android Open Source Project
      8  *
      9  * Licensed under the Apache License, Version 2.0 (the "License");
     10  * you may not use this file except in compliance with the License.
     11  * You may obtain a copy of the License at
     12  *
     13  *      http://www.apache.org/licenses/LICENSE-2.0
     14  *
     15  * Unless required by applicable law or agreed to in writing, software
     16  * distributed under the License is distributed on an "AS IS" BASIS,
     17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     18  * See the License for the specific language governing permissions and
     19  * limitations under the License.
     20  *
     21  *//*!
     22  * \file
     23  * \brief Reconfigurable floating-point value template.
     24  *//*--------------------------------------------------------------------*/
     25 
     26 #include "tcuDefs.hpp"
     27 
     28 // For memcpy().
     29 #include <string.h>
     30 
     31 namespace tcu
     32 {
     33 
     34 enum FloatFlags
     35 {
     36 	FLOAT_HAS_SIGN			= (1<<0),
     37 	FLOAT_SUPPORT_DENORM	= (1<<1)
     38 };
     39 
     40 /*--------------------------------------------------------------------*//*!
     41  * \brief Floating-point format template
     42  *
     43  * This template implements arbitrary floating-point handling. Template
     44  * can be used for conversion between different formats and checking
     45  * various properties of floating-point values.
     46  *//*--------------------------------------------------------------------*/
     47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
     48 class Float
     49 {
     50 public:
     51 	typedef StorageType_ StorageType;
     52 
     53 	enum
     54 	{
     55 		EXPONENT_BITS	= ExponentBits,
     56 		MANTISSA_BITS	= MantissaBits,
     57 		EXPONENT_BIAS	= ExponentBias,
     58 		FLAGS			= Flags,
     59 	};
     60 
     61 							Float			(void);
     62 	explicit				Float			(StorageType value);
     63 	explicit				Float			(float v);
     64 	explicit				Float			(double v);
     65 
     66 	template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
     67 	static Float			convert			(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
     68 
     69 	static inline Float		convert			(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
     70 
     71 	/*--------------------------------------------------------------------*//*!
     72 	 * \brief Construct floating point value
     73 	 * \param sign		Sign. Must be +1/-1
     74 	 * \param exponent	Exponent in range [1-ExponentBias, ExponentBias+1]
     75 	 * \param mantissa	Mantissa bits with implicit leading bit explicitly set
     76 	 * \return The specified float
     77 	 *
     78 	 * This function constructs a floating point value from its inputs.
     79 	 * The normally implicit leading bit of the mantissa must be explicitly set.
     80 	 * The exponent normally used for zero/subnormals is an invalid input. Such
     81 	 * values are specified with the leading mantissa bit of zero and the lowest
     82 	 * normal exponent (1-ExponentBias). Additionally having both exponent and
     83 	 * mantissa set to zero is a shorthand notation for the correctly signed
     84 	 * floating point zero. Inf and NaN must be specified directly with an
     85 	 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
     86 	 * bit set)
     87 	 *//*--------------------------------------------------------------------*/
     88 	static inline Float		construct		(int sign, int exponent, StorageType mantissa);
     89 
     90 	/*--------------------------------------------------------------------*//*!
     91 	 * \brief Construct floating point value. Explicit version
     92 	 * \param sign		Sign. Must be +1/-1
     93 	 * \param exponent	Exponent in range [-ExponentBias, ExponentBias+1]
     94 	 * \param mantissa	Mantissa bits
     95 	 * \return The specified float
     96 	 *
     97 	 * This function constructs a floating point value from its inputs with
     98 	 * minimal intervention.
     99 	 * The sign is turned into a sign bit and the exponent bias is added.
    100 	 * See IEEE-754 for additional information on the inputs and
    101 	 * the encoding of special values.
    102 	 *//*--------------------------------------------------------------------*/
    103 	static Float			constructBits	(int sign, int exponent, StorageType mantissaBits);
    104 
    105 	StorageType				bits			(void) const	{ return m_value;															}
    106 	float					asFloat			(void) const;
    107 	double					asDouble		(void) const;
    108 
    109 	inline int				signBit			(void) const	{ return (int)(m_value >> (ExponentBits+MantissaBits)) & 1;					}
    110 	inline StorageType		exponentBits	(void) const	{ return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);	}
    111 	inline StorageType		mantissaBits	(void) const	{ return m_value & ((StorageType(1)<<MantissaBits)-1);						}
    112 
    113 	inline int				sign			(void) const	{ return signBit() ? -1 : 1;																			}
    114 	inline int				exponent		(void) const	{ return isDenorm() ? 1	- ExponentBias : (int)exponentBits() - ExponentBias;							}
    115 	inline StorageType		mantissa		(void) const	{ return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));	}
    116 
    117 	inline bool				isInf			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() == 0;	}
    118 	inline bool				isNaN			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() != 0;	}
    119 	inline bool				isZero			(void) const	{ return exponentBits() == 0						&& mantissaBits() == 0;	}
    120 	inline bool				isDenorm		(void) const	{ return exponentBits() == 0						&& mantissaBits() != 0;	}
    121 
    122 	static Float			zero			(int sign);
    123 	static Float			inf				(int sign);
    124 	static Float			nan				(void);
    125 
    126 private:
    127 	StorageType				m_value;
    128 } DE_WARN_UNUSED_TYPE;
    129 
    130 // Common floating-point types.
    131 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float16;	//!< IEEE 754-2008 16-bit floating-point value
    132 typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float32;	//!< IEEE 754 32-bit floating-point value
    133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float64;	//!< IEEE 754 64-bit floating-point value
    134 
    135 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN>	Float16Denormless;	//!< IEEE 754-2008 16-bit floating-point value without denormalized support
    136 
    137 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    138 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
    139 	: m_value(0)
    140 {
    141 }
    142 
    143 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    144 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
    145 	: m_value(value)
    146 {
    147 }
    148 
    149 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    150 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
    151 	: m_value(0)
    152 {
    153 	deUint32 u32;
    154 	memcpy(&u32, &value, sizeof(deUint32));
    155 	*this = convert(Float32(u32));
    156 }
    157 
    158 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    159 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
    160 	: m_value(0)
    161 {
    162 	deUint64 u64;
    163 	memcpy(&u64, &value, sizeof(deUint64));
    164 	*this = convert(Float64(u64));
    165 }
    166 
    167 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    168 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
    169 {
    170 	float		v;
    171 	deUint32	u32		= Float32::convert(*this).bits();
    172 	memcpy(&v, &u32, sizeof(deUint32));
    173 	return v;
    174 }
    175 
    176 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    177 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
    178 {
    179 	double		v;
    180 	deUint64	u64		= Float64::convert(*this).bits();
    181 	memcpy(&v, &u64, sizeof(deUint64));
    182 	return v;
    183 }
    184 
    185 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    186 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
    187 {
    188 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
    189 	return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
    190 }
    191 
    192 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    193 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
    194 {
    195 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
    196 	return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
    197 }
    198 
    199 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    200 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
    201 {
    202 	return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
    203 }
    204 
    205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    206 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    207 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
    208 	(int sign, int exponent, StorageType mantissa)
    209 {
    210 	// Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
    211 	const bool			isShorthandZero	= exponent == 0 && mantissa == 0;
    212 
    213 	// Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
    214 	// Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
    215 	const bool			isDenormOrZero	= (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
    216 	const StorageType	s				= StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
    217 	const StorageType	exp				= (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
    218 
    219 	DE_ASSERT(sign == +1 || sign == -1);
    220 	DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
    221 	DE_ASSERT(exp >> ExponentBits == 0);
    222 
    223 	return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
    224 }
    225 
    226 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    227 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    228 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
    229 	(int sign, int exponent, StorageType mantissaBits)
    230 {
    231 	const StorageType signBit		= static_cast<StorageType>(sign < 0 ? 1 : 0);
    232 	const StorageType exponentBits	= static_cast<StorageType>(exponent + ExponentBias);
    233 
    234 	DE_ASSERT(sign == +1 || sign == -1 );
    235 	DE_ASSERT(exponentBits >> ExponentBits == 0);
    236 	DE_ASSERT(mantissaBits >> MantissaBits == 0);
    237 
    238 	return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
    239 }
    240 
    241 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    242 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
    243 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    244 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
    245 	(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
    246 {
    247 	if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
    248 	{
    249 		// Negative number, truncate to zero.
    250 		return zero(+1);
    251 	}
    252 	else if (other.isInf())
    253 	{
    254 		return inf(other.sign());
    255 	}
    256 	else if (other.isNaN())
    257 	{
    258 		return nan();
    259 	}
    260 	else if (other.isZero())
    261 	{
    262 		return zero(other.sign());
    263 	}
    264 	else
    265 	{
    266 		const int			eMin	= 1 - ExponentBias;
    267 		const int			eMax	= ((1<<ExponentBits)-2) - ExponentBias;
    268 
    269 		const StorageType	s		= StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
    270 		int					e		= other.exponent();
    271 		deUint64			m		= other.mantissa();
    272 
    273 		// Normalize denormalized values prior to conversion.
    274 		while (!(m & (1ull<<OtherMantissaBits)))
    275 		{
    276 			m <<= 1;
    277 			e  -= 1;
    278 		}
    279 
    280 		if (e < eMin)
    281 		{
    282 			// Underflow.
    283 			if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
    284 			{
    285 				// Shift and round (RTE).
    286 				int			bitDiff	= (OtherMantissaBits-MantissaBits) + (eMin-e);
    287 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
    288 				deUint64	bias	= (m >> bitDiff) & 1;
    289 
    290 				return Float(StorageType(s | (m + half + bias) >> bitDiff));
    291 			}
    292 			else
    293 				return zero(other.sign());
    294 		}
    295 		else
    296 		{
    297 			// Remove leading 1.
    298 			m = m & ~(1ull<<OtherMantissaBits);
    299 
    300 			if (MantissaBits < OtherMantissaBits)
    301 			{
    302 				// Round mantissa (round to nearest even).
    303 				int			bitDiff	= OtherMantissaBits-MantissaBits;
    304 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
    305 				deUint64	bias	= (m >> bitDiff) & 1;
    306 
    307 				m = (m + half + bias) >> bitDiff;
    308 
    309 				if (m & (1ull<<MantissaBits))
    310 				{
    311 					// Overflow in mantissa.
    312 					m  = 0;
    313 					e += 1;
    314 				}
    315 			}
    316 			else
    317 			{
    318 				int bitDiff = MantissaBits-OtherMantissaBits;
    319 				m = m << bitDiff;
    320 			}
    321 
    322 			if (e > eMax)
    323 			{
    324 				// Overflow.
    325 				return inf(other.sign());
    326 			}
    327 			else
    328 			{
    329 				DE_ASSERT(de::inRange(e, eMin, eMax));
    330 				DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
    331 				DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
    332 
    333 				return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
    334 			}
    335 		}
    336 	}
    337 }
    338 
    339 } // tcu
    340 
    341 #endif // _TCUFLOAT_HPP
    342