Home | History | Annotate | Download | only in common
      1 #ifndef _TCUFLOAT_HPP
      2 #define _TCUFLOAT_HPP
      3 /*-------------------------------------------------------------------------
      4  * drawElements Quality Program Tester Core
      5  * ----------------------------------------
      6  *
      7  * Copyright 2014 The Android Open Source Project
      8  *
      9  * Licensed under the Apache License, Version 2.0 (the "License");
     10  * you may not use this file except in compliance with the License.
     11  * You may obtain a copy of the License at
     12  *
     13  *      http://www.apache.org/licenses/LICENSE-2.0
     14  *
     15  * Unless required by applicable law or agreed to in writing, software
     16  * distributed under the License is distributed on an "AS IS" BASIS,
     17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     18  * See the License for the specific language governing permissions and
     19  * limitations under the License.
     20  *
     21  *//*!
     22  * \file
     23  * \brief Reconfigurable floating-point value template.
     24  *//*--------------------------------------------------------------------*/
     25 
     26 #include "tcuDefs.hpp"
     27 
     28 // For memcpy().
     29 #include <string.h>
     30 
     31 namespace tcu
     32 {
     33 
     34 enum FloatFlags
     35 {
     36 	FLOAT_HAS_SIGN			= (1<<0),
     37 	FLOAT_SUPPORT_DENORM	= (1<<1)
     38 };
     39 
     40 /*--------------------------------------------------------------------*//*!
     41  * \brief Floating-point format template
     42  *
     43  * This template implements arbitrary floating-point handling. Template
     44  * can be used for conversion between different formats and checking
     45  * various properties of floating-point values.
     46  *//*--------------------------------------------------------------------*/
     47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
     48 class Float
     49 {
     50 public:
     51 	typedef StorageType_ StorageType;
     52 
     53 	enum
     54 	{
     55 		EXPONENT_BITS	= ExponentBits,
     56 		MANTISSA_BITS	= MantissaBits,
     57 		EXPONENT_BIAS	= ExponentBias,
     58 		FLAGS			= Flags,
     59 	};
     60 
     61 							Float			(void);
     62 	explicit				Float			(StorageType value);
     63 	explicit				Float			(float v);
     64 	explicit				Float			(double v);
     65 
     66 	template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
     67 	static Float			convert			(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
     68 
     69 	static inline Float		convert			(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
     70 
     71 	/*--------------------------------------------------------------------*//*!
     72 	 * \brief Construct floating point value
     73 	 * \param sign		Sign. Must be +1/-1
     74 	 * \param exponent	Exponent in range [1-ExponentBias, ExponentBias+1]
     75 	 * \param mantissa	Mantissa bits with implicit leading bit explicitly set
     76 	 * \return The specified float
     77 	 *
     78 	 * This function constructs a floating point value from its inputs.
     79 	 * The normally implicit leading bit of the mantissa must be explicitly set.
     80 	 * The exponent normally used for zero/subnormals is an invalid input. Such
     81 	 * values are specified with the leading mantissa bit of zero and the lowest
     82 	 * normal exponent (1-ExponentBias). Additionally having both exponent and
     83 	 * mantissa set to zero is a shorthand notation for the correctly signed
     84 	 * floating point zero. Inf and NaN must be specified directly with an
     85 	 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
     86 	 * bit set)
     87 	 *//*--------------------------------------------------------------------*/
     88 	static inline Float		construct		(int sign, int exponent, StorageType mantissa);
     89 
     90 	/*--------------------------------------------------------------------*//*!
     91 	 * \brief Construct floating point value. Explicit version
     92 	 * \param sign		Sign. Must be +1/-1
     93 	 * \param exponent	Exponent in range [-ExponentBias, ExponentBias+1]
     94 	 * \param mantissa	Mantissa bits
     95 	 * \return The specified float
     96 	 *
     97 	 * This function constructs a floating point value from its inputs with
     98 	 * minimal intervention.
     99 	 * The sign is turned into a sign bit and the exponent bias is added.
    100 	 * See IEEE-754 for additional information on the inputs and
    101 	 * the encoding of special values.
    102 	 *//*--------------------------------------------------------------------*/
    103 	static Float			constructBits	(int sign, int exponent, StorageType mantissaBits);
    104 
    105 	StorageType				bits			(void) const	{ return m_value;															}
    106 	float					asFloat			(void) const;
    107 	double					asDouble		(void) const;
    108 
    109 	inline int				signBit			(void) const	{ return (int)(m_value >> (ExponentBits+MantissaBits)) & 1;					}
    110 	inline StorageType		exponentBits	(void) const	{ return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);	}
    111 	inline StorageType		mantissaBits	(void) const	{ return m_value & ((StorageType(1)<<MantissaBits)-1);						}
    112 
    113 	inline int				sign			(void) const	{ return signBit() ? -1 : 1;																			}
    114 	inline int				exponent		(void) const	{ return isDenorm() ? 1	- ExponentBias : (int)exponentBits() - ExponentBias;							}
    115 	inline StorageType		mantissa		(void) const	{ return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));	}
    116 
    117 	inline bool				isInf			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() == 0;	}
    118 	inline bool				isNaN			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() != 0;	}
    119 	inline bool				isZero			(void) const	{ return exponentBits() == 0						&& mantissaBits() == 0;	}
    120 	inline bool				isDenorm		(void) const	{ return exponentBits() == 0						&& mantissaBits() != 0;	}
    121 
    122 	static Float			zero			(int sign);
    123 	static Float			inf				(int sign);
    124 	static Float			nan				(void);
    125 
    126 private:
    127 	StorageType				m_value;
    128 } DE_WARN_UNUSED_TYPE;
    129 
    130 // Common floating-point types.
    131 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float16;	//!< IEEE 754-2008 16-bit floating-point value
    132 typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float32;	//!< IEEE 754 32-bit floating-point value
    133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float64;	//!< IEEE 754 64-bit floating-point value
    134 
    135 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    136 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
    137 	: m_value(0)
    138 {
    139 }
    140 
    141 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    142 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
    143 	: m_value(value)
    144 {
    145 }
    146 
    147 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    148 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
    149 	: m_value(0)
    150 {
    151 	deUint32 u32;
    152 	memcpy(&u32, &value, sizeof(deUint32));
    153 	*this = convert(Float32(u32));
    154 }
    155 
    156 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    157 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
    158 	: m_value(0)
    159 {
    160 	deUint64 u64;
    161 	memcpy(&u64, &value, sizeof(deUint64));
    162 	*this = convert(Float64(u64));
    163 }
    164 
    165 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    166 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
    167 {
    168 	float		v;
    169 	deUint32	u32		= Float32::convert(*this).bits();
    170 	memcpy(&v, &u32, sizeof(deUint32));
    171 	return v;
    172 }
    173 
    174 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    175 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
    176 {
    177 	double		v;
    178 	deUint64	u64		= Float64::convert(*this).bits();
    179 	memcpy(&v, &u64, sizeof(deUint64));
    180 	return v;
    181 }
    182 
    183 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    184 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
    185 {
    186 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
    187 	return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
    188 }
    189 
    190 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    191 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
    192 {
    193 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
    194 	return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
    195 }
    196 
    197 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    198 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
    199 {
    200 	return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
    201 }
    202 
    203 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    204 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    205 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
    206 	(int sign, int exponent, StorageType mantissa)
    207 {
    208 	// Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
    209 	const bool			isShorthandZero	= exponent == 0 && mantissa == 0;
    210 
    211 	// Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
    212 	// Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
    213 	const bool			isDenormOrZero	= (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
    214 	const StorageType	s				= StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
    215 	const StorageType	exp				= (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
    216 
    217 	DE_ASSERT(sign == +1 || sign == -1);
    218 	DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
    219 	DE_ASSERT(exp >> ExponentBits == 0);
    220 
    221 	return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
    222 }
    223 
    224 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    225 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    226 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
    227 	(int sign, int exponent, StorageType mantissaBits)
    228 {
    229 	const StorageType signBit		= sign < 0 ? 1 : 0;
    230 	const StorageType exponentBits	= exponent + ExponentBias;
    231 
    232 	DE_ASSERT(sign == +1 || sign == -1 );
    233 	DE_ASSERT(exponentBits >> ExponentBits == 0);
    234 	DE_ASSERT(mantissaBits >> MantissaBits == 0);
    235 
    236 	return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
    237 }
    238 
    239 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
    240 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
    241 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
    242 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
    243 	(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
    244 {
    245 	if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
    246 	{
    247 		// Negative number, truncate to zero.
    248 		return zero(+1);
    249 	}
    250 	else if (other.isInf())
    251 	{
    252 		return inf(other.sign());
    253 	}
    254 	else if (other.isNaN())
    255 	{
    256 		return nan();
    257 	}
    258 	else if (other.isZero())
    259 	{
    260 		return zero(other.sign());
    261 	}
    262 	else
    263 	{
    264 		const int			eMin	= 1 - ExponentBias;
    265 		const int			eMax	= ((1<<ExponentBits)-2) - ExponentBias;
    266 
    267 		const StorageType	s		= StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
    268 		int					e		= other.exponent();
    269 		deUint64			m		= other.mantissa();
    270 
    271 		// Normalize denormalized values prior to conversion.
    272 		while (!(m & (1ull<<OtherMantissaBits)))
    273 		{
    274 			m <<= 1;
    275 			e  -= 1;
    276 		}
    277 
    278 		if (e < eMin)
    279 		{
    280 			// Underflow.
    281 			if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
    282 			{
    283 				// Shift and round (RTE).
    284 				int			bitDiff	= (OtherMantissaBits-MantissaBits) + (eMin-e);
    285 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
    286 				deUint64	bias	= (m >> bitDiff) & 1;
    287 
    288 				return Float(StorageType(s | (m + half + bias) >> bitDiff));
    289 			}
    290 			else
    291 				return zero(other.sign());
    292 		}
    293 		else
    294 		{
    295 			// Remove leading 1.
    296 			m = m & ~(1ull<<OtherMantissaBits);
    297 
    298 			if (MantissaBits < OtherMantissaBits)
    299 			{
    300 				// Round mantissa (round to nearest even).
    301 				int			bitDiff	= OtherMantissaBits-MantissaBits;
    302 				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
    303 				deUint64	bias	= (m >> bitDiff) & 1;
    304 
    305 				m = (m + half + bias) >> bitDiff;
    306 
    307 				if (m & (1ull<<MantissaBits))
    308 				{
    309 					// Overflow in mantissa.
    310 					m  = 0;
    311 					e += 1;
    312 				}
    313 			}
    314 			else
    315 			{
    316 				int bitDiff = MantissaBits-OtherMantissaBits;
    317 				m = m << bitDiff;
    318 			}
    319 
    320 			if (e > eMax)
    321 			{
    322 				// Overflow.
    323 				return inf(other.sign());
    324 			}
    325 			else
    326 			{
    327 				DE_ASSERT(de::inRange(e, eMin, eMax));
    328 				DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
    329 				DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
    330 
    331 				return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
    332 			}
    333 		}
    334 	}
    335 }
    336 
    337 } // tcu
    338 
    339 #endif // _TCUFLOAT_HPP
    340