Home | History | Annotate | Download | only in debase
      1 /*-------------------------------------------------------------------------
      2  * drawElements Base Portability Library
      3  * -------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief 16-bit floating-point math.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "deFloat16.h"
     25 
     26 DE_BEGIN_EXTERN_C
     27 
     28 deFloat16 deFloat32To16 (float val32)
     29 {
     30 	deUint32	sign;
     31 	int			expotent;
     32 	deUint32	mantissa;
     33 	union
     34 	{
     35 		float		f;
     36 		deUint32	u;
     37 	} x;
     38 
     39 	x.f			= val32;
     40 	sign		= (x.u >> 16u) & 0x00008000u;
     41 	expotent	= (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15);
     42 	mantissa	= x.u & 0x007fffffu;
     43 
     44 	if (expotent <= 0)
     45 	{
     46 		if (expotent < -10)
     47 		{
     48 			/* Rounds to zero. */
     49 			return (deFloat16) sign;
     50 		}
     51 
     52 		/* Converted to denormalized half, add leading 1 to significand. */
     53 		mantissa = mantissa | 0x00800000u;
     54 
     55 		/* Round mantissa to nearest (10+e) */
     56 		{
     57 			deUint32 t = 14u - expotent;
     58 			deUint32 a = (1u << (t - 1u)) - 1u;
     59 			deUint32 b = (mantissa >> t) & 1u;
     60 
     61 			mantissa = (mantissa + a + b) >> t;
     62 		}
     63 
     64 		return (deFloat16) (sign | mantissa);
     65 	}
     66 	else if (expotent == 0xff - (127 - 15))
     67 	{
     68 		if (mantissa == 0u)
     69 		{
     70 			/* InF */
     71 			return (deFloat16) (sign | 0x7c00u);
     72 		}
     73 		else
     74 		{
     75 			/* NaN */
     76 			mantissa >>= 13u;
     77 			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
     78 		}
     79 	}
     80 	else
     81 	{
     82 		/* Normalized float. */
     83 		mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u);
     84 
     85 		if (mantissa & 0x00800000u)
     86 		{
     87 			/* Overflow in mantissa. */
     88 			mantissa  = 0u;
     89 			expotent += 1;
     90 		}
     91 
     92 		if (expotent > 30)
     93 		{
     94 			/* \todo [pyry] Cause hw fp overflow */
     95 			return (deFloat16) (sign | 0x7c00u);
     96 		}
     97 
     98 		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u));
     99 	}
    100 }
    101 
    102 /*--------------------------------------------------------------------*//*!
    103  * \brief Round the given number `val` to nearest even by discarding
    104  *        the last `numBitsToDiscard` bits.
    105  * \param val value to round
    106  * \param numBitsToDiscard number of (least significant) bits to discard
    107  * \return The rounded value with the last `numBitsToDiscard` removed
    108  *//*--------------------------------------------------------------------*/
    109 static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
    110 {
    111 	const deUint32	lastBits	= val & ((1 << numBitsToDiscard) - 1);
    112 	const deUint32	headBit		= val & (1 << (numBitsToDiscard - 1));
    113 
    114 	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32);	/* Make sure no overflow. */
    115 	val >>= numBitsToDiscard;
    116 
    117 	if (headBit == 0)
    118 	{
    119 		return val;
    120 	}
    121 	else if (headBit == lastBits)
    122 	{
    123 		if ((val & 0x1) == 0x1)
    124 		{
    125 			return val + 1;
    126 		}
    127 		else
    128 		{
    129 			return val;
    130 		}
    131 	}
    132 	else
    133 	{
    134 		return val + 1;
    135 	}
    136 }
    137 
    138 deFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
    139 {
    140 	union
    141 	{
    142 		float		f;		/* Interpret as 32-bit float */
    143 		deUint32	u;		/* Interpret as 32-bit unsigned integer */
    144 	} x;
    145 	deUint32	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
    146 	deUint32	exp32;		/* exp32: biased exponent for 32-bit floats */
    147 	int			exp16;		/* exp16: biased exponent for 16-bit floats */
    148 	deUint32	mantissa;
    149 
    150 	/* We only support these two rounding modes for now */
    151 	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
    152 
    153 	x.f			= val32;
    154 	sign		= (x.u >> 16u) & 0x00008000u;
    155 	exp32		= (x.u >> 23u) & 0x000000ffu;
    156 	exp16		= (int) (exp32) - 127 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
    157 	mantissa	= x.u & 0x007fffffu;
    158 
    159 	/* Case: zero and denormalized floats */
    160 	if (exp32 == 0)
    161 	{
    162 		/* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
    163 		return (deFloat16) sign;
    164 	}
    165 	/* Case: Inf and NaN */
    166 	else if (exp32 == 0x000000ffu)
    167 	{
    168 		if (mantissa == 0u)
    169 		{
    170 			/* Inf */
    171 			return (deFloat16) (sign | 0x7c00u);
    172 		}
    173 		else
    174 		{
    175 			/* NaN */
    176 			mantissa >>= 13u;	/* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
    177 			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
    178 			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
    179 		}
    180 	}
    181 	/* The following are cases for normalized floats.
    182 	 *
    183 	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
    184 	 *   we can only shift the mantissa further right.
    185 	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
    186 	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
    187 	 *   So, we just need to right shift the mantissa -exp16 bits.
    188 	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
    189 	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
    190 	 */
    191 	/* Case: normalized floats -> zero */
    192 	else if (exp16 < -10)
    193 	{
    194 		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
    195 		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
    196 		return (deFloat16) sign;
    197 	}
    198 	/* Case: normalized floats -> zero and denormalized halfs */
    199 	else if (exp16 <= 0)
    200 	{
    201 		/* Add the implicit leading 1 in mormalized float to mantissa. */
    202 		mantissa |= 0x00800000u;
    203 		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
    204 		 * Need to discard the last 14-bits considering rounding mode.
    205 		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
    206 		 */
    207 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
    208 		{
    209 			mantissa >>= (14 - exp16);
    210 		}
    211 		else
    212 		{
    213 			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
    214 			 * The overflowed bit is automatically carried to exponent then.
    215 			 */
    216 			mantissa = roundToNearestEven(mantissa, 14 - exp16);
    217 		}
    218 		return (deFloat16) (sign | mantissa);
    219 	}
    220 	/* Case: normalized floats -> normalized floats */
    221 	else if (exp16 <= 30)
    222 	{
    223 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
    224 		{
    225 			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
    226 		}
    227 		else
    228 		{
    229 			mantissa	= roundToNearestEven(mantissa, 13);
    230 			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
    231 			exp16		= (exp16 << 10u) + (mantissa & (1 << 10));
    232 			mantissa	&= (1u << 10) - 1;
    233 			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
    234 		}
    235 	}
    236 	/* Case: normalized floats (too large to be representable as 16-bit floats) */
    237 	else
    238 	{
    239 		/* According to IEEE Std 754-2008 Section 7.4,
    240 		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
    241 		 *   of the intermediate  result.
    242 		 * * roundTowardZero carries all overflows to the formats largest finite number
    243 		 *   with the sign of the intermediate result.
    244 		 */
    245 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
    246 		{
    247 			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
    248 		}
    249 		else
    250 		{
    251 			return (deFloat16) (sign | (0x1f << 10));
    252 		}
    253 	}
    254 
    255 	/* Make compiler happy */
    256 	return (deFloat16) 0;
    257 }
    258 
    259 float deFloat16To32 (deFloat16 val16)
    260 {
    261 	deUint32 sign;
    262 	deUint32 expotent;
    263 	deUint32 mantissa;
    264 	union
    265 	{
    266 		float		f;
    267 		deUint32	u;
    268 	} x;
    269 
    270 	x.u			= 0u;
    271 
    272 	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
    273 	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
    274 	mantissa	= (deUint32)val16 & 0x000003ffu;
    275 
    276 	if (expotent == 0u)
    277 	{
    278 		if (mantissa == 0u)
    279 		{
    280 			/* +/- 0 */
    281 			x.u = sign << 31u;
    282 			return x.f;
    283 		}
    284 		else
    285 		{
    286 			/* Denormalized, normalize it. */
    287 
    288 			while (!(mantissa & 0x00000400u))
    289 			{
    290 				mantissa <<= 1u;
    291 				expotent -=  1u;
    292 			}
    293 
    294 			expotent += 1u;
    295 			mantissa &= ~0x00000400u;
    296 		}
    297 	}
    298 	else if (expotent == 31u)
    299 	{
    300 		if (mantissa == 0u)
    301 		{
    302 			/* +/- InF */
    303 			x.u = (sign << 31u) | 0x7f800000u;
    304 			return x.f;
    305 		}
    306 		else
    307 		{
    308 			/* +/- NaN */
    309 			x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
    310 			return x.f;
    311 		}
    312 	}
    313 
    314 	expotent = expotent + (127u - 15u);
    315 	mantissa = mantissa << 13u;
    316 
    317 	x.u = (sign << 31u) | (expotent << 23u) | mantissa;
    318 	return x.f;
    319 }
    320 
    321 DE_END_EXTERN_C
    322