1 /*------------------------------------------------------------------------- 2 * drawElements Base Portability Library 3 * ------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief 16-bit floating-point math. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "deFloat16.h" 25 26 DE_BEGIN_EXTERN_C 27 28 deFloat16 deFloat32To16 (float val32) 29 { 30 deUint32 sign; 31 int expotent; 32 deUint32 mantissa; 33 union 34 { 35 float f; 36 deUint32 u; 37 } x; 38 39 x.f = val32; 40 sign = (x.u >> 16u) & 0x00008000u; 41 expotent = (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15); 42 mantissa = x.u & 0x007fffffu; 43 44 if (expotent <= 0) 45 { 46 if (expotent < -10) 47 { 48 /* Rounds to zero. */ 49 return (deFloat16) sign; 50 } 51 52 /* Converted to denormalized half, add leading 1 to significand. */ 53 mantissa = mantissa | 0x00800000u; 54 55 /* Round mantissa to nearest (10+e) */ 56 { 57 deUint32 t = 14u - expotent; 58 deUint32 a = (1u << (t - 1u)) - 1u; 59 deUint32 b = (mantissa >> t) & 1u; 60 61 mantissa = (mantissa + a + b) >> t; 62 } 63 64 return (deFloat16) (sign | mantissa); 65 } 66 else if (expotent == 0xff - (127 - 15)) 67 { 68 if (mantissa == 0u) 69 { 70 /* InF */ 71 return (deFloat16) (sign | 0x7c00u); 72 } 73 else 74 { 75 /* NaN */ 76 mantissa >>= 13u; 77 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 78 } 79 } 80 else 81 { 82 /* Normalized float. */ 83 mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u); 84 85 if (mantissa & 0x00800000u) 86 { 87 /* Overflow in mantissa. */ 88 mantissa = 0u; 89 expotent += 1; 90 } 91 92 if (expotent > 30) 93 { 94 /* \todo [pyry] Cause hw fp overflow */ 95 return (deFloat16) (sign | 0x7c00u); 96 } 97 98 return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u)); 99 } 100 } 101 102 /*--------------------------------------------------------------------*//*! 103 * \brief Round the given number `val` to nearest even by discarding 104 * the last `numBitsToDiscard` bits. 105 * \param val value to round 106 * \param numBitsToDiscard number of (least significant) bits to discard 107 * \return The rounded value with the last `numBitsToDiscard` removed 108 *//*--------------------------------------------------------------------*/ 109 static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard) 110 { 111 const deUint32 lastBits = val & ((1 << numBitsToDiscard) - 1); 112 const deUint32 headBit = val & (1 << (numBitsToDiscard - 1)); 113 114 DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32); /* Make sure no overflow. */ 115 val >>= numBitsToDiscard; 116 117 if (headBit == 0) 118 { 119 return val; 120 } 121 else if (headBit == lastBits) 122 { 123 if ((val & 0x1) == 0x1) 124 { 125 return val + 1; 126 } 127 else 128 { 129 return val; 130 } 131 } 132 else 133 { 134 return val + 1; 135 } 136 } 137 138 deFloat16 deFloat32To16Round (float val32, deRoundingMode mode) 139 { 140 union 141 { 142 float f; /* Interpret as 32-bit float */ 143 deUint32 u; /* Interpret as 32-bit unsigned integer */ 144 } x; 145 deUint32 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */ 146 deUint32 exp32; /* exp32: biased exponent for 32-bit floats */ 147 int exp16; /* exp16: biased exponent for 16-bit floats */ 148 deUint32 mantissa; 149 150 /* We only support these two rounding modes for now */ 151 DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN); 152 153 x.f = val32; 154 sign = (x.u >> 16u) & 0x00008000u; 155 exp32 = (x.u >> 23u) & 0x000000ffu; 156 exp16 = (int) (exp32) - 127 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */ 157 mantissa = x.u & 0x007fffffu; 158 159 /* Case: zero and denormalized floats */ 160 if (exp32 == 0) 161 { 162 /* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */ 163 return (deFloat16) sign; 164 } 165 /* Case: Inf and NaN */ 166 else if (exp32 == 0x000000ffu) 167 { 168 if (mantissa == 0u) 169 { 170 /* Inf */ 171 return (deFloat16) (sign | 0x7c00u); 172 } 173 else 174 { 175 /* NaN */ 176 mantissa >>= 13u; /* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */ 177 /* Make sure we don't turn NaN into zero by | (mantissa == 0). */ 178 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 179 } 180 } 181 /* The following are cases for normalized floats. 182 * 183 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent, 184 * we can only shift the mantissa further right. 185 * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent. 186 * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent. 187 * So, we just need to right shift the mantissa -exp16 bits. 188 * * If exp16 is 0, mantissa shifting requirement is similar to the above. 189 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats. 190 */ 191 /* Case: normalized floats -> zero */ 192 else if (exp16 < -10) 193 { 194 /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */ 195 /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */ 196 return (deFloat16) sign; 197 } 198 /* Case: normalized floats -> zero and denormalized halfs */ 199 else if (exp16 <= 0) 200 { 201 /* Add the implicit leading 1 in mormalized float to mantissa. */ 202 mantissa |= 0x00800000u; 203 /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa. 204 * Need to discard the last 14-bits considering rounding mode. 205 * We also need to shift right -exp16 bits to encode the underflowed exponent. 206 */ 207 if (mode == DE_ROUNDINGMODE_TO_ZERO) 208 { 209 mantissa >>= (14 - exp16); 210 } 211 else 212 { 213 /* mantissa in the above may exceed 10-bits, in which case overflow happens. 214 * The overflowed bit is automatically carried to exponent then. 215 */ 216 mantissa = roundToNearestEven(mantissa, 14 - exp16); 217 } 218 return (deFloat16) (sign | mantissa); 219 } 220 /* Case: normalized floats -> normalized floats */ 221 else if (exp16 <= 30) 222 { 223 if (mode == DE_ROUNDINGMODE_TO_ZERO) 224 { 225 return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u)); 226 } 227 else 228 { 229 mantissa = roundToNearestEven(mantissa, 13); 230 /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */ 231 exp16 = (exp16 << 10u) + (mantissa & (1 << 10)); 232 mantissa &= (1u << 10) - 1; 233 return (deFloat16) (sign | ((deUint32) exp16) | mantissa); 234 } 235 } 236 /* Case: normalized floats (too large to be representable as 16-bit floats) */ 237 else 238 { 239 /* According to IEEE Std 754-2008 Section 7.4, 240 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign 241 * of the intermediate result. 242 * * roundTowardZero carries all overflows to the formats largest finite number 243 * with the sign of the intermediate result. 244 */ 245 if (mode == DE_ROUNDINGMODE_TO_ZERO) 246 { 247 return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */ 248 } 249 else 250 { 251 return (deFloat16) (sign | (0x1f << 10)); 252 } 253 } 254 255 /* Make compiler happy */ 256 return (deFloat16) 0; 257 } 258 259 float deFloat16To32 (deFloat16 val16) 260 { 261 deUint32 sign; 262 deUint32 expotent; 263 deUint32 mantissa; 264 union 265 { 266 float f; 267 deUint32 u; 268 } x; 269 270 x.u = 0u; 271 272 sign = ((deUint32)val16 >> 15u) & 0x00000001u; 273 expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; 274 mantissa = (deUint32)val16 & 0x000003ffu; 275 276 if (expotent == 0u) 277 { 278 if (mantissa == 0u) 279 { 280 /* +/- 0 */ 281 x.u = sign << 31u; 282 return x.f; 283 } 284 else 285 { 286 /* Denormalized, normalize it. */ 287 288 while (!(mantissa & 0x00000400u)) 289 { 290 mantissa <<= 1u; 291 expotent -= 1u; 292 } 293 294 expotent += 1u; 295 mantissa &= ~0x00000400u; 296 } 297 } 298 else if (expotent == 31u) 299 { 300 if (mantissa == 0u) 301 { 302 /* +/- InF */ 303 x.u = (sign << 31u) | 0x7f800000u; 304 return x.f; 305 } 306 else 307 { 308 /* +/- NaN */ 309 x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u); 310 return x.f; 311 } 312 } 313 314 expotent = expotent + (127u - 15u); 315 mantissa = mantissa << 13u; 316 317 x.u = (sign << 31u) | (expotent << 23u) | mantissa; 318 return x.f; 319 } 320 321 DE_END_EXTERN_C 322