1 /////////////////////////////////////////////////////////////////////////// 2 // 3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas 4 // Digital Ltd. LLC 5 // 6 // All rights reserved. 7 // 8 // Redistribution and use in source and binary forms, with or without 9 // modification, are permitted provided that the following conditions are 10 // met: 11 // * Redistributions of source code must retain the above copyright 12 // notice, this list of conditions and the following disclaimer. 13 // * Redistributions in binary form must reproduce the above 14 // copyright notice, this list of conditions and the following disclaimer 15 // in the documentation and/or other materials provided with the 16 // distribution. 17 // * Neither the name of Industrial Light & Magic nor the names of 18 // its contributors may be used to endorse or promote products derived 19 // from this software without specific prior written permission. 20 // 21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 // 33 /////////////////////////////////////////////////////////////////////////// 34 35 // Primary authors: 36 // Florian Kainz <kainz (at) ilm.com> 37 // Rod Bogart <rgb (at) ilm.com> 38 39 40 //--------------------------------------------------------------------------- 41 // 42 // class half -- 43 // implementation of non-inline members 44 // 45 //--------------------------------------------------------------------------- 46 47 #include <assert.h> 48 #include "half.h" 49 50 using namespace std; 51 52 //------------------------------------------------------------- 53 // Lookup tables for half-to-float and float-to-half conversion 54 //------------------------------------------------------------- 55 56 HALF_EXPORT_CONST half::uif half::_toFloat[1 << 16] = 57 #include "toFloat.h" 58 HALF_EXPORT_CONST unsigned short half::_eLut[1 << 9] = 59 #include "eLut.h" 60 61 62 //----------------------------------------------- 63 // Overflow handler for float-to-half conversion; 64 // generates a hardware floating-point overflow, 65 // which may be trapped by the operating system. 66 //----------------------------------------------- 67 68 float 69 half::overflow () 70 { 71 volatile float f = 1e10; 72 73 for (int i = 0; i < 10; i++) 74 f *= f; // this will overflow before 75 // the forloop terminates 76 return f; 77 } 78 79 80 //----------------------------------------------------- 81 // Float-to-half conversion -- general case, including 82 // zeroes, denormalized numbers and exponent overflows. 83 //----------------------------------------------------- 84 85 short 86 half::convert (int i) 87 { 88 // 89 // Our floating point number, f, is represented by the bit 90 // pattern in integer i. Disassemble that bit pattern into 91 // the sign, s, the exponent, e, and the significand, m. 92 // Shift s into the position where it will go in in the 93 // resulting half number. 94 // Adjust e, accounting for the different exponent bias 95 // of float and half (127 versus 15). 96 // 97 98 register int s = (i >> 16) & 0x00008000; 99 register int e = ((i >> 23) & 0x000000ff) - (127 - 15); 100 register int m = i & 0x007fffff; 101 102 // 103 // Now reassemble s, e and m into a half: 104 // 105 106 if (e <= 0) 107 { 108 if (e < -10) 109 { 110 // 111 // E is less than -10. The absolute value of f is 112 // less than HALF_MIN (f may be a small normalized 113 // float, a denormalized float or a zero). 114 // 115 // We convert f to a half zero with the same sign as f. 116 // 117 118 return s; 119 } 120 121 // 122 // E is between -10 and 0. F is a normalized float 123 // whose magnitude is less than HALF_NRM_MIN. 124 // 125 // We convert f to a denormalized half. 126 // 127 128 // 129 // Add an explicit leading 1 to the significand. 130 // 131 132 m = m | 0x00800000; 133 134 // 135 // Round to m to the nearest (10+e)-bit value (with e between 136 // -10 and 0); in case of a tie, round to the nearest even value. 137 // 138 // Rounding may cause the significand to overflow and make 139 // our number normalized. Because of the way a half's bits 140 // are laid out, we don't have to treat this case separately; 141 // the code below will handle it correctly. 142 // 143 144 int t = 14 - e; 145 int a = (1 << (t - 1)) - 1; 146 int b = (m >> t) & 1; 147 148 m = (m + a + b) >> t; 149 150 // 151 // Assemble the half from s, e (zero) and m. 152 // 153 154 return s | m; 155 } 156 else if (e == 0xff - (127 - 15)) 157 { 158 if (m == 0) 159 { 160 // 161 // F is an infinity; convert f to a half 162 // infinity with the same sign as f. 163 // 164 165 return s | 0x7c00; 166 } 167 else 168 { 169 // 170 // F is a NAN; we produce a half NAN that preserves 171 // the sign bit and the 10 leftmost bits of the 172 // significand of f, with one exception: If the 10 173 // leftmost bits are all zero, the NAN would turn 174 // into an infinity, so we have to set at least one 175 // bit in the significand. 176 // 177 178 m >>= 13; 179 return s | 0x7c00 | m | (m == 0); 180 } 181 } 182 else 183 { 184 // 185 // E is greater than zero. F is a normalized float. 186 // We try to convert f to a normalized half. 187 // 188 189 // 190 // Round to m to the nearest 10-bit value. In case of 191 // a tie, round to the nearest even value. 192 // 193 194 m = m + 0x00000fff + ((m >> 13) & 1); 195 196 if (m & 0x00800000) 197 { 198 m = 0; // overflow in significand, 199 e += 1; // adjust exponent 200 } 201 202 // 203 // Handle exponent overflow 204 // 205 206 if (e > 30) 207 { 208 overflow (); // Cause a hardware floating point overflow; 209 return s | 0x7c00; // if this returns, the half becomes an 210 } // infinity with the same sign as f. 211 212 // 213 // Assemble the half from s, e and m. 214 // 215 216 return s | (e << 10) | (m >> 13); 217 } 218 } 219 220 221 //--------------------- 222 // Stream I/O operators 223 //--------------------- 224 225 ostream & 226 operator << (ostream &os, half h) 227 { 228 os << float (h); 229 return os; 230 } 231 232 233 istream & 234 operator >> (istream &is, half &h) 235 { 236 float f; 237 is >> f; 238 h = half (f); 239 return is; 240 } 241 242 243 //--------------------------------------- 244 // Functions to print the bit-layout of 245 // floats and halfs, mostly for debugging 246 //--------------------------------------- 247 248 void 249 printBits (ostream &os, half h) 250 { 251 unsigned short b = h.bits(); 252 253 for (int i = 15; i >= 0; i--) 254 { 255 os << (((b >> i) & 1)? '1': '0'); 256 257 if (i == 15 || i == 10) 258 os << ' '; 259 } 260 } 261 262 263 void 264 printBits (ostream &os, float f) 265 { 266 half::uif x; 267 x.f = f; 268 269 for (int i = 31; i >= 0; i--) 270 { 271 os << (((x.i >> i) & 1)? '1': '0'); 272 273 if (i == 31 || i == 23) 274 os << ' '; 275 } 276 } 277 278 279 void 280 printBits (char c[19], half h) 281 { 282 unsigned short b = h.bits(); 283 284 for (int i = 15, j = 0; i >= 0; i--, j++) 285 { 286 c[j] = (((b >> i) & 1)? '1': '0'); 287 288 if (i == 15 || i == 10) 289 c[++j] = ' '; 290 } 291 292 c[18] = 0; 293 } 294 295 296 void 297 printBits (char c[35], float f) 298 { 299 half::uif x; 300 x.f = f; 301 302 for (int i = 31, j = 0; i >= 0; i--, j++) 303 { 304 c[j] = (((x.i >> i) & 1)? '1': '0'); 305 306 if (i == 31 || i == 23) 307 c[++j] = ' '; 308 } 309 310 c[34] = 0; 311 } 312