Home | History | Annotate | Download | only in Half
      1 ///////////////////////////////////////////////////////////////////////////
      2 //
      3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
      4 // Digital Ltd. LLC
      5 //
      6 // All rights reserved.
      7 //
      8 // Redistribution and use in source and binary forms, with or without
      9 // modification, are permitted provided that the following conditions are
     10 // met:
     11 // *       Redistributions of source code must retain the above copyright
     12 // notice, this list of conditions and the following disclaimer.
     13 // *       Redistributions in binary form must reproduce the above
     14 // copyright notice, this list of conditions and the following disclaimer
     15 // in the documentation and/or other materials provided with the
     16 // distribution.
     17 // *       Neither the name of Industrial Light & Magic nor the names of
     18 // its contributors may be used to endorse or promote products derived
     19 // from this software without specific prior written permission.
     20 //
     21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 //
     33 ///////////////////////////////////////////////////////////////////////////
     34 
     35 // Primary authors:
     36 //     Florian Kainz <kainz (at) ilm.com>
     37 //     Rod Bogart <rgb (at) ilm.com>
     38 
     39 
     40 //---------------------------------------------------------------------------
     41 //
     42 //	class half --
     43 //	implementation of non-inline members
     44 //
     45 //---------------------------------------------------------------------------
     46 
     47 #include <assert.h>
     48 #include "half.h"
     49 
     50 using namespace std;
     51 
     52 //-------------------------------------------------------------
     53 // Lookup tables for half-to-float and float-to-half conversion
     54 //-------------------------------------------------------------
     55 
     56 HALF_EXPORT_CONST half::uif half::_toFloat[1 << 16] =
     57 #include "toFloat.h"
     58 HALF_EXPORT_CONST unsigned short half::_eLut[1 << 9] =
     59 #include "eLut.h"
     60 
     61 
     62 //-----------------------------------------------
     63 // Overflow handler for float-to-half conversion;
     64 // generates a hardware floating-point overflow,
     65 // which may be trapped by the operating system.
     66 //-----------------------------------------------
     67 
     68 float
     69 half::overflow ()
     70 {
     71     volatile float f = 1e10;
     72 
     73     for (int i = 0; i < 10; i++)
     74     f *= f;				// this will overflow before
     75                     // the forloop terminates
     76     return f;
     77 }
     78 
     79 
     80 //-----------------------------------------------------
     81 // Float-to-half conversion -- general case, including
     82 // zeroes, denormalized numbers and exponent overflows.
     83 //-----------------------------------------------------
     84 
     85 short
     86 half::convert (int i)
     87 {
     88     //
     89     // Our floating point number, f, is represented by the bit
     90     // pattern in integer i.  Disassemble that bit pattern into
     91     // the sign, s, the exponent, e, and the significand, m.
     92     // Shift s into the position where it will go in in the
     93     // resulting half number.
     94     // Adjust e, accounting for the different exponent bias
     95     // of float and half (127 versus 15).
     96     //
     97 
     98     register int s =  (i >> 16) & 0x00008000;
     99     register int e = ((i >> 23) & 0x000000ff) - (127 - 15);
    100     register int m =   i        & 0x007fffff;
    101 
    102     //
    103     // Now reassemble s, e and m into a half:
    104     //
    105 
    106     if (e <= 0)
    107     {
    108     if (e < -10)
    109     {
    110         //
    111         // E is less than -10.  The absolute value of f is
    112         // less than HALF_MIN (f may be a small normalized
    113         // float, a denormalized float or a zero).
    114         //
    115         // We convert f to a half zero with the same sign as f.
    116         //
    117 
    118         return s;
    119     }
    120 
    121     //
    122     // E is between -10 and 0.  F is a normalized float
    123     // whose magnitude is less than HALF_NRM_MIN.
    124     //
    125     // We convert f to a denormalized half.
    126     //
    127 
    128     //
    129     // Add an explicit leading 1 to the significand.
    130     //
    131 
    132     m = m | 0x00800000;
    133 
    134     //
    135     // Round to m to the nearest (10+e)-bit value (with e between
    136     // -10 and 0); in case of a tie, round to the nearest even value.
    137     //
    138     // Rounding may cause the significand to overflow and make
    139     // our number normalized.  Because of the way a half's bits
    140     // are laid out, we don't have to treat this case separately;
    141     // the code below will handle it correctly.
    142     //
    143 
    144     int t = 14 - e;
    145     int a = (1 << (t - 1)) - 1;
    146     int b = (m >> t) & 1;
    147 
    148     m = (m + a + b) >> t;
    149 
    150     //
    151     // Assemble the half from s, e (zero) and m.
    152     //
    153 
    154     return s | m;
    155     }
    156     else if (e == 0xff - (127 - 15))
    157     {
    158     if (m == 0)
    159     {
    160         //
    161         // F is an infinity; convert f to a half
    162         // infinity with the same sign as f.
    163         //
    164 
    165         return s | 0x7c00;
    166     }
    167     else
    168     {
    169         //
    170         // F is a NAN; we produce a half NAN that preserves
    171         // the sign bit and the 10 leftmost bits of the
    172         // significand of f, with one exception: If the 10
    173         // leftmost bits are all zero, the NAN would turn
    174         // into an infinity, so we have to set at least one
    175         // bit in the significand.
    176         //
    177 
    178         m >>= 13;
    179         return s | 0x7c00 | m | (m == 0);
    180     }
    181     }
    182     else
    183     {
    184     //
    185     // E is greater than zero.  F is a normalized float.
    186     // We try to convert f to a normalized half.
    187     //
    188 
    189     //
    190     // Round to m to the nearest 10-bit value.  In case of
    191     // a tie, round to the nearest even value.
    192     //
    193 
    194     m = m + 0x00000fff + ((m >> 13) & 1);
    195 
    196     if (m & 0x00800000)
    197     {
    198         m =  0;		// overflow in significand,
    199         e += 1;		// adjust exponent
    200     }
    201 
    202     //
    203     // Handle exponent overflow
    204     //
    205 
    206     if (e > 30)
    207     {
    208         overflow ();	// Cause a hardware floating point overflow;
    209         return s | 0x7c00;	// if this returns, the half becomes an
    210     }   			// infinity with the same sign as f.
    211 
    212     //
    213     // Assemble the half from s, e and m.
    214     //
    215 
    216     return s | (e << 10) | (m >> 13);
    217     }
    218 }
    219 
    220 
    221 //---------------------
    222 // Stream I/O operators
    223 //---------------------
    224 
    225 ostream &
    226 operator << (ostream &os, half h)
    227 {
    228     os << float (h);
    229     return os;
    230 }
    231 
    232 
    233 istream &
    234 operator >> (istream &is, half &h)
    235 {
    236     float f;
    237     is >> f;
    238     h = half (f);
    239     return is;
    240 }
    241 
    242 
    243 //---------------------------------------
    244 // Functions to print the bit-layout of
    245 // floats and halfs, mostly for debugging
    246 //---------------------------------------
    247 
    248 void
    249 printBits (ostream &os, half h)
    250 {
    251     unsigned short b = h.bits();
    252 
    253     for (int i = 15; i >= 0; i--)
    254     {
    255     os << (((b >> i) & 1)? '1': '0');
    256 
    257     if (i == 15 || i == 10)
    258         os << ' ';
    259     }
    260 }
    261 
    262 
    263 void
    264 printBits (ostream &os, float f)
    265 {
    266     half::uif x;
    267     x.f = f;
    268 
    269     for (int i = 31; i >= 0; i--)
    270     {
    271     os << (((x.i >> i) & 1)? '1': '0');
    272 
    273     if (i == 31 || i == 23)
    274         os << ' ';
    275     }
    276 }
    277 
    278 
    279 void
    280 printBits (char c[19], half h)
    281 {
    282     unsigned short b = h.bits();
    283 
    284     for (int i = 15, j = 0; i >= 0; i--, j++)
    285     {
    286     c[j] = (((b >> i) & 1)? '1': '0');
    287 
    288     if (i == 15 || i == 10)
    289         c[++j] = ' ';
    290     }
    291 
    292     c[18] = 0;
    293 }
    294 
    295 
    296 void
    297 printBits (char c[35], float f)
    298 {
    299     half::uif x;
    300     x.f = f;
    301 
    302     for (int i = 31, j = 0; i >= 0; i--, j++)
    303     {
    304     c[j] = (((x.i >> i) & 1)? '1': '0');
    305 
    306     if (i == 31 || i == 23)
    307         c[++j] = ' ';
    308     }
    309 
    310     c[34] = 0;
    311 }
    312