Home | History | Annotate | Download | only in Half
      1 ///////////////////////////////////////////////////////////////////////////
      2 //
      3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
      4 // Digital Ltd. LLC
      5 //
      6 // All rights reserved.
      7 //
      8 // Redistribution and use in source and binary forms, with or without
      9 // modification, are permitted provided that the following conditions are
     10 // met:
     11 // *       Redistributions of source code must retain the above copyright
     12 // notice, this list of conditions and the following disclaimer.
     13 // *       Redistributions in binary form must reproduce the above
     14 // copyright notice, this list of conditions and the following disclaimer
     15 // in the documentation and/or other materials provided with the
     16 // distribution.
     17 // *       Neither the name of Industrial Light & Magic nor the names of
     18 // its contributors may be used to endorse or promote products derived
     19 // from this software without specific prior written permission.
     20 //
     21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 //
     33 ///////////////////////////////////////////////////////////////////////////
     34 
     35 // Primary authors:
     36 //     Florian Kainz <kainz (at) ilm.com>
     37 //     Rod Bogart <rgb (at) ilm.com>
     38 
     39 //---------------------------------------------------------------------------
     40 //
     41 //	half -- a 16-bit floating point number class:
     42 //
     43 //	Type half can represent positive and negative numbers whose
     44 //	magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
     45 //	error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
     46 //	with an absolute error of 6.0e-8.  All integers from -2048 to
     47 //	+2048 can be represented exactly.
     48 //
     49 //	Type half behaves (almost) like the built-in C++ floating point
     50 //	types.  In arithmetic expressions, half, float and double can be
     51 //	mixed freely.  Here are a few examples:
     52 //
     53 //	    half a (3.5);
     54 //	    float b (a + sqrt (a));
     55 //	    a += b;
     56 //	    b += a;
     57 //	    b = a + 7;
     58 //
     59 //	Conversions from half to float are lossless; all half numbers
     60 //	are exactly representable as floats.
     61 //
     62 //	Conversions from float to half may not preserve a float's value
     63 //	exactly.  If a float is not representable as a half, then the
     64 //	float value is rounded to the nearest representable half.  If a
     65 //	float value is exactly in the middle between the two closest
     66 //	representable half values, then the float value is rounded to
     67 //	the closest half whose least significant bit is zero.
     68 //
     69 //	Overflows during float-to-half conversions cause arithmetic
     70 //	exceptions.  An overflow occurs when the float value to be
     71 //	converted is too large to be represented as a half, or if the
     72 //	float value is an infinity or a NAN.
     73 //
     74 //	The implementation of type half makes the following assumptions
     75 //	about the implementation of the built-in C++ types:
     76 //
     77 //	    float is an IEEE 754 single-precision number
     78 //	    sizeof (float) == 4
     79 //	    sizeof (unsigned int) == sizeof (float)
     80 //	    alignof (unsigned int) == alignof (float)
     81 //	    sizeof (unsigned short) == 2
     82 //
     83 //---------------------------------------------------------------------------
     84 
     85 #ifndef _HALF_H_
     86 #define _HALF_H_
     87 
     88 #include <iostream>
     89 
     90 #if defined(OPENEXR_DLL)
     91     #if defined(HALF_EXPORTS)
     92     #define HALF_EXPORT __declspec(dllexport)
     93     #else
     94     #define HALF_EXPORT __declspec(dllimport)
     95     #endif
     96     #define HALF_EXPORT_CONST
     97 #else
     98     #define HALF_EXPORT
     99     #define HALF_EXPORT_CONST const
    100 #endif
    101 
    102 class HALF_EXPORT half
    103 {
    104   public:
    105 
    106     //-------------
    107     // Constructors
    108     //-------------
    109 
    110     half ();			// no initialization
    111     half (float f);
    112 
    113 
    114     //--------------------
    115     // Conversion to float
    116     //--------------------
    117 
    118     operator		float () const;
    119 
    120 
    121     //------------
    122     // Unary minus
    123     //------------
    124 
    125     half		operator - () const;
    126 
    127 
    128     //-----------
    129     // Assignment
    130     //-----------
    131 
    132     half &		operator = (half  h);
    133     half &		operator = (float f);
    134 
    135     half &		operator += (half  h);
    136     half &		operator += (float f);
    137 
    138     half &		operator -= (half  h);
    139     half &		operator -= (float f);
    140 
    141     half &		operator *= (half  h);
    142     half &		operator *= (float f);
    143 
    144     half &		operator /= (half  h);
    145     half &		operator /= (float f);
    146 
    147 
    148     //---------------------------------------------------------
    149     // Round to n-bit precision (n should be between 0 and 10).
    150     // After rounding, the significand's 10-n least significant
    151     // bits will be zero.
    152     //---------------------------------------------------------
    153 
    154     half		round (unsigned int n) const;
    155 
    156 
    157     //--------------------------------------------------------------------
    158     // Classification:
    159     //
    160     //	h.isFinite()		returns true if h is a normalized number,
    161     //				a denormalized number or zero
    162     //
    163     //	h.isNormalized()	returns true if h is a normalized number
    164     //
    165     //	h.isDenormalized()	returns true if h is a denormalized number
    166     //
    167     //	h.isZero()		returns true if h is zero
    168     //
    169     //	h.isNan()		returns true if h is a NAN
    170     //
    171     //	h.isInfinity()		returns true if h is a positive
    172     //				or a negative infinity
    173     //
    174     //	h.isNegative()		returns true if the sign bit of h
    175     //				is set (negative)
    176     //--------------------------------------------------------------------
    177 
    178     bool		isFinite () const;
    179     bool		isNormalized () const;
    180     bool		isDenormalized () const;
    181     bool		isZero () const;
    182     bool		isNan () const;
    183     bool		isInfinity () const;
    184     bool		isNegative () const;
    185 
    186 
    187     //--------------------------------------------
    188     // Special values
    189     //
    190     //	posInf()	returns +infinity
    191     //
    192     //	negInf()	returns -infinity
    193     //
    194     //	qNan()		returns a NAN with the bit
    195     //			pattern 0111111111111111
    196     //
    197     //	sNan()		returns a NAN with the bit
    198     //			pattern 0111110111111111
    199     //--------------------------------------------
    200 
    201     static half		posInf ();
    202     static half		negInf ();
    203     static half		qNan ();
    204     static half		sNan ();
    205 
    206 
    207     //--------------------------------------
    208     // Access to the internal representation
    209     //--------------------------------------
    210 
    211     unsigned short	bits () const;
    212     void		setBits (unsigned short bits);
    213 
    214 
    215   public:
    216 
    217     union uif
    218     {
    219     unsigned int	i;
    220     float		f;
    221     };
    222 
    223   private:
    224 
    225     static short	convert (int i);
    226     static float	overflow ();
    227 
    228     unsigned short	_h;
    229 
    230     static HALF_EXPORT_CONST uif		_toFloat[1 << 16];
    231     static HALF_EXPORT_CONST unsigned short _eLut[1 << 9];
    232 };
    233 
    234 //-----------
    235 // Stream I/O
    236 //-----------
    237 
    238 HALF_EXPORT std::ostream &		operator << (std::ostream &os, half  h);
    239 HALF_EXPORT std::istream &		operator >> (std::istream &is, half &h);
    240 
    241 
    242 //----------
    243 // Debugging
    244 //----------
    245 
    246 HALF_EXPORT void			printBits   (std::ostream &os, half  h);
    247 HALF_EXPORT void			printBits   (std::ostream &os, float f);
    248 HALF_EXPORT void			printBits   (char  c[19], half  h);
    249 HALF_EXPORT void			printBits   (char  c[35], float f);
    250 
    251 
    252 //-------------------------------------------------------------------------
    253 // Limits
    254 //
    255 // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
    256 // constants, but at least one other compiler (gcc 2.96) produces incorrect
    257 // results if they are.
    258 //-------------------------------------------------------------------------
    259 
    260 #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
    261 
    262   #define HALF_MIN	5.96046448e-08f	// Smallest positive half
    263 
    264   #define HALF_NRM_MIN	6.10351562e-05f	// Smallest positive normalized half
    265 
    266   #define HALF_MAX	65504.0f	// Largest positive half
    267 
    268   #define HALF_EPSILON	0.00097656f	// Smallest positive e for which
    269                     // half (1.0 + e) != half (1.0)
    270 #else
    271 
    272   #define HALF_MIN	5.96046448e-08	// Smallest positive half
    273 
    274   #define HALF_NRM_MIN	6.10351562e-05	// Smallest positive normalized half
    275 
    276   #define HALF_MAX	65504.0		// Largest positive half
    277 
    278   #define HALF_EPSILON	0.00097656	// Smallest positive e for which
    279                     // half (1.0 + e) != half (1.0)
    280 #endif
    281 
    282 
    283 #define HALF_MANT_DIG	11		// Number of digits in mantissa
    284                     // (significand + hidden leading 1)
    285 
    286 #define HALF_DIG	2		// Number of base 10 digits that
    287                     // can be represented without change
    288 
    289 #define HALF_RADIX	2		// Base of the exponent
    290 
    291 #define HALF_MIN_EXP	-13		// Minimum negative integer such that
    292                     // HALF_RADIX raised to the power of
    293                     // one less than that integer is a
    294                     // normalized half
    295 
    296 #define HALF_MAX_EXP	16		// Maximum positive integer such that
    297                     // HALF_RADIX raised to the power of
    298                     // one less than that integer is a
    299                     // normalized half
    300 
    301 #define HALF_MIN_10_EXP	-4		// Minimum positive integer such
    302                     // that 10 raised to that power is
    303                     // a normalized half
    304 
    305 #define HALF_MAX_10_EXP	4		// Maximum positive integer such
    306                     // that 10 raised to that power is
    307                     // a normalized half
    308 
    309 
    310 //---------------------------------------------------------------------------
    311 //
    312 // Implementation --
    313 //
    314 // Representation of a float:
    315 //
    316 //	We assume that a float, f, is an IEEE 754 single-precision
    317 //	floating point number, whose bits are arranged as follows:
    318 //
    319 //	    31 (msb)
    320 //	    |
    321 //	    | 30     23
    322 //	    | |      |
    323 //	    | |      | 22                    0 (lsb)
    324 //	    | |      | |                     |
    325 //	    X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
    326 //
    327 //	    s e        m
    328 //
    329 //	S is the sign-bit, e is the exponent and m is the significand.
    330 //
    331 //	If e is between 1 and 254, f is a normalized number:
    332 //
    333 //	            s    e-127
    334 //	    f = (-1)  * 2      * 1.m
    335 //
    336 //	If e is 0, and m is not zero, f is a denormalized number:
    337 //
    338 //	            s    -126
    339 //	    f = (-1)  * 2      * 0.m
    340 //
    341 //	If e and m are both zero, f is zero:
    342 //
    343 //	    f = 0.0
    344 //
    345 //	If e is 255, f is an "infinity" or "not a number" (NAN),
    346 //	depending on whether m is zero or not.
    347 //
    348 //	Examples:
    349 //
    350 //	    0 00000000 00000000000000000000000 = 0.0
    351 //	    0 01111110 00000000000000000000000 = 0.5
    352 //	    0 01111111 00000000000000000000000 = 1.0
    353 //	    0 10000000 00000000000000000000000 = 2.0
    354 //	    0 10000000 10000000000000000000000 = 3.0
    355 //	    1 10000101 11110000010000000000000 = -124.0625
    356 //	    0 11111111 00000000000000000000000 = +infinity
    357 //	    1 11111111 00000000000000000000000 = -infinity
    358 //	    0 11111111 10000000000000000000000 = NAN
    359 //	    1 11111111 11111111111111111111111 = NAN
    360 //
    361 // Representation of a half:
    362 //
    363 //	Here is the bit-layout for a half number, h:
    364 //
    365 //	    15 (msb)
    366 //	    |
    367 //	    | 14  10
    368 //	    | |   |
    369 //	    | |   | 9        0 (lsb)
    370 //	    | |   | |        |
    371 //	    X XXXXX XXXXXXXXXX
    372 //
    373 //	    s e     m
    374 //
    375 //	S is the sign-bit, e is the exponent and m is the significand.
    376 //
    377 //	If e is between 1 and 30, h is a normalized number:
    378 //
    379 //	            s    e-15
    380 //	    h = (-1)  * 2     * 1.m
    381 //
    382 //	If e is 0, and m is not zero, h is a denormalized number:
    383 //
    384 //	            S    -14
    385 //	    h = (-1)  * 2     * 0.m
    386 //
    387 //	If e and m are both zero, h is zero:
    388 //
    389 //	    h = 0.0
    390 //
    391 //	If e is 31, h is an "infinity" or "not a number" (NAN),
    392 //	depending on whether m is zero or not.
    393 //
    394 //	Examples:
    395 //
    396 //	    0 00000 0000000000 = 0.0
    397 //	    0 01110 0000000000 = 0.5
    398 //	    0 01111 0000000000 = 1.0
    399 //	    0 10000 0000000000 = 2.0
    400 //	    0 10000 1000000000 = 3.0
    401 //	    1 10101 1111000001 = -124.0625
    402 //	    0 11111 0000000000 = +infinity
    403 //	    1 11111 0000000000 = -infinity
    404 //	    0 11111 1000000000 = NAN
    405 //	    1 11111 1111111111 = NAN
    406 //
    407 // Conversion:
    408 //
    409 //	Converting from a float to a half requires some non-trivial bit
    410 //	manipulations.  In some cases, this makes conversion relatively
    411 //	slow, but the most common case is accelerated via table lookups.
    412 //
    413 //	Converting back from a half to a float is easier because we don't
    414 //	have to do any rounding.  In addition, there are only 65536
    415 //	different half numbers; we can convert each of those numbers once
    416 //	and store the results in a table.  Later, all conversions can be
    417 //	done using only simple table lookups.
    418 //
    419 //---------------------------------------------------------------------------
    420 
    421 
    422 //--------------------
    423 // Simple constructors
    424 //--------------------
    425 
    426 inline
    427 half::half ()
    428 {
    429     // no initialization
    430 }
    431 
    432 
    433 //----------------------------
    434 // Half-from-float constructor
    435 //----------------------------
    436 
    437 inline
    438 half::half (float f)
    439 {
    440     uif x;
    441 
    442     x.f = f;
    443 
    444     if (f == 0)
    445     {
    446     //
    447     // Common special case - zero.
    448     // Preserve the zero's sign bit.
    449     //
    450 
    451     _h = (x.i >> 16);
    452     }
    453     else
    454     {
    455     //
    456     // We extract the combined sign and exponent, e, from our
    457     // floating-point number, f.  Then we convert e to the sign
    458     // and exponent of the half number via a table lookup.
    459     //
    460     // For the most common case, where a normalized half is produced,
    461     // the table lookup returns a non-zero value; in this case, all
    462     // we have to do is round f's significand to 10 bits and combine
    463     // the result with e.
    464     //
    465     // For all other cases (overflow, zeroes, denormalized numbers
    466     // resulting from underflow, infinities and NANs), the table
    467     // lookup returns zero, and we call a longer, non-inline function
    468     // to do the float-to-half conversion.
    469     //
    470 
    471     register int e = (x.i >> 23) & 0x000001ff;
    472 
    473     e = _eLut[e];
    474 
    475     if (e)
    476     {
    477         //
    478         // Simple case - round the significand, m, to 10
    479         // bits and combine it with the sign and exponent.
    480         //
    481 
    482         register int m = x.i & 0x007fffff;
    483         _h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
    484     }
    485     else
    486     {
    487         //
    488         // Difficult case - call a function.
    489         //
    490 
    491         _h = convert (x.i);
    492     }
    493     }
    494 }
    495 
    496 
    497 //------------------------------------------
    498 // Half-to-float conversion via table lookup
    499 //------------------------------------------
    500 
    501 inline
    502 half::operator float () const
    503 {
    504     return _toFloat[_h].f;
    505 }
    506 
    507 
    508 //-------------------------
    509 // Round to n-bit precision
    510 //-------------------------
    511 
    512 inline half
    513 half::round (unsigned int n) const
    514 {
    515     //
    516     // Parameter check.
    517     //
    518 
    519     if (n >= 10)
    520     return *this;
    521 
    522     //
    523     // Disassemble h into the sign, s,
    524     // and the combined exponent and significand, e.
    525     //
    526 
    527     unsigned short s = _h & 0x8000;
    528     unsigned short e = _h & 0x7fff;
    529 
    530     //
    531     // Round the exponent and significand to the nearest value
    532     // where ones occur only in the (10-n) most significant bits.
    533     // Note that the exponent adjusts automatically if rounding
    534     // up causes the significand to overflow.
    535     //
    536 
    537     e >>= 9 - n;
    538     e  += e & 1;
    539     e <<= 9 - n;
    540 
    541     //
    542     // Check for exponent overflow.
    543     //
    544 
    545     if (e >= 0x7c00)
    546     {
    547     //
    548     // Overflow occurred -- truncate instead of rounding.
    549     //
    550 
    551     e = _h;
    552     e >>= 10 - n;
    553     e <<= 10 - n;
    554     }
    555 
    556     //
    557     // Put the original sign bit back.
    558     //
    559 
    560     half h;
    561     h._h = s | e;
    562 
    563     return h;
    564 }
    565 
    566 
    567 //-----------------------
    568 // Other inline functions
    569 //-----------------------
    570 
    571 inline half
    572 half::operator - () const
    573 {
    574     half h;
    575     h._h = _h ^ 0x8000;
    576     return h;
    577 }
    578 
    579 
    580 inline half &
    581 half::operator = (half h)
    582 {
    583     _h = h._h;
    584     return *this;
    585 }
    586 
    587 
    588 inline half &
    589 half::operator = (float f)
    590 {
    591     *this = half (f);
    592     return *this;
    593 }
    594 
    595 
    596 inline half &
    597 half::operator += (half h)
    598 {
    599     *this = half (float (*this) + float (h));
    600     return *this;
    601 }
    602 
    603 
    604 inline half &
    605 half::operator += (float f)
    606 {
    607     *this = half (float (*this) + f);
    608     return *this;
    609 }
    610 
    611 
    612 inline half &
    613 half::operator -= (half h)
    614 {
    615     *this = half (float (*this) - float (h));
    616     return *this;
    617 }
    618 
    619 
    620 inline half &
    621 half::operator -= (float f)
    622 {
    623     *this = half (float (*this) - f);
    624     return *this;
    625 }
    626 
    627 
    628 inline half &
    629 half::operator *= (half h)
    630 {
    631     *this = half (float (*this) * float (h));
    632     return *this;
    633 }
    634 
    635 
    636 inline half &
    637 half::operator *= (float f)
    638 {
    639     *this = half (float (*this) * f);
    640     return *this;
    641 }
    642 
    643 
    644 inline half &
    645 half::operator /= (half h)
    646 {
    647     *this = half (float (*this) / float (h));
    648     return *this;
    649 }
    650 
    651 
    652 inline half &
    653 half::operator /= (float f)
    654 {
    655     *this = half (float (*this) / f);
    656     return *this;
    657 }
    658 
    659 
    660 inline bool
    661 half::isFinite () const
    662 {
    663     unsigned short e = (_h >> 10) & 0x001f;
    664     return e < 31;
    665 }
    666 
    667 
    668 inline bool
    669 half::isNormalized () const
    670 {
    671     unsigned short e = (_h >> 10) & 0x001f;
    672     return e > 0 && e < 31;
    673 }
    674 
    675 
    676 inline bool
    677 half::isDenormalized () const
    678 {
    679     unsigned short e = (_h >> 10) & 0x001f;
    680     unsigned short m =  _h & 0x3ff;
    681     return e == 0 && m != 0;
    682 }
    683 
    684 
    685 inline bool
    686 half::isZero () const
    687 {
    688     return (_h & 0x7fff) == 0;
    689 }
    690 
    691 
    692 inline bool
    693 half::isNan () const
    694 {
    695     unsigned short e = (_h >> 10) & 0x001f;
    696     unsigned short m =  _h & 0x3ff;
    697     return e == 31 && m != 0;
    698 }
    699 
    700 
    701 inline bool
    702 half::isInfinity () const
    703 {
    704     unsigned short e = (_h >> 10) & 0x001f;
    705     unsigned short m =  _h & 0x3ff;
    706     return e == 31 && m == 0;
    707 }
    708 
    709 
    710 inline bool
    711 half::isNegative () const
    712 {
    713     return (_h & 0x8000) != 0;
    714 }
    715 
    716 
    717 inline half
    718 half::posInf ()
    719 {
    720     half h;
    721     h._h = 0x7c00;
    722     return h;
    723 }
    724 
    725 
    726 inline half
    727 half::negInf ()
    728 {
    729     half h;
    730     h._h = 0xfc00;
    731     return h;
    732 }
    733 
    734 
    735 inline half
    736 half::qNan ()
    737 {
    738     half h;
    739     h._h = 0x7fff;
    740     return h;
    741 }
    742 
    743 
    744 inline half
    745 half::sNan ()
    746 {
    747     half h;
    748     h._h = 0x7dff;
    749     return h;
    750 }
    751 
    752 
    753 inline unsigned short
    754 half::bits () const
    755 {
    756     return _h;
    757 }
    758 
    759 
    760 inline void
    761 half::setBits (unsigned short bits)
    762 {
    763     _h = bits;
    764 }
    765 
    766 #endif
    767