Home | History | Annotate | Download | only in bits32
      1 /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
      2 
      3 /*
      4  * This version hacked for use with gcc -msoft-float by bjh21.
      5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
      6  *  itself).
      7  */
      8 
      9 /*
     10  * Things you may want to define:
     11  *
     12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
     13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
     14  *   properly renamed.
     15  */
     16 
     17 /*
     18  * This differs from the standard bits32/softfloat.c in that float64
     19  * is defined to be a 64-bit integer rather than a structure.  The
     20  * structure is float64s, with translation between the two going via
     21  * float64u.
     22  */
     23 
     24 /*
     25 ===============================================================================
     26 
     27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
     28 Arithmetic Package, Release 2a.
     29 
     30 Written by John R. Hauser.  This work was made possible in part by the
     31 International Computer Science Institute, located at Suite 600, 1947 Center
     32 Street, Berkeley, California 94704.  Funding was partially provided by the
     33 National Science Foundation under grant MIP-9311980.  The original version
     34 of this code was written as part of a project to build a fixed-point vector
     35 processor in collaboration with the University of California at Berkeley,
     36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     38 arithmetic/SoftFloat.html'.
     39 
     40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     45 
     46 Derivative works are acceptable, even for commercial purposes, so long as
     47 (1) they include prominent notice that the work is derivative, and (2) they
     48 include prominent notice akin to these four paragraphs for those parts of
     49 this code that are retained.
     50 
     51 ===============================================================================
     52 */
     53 
     54 #include <sys/cdefs.h>
     55 #if defined(LIBC_SCCS) && !defined(lint)
     56 __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
     57 #endif /* LIBC_SCCS and not lint */
     58 
     59 #ifdef SOFTFLOAT_FOR_GCC
     60 #include "softfloat-for-gcc.h"
     61 #endif
     62 
     63 #include "milieu.h"
     64 #include "softfloat.h"
     65 
     66 /*
     67  * Conversions between floats as stored in memory and floats as
     68  * SoftFloat uses them
     69  */
     70 #ifndef FLOAT64_DEMANGLE
     71 #define FLOAT64_DEMANGLE(a) (a)
     72 #endif
     73 #ifndef FLOAT64_MANGLE
     74 #define FLOAT64_MANGLE(a)   (a)
     75 #endif
     76 
     77 /*
     78 -------------------------------------------------------------------------------
     79 Floating-point rounding mode and exception flags.
     80 -------------------------------------------------------------------------------
     81 */
     82 #ifndef set_float_rounding_mode
     83 fp_rnd float_rounding_mode = float_round_nearest_even;
     84 fp_except float_exception_flags = 0;
     85 #endif
     86 #ifndef set_float_exception_inexact_flag
     87 #define set_float_exception_inexact_flag() \
     88     ((void)(float_exception_flags |= float_flag_inexact))
     89 #endif
     90 
     91 /*
     92 -------------------------------------------------------------------------------
     93 Primitive arithmetic functions, including multi-word arithmetic, and
     94 division and square root approximations.  (Can be specialized to target if
     95 desired.)
     96 -------------------------------------------------------------------------------
     97 */
     98 #include "softfloat-macros"
     99 
    100 /*
    101 -------------------------------------------------------------------------------
    102 Functions and definitions to determine:  (1) whether tininess for underflow
    103 is detected before or after rounding by default, (2) what (if anything)
    104 happens when exceptions are raised, (3) how signaling NaNs are distinguished
    105 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
    106 are propagated from function inputs to output.  These details are target-
    107 specific.
    108 -------------------------------------------------------------------------------
    109 */
    110 #include "softfloat-specialize"
    111 
    112 /*
    113 -------------------------------------------------------------------------------
    114 Returns the fraction bits of the single-precision floating-point value `a'.
    115 -------------------------------------------------------------------------------
    116 */
    117 INLINE bits32 extractFloat32Frac( float32 a )
    118 {
    119 
    120     return a & 0x007FFFFF;
    121 
    122 }
    123 
    124 /*
    125 -------------------------------------------------------------------------------
    126 Returns the exponent bits of the single-precision floating-point value `a'.
    127 -------------------------------------------------------------------------------
    128 */
    129 INLINE int16 extractFloat32Exp( float32 a )
    130 {
    131 
    132     return ( a>>23 ) & 0xFF;
    133 
    134 }
    135 
    136 /*
    137 -------------------------------------------------------------------------------
    138 Returns the sign bit of the single-precision floating-point value `a'.
    139 -------------------------------------------------------------------------------
    140 */
    141 INLINE flag extractFloat32Sign( float32 a )
    142 {
    143 
    144     return a>>31;
    145 
    146 }
    147 
    148 /*
    149 -------------------------------------------------------------------------------
    150 Normalizes the subnormal single-precision floating-point value represented
    151 by the denormalized significand `aSig'.  The normalized exponent and
    152 significand are stored at the locations pointed to by `zExpPtr' and
    153 `zSigPtr', respectively.
    154 -------------------------------------------------------------------------------
    155 */
    156 static void
    157  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
    158 {
    159     int8 shiftCount;
    160 
    161     shiftCount = countLeadingZeros32( aSig ) - 8;
    162     *zSigPtr = aSig<<shiftCount;
    163     *zExpPtr = 1 - shiftCount;
    164 
    165 }
    166 
    167 /*
    168 -------------------------------------------------------------------------------
    169 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
    170 single-precision floating-point value, returning the result.  After being
    171 shifted into the proper positions, the three fields are simply added
    172 together to form the result.  This means that any integer portion of `zSig'
    173 will be added into the exponent.  Since a properly normalized significand
    174 will have an integer portion equal to 1, the `zExp' input should be 1 less
    175 than the desired result exponent whenever `zSig' is a complete, normalized
    176 significand.
    177 -------------------------------------------------------------------------------
    178 */
    179 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
    180 {
    181 
    182     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
    183 
    184 }
    185 
    186 /*
    187 -------------------------------------------------------------------------------
    188 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    189 and significand `zSig', and returns the proper single-precision floating-
    190 point value corresponding to the abstract input.  Ordinarily, the abstract
    191 value is simply rounded and packed into the single-precision format, with
    192 the inexact exception raised if the abstract input cannot be represented
    193 exactly.  However, if the abstract value is too large, the overflow and
    194 inexact exceptions are raised and an infinity or maximal finite value is
    195 returned.  If the abstract value is too small, the input value is rounded to
    196 a subnormal number, and the underflow and inexact exceptions are raised if
    197 the abstract input cannot be represented exactly as a subnormal single-
    198 precision floating-point number.
    199     The input significand `zSig' has its binary point between bits 30
    200 and 29, which is 7 bits to the left of the usual location.  This shifted
    201 significand must be normalized or smaller.  If `zSig' is not normalized,
    202 `zExp' must be 0; in that case, the result returned is a subnormal number,
    203 and it must not require rounding.  In the usual case that `zSig' is
    204 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
    205 The handling of underflow and overflow follows the IEC/IEEE Standard for
    206 Binary Floating-Point Arithmetic.
    207 -------------------------------------------------------------------------------
    208 */
    209 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    210 {
    211     int8 roundingMode;
    212     flag roundNearestEven;
    213     int8 roundIncrement, roundBits;
    214     flag isTiny;
    215 
    216     roundingMode = float_rounding_mode;
    217     roundNearestEven = roundingMode == float_round_nearest_even;
    218     roundIncrement = 0x40;
    219     if ( ! roundNearestEven ) {
    220         if ( roundingMode == float_round_to_zero ) {
    221             roundIncrement = 0;
    222         }
    223         else {
    224             roundIncrement = 0x7F;
    225             if ( zSign ) {
    226                 if ( roundingMode == float_round_up ) roundIncrement = 0;
    227             }
    228             else {
    229                 if ( roundingMode == float_round_down ) roundIncrement = 0;
    230             }
    231         }
    232     }
    233     roundBits = zSig & 0x7F;
    234     if ( 0xFD <= (bits16) zExp ) {
    235         if (    ( 0xFD < zExp )
    236              || (    ( zExp == 0xFD )
    237                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
    238            ) {
    239             float_raise( float_flag_overflow | float_flag_inexact );
    240             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
    241         }
    242         if ( zExp < 0 ) {
    243             isTiny =
    244                    ( float_detect_tininess == float_tininess_before_rounding )
    245                 || ( zExp < -1 )
    246                 || ( zSig + roundIncrement < (uint32)0x80000000 );
    247             shift32RightJamming( zSig, - zExp, &zSig );
    248             zExp = 0;
    249             roundBits = zSig & 0x7F;
    250             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
    251         }
    252     }
    253     if ( roundBits ) set_float_exception_inexact_flag();
    254     zSig = ( zSig + roundIncrement )>>7;
    255     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
    256     if ( zSig == 0 ) zExp = 0;
    257     return packFloat32( zSign, zExp, zSig );
    258 
    259 }
    260 
    261 /*
    262 -------------------------------------------------------------------------------
    263 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    264 and significand `zSig', and returns the proper single-precision floating-
    265 point value corresponding to the abstract input.  This routine is just like
    266 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
    267 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
    268 floating-point exponent.
    269 -------------------------------------------------------------------------------
    270 */
    271 static float32
    272  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
    273 {
    274     int8 shiftCount;
    275 
    276     shiftCount = countLeadingZeros32( zSig ) - 1;
    277     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
    278 
    279 }
    280 
    281 /*
    282 -------------------------------------------------------------------------------
    283 Returns the least-significant 32 fraction bits of the double-precision
    284 floating-point value `a'.
    285 -------------------------------------------------------------------------------
    286 */
    287 INLINE bits32 extractFloat64Frac1( float64 a )
    288 {
    289 
    290     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
    291 
    292 }
    293 
    294 /*
    295 -------------------------------------------------------------------------------
    296 Returns the most-significant 20 fraction bits of the double-precision
    297 floating-point value `a'.
    298 -------------------------------------------------------------------------------
    299 */
    300 INLINE bits32 extractFloat64Frac0( float64 a )
    301 {
    302 
    303     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
    304 
    305 }
    306 
    307 /*
    308 -------------------------------------------------------------------------------
    309 Returns the exponent bits of the double-precision floating-point value `a'.
    310 -------------------------------------------------------------------------------
    311 */
    312 INLINE int16 extractFloat64Exp( float64 a )
    313 {
    314 
    315     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
    316 
    317 }
    318 
    319 /*
    320 -------------------------------------------------------------------------------
    321 Returns the sign bit of the double-precision floating-point value `a'.
    322 -------------------------------------------------------------------------------
    323 */
    324 INLINE flag extractFloat64Sign( float64 a )
    325 {
    326 
    327     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
    328 
    329 }
    330 
    331 /*
    332 -------------------------------------------------------------------------------
    333 Normalizes the subnormal double-precision floating-point value represented
    334 by the denormalized significand formed by the concatenation of `aSig0' and
    335 `aSig1'.  The normalized exponent is stored at the location pointed to by
    336 `zExpPtr'.  The most significant 21 bits of the normalized significand are
    337 stored at the location pointed to by `zSig0Ptr', and the least significant
    338 32 bits of the normalized significand are stored at the location pointed to
    339 by `zSig1Ptr'.
    340 -------------------------------------------------------------------------------
    341 */
    342 static void
    343  normalizeFloat64Subnormal(
    344      bits32 aSig0,
    345      bits32 aSig1,
    346      int16 *zExpPtr,
    347      bits32 *zSig0Ptr,
    348      bits32 *zSig1Ptr
    349  )
    350 {
    351     int8 shiftCount;
    352 
    353     if ( aSig0 == 0 ) {
    354         shiftCount = countLeadingZeros32( aSig1 ) - 11;
    355         if ( shiftCount < 0 ) {
    356             *zSig0Ptr = aSig1>>( - shiftCount );
    357             *zSig1Ptr = aSig1<<( shiftCount & 31 );
    358         }
    359         else {
    360             *zSig0Ptr = aSig1<<shiftCount;
    361             *zSig1Ptr = 0;
    362         }
    363         *zExpPtr = - shiftCount - 31;
    364     }
    365     else {
    366         shiftCount = countLeadingZeros32( aSig0 ) - 11;
    367         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
    368         *zExpPtr = 1 - shiftCount;
    369     }
    370 
    371 }
    372 
    373 /*
    374 -------------------------------------------------------------------------------
    375 Packs the sign `zSign', the exponent `zExp', and the significand formed by
    376 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
    377 point value, returning the result.  After being shifted into the proper
    378 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
    379 together to form the most significant 32 bits of the result.  This means
    380 that any integer portion of `zSig0' will be added into the exponent.  Since
    381 a properly normalized significand will have an integer portion equal to 1,
    382 the `zExp' input should be 1 less than the desired result exponent whenever
    383 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
    384 -------------------------------------------------------------------------------
    385 */
    386 INLINE float64
    387  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
    388 {
    389 
    390     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
    391                            ( ( (bits64) zExp )<<52 ) +
    392                            ( ( (bits64) zSig0 )<<32 ) + zSig1 );
    393 
    394 
    395 }
    396 
    397 /*
    398 -------------------------------------------------------------------------------
    399 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    400 and extended significand formed by the concatenation of `zSig0', `zSig1',
    401 and `zSig2', and returns the proper double-precision floating-point value
    402 corresponding to the abstract input.  Ordinarily, the abstract value is
    403 simply rounded and packed into the double-precision format, with the inexact
    404 exception raised if the abstract input cannot be represented exactly.
    405 However, if the abstract value is too large, the overflow and inexact
    406 exceptions are raised and an infinity or maximal finite value is returned.
    407 If the abstract value is too small, the input value is rounded to a
    408 subnormal number, and the underflow and inexact exceptions are raised if the
    409 abstract input cannot be represented exactly as a subnormal double-precision
    410 floating-point number.
    411     The input significand must be normalized or smaller.  If the input
    412 significand is not normalized, `zExp' must be 0; in that case, the result
    413 returned is a subnormal number, and it must not require rounding.  In the
    414 usual case that the input significand is normalized, `zExp' must be 1 less
    415 than the ``true'' floating-point exponent.  The handling of underflow and
    416 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    417 -------------------------------------------------------------------------------
    418 */
    419 static float64
    420  roundAndPackFloat64(
    421      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
    422 {
    423     int8 roundingMode;
    424     flag roundNearestEven, increment, isTiny;
    425 
    426     roundingMode = float_rounding_mode;
    427     roundNearestEven = ( roundingMode == float_round_nearest_even );
    428     increment = ( (sbits32) zSig2 < 0 );
    429     if ( ! roundNearestEven ) {
    430         if ( roundingMode == float_round_to_zero ) {
    431             increment = 0;
    432         }
    433         else {
    434             if ( zSign ) {
    435                 increment = ( roundingMode == float_round_down ) && zSig2;
    436             }
    437             else {
    438                 increment = ( roundingMode == float_round_up ) && zSig2;
    439             }
    440         }
    441     }
    442     if ( 0x7FD <= (bits16) zExp ) {
    443         if (    ( 0x7FD < zExp )
    444              || (    ( zExp == 0x7FD )
    445                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
    446                   && increment
    447                 )
    448            ) {
    449             float_raise( float_flag_overflow | float_flag_inexact );
    450             if (    ( roundingMode == float_round_to_zero )
    451                  || ( zSign && ( roundingMode == float_round_up ) )
    452                  || ( ! zSign && ( roundingMode == float_round_down ) )
    453                ) {
    454                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
    455             }
    456             return packFloat64( zSign, 0x7FF, 0, 0 );
    457         }
    458         if ( zExp < 0 ) {
    459             isTiny =
    460                    ( float_detect_tininess == float_tininess_before_rounding )
    461                 || ( zExp < -1 )
    462                 || ! increment
    463                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
    464             shift64ExtraRightJamming(
    465                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
    466             zExp = 0;
    467             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
    468             if ( roundNearestEven ) {
    469                 increment = ( (sbits32) zSig2 < 0 );
    470             }
    471             else {
    472                 if ( zSign ) {
    473                     increment = ( roundingMode == float_round_down ) && zSig2;
    474                 }
    475                 else {
    476                     increment = ( roundingMode == float_round_up ) && zSig2;
    477                 }
    478             }
    479         }
    480     }
    481     if ( zSig2 ) set_float_exception_inexact_flag();
    482     if ( increment ) {
    483         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
    484         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
    485     }
    486     else {
    487         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
    488     }
    489     return packFloat64( zSign, zExp, zSig0, zSig1 );
    490 
    491 }
    492 
    493 /*
    494 -------------------------------------------------------------------------------
    495 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
    496 and significand formed by the concatenation of `zSig0' and `zSig1', and
    497 returns the proper double-precision floating-point value corresponding
    498 to the abstract input.  This routine is just like `roundAndPackFloat64'
    499 except that the input significand has fewer bits and does not have to be
    500 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
    501 point exponent.
    502 -------------------------------------------------------------------------------
    503 */
    504 static float64
    505  normalizeRoundAndPackFloat64(
    506      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
    507 {
    508     int8 shiftCount;
    509     bits32 zSig2;
    510 
    511     if ( zSig0 == 0 ) {
    512         zSig0 = zSig1;
    513         zSig1 = 0;
    514         zExp -= 32;
    515     }
    516     shiftCount = countLeadingZeros32( zSig0 ) - 11;
    517     if ( 0 <= shiftCount ) {
    518         zSig2 = 0;
    519         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    520     }
    521     else {
    522         shift64ExtraRightJamming(
    523             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
    524     }
    525     zExp -= shiftCount;
    526     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
    527 
    528 }
    529 
    530 /*
    531 -------------------------------------------------------------------------------
    532 Returns the result of converting the 32-bit two's complement integer `a' to
    533 the single-precision floating-point format.  The conversion is performed
    534 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    535 -------------------------------------------------------------------------------
    536 */
    537 float32 int32_to_float32( int32 a )
    538 {
    539     flag zSign;
    540 
    541     if ( a == 0 ) return 0;
    542     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
    543     zSign = ( a < 0 );
    544     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
    545 
    546 }
    547 
    548 /*
    549 -------------------------------------------------------------------------------
    550 Returns the result of converting the 32-bit two's complement integer `a' to
    551 the double-precision floating-point format.  The conversion is performed
    552 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    553 -------------------------------------------------------------------------------
    554 */
    555 float64 int32_to_float64( int32 a )
    556 {
    557     flag zSign;
    558     bits32 absA;
    559     int8 shiftCount;
    560     bits32 zSig0, zSig1;
    561 
    562     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
    563     zSign = ( a < 0 );
    564     absA = zSign ? - a : a;
    565     shiftCount = countLeadingZeros32( absA ) - 11;
    566     if ( 0 <= shiftCount ) {
    567         zSig0 = absA<<shiftCount;
    568         zSig1 = 0;
    569     }
    570     else {
    571         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
    572     }
    573     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
    574 
    575 }
    576 
    577 #ifndef SOFTFLOAT_FOR_GCC
    578 /*
    579 -------------------------------------------------------------------------------
    580 Returns the result of converting the single-precision floating-point value
    581 `a' to the 32-bit two's complement integer format.  The conversion is
    582 performed according to the IEC/IEEE Standard for Binary Floating-Point
    583 Arithmetic---which means in particular that the conversion is rounded
    584 according to the current rounding mode.  If `a' is a NaN, the largest
    585 positive integer is returned.  Otherwise, if the conversion overflows, the
    586 largest integer with the same sign as `a' is returned.
    587 -------------------------------------------------------------------------------
    588 */
    589 int32 float32_to_int32( float32 a )
    590 {
    591     flag aSign;
    592     int16 aExp, shiftCount;
    593     bits32 aSig, aSigExtra;
    594     int32 z;
    595     int8 roundingMode;
    596 
    597     aSig = extractFloat32Frac( a );
    598     aExp = extractFloat32Exp( a );
    599     aSign = extractFloat32Sign( a );
    600     shiftCount = aExp - 0x96;
    601     if ( 0 <= shiftCount ) {
    602         if ( 0x9E <= aExp ) {
    603             if ( a != 0xCF000000 ) {
    604                 float_raise( float_flag_invalid );
    605                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
    606                     return 0x7FFFFFFF;
    607                 }
    608             }
    609             return (sbits32) 0x80000000;
    610         }
    611         z = ( aSig | 0x00800000 )<<shiftCount;
    612         if ( aSign ) z = - z;
    613     }
    614     else {
    615         if ( aExp < 0x7E ) {
    616             aSigExtra = aExp | aSig;
    617             z = 0;
    618         }
    619         else {
    620             aSig |= 0x00800000;
    621             aSigExtra = aSig<<( shiftCount & 31 );
    622             z = aSig>>( - shiftCount );
    623         }
    624         if ( aSigExtra ) set_float_exception_inexact_flag();
    625         roundingMode = float_rounding_mode;
    626         if ( roundingMode == float_round_nearest_even ) {
    627             if ( (sbits32) aSigExtra < 0 ) {
    628                 ++z;
    629                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
    630             }
    631             if ( aSign ) z = - z;
    632         }
    633         else {
    634             aSigExtra = ( aSigExtra != 0 );
    635             if ( aSign ) {
    636                 z += ( roundingMode == float_round_down ) & aSigExtra;
    637                 z = - z;
    638             }
    639             else {
    640                 z += ( roundingMode == float_round_up ) & aSigExtra;
    641             }
    642         }
    643     }
    644     return z;
    645 
    646 }
    647 #endif
    648 
    649 /*
    650 -------------------------------------------------------------------------------
    651 Returns the result of converting the single-precision floating-point value
    652 `a' to the 32-bit two's complement integer format.  The conversion is
    653 performed according to the IEC/IEEE Standard for Binary Floating-Point
    654 Arithmetic, except that the conversion is always rounded toward zero.
    655 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
    656 the conversion overflows, the largest integer with the same sign as `a' is
    657 returned.
    658 -------------------------------------------------------------------------------
    659 */
    660 int32 float32_to_int32_round_to_zero( float32 a )
    661 {
    662     flag aSign;
    663     int16 aExp, shiftCount;
    664     bits32 aSig;
    665     int32 z;
    666 
    667     aSig = extractFloat32Frac( a );
    668     aExp = extractFloat32Exp( a );
    669     aSign = extractFloat32Sign( a );
    670     shiftCount = aExp - 0x9E;
    671     if ( 0 <= shiftCount ) {
    672         if ( a != 0xCF000000 ) {
    673             float_raise( float_flag_invalid );
    674             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
    675         }
    676         return (sbits32) 0x80000000;
    677     }
    678     else if ( aExp <= 0x7E ) {
    679         if ( aExp | aSig ) set_float_exception_inexact_flag();
    680         return 0;
    681     }
    682     aSig = ( aSig | 0x00800000 )<<8;
    683     z = aSig>>( - shiftCount );
    684     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
    685         set_float_exception_inexact_flag();
    686     }
    687     if ( aSign ) z = - z;
    688     return z;
    689 
    690 }
    691 
    692 /*
    693 -------------------------------------------------------------------------------
    694 Returns the result of converting the single-precision floating-point value
    695 `a' to the double-precision floating-point format.  The conversion is
    696 performed according to the IEC/IEEE Standard for Binary Floating-Point
    697 Arithmetic.
    698 -------------------------------------------------------------------------------
    699 */
    700 float64 float32_to_float64( float32 a )
    701 {
    702     flag aSign;
    703     int16 aExp;
    704     bits32 aSig, zSig0, zSig1;
    705 
    706     aSig = extractFloat32Frac( a );
    707     aExp = extractFloat32Exp( a );
    708     aSign = extractFloat32Sign( a );
    709     if ( aExp == 0xFF ) {
    710         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
    711         return packFloat64( aSign, 0x7FF, 0, 0 );
    712     }
    713     if ( aExp == 0 ) {
    714         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
    715         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
    716         --aExp;
    717     }
    718     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
    719     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
    720 
    721 }
    722 
    723 #ifndef SOFTFLOAT_FOR_GCC
    724 /*
    725 -------------------------------------------------------------------------------
    726 Rounds the single-precision floating-point value `a' to an integer,
    727 and returns the result as a single-precision floating-point value.  The
    728 operation is performed according to the IEC/IEEE Standard for Binary
    729 Floating-Point Arithmetic.
    730 -------------------------------------------------------------------------------
    731 */
    732 float32 float32_round_to_int( float32 a )
    733 {
    734     flag aSign;
    735     int16 aExp;
    736     bits32 lastBitMask, roundBitsMask;
    737     int8 roundingMode;
    738     float32 z;
    739 
    740     aExp = extractFloat32Exp( a );
    741     if ( 0x96 <= aExp ) {
    742         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
    743             return propagateFloat32NaN( a, a );
    744         }
    745         return a;
    746     }
    747     if ( aExp <= 0x7E ) {
    748         if ( (bits32) ( a<<1 ) == 0 ) return a;
    749         set_float_exception_inexact_flag();
    750         aSign = extractFloat32Sign( a );
    751         switch ( float_rounding_mode ) {
    752          case float_round_nearest_even:
    753             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
    754                 return packFloat32( aSign, 0x7F, 0 );
    755             }
    756             break;
    757          case float_round_to_zero:
    758             break;
    759          case float_round_down:
    760             return aSign ? 0xBF800000 : 0;
    761          case float_round_up:
    762             return aSign ? 0x80000000 : 0x3F800000;
    763         }
    764         return packFloat32( aSign, 0, 0 );
    765     }
    766     lastBitMask = 1;
    767     lastBitMask <<= 0x96 - aExp;
    768     roundBitsMask = lastBitMask - 1;
    769     z = a;
    770     roundingMode = float_rounding_mode;
    771     if ( roundingMode == float_round_nearest_even ) {
    772         z += lastBitMask>>1;
    773         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
    774     }
    775     else if ( roundingMode != float_round_to_zero ) {
    776         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
    777             z += roundBitsMask;
    778         }
    779     }
    780     z &= ~ roundBitsMask;
    781     if ( z != a ) set_float_exception_inexact_flag();
    782     return z;
    783 
    784 }
    785 #endif
    786 
    787 /*
    788 -------------------------------------------------------------------------------
    789 Returns the result of adding the absolute values of the single-precision
    790 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
    791 before being returned.  `zSign' is ignored if the result is a NaN.
    792 The addition is performed according to the IEC/IEEE Standard for Binary
    793 Floating-Point Arithmetic.
    794 -------------------------------------------------------------------------------
    795 */
    796 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
    797 {
    798     int16 aExp, bExp, zExp;
    799     bits32 aSig, bSig, zSig;
    800     int16 expDiff;
    801 
    802     aSig = extractFloat32Frac( a );
    803     aExp = extractFloat32Exp( a );
    804     bSig = extractFloat32Frac( b );
    805     bExp = extractFloat32Exp( b );
    806     expDiff = aExp - bExp;
    807     aSig <<= 6;
    808     bSig <<= 6;
    809     if ( 0 < expDiff ) {
    810         if ( aExp == 0xFF ) {
    811             if ( aSig ) return propagateFloat32NaN( a, b );
    812             return a;
    813         }
    814         if ( bExp == 0 ) {
    815             --expDiff;
    816         }
    817         else {
    818             bSig |= 0x20000000;
    819         }
    820         shift32RightJamming( bSig, expDiff, &bSig );
    821         zExp = aExp;
    822     }
    823     else if ( expDiff < 0 ) {
    824         if ( bExp == 0xFF ) {
    825             if ( bSig ) return propagateFloat32NaN( a, b );
    826             return packFloat32( zSign, 0xFF, 0 );
    827         }
    828         if ( aExp == 0 ) {
    829             ++expDiff;
    830         }
    831         else {
    832             aSig |= 0x20000000;
    833         }
    834         shift32RightJamming( aSig, - expDiff, &aSig );
    835         zExp = bExp;
    836     }
    837     else {
    838         if ( aExp == 0xFF ) {
    839             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
    840             return a;
    841         }
    842         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
    843         zSig = 0x40000000 + aSig + bSig;
    844         zExp = aExp;
    845         goto roundAndPack;
    846     }
    847     aSig |= 0x20000000;
    848     zSig = ( aSig + bSig )<<1;
    849     --zExp;
    850     if ( (sbits32) zSig < 0 ) {
    851         zSig = aSig + bSig;
    852         ++zExp;
    853     }
    854  roundAndPack:
    855     return roundAndPackFloat32( zSign, zExp, zSig );
    856 
    857 }
    858 
    859 /*
    860 -------------------------------------------------------------------------------
    861 Returns the result of subtracting the absolute values of the single-
    862 precision floating-point values `a' and `b'.  If `zSign' is 1, the
    863 difference is negated before being returned.  `zSign' is ignored if the
    864 result is a NaN.  The subtraction is performed according to the IEC/IEEE
    865 Standard for Binary Floating-Point Arithmetic.
    866 -------------------------------------------------------------------------------
    867 */
    868 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
    869 {
    870     int16 aExp, bExp, zExp;
    871     bits32 aSig, bSig, zSig;
    872     int16 expDiff;
    873 
    874     aSig = extractFloat32Frac( a );
    875     aExp = extractFloat32Exp( a );
    876     bSig = extractFloat32Frac( b );
    877     bExp = extractFloat32Exp( b );
    878     expDiff = aExp - bExp;
    879     aSig <<= 7;
    880     bSig <<= 7;
    881     if ( 0 < expDiff ) goto aExpBigger;
    882     if ( expDiff < 0 ) goto bExpBigger;
    883     if ( aExp == 0xFF ) {
    884         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
    885         float_raise( float_flag_invalid );
    886         return float32_default_nan;
    887     }
    888     if ( aExp == 0 ) {
    889         aExp = 1;
    890         bExp = 1;
    891     }
    892     if ( bSig < aSig ) goto aBigger;
    893     if ( aSig < bSig ) goto bBigger;
    894     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
    895  bExpBigger:
    896     if ( bExp == 0xFF ) {
    897         if ( bSig ) return propagateFloat32NaN( a, b );
    898         return packFloat32( zSign ^ 1, 0xFF, 0 );
    899     }
    900     if ( aExp == 0 ) {
    901         ++expDiff;
    902     }
    903     else {
    904         aSig |= 0x40000000;
    905     }
    906     shift32RightJamming( aSig, - expDiff, &aSig );
    907     bSig |= 0x40000000;
    908  bBigger:
    909     zSig = bSig - aSig;
    910     zExp = bExp;
    911     zSign ^= 1;
    912     goto normalizeRoundAndPack;
    913  aExpBigger:
    914     if ( aExp == 0xFF ) {
    915         if ( aSig ) return propagateFloat32NaN( a, b );
    916         return a;
    917     }
    918     if ( bExp == 0 ) {
    919         --expDiff;
    920     }
    921     else {
    922         bSig |= 0x40000000;
    923     }
    924     shift32RightJamming( bSig, expDiff, &bSig );
    925     aSig |= 0x40000000;
    926  aBigger:
    927     zSig = aSig - bSig;
    928     zExp = aExp;
    929  normalizeRoundAndPack:
    930     --zExp;
    931     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
    932 
    933 }
    934 
    935 /*
    936 -------------------------------------------------------------------------------
    937 Returns the result of adding the single-precision floating-point values `a'
    938 and `b'.  The operation is performed according to the IEC/IEEE Standard for
    939 Binary Floating-Point Arithmetic.
    940 -------------------------------------------------------------------------------
    941 */
    942 float32 float32_add( float32 a, float32 b )
    943 {
    944     flag aSign, bSign;
    945 
    946     aSign = extractFloat32Sign( a );
    947     bSign = extractFloat32Sign( b );
    948     if ( aSign == bSign ) {
    949         return addFloat32Sigs( a, b, aSign );
    950     }
    951     else {
    952         return subFloat32Sigs( a, b, aSign );
    953     }
    954 
    955 }
    956 
    957 /*
    958 -------------------------------------------------------------------------------
    959 Returns the result of subtracting the single-precision floating-point values
    960 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
    961 for Binary Floating-Point Arithmetic.
    962 -------------------------------------------------------------------------------
    963 */
    964 float32 float32_sub( float32 a, float32 b )
    965 {
    966     flag aSign, bSign;
    967 
    968     aSign = extractFloat32Sign( a );
    969     bSign = extractFloat32Sign( b );
    970     if ( aSign == bSign ) {
    971         return subFloat32Sigs( a, b, aSign );
    972     }
    973     else {
    974         return addFloat32Sigs( a, b, aSign );
    975     }
    976 
    977 }
    978 
    979 /*
    980 -------------------------------------------------------------------------------
    981 Returns the result of multiplying the single-precision floating-point values
    982 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
    983 for Binary Floating-Point Arithmetic.
    984 -------------------------------------------------------------------------------
    985 */
    986 float32 float32_mul( float32 a, float32 b )
    987 {
    988     flag aSign, bSign, zSign;
    989     int16 aExp, bExp, zExp;
    990     bits32 aSig, bSig, zSig0, zSig1;
    991 
    992     aSig = extractFloat32Frac( a );
    993     aExp = extractFloat32Exp( a );
    994     aSign = extractFloat32Sign( a );
    995     bSig = extractFloat32Frac( b );
    996     bExp = extractFloat32Exp( b );
    997     bSign = extractFloat32Sign( b );
    998     zSign = aSign ^ bSign;
    999     if ( aExp == 0xFF ) {
   1000         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1001             return propagateFloat32NaN( a, b );
   1002         }
   1003         if ( ( bExp | bSig ) == 0 ) {
   1004             float_raise( float_flag_invalid );
   1005             return float32_default_nan;
   1006         }
   1007         return packFloat32( zSign, 0xFF, 0 );
   1008     }
   1009     if ( bExp == 0xFF ) {
   1010         if ( bSig ) return propagateFloat32NaN( a, b );
   1011         if ( ( aExp | aSig ) == 0 ) {
   1012             float_raise( float_flag_invalid );
   1013             return float32_default_nan;
   1014         }
   1015         return packFloat32( zSign, 0xFF, 0 );
   1016     }
   1017     if ( aExp == 0 ) {
   1018         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1019         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1020     }
   1021     if ( bExp == 0 ) {
   1022         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
   1023         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1024     }
   1025     zExp = aExp + bExp - 0x7F;
   1026     aSig = ( aSig | 0x00800000 )<<7;
   1027     bSig = ( bSig | 0x00800000 )<<8;
   1028     mul32To64( aSig, bSig, &zSig0, &zSig1 );
   1029     zSig0 |= ( zSig1 != 0 );
   1030     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
   1031         zSig0 <<= 1;
   1032         --zExp;
   1033     }
   1034     return roundAndPackFloat32( zSign, zExp, zSig0 );
   1035 
   1036 }
   1037 
   1038 /*
   1039 -------------------------------------------------------------------------------
   1040 Returns the result of dividing the single-precision floating-point value `a'
   1041 by the corresponding value `b'.  The operation is performed according to the
   1042 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1043 -------------------------------------------------------------------------------
   1044 */
   1045 float32 float32_div( float32 a, float32 b )
   1046 {
   1047     flag aSign, bSign, zSign;
   1048     int16 aExp, bExp, zExp;
   1049     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
   1050 
   1051     aSig = extractFloat32Frac( a );
   1052     aExp = extractFloat32Exp( a );
   1053     aSign = extractFloat32Sign( a );
   1054     bSig = extractFloat32Frac( b );
   1055     bExp = extractFloat32Exp( b );
   1056     bSign = extractFloat32Sign( b );
   1057     zSign = aSign ^ bSign;
   1058     if ( aExp == 0xFF ) {
   1059         if ( aSig ) return propagateFloat32NaN( a, b );
   1060         if ( bExp == 0xFF ) {
   1061             if ( bSig ) return propagateFloat32NaN( a, b );
   1062             float_raise( float_flag_invalid );
   1063             return float32_default_nan;
   1064         }
   1065         return packFloat32( zSign, 0xFF, 0 );
   1066     }
   1067     if ( bExp == 0xFF ) {
   1068         if ( bSig ) return propagateFloat32NaN( a, b );
   1069         return packFloat32( zSign, 0, 0 );
   1070     }
   1071     if ( bExp == 0 ) {
   1072         if ( bSig == 0 ) {
   1073             if ( ( aExp | aSig ) == 0 ) {
   1074                 float_raise( float_flag_invalid );
   1075                 return float32_default_nan;
   1076             }
   1077             float_raise( float_flag_divbyzero );
   1078             return packFloat32( zSign, 0xFF, 0 );
   1079         }
   1080         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1081     }
   1082     if ( aExp == 0 ) {
   1083         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
   1084         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1085     }
   1086     zExp = aExp - bExp + 0x7D;
   1087     aSig = ( aSig | 0x00800000 )<<7;
   1088     bSig = ( bSig | 0x00800000 )<<8;
   1089     if ( bSig <= ( aSig + aSig ) ) {
   1090         aSig >>= 1;
   1091         ++zExp;
   1092     }
   1093     zSig = estimateDiv64To32( aSig, 0, bSig );
   1094     if ( ( zSig & 0x3F ) <= 2 ) {
   1095         mul32To64( bSig, zSig, &term0, &term1 );
   1096         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
   1097         while ( (sbits32) rem0 < 0 ) {
   1098             --zSig;
   1099             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
   1100         }
   1101         zSig |= ( rem1 != 0 );
   1102     }
   1103     return roundAndPackFloat32( zSign, zExp, zSig );
   1104 
   1105 }
   1106 
   1107 #ifndef SOFTFLOAT_FOR_GCC
   1108 /*
   1109 -------------------------------------------------------------------------------
   1110 Returns the remainder of the single-precision floating-point value `a'
   1111 with respect to the corresponding value `b'.  The operation is performed
   1112 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1113 -------------------------------------------------------------------------------
   1114 */
   1115 float32 float32_rem( float32 a, float32 b )
   1116 {
   1117     flag aSign, bSign, zSign;
   1118     int16 aExp, bExp, expDiff;
   1119     bits32 aSig, bSig, q, allZero, alternateASig;
   1120     sbits32 sigMean;
   1121 
   1122     aSig = extractFloat32Frac( a );
   1123     aExp = extractFloat32Exp( a );
   1124     aSign = extractFloat32Sign( a );
   1125     bSig = extractFloat32Frac( b );
   1126     bExp = extractFloat32Exp( b );
   1127     bSign = extractFloat32Sign( b );
   1128     if ( aExp == 0xFF ) {
   1129         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
   1130             return propagateFloat32NaN( a, b );
   1131         }
   1132         float_raise( float_flag_invalid );
   1133         return float32_default_nan;
   1134     }
   1135     if ( bExp == 0xFF ) {
   1136         if ( bSig ) return propagateFloat32NaN( a, b );
   1137         return a;
   1138     }
   1139     if ( bExp == 0 ) {
   1140         if ( bSig == 0 ) {
   1141             float_raise( float_flag_invalid );
   1142             return float32_default_nan;
   1143         }
   1144         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
   1145     }
   1146     if ( aExp == 0 ) {
   1147         if ( aSig == 0 ) return a;
   1148         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1149     }
   1150     expDiff = aExp - bExp;
   1151     aSig = ( aSig | 0x00800000 )<<8;
   1152     bSig = ( bSig | 0x00800000 )<<8;
   1153     if ( expDiff < 0 ) {
   1154         if ( expDiff < -1 ) return a;
   1155         aSig >>= 1;
   1156     }
   1157     q = ( bSig <= aSig );
   1158     if ( q ) aSig -= bSig;
   1159     expDiff -= 32;
   1160     while ( 0 < expDiff ) {
   1161         q = estimateDiv64To32( aSig, 0, bSig );
   1162         q = ( 2 < q ) ? q - 2 : 0;
   1163         aSig = - ( ( bSig>>2 ) * q );
   1164         expDiff -= 30;
   1165     }
   1166     expDiff += 32;
   1167     if ( 0 < expDiff ) {
   1168         q = estimateDiv64To32( aSig, 0, bSig );
   1169         q = ( 2 < q ) ? q - 2 : 0;
   1170         q >>= 32 - expDiff;
   1171         bSig >>= 2;
   1172         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
   1173     }
   1174     else {
   1175         aSig >>= 2;
   1176         bSig >>= 2;
   1177     }
   1178     do {
   1179         alternateASig = aSig;
   1180         ++q;
   1181         aSig -= bSig;
   1182     } while ( 0 <= (sbits32) aSig );
   1183     sigMean = aSig + alternateASig;
   1184     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
   1185         aSig = alternateASig;
   1186     }
   1187     zSign = ( (sbits32) aSig < 0 );
   1188     if ( zSign ) aSig = - aSig;
   1189     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
   1190 
   1191 }
   1192 #endif
   1193 
   1194 #ifndef SOFTFLOAT_FOR_GCC
   1195 /*
   1196 -------------------------------------------------------------------------------
   1197 Returns the square root of the single-precision floating-point value `a'.
   1198 The operation is performed according to the IEC/IEEE Standard for Binary
   1199 Floating-Point Arithmetic.
   1200 -------------------------------------------------------------------------------
   1201 */
   1202 float32 float32_sqrt( float32 a )
   1203 {
   1204     flag aSign;
   1205     int16 aExp, zExp;
   1206     bits32 aSig, zSig, rem0, rem1, term0, term1;
   1207 
   1208     aSig = extractFloat32Frac( a );
   1209     aExp = extractFloat32Exp( a );
   1210     aSign = extractFloat32Sign( a );
   1211     if ( aExp == 0xFF ) {
   1212         if ( aSig ) return propagateFloat32NaN( a, 0 );
   1213         if ( ! aSign ) return a;
   1214         float_raise( float_flag_invalid );
   1215         return float32_default_nan;
   1216     }
   1217     if ( aSign ) {
   1218         if ( ( aExp | aSig ) == 0 ) return a;
   1219         float_raise( float_flag_invalid );
   1220         return float32_default_nan;
   1221     }
   1222     if ( aExp == 0 ) {
   1223         if ( aSig == 0 ) return 0;
   1224         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
   1225     }
   1226     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
   1227     aSig = ( aSig | 0x00800000 )<<8;
   1228     zSig = estimateSqrt32( aExp, aSig ) + 2;
   1229     if ( ( zSig & 0x7F ) <= 5 ) {
   1230         if ( zSig < 2 ) {
   1231             zSig = 0x7FFFFFFF;
   1232             goto roundAndPack;
   1233         }
   1234         else {
   1235             aSig >>= aExp & 1;
   1236             mul32To64( zSig, zSig, &term0, &term1 );
   1237             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
   1238             while ( (sbits32) rem0 < 0 ) {
   1239                 --zSig;
   1240                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
   1241                 term1 |= 1;
   1242                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
   1243             }
   1244             zSig |= ( ( rem0 | rem1 ) != 0 );
   1245         }
   1246     }
   1247     shift32RightJamming( zSig, 1, &zSig );
   1248  roundAndPack:
   1249     return roundAndPackFloat32( 0, zExp, zSig );
   1250 
   1251 }
   1252 #endif
   1253 
   1254 /*
   1255 -------------------------------------------------------------------------------
   1256 Returns 1 if the single-precision floating-point value `a' is equal to
   1257 the corresponding value `b', and 0 otherwise.  The comparison is performed
   1258 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1259 -------------------------------------------------------------------------------
   1260 */
   1261 flag float32_eq( float32 a, float32 b )
   1262 {
   1263 
   1264     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1265          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1266        ) {
   1267         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1268             float_raise( float_flag_invalid );
   1269         }
   1270         return 0;
   1271     }
   1272     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1273 
   1274 }
   1275 
   1276 /*
   1277 -------------------------------------------------------------------------------
   1278 Returns 1 if the single-precision floating-point value `a' is less than
   1279 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   1280 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   1281 Arithmetic.
   1282 -------------------------------------------------------------------------------
   1283 */
   1284 flag float32_le( float32 a, float32 b )
   1285 {
   1286     flag aSign, bSign;
   1287 
   1288     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1289          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1290        ) {
   1291         float_raise( float_flag_invalid );
   1292         return 0;
   1293     }
   1294     aSign = extractFloat32Sign( a );
   1295     bSign = extractFloat32Sign( b );
   1296     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1297     return ( a == b ) || ( aSign ^ ( a < b ) );
   1298 
   1299 }
   1300 
   1301 /*
   1302 -------------------------------------------------------------------------------
   1303 Returns 1 if the single-precision floating-point value `a' is less than
   1304 the corresponding value `b', and 0 otherwise.  The comparison is performed
   1305 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1306 -------------------------------------------------------------------------------
   1307 */
   1308 flag float32_lt( float32 a, float32 b )
   1309 {
   1310     flag aSign, bSign;
   1311 
   1312     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1313          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1314        ) {
   1315         float_raise( float_flag_invalid );
   1316         return 0;
   1317     }
   1318     aSign = extractFloat32Sign( a );
   1319     bSign = extractFloat32Sign( b );
   1320     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   1321     return ( a != b ) && ( aSign ^ ( a < b ) );
   1322 
   1323 }
   1324 
   1325 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1326 /*
   1327 -------------------------------------------------------------------------------
   1328 Returns 1 if the single-precision floating-point value `a' is equal to
   1329 the corresponding value `b', and 0 otherwise.  The invalid exception is
   1330 raised if either operand is a NaN.  Otherwise, the comparison is performed
   1331 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1332 -------------------------------------------------------------------------------
   1333 */
   1334 flag float32_eq_signaling( float32 a, float32 b )
   1335 {
   1336 
   1337     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1338          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1339        ) {
   1340         float_raise( float_flag_invalid );
   1341         return 0;
   1342     }
   1343     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1344 
   1345 }
   1346 
   1347 /*
   1348 -------------------------------------------------------------------------------
   1349 Returns 1 if the single-precision floating-point value `a' is less than or
   1350 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   1351 cause an exception.  Otherwise, the comparison is performed according to the
   1352 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1353 -------------------------------------------------------------------------------
   1354 */
   1355 flag float32_le_quiet( float32 a, float32 b )
   1356 {
   1357     flag aSign, bSign;
   1358     int16 aExp, bExp;
   1359 
   1360     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1361          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1362        ) {
   1363         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1364             float_raise( float_flag_invalid );
   1365         }
   1366         return 0;
   1367     }
   1368     aSign = extractFloat32Sign( a );
   1369     bSign = extractFloat32Sign( b );
   1370     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
   1371     return ( a == b ) || ( aSign ^ ( a < b ) );
   1372 
   1373 }
   1374 
   1375 /*
   1376 -------------------------------------------------------------------------------
   1377 Returns 1 if the single-precision floating-point value `a' is less than
   1378 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   1379 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   1380 Standard for Binary Floating-Point Arithmetic.
   1381 -------------------------------------------------------------------------------
   1382 */
   1383 flag float32_lt_quiet( float32 a, float32 b )
   1384 {
   1385     flag aSign, bSign;
   1386 
   1387     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
   1388          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
   1389        ) {
   1390         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
   1391             float_raise( float_flag_invalid );
   1392         }
   1393         return 0;
   1394     }
   1395     aSign = extractFloat32Sign( a );
   1396     bSign = extractFloat32Sign( b );
   1397     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
   1398     return ( a != b ) && ( aSign ^ ( a < b ) );
   1399 
   1400 }
   1401 #endif /* !SOFTFLOAT_FOR_GCC */
   1402 
   1403 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
   1404 /*
   1405 -------------------------------------------------------------------------------
   1406 Returns the result of converting the double-precision floating-point value
   1407 `a' to the 32-bit two's complement integer format.  The conversion is
   1408 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1409 Arithmetic---which means in particular that the conversion is rounded
   1410 according to the current rounding mode.  If `a' is a NaN, the largest
   1411 positive integer is returned.  Otherwise, if the conversion overflows, the
   1412 largest integer with the same sign as `a' is returned.
   1413 -------------------------------------------------------------------------------
   1414 */
   1415 int32 float64_to_int32( float64 a )
   1416 {
   1417     flag aSign;
   1418     int16 aExp, shiftCount;
   1419     bits32 aSig0, aSig1, absZ, aSigExtra;
   1420     int32 z;
   1421     int8 roundingMode;
   1422 
   1423     aSig1 = extractFloat64Frac1( a );
   1424     aSig0 = extractFloat64Frac0( a );
   1425     aExp = extractFloat64Exp( a );
   1426     aSign = extractFloat64Sign( a );
   1427     shiftCount = aExp - 0x413;
   1428     if ( 0 <= shiftCount ) {
   1429         if ( 0x41E < aExp ) {
   1430             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   1431             goto invalid;
   1432         }
   1433         shortShift64Left(
   1434             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
   1435         if ( 0x80000000 < absZ ) goto invalid;
   1436     }
   1437     else {
   1438         aSig1 = ( aSig1 != 0 );
   1439         if ( aExp < 0x3FE ) {
   1440             aSigExtra = aExp | aSig0 | aSig1;
   1441             absZ = 0;
   1442         }
   1443         else {
   1444             aSig0 |= 0x00100000;
   1445             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
   1446             absZ = aSig0>>( - shiftCount );
   1447         }
   1448     }
   1449     roundingMode = float_rounding_mode;
   1450     if ( roundingMode == float_round_nearest_even ) {
   1451         if ( (sbits32) aSigExtra < 0 ) {
   1452             ++absZ;
   1453             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
   1454         }
   1455         z = aSign ? - absZ : absZ;
   1456     }
   1457     else {
   1458         aSigExtra = ( aSigExtra != 0 );
   1459         if ( aSign ) {
   1460             z = - (   absZ
   1461                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
   1462         }
   1463         else {
   1464             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
   1465         }
   1466     }
   1467     if ( ( aSign ^ ( z < 0 ) ) && z ) {
   1468  invalid:
   1469         float_raise( float_flag_invalid );
   1470         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   1471     }
   1472     if ( aSigExtra ) set_float_exception_inexact_flag();
   1473     return z;
   1474 
   1475 }
   1476 #endif /* !SOFTFLOAT_FOR_GCC */
   1477 
   1478 /*
   1479 -------------------------------------------------------------------------------
   1480 Returns the result of converting the double-precision floating-point value
   1481 `a' to the 32-bit two's complement integer format.  The conversion is
   1482 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1483 Arithmetic, except that the conversion is always rounded toward zero.
   1484 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
   1485 the conversion overflows, the largest integer with the same sign as `a' is
   1486 returned.
   1487 -------------------------------------------------------------------------------
   1488 */
   1489 int32 float64_to_int32_round_to_zero( float64 a )
   1490 {
   1491     flag aSign;
   1492     int16 aExp, shiftCount;
   1493     bits32 aSig0, aSig1, absZ, aSigExtra;
   1494     int32 z;
   1495 
   1496     aSig1 = extractFloat64Frac1( a );
   1497     aSig0 = extractFloat64Frac0( a );
   1498     aExp = extractFloat64Exp( a );
   1499     aSign = extractFloat64Sign( a );
   1500     shiftCount = aExp - 0x413;
   1501     if ( 0 <= shiftCount ) {
   1502         if ( 0x41E < aExp ) {
   1503             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
   1504             goto invalid;
   1505         }
   1506         shortShift64Left(
   1507             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
   1508     }
   1509     else {
   1510         if ( aExp < 0x3FF ) {
   1511             if ( aExp | aSig0 | aSig1 ) {
   1512                 set_float_exception_inexact_flag();
   1513             }
   1514             return 0;
   1515         }
   1516         aSig0 |= 0x00100000;
   1517         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
   1518         absZ = aSig0>>( - shiftCount );
   1519     }
   1520     z = aSign ? - absZ : absZ;
   1521     if ( ( aSign ^ ( z < 0 ) ) && z ) {
   1522  invalid:
   1523         float_raise( float_flag_invalid );
   1524         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
   1525     }
   1526     if ( aSigExtra ) set_float_exception_inexact_flag();
   1527     return z;
   1528 
   1529 }
   1530 
   1531 /*
   1532 -------------------------------------------------------------------------------
   1533 Returns the result of converting the double-precision floating-point value
   1534 `a' to the single-precision floating-point format.  The conversion is
   1535 performed according to the IEC/IEEE Standard for Binary Floating-Point
   1536 Arithmetic.
   1537 -------------------------------------------------------------------------------
   1538 */
   1539 float32 float64_to_float32( float64 a )
   1540 {
   1541     flag aSign;
   1542     int16 aExp;
   1543     bits32 aSig0, aSig1, zSig;
   1544     bits32 allZero;
   1545 
   1546     aSig1 = extractFloat64Frac1( a );
   1547     aSig0 = extractFloat64Frac0( a );
   1548     aExp = extractFloat64Exp( a );
   1549     aSign = extractFloat64Sign( a );
   1550     if ( aExp == 0x7FF ) {
   1551         if ( aSig0 | aSig1 ) {
   1552             return commonNaNToFloat32( float64ToCommonNaN( a ) );
   1553         }
   1554         return packFloat32( aSign, 0xFF, 0 );
   1555     }
   1556     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
   1557     if ( aExp ) zSig |= 0x40000000;
   1558     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
   1559 
   1560 }
   1561 
   1562 #ifndef SOFTFLOAT_FOR_GCC
   1563 /*
   1564 -------------------------------------------------------------------------------
   1565 Rounds the double-precision floating-point value `a' to an integer,
   1566 and returns the result as a double-precision floating-point value.  The
   1567 operation is performed according to the IEC/IEEE Standard for Binary
   1568 Floating-Point Arithmetic.
   1569 -------------------------------------------------------------------------------
   1570 */
   1571 float64 float64_round_to_int( float64 a )
   1572 {
   1573     flag aSign;
   1574     int16 aExp;
   1575     bits32 lastBitMask, roundBitsMask;
   1576     int8 roundingMode;
   1577     float64 z;
   1578 
   1579     aExp = extractFloat64Exp( a );
   1580     if ( 0x413 <= aExp ) {
   1581         if ( 0x433 <= aExp ) {
   1582             if (    ( aExp == 0x7FF )
   1583                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
   1584                 return propagateFloat64NaN( a, a );
   1585             }
   1586             return a;
   1587         }
   1588         lastBitMask = 1;
   1589         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
   1590         roundBitsMask = lastBitMask - 1;
   1591         z = a;
   1592         roundingMode = float_rounding_mode;
   1593         if ( roundingMode == float_round_nearest_even ) {
   1594             if ( lastBitMask ) {
   1595                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
   1596                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
   1597             }
   1598             else {
   1599                 if ( (sbits32) z.low < 0 ) {
   1600                     ++z.high;
   1601                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
   1602                 }
   1603             }
   1604         }
   1605         else if ( roundingMode != float_round_to_zero ) {
   1606             if (   extractFloat64Sign( z )
   1607                  ^ ( roundingMode == float_round_up ) ) {
   1608                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
   1609             }
   1610         }
   1611         z.low &= ~ roundBitsMask;
   1612     }
   1613     else {
   1614         if ( aExp <= 0x3FE ) {
   1615             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
   1616             set_float_exception_inexact_flag();
   1617             aSign = extractFloat64Sign( a );
   1618             switch ( float_rounding_mode ) {
   1619              case float_round_nearest_even:
   1620                 if (    ( aExp == 0x3FE )
   1621                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
   1622                    ) {
   1623                     return packFloat64( aSign, 0x3FF, 0, 0 );
   1624                 }
   1625                 break;
   1626              case float_round_down:
   1627                 return
   1628                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
   1629                     : packFloat64( 0, 0, 0, 0 );
   1630              case float_round_up:
   1631                 return
   1632                       aSign ? packFloat64( 1, 0, 0, 0 )
   1633                     : packFloat64( 0, 0x3FF, 0, 0 );
   1634             }
   1635             return packFloat64( aSign, 0, 0, 0 );
   1636         }
   1637         lastBitMask = 1;
   1638         lastBitMask <<= 0x413 - aExp;
   1639         roundBitsMask = lastBitMask - 1;
   1640         z.low = 0;
   1641         z.high = a.high;
   1642         roundingMode = float_rounding_mode;
   1643         if ( roundingMode == float_round_nearest_even ) {
   1644             z.high += lastBitMask>>1;
   1645             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
   1646                 z.high &= ~ lastBitMask;
   1647             }
   1648         }
   1649         else if ( roundingMode != float_round_to_zero ) {
   1650             if (   extractFloat64Sign( z )
   1651                  ^ ( roundingMode == float_round_up ) ) {
   1652                 z.high |= ( a.low != 0 );
   1653                 z.high += roundBitsMask;
   1654             }
   1655         }
   1656         z.high &= ~ roundBitsMask;
   1657     }
   1658     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
   1659         set_float_exception_inexact_flag();
   1660     }
   1661     return z;
   1662 
   1663 }
   1664 #endif
   1665 
   1666 /*
   1667 -------------------------------------------------------------------------------
   1668 Returns the result of adding the absolute values of the double-precision
   1669 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
   1670 before being returned.  `zSign' is ignored if the result is a NaN.
   1671 The addition is performed according to the IEC/IEEE Standard for Binary
   1672 Floating-Point Arithmetic.
   1673 -------------------------------------------------------------------------------
   1674 */
   1675 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
   1676 {
   1677     int16 aExp, bExp, zExp;
   1678     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   1679     int16 expDiff;
   1680 
   1681     aSig1 = extractFloat64Frac1( a );
   1682     aSig0 = extractFloat64Frac0( a );
   1683     aExp = extractFloat64Exp( a );
   1684     bSig1 = extractFloat64Frac1( b );
   1685     bSig0 = extractFloat64Frac0( b );
   1686     bExp = extractFloat64Exp( b );
   1687     expDiff = aExp - bExp;
   1688     if ( 0 < expDiff ) {
   1689         if ( aExp == 0x7FF ) {
   1690             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1691             return a;
   1692         }
   1693         if ( bExp == 0 ) {
   1694             --expDiff;
   1695         }
   1696         else {
   1697             bSig0 |= 0x00100000;
   1698         }
   1699         shift64ExtraRightJamming(
   1700             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
   1701         zExp = aExp;
   1702     }
   1703     else if ( expDiff < 0 ) {
   1704         if ( bExp == 0x7FF ) {
   1705             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1706             return packFloat64( zSign, 0x7FF, 0, 0 );
   1707         }
   1708         if ( aExp == 0 ) {
   1709             ++expDiff;
   1710         }
   1711         else {
   1712             aSig0 |= 0x00100000;
   1713         }
   1714         shift64ExtraRightJamming(
   1715             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
   1716         zExp = bExp;
   1717     }
   1718     else {
   1719         if ( aExp == 0x7FF ) {
   1720             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   1721                 return propagateFloat64NaN( a, b );
   1722             }
   1723             return a;
   1724         }
   1725         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1726         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
   1727         zSig2 = 0;
   1728         zSig0 |= 0x00200000;
   1729         zExp = aExp;
   1730         goto shiftRight1;
   1731     }
   1732     aSig0 |= 0x00100000;
   1733     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1734     --zExp;
   1735     if ( zSig0 < 0x00200000 ) goto roundAndPack;
   1736     ++zExp;
   1737  shiftRight1:
   1738     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   1739  roundAndPack:
   1740     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   1741 
   1742 }
   1743 
   1744 /*
   1745 -------------------------------------------------------------------------------
   1746 Returns the result of subtracting the absolute values of the double-
   1747 precision floating-point values `a' and `b'.  If `zSign' is 1, the
   1748 difference is negated before being returned.  `zSign' is ignored if the
   1749 result is a NaN.  The subtraction is performed according to the IEC/IEEE
   1750 Standard for Binary Floating-Point Arithmetic.
   1751 -------------------------------------------------------------------------------
   1752 */
   1753 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
   1754 {
   1755     int16 aExp, bExp, zExp;
   1756     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
   1757     int16 expDiff;
   1758 
   1759     aSig1 = extractFloat64Frac1( a );
   1760     aSig0 = extractFloat64Frac0( a );
   1761     aExp = extractFloat64Exp( a );
   1762     bSig1 = extractFloat64Frac1( b );
   1763     bSig0 = extractFloat64Frac0( b );
   1764     bExp = extractFloat64Exp( b );
   1765     expDiff = aExp - bExp;
   1766     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
   1767     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
   1768     if ( 0 < expDiff ) goto aExpBigger;
   1769     if ( expDiff < 0 ) goto bExpBigger;
   1770     if ( aExp == 0x7FF ) {
   1771         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
   1772             return propagateFloat64NaN( a, b );
   1773         }
   1774         float_raise( float_flag_invalid );
   1775         return float64_default_nan;
   1776     }
   1777     if ( aExp == 0 ) {
   1778         aExp = 1;
   1779         bExp = 1;
   1780     }
   1781     if ( bSig0 < aSig0 ) goto aBigger;
   1782     if ( aSig0 < bSig0 ) goto bBigger;
   1783     if ( bSig1 < aSig1 ) goto aBigger;
   1784     if ( aSig1 < bSig1 ) goto bBigger;
   1785     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
   1786  bExpBigger:
   1787     if ( bExp == 0x7FF ) {
   1788         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1789         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
   1790     }
   1791     if ( aExp == 0 ) {
   1792         ++expDiff;
   1793     }
   1794     else {
   1795         aSig0 |= 0x40000000;
   1796     }
   1797     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   1798     bSig0 |= 0x40000000;
   1799  bBigger:
   1800     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
   1801     zExp = bExp;
   1802     zSign ^= 1;
   1803     goto normalizeRoundAndPack;
   1804  aExpBigger:
   1805     if ( aExp == 0x7FF ) {
   1806         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1807         return a;
   1808     }
   1809     if ( bExp == 0 ) {
   1810         --expDiff;
   1811     }
   1812     else {
   1813         bSig0 |= 0x40000000;
   1814     }
   1815     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
   1816     aSig0 |= 0x40000000;
   1817  aBigger:
   1818     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
   1819     zExp = aExp;
   1820  normalizeRoundAndPack:
   1821     --zExp;
   1822     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
   1823 
   1824 }
   1825 
   1826 /*
   1827 -------------------------------------------------------------------------------
   1828 Returns the result of adding the double-precision floating-point values `a'
   1829 and `b'.  The operation is performed according to the IEC/IEEE Standard for
   1830 Binary Floating-Point Arithmetic.
   1831 -------------------------------------------------------------------------------
   1832 */
   1833 float64 float64_add( float64 a, float64 b )
   1834 {
   1835     flag aSign, bSign;
   1836 
   1837     aSign = extractFloat64Sign( a );
   1838     bSign = extractFloat64Sign( b );
   1839     if ( aSign == bSign ) {
   1840         return addFloat64Sigs( a, b, aSign );
   1841     }
   1842     else {
   1843         return subFloat64Sigs( a, b, aSign );
   1844     }
   1845 
   1846 }
   1847 
   1848 /*
   1849 -------------------------------------------------------------------------------
   1850 Returns the result of subtracting the double-precision floating-point values
   1851 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1852 for Binary Floating-Point Arithmetic.
   1853 -------------------------------------------------------------------------------
   1854 */
   1855 float64 float64_sub( float64 a, float64 b )
   1856 {
   1857     flag aSign, bSign;
   1858 
   1859     aSign = extractFloat64Sign( a );
   1860     bSign = extractFloat64Sign( b );
   1861     if ( aSign == bSign ) {
   1862         return subFloat64Sigs( a, b, aSign );
   1863     }
   1864     else {
   1865         return addFloat64Sigs( a, b, aSign );
   1866     }
   1867 
   1868 }
   1869 
   1870 /*
   1871 -------------------------------------------------------------------------------
   1872 Returns the result of multiplying the double-precision floating-point values
   1873 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
   1874 for Binary Floating-Point Arithmetic.
   1875 -------------------------------------------------------------------------------
   1876 */
   1877 float64 float64_mul( float64 a, float64 b )
   1878 {
   1879     flag aSign, bSign, zSign;
   1880     int16 aExp, bExp, zExp;
   1881     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
   1882 
   1883     aSig1 = extractFloat64Frac1( a );
   1884     aSig0 = extractFloat64Frac0( a );
   1885     aExp = extractFloat64Exp( a );
   1886     aSign = extractFloat64Sign( a );
   1887     bSig1 = extractFloat64Frac1( b );
   1888     bSig0 = extractFloat64Frac0( b );
   1889     bExp = extractFloat64Exp( b );
   1890     bSign = extractFloat64Sign( b );
   1891     zSign = aSign ^ bSign;
   1892     if ( aExp == 0x7FF ) {
   1893         if (    ( aSig0 | aSig1 )
   1894              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
   1895             return propagateFloat64NaN( a, b );
   1896         }
   1897         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
   1898         return packFloat64( zSign, 0x7FF, 0, 0 );
   1899     }
   1900     if ( bExp == 0x7FF ) {
   1901         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1902         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   1903  invalid:
   1904             float_raise( float_flag_invalid );
   1905             return float64_default_nan;
   1906         }
   1907         return packFloat64( zSign, 0x7FF, 0, 0 );
   1908     }
   1909     if ( aExp == 0 ) {
   1910         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1911         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   1912     }
   1913     if ( bExp == 0 ) {
   1914         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1915         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   1916     }
   1917     zExp = aExp + bExp - 0x400;
   1918     aSig0 |= 0x00100000;
   1919     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
   1920     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
   1921     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
   1922     zSig2 |= ( zSig3 != 0 );
   1923     if ( 0x00200000 <= zSig0 ) {
   1924         shift64ExtraRightJamming(
   1925             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
   1926         ++zExp;
   1927     }
   1928     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   1929 
   1930 }
   1931 
   1932 /*
   1933 -------------------------------------------------------------------------------
   1934 Returns the result of dividing the double-precision floating-point value `a'
   1935 by the corresponding value `b'.  The operation is performed according to the
   1936 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1937 -------------------------------------------------------------------------------
   1938 */
   1939 float64 float64_div( float64 a, float64 b )
   1940 {
   1941     flag aSign, bSign, zSign;
   1942     int16 aExp, bExp, zExp;
   1943     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
   1944     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   1945 
   1946     aSig1 = extractFloat64Frac1( a );
   1947     aSig0 = extractFloat64Frac0( a );
   1948     aExp = extractFloat64Exp( a );
   1949     aSign = extractFloat64Sign( a );
   1950     bSig1 = extractFloat64Frac1( b );
   1951     bSig0 = extractFloat64Frac0( b );
   1952     bExp = extractFloat64Exp( b );
   1953     bSign = extractFloat64Sign( b );
   1954     zSign = aSign ^ bSign;
   1955     if ( aExp == 0x7FF ) {
   1956         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
   1957         if ( bExp == 0x7FF ) {
   1958             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1959             goto invalid;
   1960         }
   1961         return packFloat64( zSign, 0x7FF, 0, 0 );
   1962     }
   1963     if ( bExp == 0x7FF ) {
   1964         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   1965         return packFloat64( zSign, 0, 0, 0 );
   1966     }
   1967     if ( bExp == 0 ) {
   1968         if ( ( bSig0 | bSig1 ) == 0 ) {
   1969             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
   1970  invalid:
   1971                 float_raise( float_flag_invalid );
   1972                 return float64_default_nan;
   1973             }
   1974             float_raise( float_flag_divbyzero );
   1975             return packFloat64( zSign, 0x7FF, 0, 0 );
   1976         }
   1977         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   1978     }
   1979     if ( aExp == 0 ) {
   1980         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
   1981         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   1982     }
   1983     zExp = aExp - bExp + 0x3FD;
   1984     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
   1985     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
   1986     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
   1987         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
   1988         ++zExp;
   1989     }
   1990     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
   1991     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
   1992     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
   1993     while ( (sbits32) rem0 < 0 ) {
   1994         --zSig0;
   1995         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
   1996     }
   1997     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
   1998     if ( ( zSig1 & 0x3FF ) <= 4 ) {
   1999         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
   2000         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
   2001         while ( (sbits32) rem1 < 0 ) {
   2002             --zSig1;
   2003             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
   2004         }
   2005         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   2006     }
   2007     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
   2008     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
   2009 
   2010 }
   2011 
   2012 #ifndef SOFTFLOAT_FOR_GCC
   2013 /*
   2014 -------------------------------------------------------------------------------
   2015 Returns the remainder of the double-precision floating-point value `a'
   2016 with respect to the corresponding value `b'.  The operation is performed
   2017 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2018 -------------------------------------------------------------------------------
   2019 */
   2020 float64 float64_rem( float64 a, float64 b )
   2021 {
   2022     flag aSign, bSign, zSign;
   2023     int16 aExp, bExp, expDiff;
   2024     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
   2025     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
   2026     sbits32 sigMean0;
   2027     float64 z;
   2028 
   2029     aSig1 = extractFloat64Frac1( a );
   2030     aSig0 = extractFloat64Frac0( a );
   2031     aExp = extractFloat64Exp( a );
   2032     aSign = extractFloat64Sign( a );
   2033     bSig1 = extractFloat64Frac1( b );
   2034     bSig0 = extractFloat64Frac0( b );
   2035     bExp = extractFloat64Exp( b );
   2036     bSign = extractFloat64Sign( b );
   2037     if ( aExp == 0x7FF ) {
   2038         if (    ( aSig0 | aSig1 )
   2039              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
   2040             return propagateFloat64NaN( a, b );
   2041         }
   2042         goto invalid;
   2043     }
   2044     if ( bExp == 0x7FF ) {
   2045         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
   2046         return a;
   2047     }
   2048     if ( bExp == 0 ) {
   2049         if ( ( bSig0 | bSig1 ) == 0 ) {
   2050  invalid:
   2051             float_raise( float_flag_invalid );
   2052             return float64_default_nan;
   2053         }
   2054         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
   2055     }
   2056     if ( aExp == 0 ) {
   2057         if ( ( aSig0 | aSig1 ) == 0 ) return a;
   2058         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   2059     }
   2060     expDiff = aExp - bExp;
   2061     if ( expDiff < -1 ) return a;
   2062     shortShift64Left(
   2063         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
   2064     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
   2065     q = le64( bSig0, bSig1, aSig0, aSig1 );
   2066     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   2067     expDiff -= 32;
   2068     while ( 0 < expDiff ) {
   2069         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
   2070         q = ( 4 < q ) ? q - 4 : 0;
   2071         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
   2072         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
   2073         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
   2074         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
   2075         expDiff -= 29;
   2076     }
   2077     if ( -32 < expDiff ) {
   2078         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
   2079         q = ( 4 < q ) ? q - 4 : 0;
   2080         q >>= - expDiff;
   2081         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
   2082         expDiff += 24;
   2083         if ( expDiff < 0 ) {
   2084             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
   2085         }
   2086         else {
   2087             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
   2088         }
   2089         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
   2090         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
   2091     }
   2092     else {
   2093         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
   2094         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
   2095     }
   2096     do {
   2097         alternateASig0 = aSig0;
   2098         alternateASig1 = aSig1;
   2099         ++q;
   2100         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
   2101     } while ( 0 <= (sbits32) aSig0 );
   2102     add64(
   2103         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
   2104     if (    ( sigMean0 < 0 )
   2105          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
   2106         aSig0 = alternateASig0;
   2107         aSig1 = alternateASig1;
   2108     }
   2109     zSign = ( (sbits32) aSig0 < 0 );
   2110     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
   2111     return
   2112         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
   2113 
   2114 }
   2115 #endif
   2116 
   2117 #ifndef SOFTFLOAT_FOR_GCC
   2118 /*
   2119 -------------------------------------------------------------------------------
   2120 Returns the square root of the double-precision floating-point value `a'.
   2121 The operation is performed according to the IEC/IEEE Standard for Binary
   2122 Floating-Point Arithmetic.
   2123 -------------------------------------------------------------------------------
   2124 */
   2125 float64 float64_sqrt( float64 a )
   2126 {
   2127     flag aSign;
   2128     int16 aExp, zExp;
   2129     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
   2130     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
   2131     float64 z;
   2132 
   2133     aSig1 = extractFloat64Frac1( a );
   2134     aSig0 = extractFloat64Frac0( a );
   2135     aExp = extractFloat64Exp( a );
   2136     aSign = extractFloat64Sign( a );
   2137     if ( aExp == 0x7FF ) {
   2138         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
   2139         if ( ! aSign ) return a;
   2140         goto invalid;
   2141     }
   2142     if ( aSign ) {
   2143         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
   2144  invalid:
   2145         float_raise( float_flag_invalid );
   2146         return float64_default_nan;
   2147     }
   2148     if ( aExp == 0 ) {
   2149         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
   2150         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
   2151     }
   2152     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
   2153     aSig0 |= 0x00100000;
   2154     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
   2155     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
   2156     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
   2157     doubleZSig0 = zSig0 + zSig0;
   2158     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
   2159     mul32To64( zSig0, zSig0, &term0, &term1 );
   2160     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
   2161     while ( (sbits32) rem0 < 0 ) {
   2162         --zSig0;
   2163         doubleZSig0 -= 2;
   2164         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
   2165     }
   2166     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
   2167     if ( ( zSig1 & 0x1FF ) <= 5 ) {
   2168         if ( zSig1 == 0 ) zSig1 = 1;
   2169         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
   2170         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
   2171         mul32To64( zSig1, zSig1, &term2, &term3 );
   2172         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
   2173         while ( (sbits32) rem1 < 0 ) {
   2174             --zSig1;
   2175             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
   2176             term3 |= 1;
   2177             term2 |= doubleZSig0;
   2178             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
   2179         }
   2180         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
   2181     }
   2182     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
   2183     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
   2184 
   2185 }
   2186 #endif
   2187 
   2188 /*
   2189 -------------------------------------------------------------------------------
   2190 Returns 1 if the double-precision floating-point value `a' is equal to
   2191 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2192 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2193 -------------------------------------------------------------------------------
   2194 */
   2195 flag float64_eq( float64 a, float64 b )
   2196 {
   2197 
   2198     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2199               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2200          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2201               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2202        ) {
   2203         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2204             float_raise( float_flag_invalid );
   2205         }
   2206         return 0;
   2207     }
   2208     return ( a == b ) ||
   2209         ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
   2210 
   2211 }
   2212 
   2213 /*
   2214 -------------------------------------------------------------------------------
   2215 Returns 1 if the double-precision floating-point value `a' is less than
   2216 or equal to the corresponding value `b', and 0 otherwise.  The comparison
   2217 is performed according to the IEC/IEEE Standard for Binary Floating-Point
   2218 Arithmetic.
   2219 -------------------------------------------------------------------------------
   2220 */
   2221 flag float64_le( float64 a, float64 b )
   2222 {
   2223     flag aSign, bSign;
   2224 
   2225     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2226               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2227          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2228               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2229        ) {
   2230         float_raise( float_flag_invalid );
   2231         return 0;
   2232     }
   2233     aSign = extractFloat64Sign( a );
   2234     bSign = extractFloat64Sign( b );
   2235     if ( aSign != bSign )
   2236         return aSign ||
   2237             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
   2238                 0 );
   2239     return ( a == b ) ||
   2240         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   2241 }
   2242 
   2243 /*
   2244 -------------------------------------------------------------------------------
   2245 Returns 1 if the double-precision floating-point value `a' is less than
   2246 the corresponding value `b', and 0 otherwise.  The comparison is performed
   2247 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2248 -------------------------------------------------------------------------------
   2249 */
   2250 flag float64_lt( float64 a, float64 b )
   2251 {
   2252     flag aSign, bSign;
   2253 
   2254     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2255               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2256          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2257               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2258        ) {
   2259         float_raise( float_flag_invalid );
   2260         return 0;
   2261     }
   2262     aSign = extractFloat64Sign( a );
   2263     bSign = extractFloat64Sign( b );
   2264     if ( aSign != bSign )
   2265         return aSign &&
   2266             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
   2267               0 );
   2268     return ( a != b ) &&
   2269            ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
   2270 
   2271 }
   2272 
   2273 #ifndef SOFTFLOAT_FOR_GCC
   2274 /*
   2275 -------------------------------------------------------------------------------
   2276 Returns 1 if the double-precision floating-point value `a' is equal to
   2277 the corresponding value `b', and 0 otherwise.  The invalid exception is
   2278 raised if either operand is a NaN.  Otherwise, the comparison is performed
   2279 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2280 -------------------------------------------------------------------------------
   2281 */
   2282 flag float64_eq_signaling( float64 a, float64 b )
   2283 {
   2284 
   2285     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2286               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2287          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2288               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2289        ) {
   2290         float_raise( float_flag_invalid );
   2291         return 0;
   2292     }
   2293     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
   2294 
   2295 }
   2296 
   2297 /*
   2298 -------------------------------------------------------------------------------
   2299 Returns 1 if the double-precision floating-point value `a' is less than or
   2300 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
   2301 cause an exception.  Otherwise, the comparison is performed according to the
   2302 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   2303 -------------------------------------------------------------------------------
   2304 */
   2305 flag float64_le_quiet( float64 a, float64 b )
   2306 {
   2307     flag aSign, bSign;
   2308 
   2309     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2310               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2311          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2312               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2313        ) {
   2314         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2315             float_raise( float_flag_invalid );
   2316         }
   2317         return 0;
   2318     }
   2319     aSign = extractFloat64Sign( a );
   2320     bSign = extractFloat64Sign( b );
   2321     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
   2322     return ( a == b ) || ( aSign ^ ( a < b ) );
   2323 
   2324 }
   2325 
   2326 /*
   2327 -------------------------------------------------------------------------------
   2328 Returns 1 if the double-precision floating-point value `a' is less than
   2329 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
   2330 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
   2331 Standard for Binary Floating-Point Arithmetic.
   2332 -------------------------------------------------------------------------------
   2333 */
   2334 flag float64_lt_quiet( float64 a, float64 b )
   2335 {
   2336     flag aSign, bSign;
   2337 
   2338     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
   2339               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
   2340          || (    ( extractFloat64Exp( b ) == 0x7FF )
   2341               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
   2342        ) {
   2343         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
   2344             float_raise( float_flag_invalid );
   2345         }
   2346         return 0;
   2347     }
   2348     aSign = extractFloat64Sign( a );
   2349     bSign = extractFloat64Sign( b );
   2350     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
   2351     return ( a != b ) && ( aSign ^ ( a < b ) );
   2352 
   2353 }
   2354 
   2355 #endif
   2356