Home | History | Annotate | Download | only in fpu
      1 /*
      2  * QEMU float support macros
      3  *
      4  * Derived from SoftFloat.
      5  */
      6 
      7 /*============================================================================
      8 
      9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
     10 Arithmetic Package, Release 2b.
     11 
     12 Written by John R. Hauser.  This work was made possible in part by the
     13 International Computer Science Institute, located at Suite 600, 1947 Center
     14 Street, Berkeley, California 94704.  Funding was partially provided by the
     15 National Science Foundation under grant MIP-9311980.  The original version
     16 of this code was written as part of a project to build a fixed-point vector
     17 processor in collaboration with the University of California at Berkeley,
     18 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
     20 arithmetic/SoftFloat.html'.
     21 
     22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
     23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
     24 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
     25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
     26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
     27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
     28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
     29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
     30 
     31 Derivative works are acceptable, even for commercial purposes, so long as
     32 (1) the source code for the derivative work includes prominent notice that
     33 the work is derivative, and (2) the source code includes prominent notice with
     34 these four paragraphs for those parts of this code that are retained.
     35 
     36 =============================================================================*/
     37 
     38 /*----------------------------------------------------------------------------
     39 | This macro tests for minimum version of the GNU C compiler.
     40 *----------------------------------------------------------------------------*/
     41 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
     42 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \
     43          ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
     44 #else
     45 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
     46 #endif
     47 
     48 
     49 /*----------------------------------------------------------------------------
     50 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
     51 | bits are shifted off, they are ``jammed'' into the least significant bit of
     52 | the result by setting the least significant bit to 1.  The value of `count'
     53 | can be arbitrarily large; in particular, if `count' is greater than 32, the
     54 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     55 | The result is stored in the location pointed to by `zPtr'.
     56 *----------------------------------------------------------------------------*/
     57 
     58 INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
     59 {
     60     uint32_t z;
     61 
     62     if ( count == 0 ) {
     63         z = a;
     64     }
     65     else if ( count < 32 ) {
     66         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     67     }
     68     else {
     69         z = ( a != 0 );
     70     }
     71     *zPtr = z;
     72 
     73 }
     74 
     75 /*----------------------------------------------------------------------------
     76 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
     77 | bits are shifted off, they are ``jammed'' into the least significant bit of
     78 | the result by setting the least significant bit to 1.  The value of `count'
     79 | can be arbitrarily large; in particular, if `count' is greater than 64, the
     80 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     81 | The result is stored in the location pointed to by `zPtr'.
     82 *----------------------------------------------------------------------------*/
     83 
     84 INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
     85 {
     86     uint64_t z;
     87 
     88     if ( count == 0 ) {
     89         z = a;
     90     }
     91     else if ( count < 64 ) {
     92         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     93     }
     94     else {
     95         z = ( a != 0 );
     96     }
     97     *zPtr = z;
     98 
     99 }
    100 
    101 /*----------------------------------------------------------------------------
    102 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
    103 | _plus_ the number of bits given in `count'.  The shifted result is at most
    104 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
    105 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
    106 | shifted off is the most-significant bit of the extra result, and the other
    107 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
    108 | bits shifted off were all zero.  This extra result is stored in the location
    109 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
    110 |     (This routine makes more sense if `a0' and `a1' are considered to form
    111 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
    112 | point value is shifted right by the number of bits given in `count', and
    113 | the integer part of the result is returned at the location pointed to by
    114 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    115 | described above, and is returned at the location pointed to by `z1Ptr'.)
    116 *----------------------------------------------------------------------------*/
    117 
    118 INLINE void
    119  shift64ExtraRightJamming(
    120      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
    121 {
    122     uint64_t z0, z1;
    123     int8 negCount = ( - count ) & 63;
    124 
    125     if ( count == 0 ) {
    126         z1 = a1;
    127         z0 = a0;
    128     }
    129     else if ( count < 64 ) {
    130         z1 = ( a0<<negCount ) | ( a1 != 0 );
    131         z0 = a0>>count;
    132     }
    133     else {
    134         if ( count == 64 ) {
    135             z1 = a0 | ( a1 != 0 );
    136         }
    137         else {
    138             z1 = ( ( a0 | a1 ) != 0 );
    139         }
    140         z0 = 0;
    141     }
    142     *z1Ptr = z1;
    143     *z0Ptr = z0;
    144 
    145 }
    146 
    147 /*----------------------------------------------------------------------------
    148 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    149 | number of bits given in `count'.  Any bits shifted off are lost.  The value
    150 | of `count' can be arbitrarily large; in particular, if `count' is greater
    151 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
    152 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    153 *----------------------------------------------------------------------------*/
    154 
    155 INLINE void
    156  shift128Right(
    157      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
    158 {
    159     uint64_t z0, z1;
    160     int8 negCount = ( - count ) & 63;
    161 
    162     if ( count == 0 ) {
    163         z1 = a1;
    164         z0 = a0;
    165     }
    166     else if ( count < 64 ) {
    167         z1 = ( a0<<negCount ) | ( a1>>count );
    168         z0 = a0>>count;
    169     }
    170     else {
    171         z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
    172         z0 = 0;
    173     }
    174     *z1Ptr = z1;
    175     *z0Ptr = z0;
    176 
    177 }
    178 
    179 /*----------------------------------------------------------------------------
    180 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    181 | number of bits given in `count'.  If any nonzero bits are shifted off, they
    182 | are ``jammed'' into the least significant bit of the result by setting the
    183 | least significant bit to 1.  The value of `count' can be arbitrarily large;
    184 | in particular, if `count' is greater than 128, the result will be either
    185 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    186 | nonzero.  The result is broken into two 64-bit pieces which are stored at
    187 | the locations pointed to by `z0Ptr' and `z1Ptr'.
    188 *----------------------------------------------------------------------------*/
    189 
    190 INLINE void
    191  shift128RightJamming(
    192      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
    193 {
    194     uint64_t z0, z1;
    195     int8 negCount = ( - count ) & 63;
    196 
    197     if ( count == 0 ) {
    198         z1 = a1;
    199         z0 = a0;
    200     }
    201     else if ( count < 64 ) {
    202         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    203         z0 = a0>>count;
    204     }
    205     else {
    206         if ( count == 64 ) {
    207             z1 = a0 | ( a1 != 0 );
    208         }
    209         else if ( count < 128 ) {
    210             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    211         }
    212         else {
    213             z1 = ( ( a0 | a1 ) != 0 );
    214         }
    215         z0 = 0;
    216     }
    217     *z1Ptr = z1;
    218     *z0Ptr = z0;
    219 
    220 }
    221 
    222 /*----------------------------------------------------------------------------
    223 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    224 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
    225 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    226 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    227 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
    228 | the most-significant bit of the extra result, and the other 63 bits of the
    229 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
    230 | were all zero.  This extra result is stored in the location pointed to by
    231 | `z2Ptr'.  The value of `count' can be arbitrarily large.
    232 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
    233 | to form a fixed-point value with binary point between `a1' and `a2'.  This
    234 | fixed-point value is shifted right by the number of bits given in `count',
    235 | and the integer part of the result is returned at the locations pointed to
    236 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    237 | corrupted as described above, and is returned at the location pointed to by
    238 | `z2Ptr'.)
    239 *----------------------------------------------------------------------------*/
    240 
    241 INLINE void
    242  shift128ExtraRightJamming(
    243      uint64_t a0,
    244      uint64_t a1,
    245      uint64_t a2,
    246      int_fast16_t count,
    247      uint64_t *z0Ptr,
    248      uint64_t *z1Ptr,
    249      uint64_t *z2Ptr
    250  )
    251 {
    252     uint64_t z0, z1, z2;
    253     int8 negCount = ( - count ) & 63;
    254 
    255     if ( count == 0 ) {
    256         z2 = a2;
    257         z1 = a1;
    258         z0 = a0;
    259     }
    260     else {
    261         if ( count < 64 ) {
    262             z2 = a1<<negCount;
    263             z1 = ( a0<<negCount ) | ( a1>>count );
    264             z0 = a0>>count;
    265         }
    266         else {
    267             if ( count == 64 ) {
    268                 z2 = a1;
    269                 z1 = a0;
    270             }
    271             else {
    272                 a2 |= a1;
    273                 if ( count < 128 ) {
    274                     z2 = a0<<negCount;
    275                     z1 = a0>>( count & 63 );
    276                 }
    277                 else {
    278                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    279                     z1 = 0;
    280                 }
    281             }
    282             z0 = 0;
    283         }
    284         z2 |= ( a2 != 0 );
    285     }
    286     *z2Ptr = z2;
    287     *z1Ptr = z1;
    288     *z0Ptr = z0;
    289 
    290 }
    291 
    292 /*----------------------------------------------------------------------------
    293 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    294 | number of bits given in `count'.  Any bits shifted off are lost.  The value
    295 | of `count' must be less than 64.  The result is broken into two 64-bit
    296 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    297 *----------------------------------------------------------------------------*/
    298 
    299 INLINE void
    300  shortShift128Left(
    301      uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
    302 {
    303 
    304     *z1Ptr = a1<<count;
    305     *z0Ptr =
    306         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    307 
    308 }
    309 
    310 /*----------------------------------------------------------------------------
    311 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    312 | by the number of bits given in `count'.  Any bits shifted off are lost.
    313 | The value of `count' must be less than 64.  The result is broken into three
    314 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    315 | `z1Ptr', and `z2Ptr'.
    316 *----------------------------------------------------------------------------*/
    317 
    318 INLINE void
    319  shortShift192Left(
    320      uint64_t a0,
    321      uint64_t a1,
    322      uint64_t a2,
    323      int_fast16_t count,
    324      uint64_t *z0Ptr,
    325      uint64_t *z1Ptr,
    326      uint64_t *z2Ptr
    327  )
    328 {
    329     uint64_t z0, z1, z2;
    330     int8 negCount;
    331 
    332     z2 = a2<<count;
    333     z1 = a1<<count;
    334     z0 = a0<<count;
    335     if ( 0 < count ) {
    336         negCount = ( ( - count ) & 63 );
    337         z1 |= a2>>negCount;
    338         z0 |= a1>>negCount;
    339     }
    340     *z2Ptr = z2;
    341     *z1Ptr = z1;
    342     *z0Ptr = z0;
    343 
    344 }
    345 
    346 /*----------------------------------------------------------------------------
    347 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    348 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    349 | any carry out is lost.  The result is broken into two 64-bit pieces which
    350 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    351 *----------------------------------------------------------------------------*/
    352 
    353 INLINE void
    354  add128(
    355      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
    356 {
    357     uint64_t z1;
    358 
    359     z1 = a1 + b1;
    360     *z1Ptr = z1;
    361     *z0Ptr = a0 + b0 + ( z1 < a1 );
    362 
    363 }
    364 
    365 /*----------------------------------------------------------------------------
    366 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    367 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    368 | modulo 2^192, so any carry out is lost.  The result is broken into three
    369 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    370 | `z1Ptr', and `z2Ptr'.
    371 *----------------------------------------------------------------------------*/
    372 
    373 INLINE void
    374  add192(
    375      uint64_t a0,
    376      uint64_t a1,
    377      uint64_t a2,
    378      uint64_t b0,
    379      uint64_t b1,
    380      uint64_t b2,
    381      uint64_t *z0Ptr,
    382      uint64_t *z1Ptr,
    383      uint64_t *z2Ptr
    384  )
    385 {
    386     uint64_t z0, z1, z2;
    387     int8 carry0, carry1;
    388 
    389     z2 = a2 + b2;
    390     carry1 = ( z2 < a2 );
    391     z1 = a1 + b1;
    392     carry0 = ( z1 < a1 );
    393     z0 = a0 + b0;
    394     z1 += carry1;
    395     z0 += ( z1 < carry1 );
    396     z0 += carry0;
    397     *z2Ptr = z2;
    398     *z1Ptr = z1;
    399     *z0Ptr = z0;
    400 
    401 }
    402 
    403 /*----------------------------------------------------------------------------
    404 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    405 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    406 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    407 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    408 | `z1Ptr'.
    409 *----------------------------------------------------------------------------*/
    410 
    411 INLINE void
    412  sub128(
    413      uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
    414 {
    415 
    416     *z1Ptr = a1 - b1;
    417     *z0Ptr = a0 - b0 - ( a1 < b1 );
    418 
    419 }
    420 
    421 /*----------------------------------------------------------------------------
    422 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    423 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    424 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    425 | result is broken into three 64-bit pieces which are stored at the locations
    426 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    427 *----------------------------------------------------------------------------*/
    428 
    429 INLINE void
    430  sub192(
    431      uint64_t a0,
    432      uint64_t a1,
    433      uint64_t a2,
    434      uint64_t b0,
    435      uint64_t b1,
    436      uint64_t b2,
    437      uint64_t *z0Ptr,
    438      uint64_t *z1Ptr,
    439      uint64_t *z2Ptr
    440  )
    441 {
    442     uint64_t z0, z1, z2;
    443     int8 borrow0, borrow1;
    444 
    445     z2 = a2 - b2;
    446     borrow1 = ( a2 < b2 );
    447     z1 = a1 - b1;
    448     borrow0 = ( a1 < b1 );
    449     z0 = a0 - b0;
    450     z0 -= ( z1 < borrow1 );
    451     z1 -= borrow1;
    452     z0 -= borrow0;
    453     *z2Ptr = z2;
    454     *z1Ptr = z1;
    455     *z0Ptr = z0;
    456 
    457 }
    458 
    459 /*----------------------------------------------------------------------------
    460 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    461 | into two 64-bit pieces which are stored at the locations pointed to by
    462 | `z0Ptr' and `z1Ptr'.
    463 *----------------------------------------------------------------------------*/
    464 
    465 INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
    466 {
    467     uint32_t aHigh, aLow, bHigh, bLow;
    468     uint64_t z0, zMiddleA, zMiddleB, z1;
    469 
    470     aLow = a;
    471     aHigh = a>>32;
    472     bLow = b;
    473     bHigh = b>>32;
    474     z1 = ( (uint64_t) aLow ) * bLow;
    475     zMiddleA = ( (uint64_t) aLow ) * bHigh;
    476     zMiddleB = ( (uint64_t) aHigh ) * bLow;
    477     z0 = ( (uint64_t) aHigh ) * bHigh;
    478     zMiddleA += zMiddleB;
    479     z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    480     zMiddleA <<= 32;
    481     z1 += zMiddleA;
    482     z0 += ( z1 < zMiddleA );
    483     *z1Ptr = z1;
    484     *z0Ptr = z0;
    485 
    486 }
    487 
    488 /*----------------------------------------------------------------------------
    489 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    490 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    491 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    492 | `z2Ptr'.
    493 *----------------------------------------------------------------------------*/
    494 
    495 INLINE void
    496  mul128By64To192(
    497      uint64_t a0,
    498      uint64_t a1,
    499      uint64_t b,
    500      uint64_t *z0Ptr,
    501      uint64_t *z1Ptr,
    502      uint64_t *z2Ptr
    503  )
    504 {
    505     uint64_t z0, z1, z2, more1;
    506 
    507     mul64To128( a1, b, &z1, &z2 );
    508     mul64To128( a0, b, &z0, &more1 );
    509     add128( z0, more1, 0, z1, &z0, &z1 );
    510     *z2Ptr = z2;
    511     *z1Ptr = z1;
    512     *z0Ptr = z0;
    513 
    514 }
    515 
    516 /*----------------------------------------------------------------------------
    517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    518 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    519 | product.  The product is broken into four 64-bit pieces which are stored at
    520 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    521 *----------------------------------------------------------------------------*/
    522 
    523 INLINE void
    524  mul128To256(
    525      uint64_t a0,
    526      uint64_t a1,
    527      uint64_t b0,
    528      uint64_t b1,
    529      uint64_t *z0Ptr,
    530      uint64_t *z1Ptr,
    531      uint64_t *z2Ptr,
    532      uint64_t *z3Ptr
    533  )
    534 {
    535     uint64_t z0, z1, z2, z3;
    536     uint64_t more1, more2;
    537 
    538     mul64To128( a1, b1, &z2, &z3 );
    539     mul64To128( a1, b0, &z1, &more2 );
    540     add128( z1, more2, 0, z2, &z1, &z2 );
    541     mul64To128( a0, b0, &z0, &more1 );
    542     add128( z0, more1, 0, z1, &z0, &z1 );
    543     mul64To128( a0, b1, &more1, &more2 );
    544     add128( more1, more2, 0, z2, &more1, &z2 );
    545     add128( z0, z1, 0, more1, &z0, &z1 );
    546     *z3Ptr = z3;
    547     *z2Ptr = z2;
    548     *z1Ptr = z1;
    549     *z0Ptr = z0;
    550 
    551 }
    552 
    553 /*----------------------------------------------------------------------------
    554 | Returns an approximation to the 64-bit integer quotient obtained by dividing
    555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    556 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    557 | toward zero, the approximation returned lies between q and q + 2 inclusive.
    558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    559 | unsigned integer is returned.
    560 *----------------------------------------------------------------------------*/
    561 
    562 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
    563 {
    564     uint64_t b0, b1;
    565     uint64_t rem0, rem1, term0, term1;
    566     uint64_t z;
    567 
    568     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    569     b0 = b>>32;
    570     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    571     mul64To128( b, z, &term0, &term1 );
    572     sub128( a0, a1, term0, term1, &rem0, &rem1 );
    573     while ( ( (int64_t) rem0 ) < 0 ) {
    574         z -= LIT64( 0x100000000 );
    575         b1 = b<<32;
    576         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    577     }
    578     rem0 = ( rem0<<32 ) | ( rem1>>32 );
    579     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    580     return z;
    581 
    582 }
    583 
    584 /*----------------------------------------------------------------------------
    585 | Returns an approximation to the square root of the 32-bit significand given
    586 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    587 | `aExp' (the least significant bit) is 1, the integer returned approximates
    588 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    589 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    590 | case, the approximation returned lies strictly within +/-2 of the exact
    591 | value.
    592 *----------------------------------------------------------------------------*/
    593 
    594 static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a)
    595 {
    596     static const uint16_t sqrtOddAdjustments[] = {
    597         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    598         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    599     };
    600     static const uint16_t sqrtEvenAdjustments[] = {
    601         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    602         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    603     };
    604     int8 index;
    605     uint32_t z;
    606 
    607     index = ( a>>27 ) & 15;
    608     if ( aExp & 1 ) {
    609         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
    610         z = ( ( a / z )<<14 ) + ( z<<15 );
    611         a >>= 1;
    612     }
    613     else {
    614         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
    615         z = a / z + z;
    616         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    617         if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
    618     }
    619     return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
    620 
    621 }
    622 
    623 /*----------------------------------------------------------------------------
    624 | Returns the number of leading 0 bits before the most-significant 1 bit of
    625 | `a'.  If `a' is zero, 32 is returned.
    626 *----------------------------------------------------------------------------*/
    627 
    628 static int8 countLeadingZeros32( uint32_t a )
    629 {
    630 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
    631     if (a) {
    632         return __builtin_clz(a);
    633     } else {
    634         return 32;
    635     }
    636 #else
    637     static const int8 countLeadingZerosHigh[] = {
    638         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    639         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    640         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    641         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    642         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    643         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    644         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    645         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    646         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    647         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    648         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    649         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    651         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    652         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    653         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    654     };
    655     int8 shiftCount;
    656 
    657     shiftCount = 0;
    658     if ( a < 0x10000 ) {
    659         shiftCount += 16;
    660         a <<= 16;
    661     }
    662     if ( a < 0x1000000 ) {
    663         shiftCount += 8;
    664         a <<= 8;
    665     }
    666     shiftCount += countLeadingZerosHigh[ a>>24 ];
    667     return shiftCount;
    668 #endif
    669 }
    670 
    671 /*----------------------------------------------------------------------------
    672 | Returns the number of leading 0 bits before the most-significant 1 bit of
    673 | `a'.  If `a' is zero, 64 is returned.
    674 *----------------------------------------------------------------------------*/
    675 
    676 static int8 countLeadingZeros64( uint64_t a )
    677 {
    678 #if SOFTFLOAT_GNUC_PREREQ(3, 4)
    679     if (a) {
    680         return __builtin_clzll(a);
    681     } else {
    682         return 64;
    683     }
    684 #else
    685     int8 shiftCount;
    686 
    687     shiftCount = 0;
    688     if ( a < ( (uint64_t) 1 )<<32 ) {
    689         shiftCount += 32;
    690     }
    691     else {
    692         a >>= 32;
    693     }
    694     shiftCount += countLeadingZeros32( a );
    695     return shiftCount;
    696 #endif
    697 }
    698 
    699 /*----------------------------------------------------------------------------
    700 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    701 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    702 | Otherwise, returns 0.
    703 *----------------------------------------------------------------------------*/
    704 
    705 INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
    706 {
    707 
    708     return ( a0 == b0 ) && ( a1 == b1 );
    709 
    710 }
    711 
    712 /*----------------------------------------------------------------------------
    713 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    714 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    715 | Otherwise, returns 0.
    716 *----------------------------------------------------------------------------*/
    717 
    718 INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
    719 {
    720 
    721     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    722 
    723 }
    724 
    725 /*----------------------------------------------------------------------------
    726 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    727 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    728 | returns 0.
    729 *----------------------------------------------------------------------------*/
    730 
    731 INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
    732 {
    733 
    734     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    735 
    736 }
    737 
    738 /*----------------------------------------------------------------------------
    739 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    740 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    741 | Otherwise, returns 0.
    742 *----------------------------------------------------------------------------*/
    743 
    744 INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
    745 {
    746 
    747     return ( a0 != b0 ) || ( a1 != b1 );
    748 
    749 }
    750