Home | History | Annotate | Download | only in bits64
      1 /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */
      2 
      3 /*
      4 ===============================================================================
      5 
      6 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      7 Arithmetic Package, Release 2a.
      8 
      9 Written by John R. Hauser.  This work was made possible in part by the
     10 International Computer Science Institute, located at Suite 600, 1947 Center
     11 Street, Berkeley, California 94704.  Funding was partially provided by the
     12 National Science Foundation under grant MIP-9311980.  The original version
     13 of this code was written as part of a project to build a fixed-point vector
     14 processor in collaboration with the University of California at Berkeley,
     15 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     16 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     17 arithmetic/SoftFloat.html'.
     18 
     19 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     20 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     22 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     23 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     24 
     25 Derivative works are acceptable, even for commercial purposes, so long as
     26 (1) they include prominent notice that the work is derivative, and (2) they
     27 include prominent notice akin to these four paragraphs for those parts of
     28 this code that are retained.
     29 
     30 ===============================================================================
     31 */
     32 
     33 /*
     34 -------------------------------------------------------------------------------
     35 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     36 bits are shifted off, they are ``jammed'' into the least significant bit of
     37 the result by setting the least significant bit to 1.  The value of `count'
     38 can be arbitrarily large; in particular, if `count' is greater than 32, the
     39 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     40 The result is stored in the location pointed to by `zPtr'.
     41 -------------------------------------------------------------------------------
     42 */
     43 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     44 {
     45     bits32 z;
     46 
     47     if ( count == 0 ) {
     48         z = a;
     49     }
     50     else if ( count < 32 ) {
     51         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     52     }
     53     else {
     54         z = ( a != 0 );
     55     }
     56     *zPtr = z;
     57 
     58 }
     59 
     60 /*
     61 -------------------------------------------------------------------------------
     62 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     63 bits are shifted off, they are ``jammed'' into the least significant bit of
     64 the result by setting the least significant bit to 1.  The value of `count'
     65 can be arbitrarily large; in particular, if `count' is greater than 64, the
     66 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     67 The result is stored in the location pointed to by `zPtr'.
     68 -------------------------------------------------------------------------------
     69 */
     70 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     71 {
     72     bits64 z;
     73 
     74     if ( count == 0 ) {
     75         z = a;
     76     }
     77     else if ( count < 64 ) {
     78         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     79     }
     80     else {
     81         z = ( a != 0 );
     82     }
     83     *zPtr = z;
     84 
     85 }
     86 
     87 /*
     88 -------------------------------------------------------------------------------
     89 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     90 _plus_ the number of bits given in `count'.  The shifted result is at most
     91 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     92 bits shifted off form a second 64-bit result as follows:  The _last_ bit
     93 shifted off is the most-significant bit of the extra result, and the other
     94 63 bits of the extra result are all zero if and only if _all_but_the_last_
     95 bits shifted off were all zero.  This extra result is stored in the location
     96 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     97     (This routine makes more sense if `a0' and `a1' are considered to form a
     98 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
     99 value is shifted right by the number of bits given in `count', and the
    100 integer part of the result is returned at the location pointed to by
    101 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
    102 described above, and is returned at the location pointed to by `z1Ptr'.)
    103 -------------------------------------------------------------------------------
    104 */
    105 INLINE void
    106  shift64ExtraRightJamming(
    107      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    108 {
    109     bits64 z0, z1;
    110     int8 negCount = ( - count ) & 63;
    111 
    112     if ( count == 0 ) {
    113         z1 = a1;
    114         z0 = a0;
    115     }
    116     else if ( count < 64 ) {
    117         z1 = ( a0<<negCount ) | ( a1 != 0 );
    118         z0 = a0>>count;
    119     }
    120     else {
    121         if ( count == 64 ) {
    122             z1 = a0 | ( a1 != 0 );
    123         }
    124         else {
    125             z1 = ( ( a0 | a1 ) != 0 );
    126         }
    127         z0 = 0;
    128     }
    129     *z1Ptr = z1;
    130     *z0Ptr = z0;
    131 
    132 }
    133 
    134 /*
    135 -------------------------------------------------------------------------------
    136 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    137 number of bits given in `count'.  Any bits shifted off are lost.  The value
    138 of `count' can be arbitrarily large; in particular, if `count' is greater
    139 than 128, the result will be 0.  The result is broken into two 64-bit pieces
    140 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    141 -------------------------------------------------------------------------------
    142 */
    143 INLINE void
    144  shift128Right(
    145      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    146 {
    147     bits64 z0, z1;
    148     int8 negCount = ( - count ) & 63;
    149 
    150     if ( count == 0 ) {
    151         z1 = a1;
    152         z0 = a0;
    153     }
    154     else if ( count < 64 ) {
    155         z1 = ( a0<<negCount ) | ( a1>>count );
    156         z0 = a0>>count;
    157     }
    158     else {
    159         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
    160         z0 = 0;
    161     }
    162     *z1Ptr = z1;
    163     *z0Ptr = z0;
    164 
    165 }
    166 
    167 /*
    168 -------------------------------------------------------------------------------
    169 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    170 number of bits given in `count'.  If any nonzero bits are shifted off, they
    171 are ``jammed'' into the least significant bit of the result by setting the
    172 least significant bit to 1.  The value of `count' can be arbitrarily large;
    173 in particular, if `count' is greater than 128, the result will be either
    174 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    175 nonzero.  The result is broken into two 64-bit pieces which are stored at
    176 the locations pointed to by `z0Ptr' and `z1Ptr'.
    177 -------------------------------------------------------------------------------
    178 */
    179 INLINE void
    180  shift128RightJamming(
    181      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    182 {
    183     bits64 z0, z1;
    184     int8 negCount = ( - count ) & 63;
    185 
    186     if ( count == 0 ) {
    187         z1 = a1;
    188         z0 = a0;
    189     }
    190     else if ( count < 64 ) {
    191         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    192         z0 = a0>>count;
    193     }
    194     else {
    195         if ( count == 64 ) {
    196             z1 = a0 | ( a1 != 0 );
    197         }
    198         else if ( count < 128 ) {
    199             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    200         }
    201         else {
    202             z1 = ( ( a0 | a1 ) != 0 );
    203         }
    204         z0 = 0;
    205     }
    206     *z1Ptr = z1;
    207     *z0Ptr = z0;
    208 
    209 }
    210 
    211 /*
    212 -------------------------------------------------------------------------------
    213 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    214 by 64 _plus_ the number of bits given in `count'.  The shifted result is
    215 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    216 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    217 off form a third 64-bit result as follows:  The _last_ bit shifted off is
    218 the most-significant bit of the extra result, and the other 63 bits of the
    219 extra result are all zero if and only if _all_but_the_last_ bits shifted off
    220 were all zero.  This extra result is stored in the location pointed to by
    221 `z2Ptr'.  The value of `count' can be arbitrarily large.
    222     (This routine makes more sense if `a0', `a1', and `a2' are considered
    223 to form a fixed-point value with binary point between `a1' and `a2'.  This
    224 fixed-point value is shifted right by the number of bits given in `count',
    225 and the integer part of the result is returned at the locations pointed to
    226 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    227 corrupted as described above, and is returned at the location pointed to by
    228 `z2Ptr'.)
    229 -------------------------------------------------------------------------------
    230 */
    231 INLINE void
    232  shift128ExtraRightJamming(
    233      bits64 a0,
    234      bits64 a1,
    235      bits64 a2,
    236      int16 count,
    237      bits64 *z0Ptr,
    238      bits64 *z1Ptr,
    239      bits64 *z2Ptr
    240  )
    241 {
    242     bits64 z0, z1, z2;
    243     int8 negCount = ( - count ) & 63;
    244 
    245     if ( count == 0 ) {
    246         z2 = a2;
    247         z1 = a1;
    248         z0 = a0;
    249     }
    250     else {
    251         if ( count < 64 ) {
    252             z2 = a1<<negCount;
    253             z1 = ( a0<<negCount ) | ( a1>>count );
    254             z0 = a0>>count;
    255         }
    256         else {
    257             if ( count == 64 ) {
    258                 z2 = a1;
    259                 z1 = a0;
    260             }
    261             else {
    262                 a2 |= a1;
    263                 if ( count < 128 ) {
    264                     z2 = a0<<negCount;
    265                     z1 = a0>>( count & 63 );
    266                 }
    267                 else {
    268                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    269                     z1 = 0;
    270                 }
    271             }
    272             z0 = 0;
    273         }
    274         z2 |= ( a2 != 0 );
    275     }
    276     *z2Ptr = z2;
    277     *z1Ptr = z1;
    278     *z0Ptr = z0;
    279 
    280 }
    281 
    282 /*
    283 -------------------------------------------------------------------------------
    284 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    285 number of bits given in `count'.  Any bits shifted off are lost.  The value
    286 of `count' must be less than 64.  The result is broken into two 64-bit
    287 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    288 -------------------------------------------------------------------------------
    289 */
    290 INLINE void
    291  shortShift128Left(
    292      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    293 {
    294 
    295     *z1Ptr = a1<<count;
    296     *z0Ptr =
    297         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    298 
    299 }
    300 
    301 /*
    302 -------------------------------------------------------------------------------
    303 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    304 by the number of bits given in `count'.  Any bits shifted off are lost.
    305 The value of `count' must be less than 64.  The result is broken into three
    306 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    307 `z1Ptr', and `z2Ptr'.
    308 -------------------------------------------------------------------------------
    309 */
    310 INLINE void
    311  shortShift192Left(
    312      bits64 a0,
    313      bits64 a1,
    314      bits64 a2,
    315      int16 count,
    316      bits64 *z0Ptr,
    317      bits64 *z1Ptr,
    318      bits64 *z2Ptr
    319  )
    320 {
    321     bits64 z0, z1, z2;
    322     int8 negCount;
    323 
    324     z2 = a2<<count;
    325     z1 = a1<<count;
    326     z0 = a0<<count;
    327     if ( 0 < count ) {
    328         negCount = ( ( - count ) & 63 );
    329         z1 |= a2>>negCount;
    330         z0 |= a1>>negCount;
    331     }
    332     *z2Ptr = z2;
    333     *z1Ptr = z1;
    334     *z0Ptr = z0;
    335 
    336 }
    337 
    338 /*
    339 -------------------------------------------------------------------------------
    340 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    341 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    342 any carry out is lost.  The result is broken into two 64-bit pieces which
    343 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    344 -------------------------------------------------------------------------------
    345 */
    346 INLINE void
    347  add128(
    348      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    349 {
    350     bits64 z1;
    351 
    352     z1 = a1 + b1;
    353     *z1Ptr = z1;
    354     *z0Ptr = a0 + b0 + ( z1 < a1 );
    355 
    356 }
    357 
    358 /*
    359 -------------------------------------------------------------------------------
    360 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    361 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    362 modulo 2^192, so any carry out is lost.  The result is broken into three
    363 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
    364 `z1Ptr', and `z2Ptr'.
    365 -------------------------------------------------------------------------------
    366 */
    367 INLINE void
    368  add192(
    369      bits64 a0,
    370      bits64 a1,
    371      bits64 a2,
    372      bits64 b0,
    373      bits64 b1,
    374      bits64 b2,
    375      bits64 *z0Ptr,
    376      bits64 *z1Ptr,
    377      bits64 *z2Ptr
    378  )
    379 {
    380     bits64 z0, z1, z2;
    381     int8 carry0, carry1;
    382 
    383     z2 = a2 + b2;
    384     carry1 = ( z2 < a2 );
    385     z1 = a1 + b1;
    386     carry0 = ( z1 < a1 );
    387     z0 = a0 + b0;
    388     z1 += carry1;
    389     z0 += ( z1 < (bits64)carry1 );
    390     z0 += carry0;
    391     *z2Ptr = z2;
    392     *z1Ptr = z1;
    393     *z0Ptr = z0;
    394 
    395 }
    396 
    397 /*
    398 -------------------------------------------------------------------------------
    399 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    400 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    401 2^128, so any borrow out (carry out) is lost.  The result is broken into two
    402 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    403 `z1Ptr'.
    404 -------------------------------------------------------------------------------
    405 */
    406 INLINE void
    407  sub128(
    408      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    409 {
    410 
    411     *z1Ptr = a1 - b1;
    412     *z0Ptr = a0 - b0 - ( a1 < b1 );
    413 
    414 }
    415 
    416 /*
    417 -------------------------------------------------------------------------------
    418 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    419 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    420 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    421 result is broken into three 64-bit pieces which are stored at the locations
    422 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    423 -------------------------------------------------------------------------------
    424 */
    425 INLINE void
    426  sub192(
    427      bits64 a0,
    428      bits64 a1,
    429      bits64 a2,
    430      bits64 b0,
    431      bits64 b1,
    432      bits64 b2,
    433      bits64 *z0Ptr,
    434      bits64 *z1Ptr,
    435      bits64 *z2Ptr
    436  )
    437 {
    438     bits64 z0, z1, z2;
    439     int8 borrow0, borrow1;
    440 
    441     z2 = a2 - b2;
    442     borrow1 = ( a2 < b2 );
    443     z1 = a1 - b1;
    444     borrow0 = ( a1 < b1 );
    445     z0 = a0 - b0;
    446     z0 -= ( z1 < (bits64)borrow1 );
    447     z1 -= borrow1;
    448     z0 -= borrow0;
    449     *z2Ptr = z2;
    450     *z1Ptr = z1;
    451     *z0Ptr = z0;
    452 
    453 }
    454 
    455 /*
    456 -------------------------------------------------------------------------------
    457 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    458 into two 64-bit pieces which are stored at the locations pointed to by
    459 `z0Ptr' and `z1Ptr'.
    460 -------------------------------------------------------------------------------
    461 */
    462 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    463 {
    464     bits32 aHigh, aLow, bHigh, bLow;
    465     bits64 z0, zMiddleA, zMiddleB, z1;
    466 
    467     aLow = (bits32)a;
    468     aHigh = (bits32)(a>>32);
    469     bLow = (bits32)b;
    470     bHigh = (bits32)(b>>32);
    471     z1 = ( (bits64) aLow ) * bLow;
    472     zMiddleA = ( (bits64) aLow ) * bHigh;
    473     zMiddleB = ( (bits64) aHigh ) * bLow;
    474     z0 = ( (bits64) aHigh ) * bHigh;
    475     zMiddleA += zMiddleB;
    476     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    477     zMiddleA <<= 32;
    478     z1 += zMiddleA;
    479     z0 += ( z1 < zMiddleA );
    480     *z1Ptr = z1;
    481     *z0Ptr = z0;
    482 
    483 }
    484 
    485 /*
    486 -------------------------------------------------------------------------------
    487 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
    488 `b' to obtain a 192-bit product.  The product is broken into three 64-bit
    489 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    490 `z2Ptr'.
    491 -------------------------------------------------------------------------------
    492 */
    493 INLINE void
    494  mul128By64To192(
    495      bits64 a0,
    496      bits64 a1,
    497      bits64 b,
    498      bits64 *z0Ptr,
    499      bits64 *z1Ptr,
    500      bits64 *z2Ptr
    501  )
    502 {
    503     bits64 z0, z1, z2, more1;
    504 
    505     mul64To128( a1, b, &z1, &z2 );
    506     mul64To128( a0, b, &z0, &more1 );
    507     add128( z0, more1, 0, z1, &z0, &z1 );
    508     *z2Ptr = z2;
    509     *z1Ptr = z1;
    510     *z0Ptr = z0;
    511 
    512 }
    513 
    514 /*
    515 -------------------------------------------------------------------------------
    516 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    517 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    518 product.  The product is broken into four 64-bit pieces which are stored at
    519 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    520 -------------------------------------------------------------------------------
    521 */
    522 INLINE void
    523  mul128To256(
    524      bits64 a0,
    525      bits64 a1,
    526      bits64 b0,
    527      bits64 b1,
    528      bits64 *z0Ptr,
    529      bits64 *z1Ptr,
    530      bits64 *z2Ptr,
    531      bits64 *z3Ptr
    532  )
    533 {
    534     bits64 z0, z1, z2, z3;
    535     bits64 more1, more2;
    536 
    537     mul64To128( a1, b1, &z2, &z3 );
    538     mul64To128( a1, b0, &z1, &more2 );
    539     add128( z1, more2, 0, z2, &z1, &z2 );
    540     mul64To128( a0, b0, &z0, &more1 );
    541     add128( z0, more1, 0, z1, &z0, &z1 );
    542     mul64To128( a0, b1, &more1, &more2 );
    543     add128( more1, more2, 0, z2, &more1, &z2 );
    544     add128( z0, z1, 0, more1, &z0, &z1 );
    545     *z3Ptr = z3;
    546     *z2Ptr = z2;
    547     *z1Ptr = z1;
    548     *z0Ptr = z0;
    549 
    550 }
    551 
    552 /*
    553 -------------------------------------------------------------------------------
    554 Returns an approximation to the 64-bit integer quotient obtained by dividing
    555 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    556 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    557 toward zero, the approximation returned lies between q and q + 2 inclusive.
    558 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    559 unsigned integer is returned.
    560 -------------------------------------------------------------------------------
    561 */
    562 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    563 {
    564     bits64 b0, b1;
    565     bits64 rem0, rem1, term0, term1;
    566     bits64 z;
    567 
    568     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    569     b0 = b>>32;
    570     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
    571     mul64To128( b, z, &term0, &term1 );
    572     sub128( a0, a1, term0, term1, &rem0, &rem1 );
    573     while ( ( (sbits64) rem0 ) < 0 ) {
    574         z -= LIT64( 0x100000000 );
    575         b1 = b<<32;
    576         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    577     }
    578     rem0 = ( rem0<<32 ) | ( rem1>>32 );
    579     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
    580     return z;
    581 
    582 }
    583 
    584 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
    585 /*
    586 -------------------------------------------------------------------------------
    587 Returns an approximation to the square root of the 32-bit significand given
    588 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    589 `aExp' (the least significant bit) is 1, the integer returned approximates
    590 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    591 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    592 case, the approximation returned lies strictly within +/-2 of the exact
    593 value.
    594 -------------------------------------------------------------------------------
    595 */
    596 static bits32 estimateSqrt32( int16 aExp, bits32 a )
    597 {
    598     static const bits16 sqrtOddAdjustments[] = {
    599         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    600         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    601     };
    602     static const bits16 sqrtEvenAdjustments[] = {
    603         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    604         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    605     };
    606     int8 idx;
    607     bits32 z;
    608 
    609     idx = ( a>>27 ) & 15;
    610     if ( aExp & 1 ) {
    611         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
    612         z = ( ( a / z )<<14 ) + ( z<<15 );
    613         a >>= 1;
    614     }
    615     else {
    616         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
    617         z = a / z + z;
    618         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    619         if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );
    620     }
    621     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
    622 
    623 }
    624 #endif
    625 
    626 /*
    627 -------------------------------------------------------------------------------
    628 Returns the number of leading 0 bits before the most-significant 1 bit of
    629 `a'.  If `a' is zero, 32 is returned.
    630 -------------------------------------------------------------------------------
    631 */
    632 static int8 countLeadingZeros32( bits32 a )
    633 {
    634     static const int8 countLeadingZerosHigh[] = {
    635         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    636         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    637         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    638         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    639         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    640         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    641         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    642         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    643         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    644         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    645         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    646         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    647         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    648         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    649         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    651     };
    652     int8 shiftCount;
    653 
    654     shiftCount = 0;
    655     if ( a < 0x10000 ) {
    656         shiftCount += 16;
    657         a <<= 16;
    658     }
    659     if ( a < 0x1000000 ) {
    660         shiftCount += 8;
    661         a <<= 8;
    662     }
    663     shiftCount += countLeadingZerosHigh[ a>>24 ];
    664     return shiftCount;
    665 
    666 }
    667 
    668 /*
    669 -------------------------------------------------------------------------------
    670 Returns the number of leading 0 bits before the most-significant 1 bit of
    671 `a'.  If `a' is zero, 64 is returned.
    672 -------------------------------------------------------------------------------
    673 */
    674 static int8 countLeadingZeros64( bits64 a )
    675 {
    676     int8 shiftCount;
    677 
    678     shiftCount = 0;
    679     if ( a < ( (bits64) 1 )<<32 ) {
    680         shiftCount += 32;
    681     }
    682     else {
    683         a >>= 32;
    684     }
    685     shiftCount += (int8)countLeadingZeros32( (bits32)a );
    686     return shiftCount;
    687 
    688 }
    689 
    690 /*
    691 -------------------------------------------------------------------------------
    692 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    693 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    694 Otherwise, returns 0.
    695 -------------------------------------------------------------------------------
    696 */
    697 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    698 {
    699 
    700     return ( a0 == b0 ) && ( a1 == b1 );
    701 
    702 }
    703 
    704 /*
    705 -------------------------------------------------------------------------------
    706 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    707 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    708 Otherwise, returns 0.
    709 -------------------------------------------------------------------------------
    710 */
    711 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    712 {
    713 
    714     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    715 
    716 }
    717 
    718 /*
    719 -------------------------------------------------------------------------------
    720 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    721 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    722 returns 0.
    723 -------------------------------------------------------------------------------
    724 */
    725 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    726 {
    727 
    728     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    729 
    730 }
    731 
    732 /*
    733 -------------------------------------------------------------------------------
    734 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    735 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    736 Otherwise, returns 0.
    737 -------------------------------------------------------------------------------
    738 */
    739 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    740 {
    741 
    742     return ( a0 != b0 ) || ( a1 != b1 );
    743 
    744 }
    745 
    746