Home | History | Annotate | Download | only in bits32
      1 
      2 /*
      3 ===============================================================================
      4 
      5 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      6 Arithmetic Package, Release 2a.
      7 
      8 Written by John R. Hauser.  This work was made possible in part by the
      9 International Computer Science Institute, located at Suite 600, 1947 Center
     10 Street, Berkeley, California 94704.  Funding was partially provided by the
     11 National Science Foundation under grant MIP-9311980.  The original version
     12 of this code was written as part of a project to build a fixed-point vector
     13 processor in collaboration with the University of California at Berkeley,
     14 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     15 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     16 arithmetic/SoftFloat.html'.
     17 
     18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     19 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     20 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     23 
     24 Derivative works are acceptable, even for commercial purposes, so long as
     25 (1) they include prominent notice that the work is derivative, and (2) they
     26 include prominent notice akin to these four paragraphs for those parts of
     27 this code that are retained.
     28 
     29 ===============================================================================
     30 */
     31 
     32 /*
     33 -------------------------------------------------------------------------------
     34 Shifts `a' right by the number of bits given in `count'.  If any nonzero
     35 bits are shifted off, they are ``jammed'' into the least significant bit of
     36 the result by setting the least significant bit to 1.  The value of `count'
     37 can be arbitrarily large; in particular, if `count' is greater than 32, the
     38 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     39 The result is stored in the location pointed to by `zPtr'.
     40 -------------------------------------------------------------------------------
     41 */
     42 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     43 {
     44     bits32 z;
     45 
     46     if ( count == 0 ) {
     47         z = a;
     48     }
     49     else if ( count < 32 ) {
     50         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     51     }
     52     else {
     53         z = ( a != 0 );
     54     }
     55     *zPtr = z;
     56 
     57 }
     58 
     59 /*
     60 -------------------------------------------------------------------------------
     61 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
     62 number of bits given in `count'.  Any bits shifted off are lost.  The value
     63 of `count' can be arbitrarily large; in particular, if `count' is greater
     64 than 64, the result will be 0.  The result is broken into two 32-bit pieces
     65 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
     66 -------------------------------------------------------------------------------
     67 */
     68 INLINE void
     69  shift64Right(
     70      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
     71 {
     72     bits32 z0, z1;
     73     int8 negCount = ( - count ) & 31;
     74 
     75     if ( count == 0 ) {
     76         z1 = a1;
     77         z0 = a0;
     78     }
     79     else if ( count < 32 ) {
     80         z1 = ( a0<<negCount ) | ( a1>>count );
     81         z0 = a0>>count;
     82     }
     83     else {
     84         z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;
     85         z0 = 0;
     86     }
     87     *z1Ptr = z1;
     88     *z0Ptr = z0;
     89 
     90 }
     91 
     92 /*
     93 -------------------------------------------------------------------------------
     94 Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
     95 number of bits given in `count'.  If any nonzero bits are shifted off, they
     96 are ``jammed'' into the least significant bit of the result by setting the
     97 least significant bit to 1.  The value of `count' can be arbitrarily large;
     98 in particular, if `count' is greater than 64, the result will be either 0
     99 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    100 nonzero.  The result is broken into two 32-bit pieces which are stored at
    101 the locations pointed to by `z0Ptr' and `z1Ptr'.
    102 -------------------------------------------------------------------------------
    103 */
    104 INLINE void
    105  shift64RightJamming(
    106      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
    107 {
    108     bits32 z0, z1;
    109     int8 negCount = ( - count ) & 31;
    110 
    111     if ( count == 0 ) {
    112         z1 = a1;
    113         z0 = a0;
    114     }
    115     else if ( count < 32 ) {
    116         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    117         z0 = a0>>count;
    118     }
    119     else {
    120         if ( count == 32 ) {
    121             z1 = a0 | ( a1 != 0 );
    122         }
    123         else if ( count < 64 ) {
    124             z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    125         }
    126         else {
    127             z1 = ( ( a0 | a1 ) != 0 );
    128         }
    129         z0 = 0;
    130     }
    131     *z1Ptr = z1;
    132     *z0Ptr = z0;
    133 
    134 }
    135 
    136 /*
    137 -------------------------------------------------------------------------------
    138 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
    139 by 32 _plus_ the number of bits given in `count'.  The shifted result is
    140 at most 64 nonzero bits; these are broken into two 32-bit pieces which are
    141 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    142 off form a third 32-bit result as follows:  The _last_ bit shifted off is
    143 the most-significant bit of the extra result, and the other 31 bits of the
    144 extra result are all zero if and only if _all_but_the_last_ bits shifted off
    145 were all zero.  This extra result is stored in the location pointed to by
    146 `z2Ptr'.  The value of `count' can be arbitrarily large.
    147     (This routine makes more sense if `a0', `a1', and `a2' are considered
    148 to form a fixed-point value with binary point between `a1' and `a2'.  This
    149 fixed-point value is shifted right by the number of bits given in `count',
    150 and the integer part of the result is returned at the locations pointed to
    151 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    152 corrupted as described above, and is returned at the location pointed to by
    153 `z2Ptr'.)
    154 -------------------------------------------------------------------------------
    155 */
    156 INLINE void
    157  shift64ExtraRightJamming(
    158      bits32 a0,
    159      bits32 a1,
    160      bits32 a2,
    161      int16 count,
    162      bits32 *z0Ptr,
    163      bits32 *z1Ptr,
    164      bits32 *z2Ptr
    165  )
    166 {
    167     bits32 z0, z1, z2;
    168     int8 negCount = ( - count ) & 31;
    169 
    170     if ( count == 0 ) {
    171         z2 = a2;
    172         z1 = a1;
    173         z0 = a0;
    174     }
    175     else {
    176         if ( count < 32 ) {
    177             z2 = a1<<negCount;
    178             z1 = ( a0<<negCount ) | ( a1>>count );
    179             z0 = a0>>count;
    180         }
    181         else {
    182             if ( count == 32 ) {
    183                 z2 = a1;
    184                 z1 = a0;
    185             }
    186             else {
    187                 a2 |= a1;
    188                 if ( count < 64 ) {
    189                     z2 = a0<<negCount;
    190                     z1 = a0>>( count & 31 );
    191                 }
    192                 else {
    193                     z2 = ( count == 64 ) ? a0 : ( a0 != 0 );
    194                     z1 = 0;
    195                 }
    196             }
    197             z0 = 0;
    198         }
    199         z2 |= ( a2 != 0 );
    200     }
    201     *z2Ptr = z2;
    202     *z1Ptr = z1;
    203     *z0Ptr = z0;
    204 
    205 }
    206 
    207 /*
    208 -------------------------------------------------------------------------------
    209 Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
    210 number of bits given in `count'.  Any bits shifted off are lost.  The value
    211 of `count' must be less than 32.  The result is broken into two 32-bit
    212 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    213 -------------------------------------------------------------------------------
    214 */
    215 INLINE void
    216  shortShift64Left(
    217      bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )
    218 {
    219 
    220     *z1Ptr = a1<<count;
    221     *z0Ptr =
    222         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
    223 
    224 }
    225 
    226 /*
    227 -------------------------------------------------------------------------------
    228 Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
    229 by the number of bits given in `count'.  Any bits shifted off are lost.
    230 The value of `count' must be less than 32.  The result is broken into three
    231 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
    232 `z1Ptr', and `z2Ptr'.
    233 -------------------------------------------------------------------------------
    234 */
    235 INLINE void
    236  shortShift96Left(
    237      bits32 a0,
    238      bits32 a1,
    239      bits32 a2,
    240      int16 count,
    241      bits32 *z0Ptr,
    242      bits32 *z1Ptr,
    243      bits32 *z2Ptr
    244  )
    245 {
    246     bits32 z0, z1, z2;
    247     int8 negCount;
    248 
    249     z2 = a2<<count;
    250     z1 = a1<<count;
    251     z0 = a0<<count;
    252     if ( 0 < count ) {
    253         negCount = ( ( - count ) & 31 );
    254         z1 |= a2>>negCount;
    255         z0 |= a1>>negCount;
    256     }
    257     *z2Ptr = z2;
    258     *z1Ptr = z1;
    259     *z0Ptr = z0;
    260 
    261 }
    262 
    263 /*
    264 -------------------------------------------------------------------------------
    265 Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
    266 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
    267 any carry out is lost.  The result is broken into two 32-bit pieces which
    268 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    269 -------------------------------------------------------------------------------
    270 */
    271 INLINE void
    272  add64(
    273      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
    274 {
    275     bits32 z1;
    276 
    277     z1 = a1 + b1;
    278     *z1Ptr = z1;
    279     *z0Ptr = a0 + b0 + ( z1 < a1 );
    280 
    281 }
    282 
    283 /*
    284 -------------------------------------------------------------------------------
    285 Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
    286 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    287 modulo 2^96, so any carry out is lost.  The result is broken into three
    288 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
    289 `z1Ptr', and `z2Ptr'.
    290 -------------------------------------------------------------------------------
    291 */
    292 INLINE void
    293  add96(
    294      bits32 a0,
    295      bits32 a1,
    296      bits32 a2,
    297      bits32 b0,
    298      bits32 b1,
    299      bits32 b2,
    300      bits32 *z0Ptr,
    301      bits32 *z1Ptr,
    302      bits32 *z2Ptr
    303  )
    304 {
    305     bits32 z0, z1, z2;
    306     int8 carry0, carry1;
    307 
    308     z2 = a2 + b2;
    309     carry1 = ( z2 < a2 );
    310     z1 = a1 + b1;
    311     carry0 = ( z1 < a1 );
    312     z0 = a0 + b0;
    313     z1 += carry1;
    314     z0 += ( z1 < (bits32)carry1 );
    315     z0 += carry0;
    316     *z2Ptr = z2;
    317     *z1Ptr = z1;
    318     *z0Ptr = z0;
    319 
    320 }
    321 
    322 /*
    323 -------------------------------------------------------------------------------
    324 Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
    325 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    326 2^64, so any borrow out (carry out) is lost.  The result is broken into two
    327 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    328 `z1Ptr'.
    329 -------------------------------------------------------------------------------
    330 */
    331 INLINE void
    332  sub64(
    333      bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )
    334 {
    335 
    336     *z1Ptr = a1 - b1;
    337     *z0Ptr = a0 - b0 - ( a1 < b1 );
    338 
    339 }
    340 
    341 /*
    342 -------------------------------------------------------------------------------
    343 Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
    344 the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
    345 is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
    346 into three 32-bit pieces which are stored at the locations pointed to by
    347 `z0Ptr', `z1Ptr', and `z2Ptr'.
    348 -------------------------------------------------------------------------------
    349 */
    350 INLINE void
    351  sub96(
    352      bits32 a0,
    353      bits32 a1,
    354      bits32 a2,
    355      bits32 b0,
    356      bits32 b1,
    357      bits32 b2,
    358      bits32 *z0Ptr,
    359      bits32 *z1Ptr,
    360      bits32 *z2Ptr
    361  )
    362 {
    363     bits32 z0, z1, z2;
    364     int8 borrow0, borrow1;
    365 
    366     z2 = a2 - b2;
    367     borrow1 = ( a2 < b2 );
    368     z1 = a1 - b1;
    369     borrow0 = ( a1 < b1 );
    370     z0 = a0 - b0;
    371     z0 -= ( z1 < (bits32)borrow1 );
    372     z1 -= borrow1;
    373     z0 -= borrow0;
    374     *z2Ptr = z2;
    375     *z1Ptr = z1;
    376     *z0Ptr = z0;
    377 
    378 }
    379 
    380 /*
    381 -------------------------------------------------------------------------------
    382 Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
    383 into two 32-bit pieces which are stored at the locations pointed to by
    384 `z0Ptr' and `z1Ptr'.
    385 -------------------------------------------------------------------------------
    386 */
    387 INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )
    388 {
    389     bits16 aHigh, aLow, bHigh, bLow;
    390     bits32 z0, zMiddleA, zMiddleB, z1;
    391 
    392     aLow = a;
    393     aHigh = a>>16;
    394     bLow = b;
    395     bHigh = b>>16;
    396     z1 = ( (bits32) aLow ) * bLow;
    397     zMiddleA = ( (bits32) aLow ) * bHigh;
    398     zMiddleB = ( (bits32) aHigh ) * bLow;
    399     z0 = ( (bits32) aHigh ) * bHigh;
    400     zMiddleA += zMiddleB;
    401     z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
    402     zMiddleA <<= 16;
    403     z1 += zMiddleA;
    404     z0 += ( z1 < zMiddleA );
    405     *z1Ptr = z1;
    406     *z0Ptr = z0;
    407 
    408 }
    409 
    410 /*
    411 -------------------------------------------------------------------------------
    412 Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
    413 to obtain a 96-bit product.  The product is broken into three 32-bit pieces
    414 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    415 `z2Ptr'.
    416 -------------------------------------------------------------------------------
    417 */
    418 INLINE void
    419  mul64By32To96(
    420      bits32 a0,
    421      bits32 a1,
    422      bits32 b,
    423      bits32 *z0Ptr,
    424      bits32 *z1Ptr,
    425      bits32 *z2Ptr
    426  )
    427 {
    428     bits32 z0, z1, z2, more1;
    429 
    430     mul32To64( a1, b, &z1, &z2 );
    431     mul32To64( a0, b, &z0, &more1 );
    432     add64( z0, more1, 0, z1, &z0, &z1 );
    433     *z2Ptr = z2;
    434     *z1Ptr = z1;
    435     *z0Ptr = z0;
    436 
    437 }
    438 
    439 /*
    440 -------------------------------------------------------------------------------
    441 Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
    442 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
    443 product.  The product is broken into four 32-bit pieces which are stored at
    444 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    445 -------------------------------------------------------------------------------
    446 */
    447 INLINE void
    448  mul64To128(
    449      bits32 a0,
    450      bits32 a1,
    451      bits32 b0,
    452      bits32 b1,
    453      bits32 *z0Ptr,
    454      bits32 *z1Ptr,
    455      bits32 *z2Ptr,
    456      bits32 *z3Ptr
    457  )
    458 {
    459     bits32 z0, z1, z2, z3;
    460     bits32 more1, more2;
    461 
    462     mul32To64( a1, b1, &z2, &z3 );
    463     mul32To64( a1, b0, &z1, &more2 );
    464     add64( z1, more2, 0, z2, &z1, &z2 );
    465     mul32To64( a0, b0, &z0, &more1 );
    466     add64( z0, more1, 0, z1, &z0, &z1 );
    467     mul32To64( a0, b1, &more1, &more2 );
    468     add64( more1, more2, 0, z2, &more1, &z2 );
    469     add64( z0, z1, 0, more1, &z0, &z1 );
    470     *z3Ptr = z3;
    471     *z2Ptr = z2;
    472     *z1Ptr = z1;
    473     *z0Ptr = z0;
    474 
    475 }
    476 
    477 /*
    478 -------------------------------------------------------------------------------
    479 Returns an approximation to the 32-bit integer quotient obtained by dividing
    480 `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
    481 divisor `b' must be at least 2^31.  If q is the exact quotient truncated
    482 toward zero, the approximation returned lies between q and q + 2 inclusive.
    483 If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
    484 unsigned integer is returned.
    485 -------------------------------------------------------------------------------
    486 */
    487 static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )
    488 {
    489     bits32 b0, b1;
    490     bits32 rem0, rem1, term0, term1;
    491     bits32 z;
    492 
    493     if ( b <= a0 ) return 0xFFFFFFFF;
    494     b0 = b>>16;
    495     z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;
    496     mul32To64( b, z, &term0, &term1 );
    497     sub64( a0, a1, term0, term1, &rem0, &rem1 );
    498     while ( ( (sbits32) rem0 ) < 0 ) {
    499         z -= 0x10000;
    500         b1 = b<<16;
    501         add64( rem0, rem1, b0, b1, &rem0, &rem1 );
    502     }
    503     rem0 = ( rem0<<16 ) | ( rem1>>16 );
    504     z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;
    505     return z;
    506 
    507 }
    508 
    509 #ifndef SOFTFLOAT_FOR_GCC
    510 /*
    511 -------------------------------------------------------------------------------
    512 Returns an approximation to the square root of the 32-bit significand given
    513 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    514 `aExp' (the least significant bit) is 1, the integer returned approximates
    515 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    516 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    517 case, the approximation returned lies strictly within +/-2 of the exact
    518 value.
    519 -------------------------------------------------------------------------------
    520 */
    521 static bits32 estimateSqrt32( int16 aExp, bits32 a )
    522 {
    523     static const bits16 sqrtOddAdjustments[] = {
    524         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    525         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    526     };
    527     static const bits16 sqrtEvenAdjustments[] = {
    528         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    529         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    530     };
    531     int8 index;
    532     bits32 z;
    533 
    534     index = ( a>>27 ) & 15;
    535     if ( aExp & 1 ) {
    536         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
    537         z = ( ( a / z )<<14 ) + ( z<<15 );
    538         a >>= 1;
    539     }
    540     else {
    541         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
    542         z = a / z + z;
    543         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    544         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    545     }
    546     return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );
    547 
    548 }
    549 #endif
    550 
    551 /*
    552 -------------------------------------------------------------------------------
    553 Returns the number of leading 0 bits before the most-significant 1 bit of
    554 `a'.  If `a' is zero, 32 is returned.
    555 -------------------------------------------------------------------------------
    556 */
    557 static int8 countLeadingZeros32( bits32 a )
    558 {
    559     static const int8 countLeadingZerosHigh[] = {
    560         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    561         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    562         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    563         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    564         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    565         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    566         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    567         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    568         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    569         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    570         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    571         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    572         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    573         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    574         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    575         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    576     };
    577     int8 shiftCount;
    578 
    579     shiftCount = 0;
    580     if ( a < 0x10000 ) {
    581         shiftCount += 16;
    582         a <<= 16;
    583     }
    584     if ( a < 0x1000000 ) {
    585         shiftCount += 8;
    586         a <<= 8;
    587     }
    588     shiftCount += countLeadingZerosHigh[ a>>24 ];
    589     return shiftCount;
    590 
    591 }
    592 
    593 /*
    594 -------------------------------------------------------------------------------
    595 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
    596 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    597 returns 0.
    598 -------------------------------------------------------------------------------
    599 */
    600 INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    601 {
    602 
    603     return ( a0 == b0 ) && ( a1 == b1 );
    604 
    605 }
    606 
    607 /*
    608 -------------------------------------------------------------------------------
    609 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
    610 than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
    611 Otherwise, returns 0.
    612 -------------------------------------------------------------------------------
    613 */
    614 INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    615 {
    616 
    617     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    618 
    619 }
    620 
    621 /*
    622 -------------------------------------------------------------------------------
    623 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
    624 than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    625 returns 0.
    626 -------------------------------------------------------------------------------
    627 */
    628 INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    629 {
    630 
    631     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    632 
    633 }
    634 
    635 /*
    636 -------------------------------------------------------------------------------
    637 Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
    638 equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    639 returns 0.
    640 -------------------------------------------------------------------------------
    641 */
    642 INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )
    643 {
    644 
    645     return ( a0 != b0 ) || ( a1 != b1 );
    646 
    647 }
    648 
    649