1 2 /*============================================================================ 3 4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 5 Arithmetic Package, Release 2b. 6 7 Written by John R. Hauser. This work was made possible in part by the 8 International Computer Science Institute, located at Suite 600, 1947 Center 9 Street, Berkeley, California 94704. Funding was partially provided by the 10 National Science Foundation under grant MIP-9311980. The original version 11 of this code was written as part of a project to build a fixed-point vector 12 processor in collaboration with the University of California at Berkeley, 13 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 15 arithmetic/SoftFloat.html'. 16 17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 19 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 23 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR 24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 25 26 Derivative works are acceptable, even for commercial purposes, so long as 27 (1) the source code for the derivative work includes prominent notice that 28 the work is derivative, and (2) the source code includes prominent notice with 29 these four paragraphs for those parts of this code that are retained. 30 31 =============================================================================*/ 32 33 /*---------------------------------------------------------------------------- 34 | Shifts `a' right by the number of bits given in `count'. If any nonzero 35 | bits are shifted off, they are ``jammed'' into the least significant bit of 36 | the result by setting the least significant bit to 1. The value of `count' 37 | can be arbitrarily large; in particular, if `count' is greater than 32, the 38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 39 | The result is stored in the location pointed to by `zPtr'. 40 *----------------------------------------------------------------------------*/ 41 42 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 43 { 44 bits32 z; 45 46 if ( count == 0 ) { 47 z = a; 48 } 49 else if ( count < 32 ) { 50 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 51 } 52 else { 53 z = ( a != 0 ); 54 } 55 *zPtr = z; 56 57 } 58 59 /*---------------------------------------------------------------------------- 60 | Shifts `a' right by the number of bits given in `count'. If any nonzero 61 | bits are shifted off, they are ``jammed'' into the least significant bit of 62 | the result by setting the least significant bit to 1. The value of `count' 63 | can be arbitrarily large; in particular, if `count' is greater than 64, the 64 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 65 | The result is stored in the location pointed to by `zPtr'. 66 *----------------------------------------------------------------------------*/ 67 68 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 69 { 70 bits64 z; 71 72 if ( count == 0 ) { 73 z = a; 74 } 75 else if ( count < 64 ) { 76 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 77 } 78 else { 79 z = ( a != 0 ); 80 } 81 *zPtr = z; 82 83 } 84 85 /*---------------------------------------------------------------------------- 86 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 87 | _plus_ the number of bits given in `count'. The shifted result is at most 88 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 89 | bits shifted off form a second 64-bit result as follows: The _last_ bit 90 | shifted off is the most-significant bit of the extra result, and the other 91 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 92 | bits shifted off were all zero. This extra result is stored in the location 93 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 94 | (This routine makes more sense if `a0' and `a1' are considered to form 95 | a fixed-point value with binary point between `a0' and `a1'. This fixed- 96 | point value is shifted right by the number of bits given in `count', and 97 | the integer part of the result is returned at the location pointed to by 98 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 99 | described above, and is returned at the location pointed to by `z1Ptr'.) 100 *----------------------------------------------------------------------------*/ 101 102 INLINE void 103 shift64ExtraRightJamming( 104 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 105 { 106 bits64 z0, z1; 107 int8 negCount = ( - count ) & 63; 108 109 if ( count == 0 ) { 110 z1 = a1; 111 z0 = a0; 112 } 113 else if ( count < 64 ) { 114 z1 = ( a0<<negCount ) | ( a1 != 0 ); 115 z0 = a0>>count; 116 } 117 else { 118 if ( count == 64 ) { 119 z1 = a0 | ( a1 != 0 ); 120 } 121 else { 122 z1 = ( ( a0 | a1 ) != 0 ); 123 } 124 z0 = 0; 125 } 126 *z1Ptr = z1; 127 *z0Ptr = z0; 128 129 } 130 131 /*---------------------------------------------------------------------------- 132 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 133 | number of bits given in `count'. Any bits shifted off are lost. The value 134 | of `count' can be arbitrarily large; in particular, if `count' is greater 135 | than 128, the result will be 0. The result is broken into two 64-bit pieces 136 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 137 *----------------------------------------------------------------------------*/ 138 139 INLINE void 140 shift128Right( 141 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 142 { 143 bits64 z0, z1; 144 int8 negCount = ( - count ) & 63; 145 146 if ( count == 0 ) { 147 z1 = a1; 148 z0 = a0; 149 } 150 else if ( count < 64 ) { 151 z1 = ( a0<<negCount ) | ( a1>>count ); 152 z0 = a0>>count; 153 } 154 else { 155 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 156 z0 = 0; 157 } 158 *z1Ptr = z1; 159 *z0Ptr = z0; 160 161 } 162 163 /*---------------------------------------------------------------------------- 164 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 165 | number of bits given in `count'. If any nonzero bits are shifted off, they 166 | are ``jammed'' into the least significant bit of the result by setting the 167 | least significant bit to 1. The value of `count' can be arbitrarily large; 168 | in particular, if `count' is greater than 128, the result will be either 169 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 170 | nonzero. The result is broken into two 64-bit pieces which are stored at 171 | the locations pointed to by `z0Ptr' and `z1Ptr'. 172 *----------------------------------------------------------------------------*/ 173 174 INLINE void 175 shift128RightJamming( 176 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 177 { 178 bits64 z0, z1; 179 int8 negCount = ( - count ) & 63; 180 181 if ( count == 0 ) { 182 z1 = a1; 183 z0 = a0; 184 } 185 else if ( count < 64 ) { 186 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 187 z0 = a0>>count; 188 } 189 else { 190 if ( count == 64 ) { 191 z1 = a0 | ( a1 != 0 ); 192 } 193 else if ( count < 128 ) { 194 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 195 } 196 else { 197 z1 = ( ( a0 | a1 ) != 0 ); 198 } 199 z0 = 0; 200 } 201 *z1Ptr = z1; 202 *z0Ptr = z0; 203 204 } 205 206 /*---------------------------------------------------------------------------- 207 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 208 | by 64 _plus_ the number of bits given in `count'. The shifted result is 209 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 210 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 211 | off form a third 64-bit result as follows: The _last_ bit shifted off is 212 | the most-significant bit of the extra result, and the other 63 bits of the 213 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 214 | were all zero. This extra result is stored in the location pointed to by 215 | `z2Ptr'. The value of `count' can be arbitrarily large. 216 | (This routine makes more sense if `a0', `a1', and `a2' are considered 217 | to form a fixed-point value with binary point between `a1' and `a2'. This 218 | fixed-point value is shifted right by the number of bits given in `count', 219 | and the integer part of the result is returned at the locations pointed to 220 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 221 | corrupted as described above, and is returned at the location pointed to by 222 | `z2Ptr'.) 223 *----------------------------------------------------------------------------*/ 224 225 INLINE void 226 shift128ExtraRightJamming( 227 bits64 a0, 228 bits64 a1, 229 bits64 a2, 230 int16 count, 231 bits64 *z0Ptr, 232 bits64 *z1Ptr, 233 bits64 *z2Ptr 234 ) 235 { 236 bits64 z0, z1, z2; 237 int8 negCount = ( - count ) & 63; 238 239 if ( count == 0 ) { 240 z2 = a2; 241 z1 = a1; 242 z0 = a0; 243 } 244 else { 245 if ( count < 64 ) { 246 z2 = a1<<negCount; 247 z1 = ( a0<<negCount ) | ( a1>>count ); 248 z0 = a0>>count; 249 } 250 else { 251 if ( count == 64 ) { 252 z2 = a1; 253 z1 = a0; 254 } 255 else { 256 a2 |= a1; 257 if ( count < 128 ) { 258 z2 = a0<<negCount; 259 z1 = a0>>( count & 63 ); 260 } 261 else { 262 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 263 z1 = 0; 264 } 265 } 266 z0 = 0; 267 } 268 z2 |= ( a2 != 0 ); 269 } 270 *z2Ptr = z2; 271 *z1Ptr = z1; 272 *z0Ptr = z0; 273 274 } 275 276 /*---------------------------------------------------------------------------- 277 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 278 | number of bits given in `count'. Any bits shifted off are lost. The value 279 | of `count' must be less than 64. The result is broken into two 64-bit 280 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 281 *----------------------------------------------------------------------------*/ 282 283 INLINE void 284 shortShift128Left( 285 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 286 { 287 288 *z1Ptr = a1<<count; 289 *z0Ptr = 290 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 291 292 } 293 294 /*---------------------------------------------------------------------------- 295 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 296 | by the number of bits given in `count'. Any bits shifted off are lost. 297 | The value of `count' must be less than 64. The result is broken into three 298 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 299 | `z1Ptr', and `z2Ptr'. 300 *----------------------------------------------------------------------------*/ 301 302 INLINE void 303 shortShift192Left( 304 bits64 a0, 305 bits64 a1, 306 bits64 a2, 307 int16 count, 308 bits64 *z0Ptr, 309 bits64 *z1Ptr, 310 bits64 *z2Ptr 311 ) 312 { 313 bits64 z0, z1, z2; 314 int8 negCount; 315 316 z2 = a2<<count; 317 z1 = a1<<count; 318 z0 = a0<<count; 319 if ( 0 < count ) { 320 negCount = ( ( - count ) & 63 ); 321 z1 |= a2>>negCount; 322 z0 |= a1>>negCount; 323 } 324 *z2Ptr = z2; 325 *z1Ptr = z1; 326 *z0Ptr = z0; 327 328 } 329 330 /*---------------------------------------------------------------------------- 331 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 332 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 333 | any carry out is lost. The result is broken into two 64-bit pieces which 334 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 335 *----------------------------------------------------------------------------*/ 336 337 INLINE void 338 add128( 339 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 340 { 341 bits64 z1; 342 343 z1 = a1 + b1; 344 *z1Ptr = z1; 345 *z0Ptr = a0 + b0 + ( z1 < a1 ); 346 347 } 348 349 /*---------------------------------------------------------------------------- 350 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 351 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 352 | modulo 2^192, so any carry out is lost. The result is broken into three 353 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 354 | `z1Ptr', and `z2Ptr'. 355 *----------------------------------------------------------------------------*/ 356 357 INLINE void 358 add192( 359 bits64 a0, 360 bits64 a1, 361 bits64 a2, 362 bits64 b0, 363 bits64 b1, 364 bits64 b2, 365 bits64 *z0Ptr, 366 bits64 *z1Ptr, 367 bits64 *z2Ptr 368 ) 369 { 370 bits64 z0, z1, z2; 371 int8 carry0, carry1; 372 373 z2 = a2 + b2; 374 carry1 = ( z2 < a2 ); 375 z1 = a1 + b1; 376 carry0 = ( z1 < a1 ); 377 z0 = a0 + b0; 378 z1 += carry1; 379 z0 += ( z1 < carry1 ); 380 z0 += carry0; 381 *z2Ptr = z2; 382 *z1Ptr = z1; 383 *z0Ptr = z0; 384 385 } 386 387 /*---------------------------------------------------------------------------- 388 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 389 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 390 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 391 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 392 | `z1Ptr'. 393 *----------------------------------------------------------------------------*/ 394 395 INLINE void 396 sub128( 397 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 398 { 399 400 *z1Ptr = a1 - b1; 401 *z0Ptr = a0 - b0 - ( a1 < b1 ); 402 403 } 404 405 /*---------------------------------------------------------------------------- 406 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 407 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 408 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 409 | result is broken into three 64-bit pieces which are stored at the locations 410 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 411 *----------------------------------------------------------------------------*/ 412 413 INLINE void 414 sub192( 415 bits64 a0, 416 bits64 a1, 417 bits64 a2, 418 bits64 b0, 419 bits64 b1, 420 bits64 b2, 421 bits64 *z0Ptr, 422 bits64 *z1Ptr, 423 bits64 *z2Ptr 424 ) 425 { 426 bits64 z0, z1, z2; 427 int8 borrow0, borrow1; 428 429 z2 = a2 - b2; 430 borrow1 = ( a2 < b2 ); 431 z1 = a1 - b1; 432 borrow0 = ( a1 < b1 ); 433 z0 = a0 - b0; 434 z0 -= ( z1 < borrow1 ); 435 z1 -= borrow1; 436 z0 -= borrow0; 437 *z2Ptr = z2; 438 *z1Ptr = z1; 439 *z0Ptr = z0; 440 441 } 442 443 /*---------------------------------------------------------------------------- 444 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 445 | into two 64-bit pieces which are stored at the locations pointed to by 446 | `z0Ptr' and `z1Ptr'. 447 *----------------------------------------------------------------------------*/ 448 449 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 450 { 451 bits32 aHigh, aLow, bHigh, bLow; 452 bits64 z0, zMiddleA, zMiddleB, z1; 453 454 aLow = a; 455 aHigh = a>>32; 456 bLow = b; 457 bHigh = b>>32; 458 z1 = ( (bits64) aLow ) * bLow; 459 zMiddleA = ( (bits64) aLow ) * bHigh; 460 zMiddleB = ( (bits64) aHigh ) * bLow; 461 z0 = ( (bits64) aHigh ) * bHigh; 462 zMiddleA += zMiddleB; 463 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 464 zMiddleA <<= 32; 465 z1 += zMiddleA; 466 z0 += ( z1 < zMiddleA ); 467 *z1Ptr = z1; 468 *z0Ptr = z0; 469 470 } 471 472 /*---------------------------------------------------------------------------- 473 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 474 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 475 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 476 | `z2Ptr'. 477 *----------------------------------------------------------------------------*/ 478 479 INLINE void 480 mul128By64To192( 481 bits64 a0, 482 bits64 a1, 483 bits64 b, 484 bits64 *z0Ptr, 485 bits64 *z1Ptr, 486 bits64 *z2Ptr 487 ) 488 { 489 bits64 z0, z1, z2, more1; 490 491 mul64To128( a1, b, &z1, &z2 ); 492 mul64To128( a0, b, &z0, &more1 ); 493 add128( z0, more1, 0, z1, &z0, &z1 ); 494 *z2Ptr = z2; 495 *z1Ptr = z1; 496 *z0Ptr = z0; 497 498 } 499 500 /*---------------------------------------------------------------------------- 501 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 502 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 503 | product. The product is broken into four 64-bit pieces which are stored at 504 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 505 *----------------------------------------------------------------------------*/ 506 507 INLINE void 508 mul128To256( 509 bits64 a0, 510 bits64 a1, 511 bits64 b0, 512 bits64 b1, 513 bits64 *z0Ptr, 514 bits64 *z1Ptr, 515 bits64 *z2Ptr, 516 bits64 *z3Ptr 517 ) 518 { 519 bits64 z0, z1, z2, z3; 520 bits64 more1, more2; 521 522 mul64To128( a1, b1, &z2, &z3 ); 523 mul64To128( a1, b0, &z1, &more2 ); 524 add128( z1, more2, 0, z2, &z1, &z2 ); 525 mul64To128( a0, b0, &z0, &more1 ); 526 add128( z0, more1, 0, z1, &z0, &z1 ); 527 mul64To128( a0, b1, &more1, &more2 ); 528 add128( more1, more2, 0, z2, &more1, &z2 ); 529 add128( z0, z1, 0, more1, &z0, &z1 ); 530 *z3Ptr = z3; 531 *z2Ptr = z2; 532 *z1Ptr = z1; 533 *z0Ptr = z0; 534 535 } 536 537 /*---------------------------------------------------------------------------- 538 | Returns an approximation to the 64-bit integer quotient obtained by dividing 539 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 540 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 541 | toward zero, the approximation returned lies between q and q + 2 inclusive. 542 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 543 | unsigned integer is returned. 544 *----------------------------------------------------------------------------*/ 545 546 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 547 { 548 bits64 b0, b1; 549 bits64 rem0, rem1, term0, term1; 550 bits64 z; 551 552 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 553 b0 = b>>32; 554 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 555 mul64To128( b, z, &term0, &term1 ); 556 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 557 while ( ( (sbits64) rem0 ) < 0 ) { 558 z -= LIT64( 0x100000000 ); 559 b1 = b<<32; 560 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 561 } 562 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 563 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 564 return z; 565 566 } 567 568 /*---------------------------------------------------------------------------- 569 | Returns an approximation to the square root of the 32-bit significand given 570 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 571 | `aExp' (the least significant bit) is 1, the integer returned approximates 572 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 573 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 574 | case, the approximation returned lies strictly within +/-2 of the exact 575 | value. 576 *----------------------------------------------------------------------------*/ 577 578 static bits32 estimateSqrt32( int16 aExp, bits32 a ) 579 { 580 static const bits16 sqrtOddAdjustments[] = { 581 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 582 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 583 }; 584 static const bits16 sqrtEvenAdjustments[] = { 585 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 586 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 587 }; 588 int8 index; 589 bits32 z; 590 591 index = ( a>>27 ) & 15; 592 if ( aExp & 1 ) { 593 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 594 z = ( ( a / z )<<14 ) + ( z<<15 ); 595 a >>= 1; 596 } 597 else { 598 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 599 z = a / z + z; 600 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 601 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 602 } 603 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 604 605 } 606 607 /*---------------------------------------------------------------------------- 608 | Returns the number of leading 0 bits before the most-significant 1 bit of 609 | `a'. If `a' is zero, 32 is returned. 610 *----------------------------------------------------------------------------*/ 611 612 static int8 countLeadingZeros32( bits32 a ) 613 { 614 static const int8 countLeadingZerosHigh[] = { 615 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 616 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 617 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 618 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 619 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 620 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 621 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 624 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 625 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 631 }; 632 int8 shiftCount; 633 634 shiftCount = 0; 635 if ( a < 0x10000 ) { 636 shiftCount += 16; 637 a <<= 16; 638 } 639 if ( a < 0x1000000 ) { 640 shiftCount += 8; 641 a <<= 8; 642 } 643 shiftCount += countLeadingZerosHigh[ a>>24 ]; 644 return shiftCount; 645 646 } 647 648 /*---------------------------------------------------------------------------- 649 | Returns the number of leading 0 bits before the most-significant 1 bit of 650 | `a'. If `a' is zero, 64 is returned. 651 *----------------------------------------------------------------------------*/ 652 653 static int8 countLeadingZeros64( bits64 a ) 654 { 655 int8 shiftCount; 656 657 shiftCount = 0; 658 if ( a < ( (bits64) 1 )<<32 ) { 659 shiftCount += 32; 660 } 661 else { 662 a >>= 32; 663 } 664 shiftCount += countLeadingZeros32( a ); 665 return shiftCount; 666 667 } 668 669 /*---------------------------------------------------------------------------- 670 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 671 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 672 | Otherwise, returns 0. 673 *----------------------------------------------------------------------------*/ 674 675 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 676 { 677 678 return ( a0 == b0 ) && ( a1 == b1 ); 679 680 } 681 682 /*---------------------------------------------------------------------------- 683 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 684 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 685 | Otherwise, returns 0. 686 *----------------------------------------------------------------------------*/ 687 688 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 689 { 690 691 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 692 693 } 694 695 /*---------------------------------------------------------------------------- 696 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 697 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 698 | returns 0. 699 *----------------------------------------------------------------------------*/ 700 701 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 702 { 703 704 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 705 706 } 707 708 /*---------------------------------------------------------------------------- 709 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 710 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 711 | Otherwise, returns 0. 712 *----------------------------------------------------------------------------*/ 713 714 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 715 { 716 717 return ( a0 != b0 ) || ( a1 != b1 ); 718 719 } 720