1 /* 2 * QEMU float support macros 3 * 4 * Derived from SoftFloat. 5 */ 6 7 /*============================================================================ 8 9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 10 Arithmetic Package, Release 2b. 11 12 Written by John R. Hauser. This work was made possible in part by the 13 International Computer Science Institute, located at Suite 600, 1947 Center 14 Street, Berkeley, California 94704. Funding was partially provided by the 15 National Science Foundation under grant MIP-9311980. The original version 16 of this code was written as part of a project to build a fixed-point vector 17 processor in collaboration with the University of California at Berkeley, 18 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 20 arithmetic/SoftFloat.html'. 21 22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 24 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR 29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 30 31 Derivative works are acceptable, even for commercial purposes, so long as 32 (1) the source code for the derivative work includes prominent notice that 33 the work is derivative, and (2) the source code includes prominent notice with 34 these four paragraphs for those parts of this code that are retained. 35 36 =============================================================================*/ 37 38 /*---------------------------------------------------------------------------- 39 | This macro tests for minimum version of the GNU C compiler. 40 *----------------------------------------------------------------------------*/ 41 #if defined(__GNUC__) && defined(__GNUC_MINOR__) 42 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \ 43 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) 44 #else 45 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 46 #endif 47 48 49 /*---------------------------------------------------------------------------- 50 | Shifts `a' right by the number of bits given in `count'. If any nonzero 51 | bits are shifted off, they are ``jammed'' into the least significant bit of 52 | the result by setting the least significant bit to 1. The value of `count' 53 | can be arbitrarily large; in particular, if `count' is greater than 32, the 54 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 55 | The result is stored in the location pointed to by `zPtr'. 56 *----------------------------------------------------------------------------*/ 57 58 INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr) 59 { 60 uint32_t z; 61 62 if ( count == 0 ) { 63 z = a; 64 } 65 else if ( count < 32 ) { 66 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 67 } 68 else { 69 z = ( a != 0 ); 70 } 71 *zPtr = z; 72 73 } 74 75 /*---------------------------------------------------------------------------- 76 | Shifts `a' right by the number of bits given in `count'. If any nonzero 77 | bits are shifted off, they are ``jammed'' into the least significant bit of 78 | the result by setting the least significant bit to 1. The value of `count' 79 | can be arbitrarily large; in particular, if `count' is greater than 64, the 80 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 81 | The result is stored in the location pointed to by `zPtr'. 82 *----------------------------------------------------------------------------*/ 83 84 INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr) 85 { 86 uint64_t z; 87 88 if ( count == 0 ) { 89 z = a; 90 } 91 else if ( count < 64 ) { 92 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 93 } 94 else { 95 z = ( a != 0 ); 96 } 97 *zPtr = z; 98 99 } 100 101 /*---------------------------------------------------------------------------- 102 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 103 | _plus_ the number of bits given in `count'. The shifted result is at most 104 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 105 | bits shifted off form a second 64-bit result as follows: The _last_ bit 106 | shifted off is the most-significant bit of the extra result, and the other 107 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 108 | bits shifted off were all zero. This extra result is stored in the location 109 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 110 | (This routine makes more sense if `a0' and `a1' are considered to form 111 | a fixed-point value with binary point between `a0' and `a1'. This fixed- 112 | point value is shifted right by the number of bits given in `count', and 113 | the integer part of the result is returned at the location pointed to by 114 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 115 | described above, and is returned at the location pointed to by `z1Ptr'.) 116 *----------------------------------------------------------------------------*/ 117 118 INLINE void 119 shift64ExtraRightJamming( 120 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr) 121 { 122 uint64_t z0, z1; 123 int8 negCount = ( - count ) & 63; 124 125 if ( count == 0 ) { 126 z1 = a1; 127 z0 = a0; 128 } 129 else if ( count < 64 ) { 130 z1 = ( a0<<negCount ) | ( a1 != 0 ); 131 z0 = a0>>count; 132 } 133 else { 134 if ( count == 64 ) { 135 z1 = a0 | ( a1 != 0 ); 136 } 137 else { 138 z1 = ( ( a0 | a1 ) != 0 ); 139 } 140 z0 = 0; 141 } 142 *z1Ptr = z1; 143 *z0Ptr = z0; 144 145 } 146 147 /*---------------------------------------------------------------------------- 148 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 149 | number of bits given in `count'. Any bits shifted off are lost. The value 150 | of `count' can be arbitrarily large; in particular, if `count' is greater 151 | than 128, the result will be 0. The result is broken into two 64-bit pieces 152 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 153 *----------------------------------------------------------------------------*/ 154 155 INLINE void 156 shift128Right( 157 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr) 158 { 159 uint64_t z0, z1; 160 int8 negCount = ( - count ) & 63; 161 162 if ( count == 0 ) { 163 z1 = a1; 164 z0 = a0; 165 } 166 else if ( count < 64 ) { 167 z1 = ( a0<<negCount ) | ( a1>>count ); 168 z0 = a0>>count; 169 } 170 else { 171 z1 = (count < 128) ? (a0 >> (count & 63)) : 0; 172 z0 = 0; 173 } 174 *z1Ptr = z1; 175 *z0Ptr = z0; 176 177 } 178 179 /*---------------------------------------------------------------------------- 180 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 181 | number of bits given in `count'. If any nonzero bits are shifted off, they 182 | are ``jammed'' into the least significant bit of the result by setting the 183 | least significant bit to 1. The value of `count' can be arbitrarily large; 184 | in particular, if `count' is greater than 128, the result will be either 185 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 186 | nonzero. The result is broken into two 64-bit pieces which are stored at 187 | the locations pointed to by `z0Ptr' and `z1Ptr'. 188 *----------------------------------------------------------------------------*/ 189 190 INLINE void 191 shift128RightJamming( 192 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr) 193 { 194 uint64_t z0, z1; 195 int8 negCount = ( - count ) & 63; 196 197 if ( count == 0 ) { 198 z1 = a1; 199 z0 = a0; 200 } 201 else if ( count < 64 ) { 202 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 203 z0 = a0>>count; 204 } 205 else { 206 if ( count == 64 ) { 207 z1 = a0 | ( a1 != 0 ); 208 } 209 else if ( count < 128 ) { 210 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 211 } 212 else { 213 z1 = ( ( a0 | a1 ) != 0 ); 214 } 215 z0 = 0; 216 } 217 *z1Ptr = z1; 218 *z0Ptr = z0; 219 220 } 221 222 /*---------------------------------------------------------------------------- 223 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 224 | by 64 _plus_ the number of bits given in `count'. The shifted result is 225 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 226 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 227 | off form a third 64-bit result as follows: The _last_ bit shifted off is 228 | the most-significant bit of the extra result, and the other 63 bits of the 229 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 230 | were all zero. This extra result is stored in the location pointed to by 231 | `z2Ptr'. The value of `count' can be arbitrarily large. 232 | (This routine makes more sense if `a0', `a1', and `a2' are considered 233 | to form a fixed-point value with binary point between `a1' and `a2'. This 234 | fixed-point value is shifted right by the number of bits given in `count', 235 | and the integer part of the result is returned at the locations pointed to 236 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 237 | corrupted as described above, and is returned at the location pointed to by 238 | `z2Ptr'.) 239 *----------------------------------------------------------------------------*/ 240 241 INLINE void 242 shift128ExtraRightJamming( 243 uint64_t a0, 244 uint64_t a1, 245 uint64_t a2, 246 int_fast16_t count, 247 uint64_t *z0Ptr, 248 uint64_t *z1Ptr, 249 uint64_t *z2Ptr 250 ) 251 { 252 uint64_t z0, z1, z2; 253 int8 negCount = ( - count ) & 63; 254 255 if ( count == 0 ) { 256 z2 = a2; 257 z1 = a1; 258 z0 = a0; 259 } 260 else { 261 if ( count < 64 ) { 262 z2 = a1<<negCount; 263 z1 = ( a0<<negCount ) | ( a1>>count ); 264 z0 = a0>>count; 265 } 266 else { 267 if ( count == 64 ) { 268 z2 = a1; 269 z1 = a0; 270 } 271 else { 272 a2 |= a1; 273 if ( count < 128 ) { 274 z2 = a0<<negCount; 275 z1 = a0>>( count & 63 ); 276 } 277 else { 278 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 279 z1 = 0; 280 } 281 } 282 z0 = 0; 283 } 284 z2 |= ( a2 != 0 ); 285 } 286 *z2Ptr = z2; 287 *z1Ptr = z1; 288 *z0Ptr = z0; 289 290 } 291 292 /*---------------------------------------------------------------------------- 293 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 294 | number of bits given in `count'. Any bits shifted off are lost. The value 295 | of `count' must be less than 64. The result is broken into two 64-bit 296 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 297 *----------------------------------------------------------------------------*/ 298 299 INLINE void 300 shortShift128Left( 301 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr) 302 { 303 304 *z1Ptr = a1<<count; 305 *z0Ptr = 306 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 307 308 } 309 310 /*---------------------------------------------------------------------------- 311 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 312 | by the number of bits given in `count'. Any bits shifted off are lost. 313 | The value of `count' must be less than 64. The result is broken into three 314 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 315 | `z1Ptr', and `z2Ptr'. 316 *----------------------------------------------------------------------------*/ 317 318 INLINE void 319 shortShift192Left( 320 uint64_t a0, 321 uint64_t a1, 322 uint64_t a2, 323 int_fast16_t count, 324 uint64_t *z0Ptr, 325 uint64_t *z1Ptr, 326 uint64_t *z2Ptr 327 ) 328 { 329 uint64_t z0, z1, z2; 330 int8 negCount; 331 332 z2 = a2<<count; 333 z1 = a1<<count; 334 z0 = a0<<count; 335 if ( 0 < count ) { 336 negCount = ( ( - count ) & 63 ); 337 z1 |= a2>>negCount; 338 z0 |= a1>>negCount; 339 } 340 *z2Ptr = z2; 341 *z1Ptr = z1; 342 *z0Ptr = z0; 343 344 } 345 346 /*---------------------------------------------------------------------------- 347 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 348 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 349 | any carry out is lost. The result is broken into two 64-bit pieces which 350 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 351 *----------------------------------------------------------------------------*/ 352 353 INLINE void 354 add128( 355 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 356 { 357 uint64_t z1; 358 359 z1 = a1 + b1; 360 *z1Ptr = z1; 361 *z0Ptr = a0 + b0 + ( z1 < a1 ); 362 363 } 364 365 /*---------------------------------------------------------------------------- 366 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 367 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 368 | modulo 2^192, so any carry out is lost. The result is broken into three 369 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 370 | `z1Ptr', and `z2Ptr'. 371 *----------------------------------------------------------------------------*/ 372 373 INLINE void 374 add192( 375 uint64_t a0, 376 uint64_t a1, 377 uint64_t a2, 378 uint64_t b0, 379 uint64_t b1, 380 uint64_t b2, 381 uint64_t *z0Ptr, 382 uint64_t *z1Ptr, 383 uint64_t *z2Ptr 384 ) 385 { 386 uint64_t z0, z1, z2; 387 int8 carry0, carry1; 388 389 z2 = a2 + b2; 390 carry1 = ( z2 < a2 ); 391 z1 = a1 + b1; 392 carry0 = ( z1 < a1 ); 393 z0 = a0 + b0; 394 z1 += carry1; 395 z0 += ( z1 < carry1 ); 396 z0 += carry0; 397 *z2Ptr = z2; 398 *z1Ptr = z1; 399 *z0Ptr = z0; 400 401 } 402 403 /*---------------------------------------------------------------------------- 404 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 405 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 406 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 407 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 408 | `z1Ptr'. 409 *----------------------------------------------------------------------------*/ 410 411 INLINE void 412 sub128( 413 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 414 { 415 416 *z1Ptr = a1 - b1; 417 *z0Ptr = a0 - b0 - ( a1 < b1 ); 418 419 } 420 421 /*---------------------------------------------------------------------------- 422 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 423 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 424 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 425 | result is broken into three 64-bit pieces which are stored at the locations 426 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 427 *----------------------------------------------------------------------------*/ 428 429 INLINE void 430 sub192( 431 uint64_t a0, 432 uint64_t a1, 433 uint64_t a2, 434 uint64_t b0, 435 uint64_t b1, 436 uint64_t b2, 437 uint64_t *z0Ptr, 438 uint64_t *z1Ptr, 439 uint64_t *z2Ptr 440 ) 441 { 442 uint64_t z0, z1, z2; 443 int8 borrow0, borrow1; 444 445 z2 = a2 - b2; 446 borrow1 = ( a2 < b2 ); 447 z1 = a1 - b1; 448 borrow0 = ( a1 < b1 ); 449 z0 = a0 - b0; 450 z0 -= ( z1 < borrow1 ); 451 z1 -= borrow1; 452 z0 -= borrow0; 453 *z2Ptr = z2; 454 *z1Ptr = z1; 455 *z0Ptr = z0; 456 457 } 458 459 /*---------------------------------------------------------------------------- 460 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 461 | into two 64-bit pieces which are stored at the locations pointed to by 462 | `z0Ptr' and `z1Ptr'. 463 *----------------------------------------------------------------------------*/ 464 465 INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) 466 { 467 uint32_t aHigh, aLow, bHigh, bLow; 468 uint64_t z0, zMiddleA, zMiddleB, z1; 469 470 aLow = a; 471 aHigh = a>>32; 472 bLow = b; 473 bHigh = b>>32; 474 z1 = ( (uint64_t) aLow ) * bLow; 475 zMiddleA = ( (uint64_t) aLow ) * bHigh; 476 zMiddleB = ( (uint64_t) aHigh ) * bLow; 477 z0 = ( (uint64_t) aHigh ) * bHigh; 478 zMiddleA += zMiddleB; 479 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 480 zMiddleA <<= 32; 481 z1 += zMiddleA; 482 z0 += ( z1 < zMiddleA ); 483 *z1Ptr = z1; 484 *z0Ptr = z0; 485 486 } 487 488 /*---------------------------------------------------------------------------- 489 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 490 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 491 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 492 | `z2Ptr'. 493 *----------------------------------------------------------------------------*/ 494 495 INLINE void 496 mul128By64To192( 497 uint64_t a0, 498 uint64_t a1, 499 uint64_t b, 500 uint64_t *z0Ptr, 501 uint64_t *z1Ptr, 502 uint64_t *z2Ptr 503 ) 504 { 505 uint64_t z0, z1, z2, more1; 506 507 mul64To128( a1, b, &z1, &z2 ); 508 mul64To128( a0, b, &z0, &more1 ); 509 add128( z0, more1, 0, z1, &z0, &z1 ); 510 *z2Ptr = z2; 511 *z1Ptr = z1; 512 *z0Ptr = z0; 513 514 } 515 516 /*---------------------------------------------------------------------------- 517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 518 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 519 | product. The product is broken into four 64-bit pieces which are stored at 520 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 521 *----------------------------------------------------------------------------*/ 522 523 INLINE void 524 mul128To256( 525 uint64_t a0, 526 uint64_t a1, 527 uint64_t b0, 528 uint64_t b1, 529 uint64_t *z0Ptr, 530 uint64_t *z1Ptr, 531 uint64_t *z2Ptr, 532 uint64_t *z3Ptr 533 ) 534 { 535 uint64_t z0, z1, z2, z3; 536 uint64_t more1, more2; 537 538 mul64To128( a1, b1, &z2, &z3 ); 539 mul64To128( a1, b0, &z1, &more2 ); 540 add128( z1, more2, 0, z2, &z1, &z2 ); 541 mul64To128( a0, b0, &z0, &more1 ); 542 add128( z0, more1, 0, z1, &z0, &z1 ); 543 mul64To128( a0, b1, &more1, &more2 ); 544 add128( more1, more2, 0, z2, &more1, &z2 ); 545 add128( z0, z1, 0, more1, &z0, &z1 ); 546 *z3Ptr = z3; 547 *z2Ptr = z2; 548 *z1Ptr = z1; 549 *z0Ptr = z0; 550 551 } 552 553 /*---------------------------------------------------------------------------- 554 | Returns an approximation to the 64-bit integer quotient obtained by dividing 555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 556 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 557 | toward zero, the approximation returned lies between q and q + 2 inclusive. 558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 559 | unsigned integer is returned. 560 *----------------------------------------------------------------------------*/ 561 562 static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b ) 563 { 564 uint64_t b0, b1; 565 uint64_t rem0, rem1, term0, term1; 566 uint64_t z; 567 568 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 569 b0 = b>>32; 570 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 571 mul64To128( b, z, &term0, &term1 ); 572 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 573 while ( ( (int64_t) rem0 ) < 0 ) { 574 z -= LIT64( 0x100000000 ); 575 b1 = b<<32; 576 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 577 } 578 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 579 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 580 return z; 581 582 } 583 584 /*---------------------------------------------------------------------------- 585 | Returns an approximation to the square root of the 32-bit significand given 586 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 587 | `aExp' (the least significant bit) is 1, the integer returned approximates 588 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 589 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 590 | case, the approximation returned lies strictly within +/-2 of the exact 591 | value. 592 *----------------------------------------------------------------------------*/ 593 594 static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a) 595 { 596 static const uint16_t sqrtOddAdjustments[] = { 597 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 598 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 599 }; 600 static const uint16_t sqrtEvenAdjustments[] = { 601 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 602 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 603 }; 604 int8 index; 605 uint32_t z; 606 607 index = ( a>>27 ) & 15; 608 if ( aExp & 1 ) { 609 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 610 z = ( ( a / z )<<14 ) + ( z<<15 ); 611 a >>= 1; 612 } 613 else { 614 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 615 z = a / z + z; 616 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 617 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); 618 } 619 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); 620 621 } 622 623 /*---------------------------------------------------------------------------- 624 | Returns the number of leading 0 bits before the most-significant 1 bit of 625 | `a'. If `a' is zero, 32 is returned. 626 *----------------------------------------------------------------------------*/ 627 628 static int8 countLeadingZeros32( uint32_t a ) 629 { 630 #if SOFTFLOAT_GNUC_PREREQ(3, 4) 631 if (a) { 632 return __builtin_clz(a); 633 } else { 634 return 32; 635 } 636 #else 637 static const int8 countLeadingZerosHigh[] = { 638 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 639 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 640 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 641 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 643 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 645 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 654 }; 655 int8 shiftCount; 656 657 shiftCount = 0; 658 if ( a < 0x10000 ) { 659 shiftCount += 16; 660 a <<= 16; 661 } 662 if ( a < 0x1000000 ) { 663 shiftCount += 8; 664 a <<= 8; 665 } 666 shiftCount += countLeadingZerosHigh[ a>>24 ]; 667 return shiftCount; 668 #endif 669 } 670 671 /*---------------------------------------------------------------------------- 672 | Returns the number of leading 0 bits before the most-significant 1 bit of 673 | `a'. If `a' is zero, 64 is returned. 674 *----------------------------------------------------------------------------*/ 675 676 static int8 countLeadingZeros64( uint64_t a ) 677 { 678 #if SOFTFLOAT_GNUC_PREREQ(3, 4) 679 if (a) { 680 return __builtin_clzll(a); 681 } else { 682 return 64; 683 } 684 #else 685 int8 shiftCount; 686 687 shiftCount = 0; 688 if ( a < ( (uint64_t) 1 )<<32 ) { 689 shiftCount += 32; 690 } 691 else { 692 a >>= 32; 693 } 694 shiftCount += countLeadingZeros32( a ); 695 return shiftCount; 696 #endif 697 } 698 699 /*---------------------------------------------------------------------------- 700 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 701 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 702 | Otherwise, returns 0. 703 *----------------------------------------------------------------------------*/ 704 705 INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 706 { 707 708 return ( a0 == b0 ) && ( a1 == b1 ); 709 710 } 711 712 /*---------------------------------------------------------------------------- 713 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 714 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 715 | Otherwise, returns 0. 716 *----------------------------------------------------------------------------*/ 717 718 INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 719 { 720 721 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 722 723 } 724 725 /*---------------------------------------------------------------------------- 726 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 727 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 728 | returns 0. 729 *----------------------------------------------------------------------------*/ 730 731 INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 732 { 733 734 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 735 736 } 737 738 /*---------------------------------------------------------------------------- 739 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 740 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 741 | Otherwise, returns 0. 742 *----------------------------------------------------------------------------*/ 743 744 INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 745 { 746 747 return ( a0 != b0 ) || ( a1 != b1 ); 748 749 } 750