1 2 #ifndef USED_AS_INCLUDE 3 4 #include "../pub/libvex_basictypes.h" 5 #include <stdio.h> 6 #include <malloc.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <assert.h> 10 11 12 /* Test program for developing code for conversions between 13 x87 64-bit and 80-bit floats. 14 15 80-bit format exists only for x86/x86-64, and so the routines 16 hardwire it as little-endian. The 64-bit format (IEEE double) 17 could exist on any platform, little or big-endian and so we 18 have to take that into account. IOW, these routines have to 19 work correctly when compiled on both big- and little-endian 20 targets, but the 80-bit images only ever have to exist in 21 little-endian format. 22 */ 23 static void show_f80 ( UChar* ); 24 static void show_f64 ( UChar* ); 25 26 static inline 27 UInt read_bit_array ( UChar* arr, UInt n ) 28 { 29 UChar c = arr[n >> 3]; 30 c >>= (n&7); 31 return c & 1; 32 } 33 34 static inline 35 void write_bit_array ( UChar* arr, UInt n, UInt b ) 36 { 37 UChar c = arr[n >> 3]; 38 c &= ~(1 << (n&7)); 39 c |= ((b&1) << (n&7)); 40 arr[n >> 3] = c; 41 } 42 43 44 static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ) 45 { 46 asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)" 47 : 48 : "r" (&f80[0]), "r" (&f64[0]) 49 : "memory" ); 50 } 51 52 static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ) 53 { 54 asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)" 55 : 56 : "r" (&f64[0]), "r" (&f80[0]) 57 : "memory" ); 58 } 59 60 #endif /* ndef USED_AS_INCLUDE */ 61 62 63 64 /* 80 and 64-bit floating point formats: 65 66 80-bit: 67 68 S 0 0-------0 zero 69 S 0 0X------X denormals 70 S 1-7FFE 1X------X normals (all normals have leading 1) 71 S 7FFF 10------0 infinity 72 S 7FFF 10X-----X snan 73 S 7FFF 11X-----X qnan 74 75 S is the sign bit. For runs X----X, at least one of the Xs must be 76 nonzero. Exponent is 15 bits, fractional part is 63 bits, and 77 there is an explicitly represented leading 1, and a sign bit, 78 giving 80 in total. 79 80 64-bit avoids the confusion of an explicitly represented leading 1 81 and so is simpler: 82 83 S 0 0------0 zero 84 S 0 X------X denormals 85 S 1-7FE any normals 86 S 7FF 0------0 infinity 87 S 7FF 0X-----X snan 88 S 7FF 1X-----X qnan 89 90 Exponent is 11 bits, fractional part is 52 bits, and there is a 91 sign bit, giving 64 in total. 92 */ 93 94 /* Convert a IEEE754 double (64-bit) into an x87 extended double 95 (80-bit), mimicing the hardware fairly closely. Both numbers are 96 stored little-endian. Limitations, all of which could be fixed, 97 given some level of hassle: 98 99 * Identity of NaNs is not preserved. 100 101 See comments in the code for more details. 102 */ 103 static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ) 104 { 105 Bool mantissaIsZero; 106 Int bexp, i, j, shift; 107 UChar sign; 108 109 sign = toUChar( (f64[7] >> 7) & 1 ); 110 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 111 bexp &= 0x7FF; 112 113 mantissaIsZero = False; 114 if (bexp == 0 || bexp == 0x7FF) { 115 /* We'll need to know whether or not the mantissa (bits 51:0) is 116 all zeroes in order to handle these cases. So figure it 117 out. */ 118 mantissaIsZero 119 = toBool( 120 (f64[6] & 0x0F) == 0 121 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0 122 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0 123 ); 124 } 125 126 /* If the exponent is zero, either we have a zero or a denormal. 127 Produce a zero. This is a hack in that it forces denormals to 128 zero. Could do better. */ 129 if (bexp == 0) { 130 f80[9] = toUChar( sign << 7 ); 131 f80[8] = f80[7] = f80[6] = f80[5] = f80[4] 132 = f80[3] = f80[2] = f80[1] = f80[0] = 0; 133 134 if (mantissaIsZero) 135 /* It really is zero, so that's all we can do. */ 136 return; 137 138 /* There is at least one 1-bit in the mantissa. So it's a 139 potentially denormalised double -- but we can produce a 140 normalised long double. Count the leading zeroes in the 141 mantissa so as to decide how much to bump the exponent down 142 by. Note, this is SLOW. */ 143 shift = 0; 144 for (i = 51; i >= 0; i--) { 145 if (read_bit_array(f64, i)) 146 break; 147 shift++; 148 } 149 150 /* and copy into place as many bits as we can get our hands on. */ 151 j = 63; 152 for (i = 51 - shift; i >= 0; i--) { 153 write_bit_array( f80, j, 154 read_bit_array( f64, i ) ); 155 j--; 156 } 157 158 /* Set the exponent appropriately, and we're done. */ 159 bexp -= shift; 160 bexp += (16383 - 1023); 161 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); 162 f80[8] = toUChar( bexp & 0xFF ); 163 return; 164 } 165 166 /* If the exponent is 7FF, this is either an Infinity, a SNaN or 167 QNaN, as determined by examining bits 51:0, thus: 168 0 ... 0 Inf 169 0X ... X SNaN 170 1X ... X QNaN 171 where at least one of the Xs is not zero. 172 */ 173 if (bexp == 0x7FF) { 174 if (mantissaIsZero) { 175 /* Produce an appropriately signed infinity: 176 S 1--1 (15) 1 0--0 (63) 177 */ 178 f80[9] = toUChar( (sign << 7) | 0x7F ); 179 f80[8] = 0xFF; 180 f80[7] = 0x80; 181 f80[6] = f80[5] = f80[4] = f80[3] 182 = f80[2] = f80[1] = f80[0] = 0; 183 return; 184 } 185 /* So it's either a QNaN or SNaN. Distinguish by considering 186 bit 51. Note, this destroys all the trailing bits 187 (identity?) of the NaN. IEEE754 doesn't require preserving 188 these (it only requires that there be one QNaN value and one 189 SNaN value), but x87 does seem to have some ability to 190 preserve them. Anyway, here, the NaN's identity is 191 destroyed. Could be improved. */ 192 if (f64[6] & 8) { 193 /* QNaN. Make a QNaN: 194 S 1--1 (15) 1 1--1 (63) 195 */ 196 f80[9] = toUChar( (sign << 7) | 0x7F ); 197 f80[8] = 0xFF; 198 f80[7] = 0xFF; 199 f80[6] = f80[5] = f80[4] = f80[3] 200 = f80[2] = f80[1] = f80[0] = 0xFF; 201 } else { 202 /* SNaN. Make a SNaN: 203 S 1--1 (15) 0 1--1 (63) 204 */ 205 f80[9] = toUChar( (sign << 7) | 0x7F ); 206 f80[8] = 0xFF; 207 f80[7] = 0x7F; 208 f80[6] = f80[5] = f80[4] = f80[3] 209 = f80[2] = f80[1] = f80[0] = 0xFF; 210 } 211 return; 212 } 213 214 /* It's not a zero, denormal, infinity or nan. So it must be a 215 normalised number. Rebias the exponent and build the new 216 number. */ 217 bexp += (16383 - 1023); 218 219 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); 220 f80[8] = toUChar( bexp & 0xFF ); 221 f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78) 222 | ((f64[5] >> 5) & 7) ); 223 f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) ); 224 f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) ); 225 f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) ); 226 f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) ); 227 f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) ); 228 f80[1] = toUChar( ((f64[0] << 3) & 0xF8) ); 229 f80[0] = toUChar( 0 ); 230 } 231 232 233 /* Convert a x87 extended double (80-bit) into an IEEE 754 double 234 (64-bit), mimicking the hardware fairly closely. Both numbers are 235 stored little-endian. Limitations, both of which could be fixed, 236 given some level of hassle: 237 238 * Rounding following truncation could be a bit better. 239 240 * Identity of NaNs is not preserved. 241 242 See comments in the code for more details. 243 */ 244 static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ) 245 { 246 Bool isInf; 247 Int bexp, i, j; 248 UChar sign; 249 250 sign = toUChar((f80[9] >> 7) & 1); 251 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8]; 252 bexp &= 0x7FFF; 253 254 /* If the exponent is zero, either we have a zero or a denormal. 255 But an extended precision denormal becomes a double precision 256 zero, so in either case, just produce the appropriately signed 257 zero. */ 258 if (bexp == 0) { 259 f64[7] = toUChar(sign << 7); 260 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 261 return; 262 } 263 264 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or 265 QNaN, as determined by examining bits 62:0, thus: 266 0 ... 0 Inf 267 0X ... X SNaN 268 1X ... X QNaN 269 where at least one of the Xs is not zero. 270 */ 271 if (bexp == 0x7FFF) { 272 isInf = toBool( 273 (f80[7] & 0x7F) == 0 274 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0 275 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 276 && f80[0] == 0 277 ); 278 if (isInf) { 279 if (0 == (f80[7] & 0x80)) 280 goto wierd_NaN; 281 /* Produce an appropriately signed infinity: 282 S 1--1 (11) 0--0 (52) 283 */ 284 f64[7] = toUChar((sign << 7) | 0x7F); 285 f64[6] = 0xF0; 286 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 287 return; 288 } 289 /* So it's either a QNaN or SNaN. Distinguish by considering 290 bit 62. Note, this destroys all the trailing bits 291 (identity?) of the NaN. IEEE754 doesn't require preserving 292 these (it only requires that there be one QNaN value and one 293 SNaN value), but x87 does seem to have some ability to 294 preserve them. Anyway, here, the NaN's identity is 295 destroyed. Could be improved. */ 296 if (f80[8] & 0x40) { 297 /* QNaN. Make a QNaN: 298 S 1--1 (11) 1 1--1 (51) 299 */ 300 f64[7] = toUChar((sign << 7) | 0x7F); 301 f64[6] = 0xFF; 302 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; 303 } else { 304 /* SNaN. Make a SNaN: 305 S 1--1 (11) 0 1--1 (51) 306 */ 307 f64[7] = toUChar((sign << 7) | 0x7F); 308 f64[6] = 0xF7; 309 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; 310 } 311 return; 312 } 313 314 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is 315 zero, the x87 FPU appears to consider the number denormalised 316 and converts it to a QNaN. */ 317 if (0 == (f80[7] & 0x80)) { 318 wierd_NaN: 319 /* Strange hardware QNaN: 320 S 1--1 (11) 1 0--0 (51) 321 */ 322 /* On a PIII, these QNaNs always appear with sign==1. I have 323 no idea why. */ 324 f64[7] = (1 /*sign*/ << 7) | 0x7F; 325 f64[6] = 0xF8; 326 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 327 return; 328 } 329 330 /* It's not a zero, denormal, infinity or nan. So it must be a 331 normalised number. Rebias the exponent and consider. */ 332 bexp -= (16383 - 1023); 333 if (bexp >= 0x7FF) { 334 /* It's too big for a double. Construct an infinity. */ 335 f64[7] = toUChar((sign << 7) | 0x7F); 336 f64[6] = 0xF0; 337 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 338 return; 339 } 340 341 if (bexp <= 0) { 342 /* It's too small for a normalised double. First construct a 343 zero and then see if it can be improved into a denormal. */ 344 f64[7] = toUChar(sign << 7); 345 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 346 347 if (bexp < -52) 348 /* Too small even for a denormal. */ 349 return; 350 351 /* Ok, let's make a denormal. Note, this is SLOW. */ 352 /* Copy bits 63, 62, 61, etc of the src mantissa into the dst, 353 indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */ 354 /* bexp is in range -52 .. 0 inclusive */ 355 for (i = 63; i >= 0; i--) { 356 j = i - 12 + bexp; 357 if (j < 0) break; 358 /* We shouldn't really call vassert from generated code. */ 359 assert(j >= 0 && j < 52); 360 write_bit_array ( f64, 361 j, 362 read_bit_array ( f80, i ) ); 363 } 364 /* and now we might have to round ... */ 365 if (read_bit_array(f80, 10+1 - bexp) == 1) 366 goto do_rounding; 367 368 return; 369 } 370 371 /* Ok, it's a normalised number which is representable as a double. 372 Copy the exponent and mantissa into place. */ 373 /* 374 for (i = 0; i < 52; i++) 375 write_bit_array ( f64, 376 i, 377 read_bit_array ( f80, i+11 ) ); 378 */ 379 f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) ); 380 f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) ); 381 f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) ); 382 f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) ); 383 f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) ); 384 f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) ); 385 386 f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) ); 387 388 f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) ); 389 390 /* Now consider any rounding that needs to happen as a result of 391 truncating the mantissa. */ 392 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ { 393 394 /* If the bottom bits of f80 are "100 0000 0000", then the 395 infinitely precise value is deemed to be mid-way between the 396 two closest representable values. Since we're doing 397 round-to-nearest (the default mode), in that case it is the 398 bit immediately above which indicates whether we should round 399 upwards or not -- if 0, we don't. All that is encapsulated 400 in the following simple test. */ 401 if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0) 402 return; 403 404 do_rounding: 405 /* Round upwards. This is a kludge. Once in every 2^24 406 roundings (statistically) the bottom three bytes are all 0xFF 407 and so we don't round at all. Could be improved. */ 408 if (f64[0] != 0xFF) { 409 f64[0]++; 410 } 411 else 412 if (f64[0] == 0xFF && f64[1] != 0xFF) { 413 f64[0] = 0; 414 f64[1]++; 415 } 416 else 417 if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) { 418 f64[0] = 0; 419 f64[1] = 0; 420 f64[2]++; 421 } 422 /* else we don't round, but we should. */ 423 } 424 } 425 426 427 #ifndef USED_AS_INCLUDE 428 429 ////////////// 430 431 static void show_f80 ( UChar* f80 ) 432 { 433 Int i; 434 printf("%d ", read_bit_array(f80, 79)); 435 436 for (i = 78; i >= 64; i--) 437 printf("%d", read_bit_array(f80, i)); 438 439 printf(" %d ", read_bit_array(f80, 63)); 440 441 for (i = 62; i >= 0; i--) 442 printf("%d", read_bit_array(f80, i)); 443 } 444 445 static void show_f64le ( UChar* f64 ) 446 { 447 Int i; 448 printf("%d ", read_bit_array(f64, 63)); 449 450 for (i = 62; i >= 52; i--) 451 printf("%d", read_bit_array(f64, i)); 452 453 printf(" "); 454 for (i = 51; i >= 0; i--) 455 printf("%d", read_bit_array(f64, i)); 456 } 457 458 ////////////// 459 460 461 /* Convert f80 to a 64-bit IEEE double using both the hardware and the 462 soft version, and compare the results. If they differ, print 463 details and return 1. If they are identical, return 0. 464 */ 465 int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s) 466 { 467 Char buf64s[100], buf64h[100]; 468 Bool same; 469 Int k; 470 convert_f80le_to_f64le_HW(f80, f64h); 471 convert_f80le_to_f64le(f80, f64s); 472 same = True; 473 for (k = 0; k < 8; k++) { 474 if (f64s[k] != f64h[k]) { 475 same = False; break; 476 } 477 } 478 /* bitwise identical */ 479 if (same) 480 return 0; 481 482 sprintf(buf64s, "%.16e", *(double*)f64s); 483 sprintf(buf64h, "%.16e", *(double*)f64h); 484 485 /* Not bitwise identical, but pretty darn close */ 486 if (0 == strcmp(buf64s, buf64h)) 487 return 0; 488 489 printf("\n"); 490 printf("f80: "); show_f80(f80); printf("\n"); 491 printf("f64h: "); show_f64le(f64h); printf("\n"); 492 printf("f64s: "); show_f64le(f64s); printf("\n"); 493 494 printf("[test %d] %.16Le -> (hw %s, sw %s)\n", 495 test_no, *(long double*)f80, 496 buf64h, buf64s ); 497 498 return 1; 499 } 500 501 502 /* Convert an IEEE 64-bit double to a x87 extended double (80 bit) 503 using both the hardware and the soft version, and compare the 504 results. If they differ, print details and return 1. If they are 505 identical, return 0. 506 */ 507 int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s) 508 { 509 Char buf80s[100], buf80h[100]; 510 Bool same; 511 Int k; 512 convert_f64le_to_f80le_HW(f64, f80h); 513 convert_f64le_to_f80le(f64, f80s); 514 same = True; 515 for (k = 0; k < 10; k++) { 516 if (f80s[k] != f80h[k]) { 517 same = False; break; 518 } 519 } 520 /* bitwise identical */ 521 if (same) 522 return 0; 523 524 sprintf(buf80s, "%.20Le", *(long double*)f80s); 525 sprintf(buf80h, "%.20Le", *(long double*)f80h); 526 527 /* Not bitwise identical, but pretty darn close */ 528 if (0 == strcmp(buf80s, buf80h)) 529 return 0; 530 531 printf("\n"); 532 printf("f64: "); show_f64le(f64); printf("\n"); 533 printf("f80h: "); show_f80(f80h); printf("\n"); 534 printf("f80s: "); show_f80(f80s); printf("\n"); 535 536 printf("[test %d] %.16e -> (hw %s, sw %s)\n", 537 test_no, *(double*)f64, 538 buf80h, buf80s ); 539 540 return 1; 541 } 542 543 544 545 void do_80_to_64_tests ( void ) 546 { 547 UInt b9,b8,b7,i, j; 548 Int fails=0, tests=0; 549 UChar* f64h = malloc(8); 550 UChar* f64s = malloc(8); 551 UChar* f80 = malloc(10); 552 int STEP = 1; 553 554 srandom(4343); 555 556 /* Ten million random bit patterns */ 557 for (i = 0; i < 10000000; i++) { 558 tests++; 559 for (j = 0; j < 10; j++) 560 f80[j] = (random() >> 7) & 255; 561 562 fails += do_80_to_64_test(tests, f80, f64h, f64s); 563 } 564 565 /* 2^24 numbers in which the first 24 bits are tested exhaustively 566 -- this covers the sign, exponent and leading part of the 567 mantissa. */ 568 for (b9 = 0; b9 < 256; b9 += STEP) { 569 for (b8 = 0; b8 < 256; b8 += STEP) { 570 for (b7 = 0; b7 < 256; b7 += STEP) { 571 tests++; 572 for (i = 0; i < 10; i++) 573 f80[i] = 0; 574 for (i = 0; i < 8; i++) 575 f64h[i] = f64s[i] = 0; 576 f80[9] = b9; 577 f80[8] = b8; 578 f80[7] = b7; 579 580 fails += do_80_to_64_test(tests, f80, f64h, f64s); 581 }}} 582 583 printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails); 584 } 585 586 587 void do_64_to_80_tests ( void ) 588 { 589 UInt b7,b6,b5,i, j; 590 Int fails=0, tests=0; 591 UChar* f80h = malloc(10); 592 UChar* f80s = malloc(10); 593 UChar* f64 = malloc(8); 594 int STEP = 1; 595 596 srandom(2323); 597 598 /* Ten million random bit patterns */ 599 for (i = 0; i < 10000000; i++) { 600 tests++; 601 for (j = 0; j < 8; j++) 602 f64[j] = (random() >> 13) & 255; 603 604 fails += do_64_to_80_test(tests, f64, f80h, f80s); 605 } 606 607 /* 2^24 numbers in which the first 24 bits are tested exhaustively 608 -- this covers the sign, exponent and leading part of the 609 mantissa. */ 610 for (b7 = 0; b7 < 256; b7 += STEP) { 611 for (b6 = 0; b6 < 256; b6 += STEP) { 612 for (b5 = 0; b5 < 256; b5 += STEP) { 613 tests++; 614 for (i = 0; i < 8; i++) 615 f64[i] = 0; 616 for (i = 0; i < 10; i++) 617 f80h[i] = f80s[i] = 0; 618 f64[7] = b7; 619 f64[6] = b6; 620 f64[5] = b5; 621 622 fails += do_64_to_80_test(tests, f64, f80h, f80s); 623 }}} 624 625 printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails); 626 } 627 628 629 int main ( void ) 630 { 631 do_80_to_64_tests(); 632 do_64_to_80_tests(); 633 return 0; 634 } 635 636 #endif /* ndef USED_AS_INCLUDE */ 637