1 2 /* Copyright (C) 2006 Dave Nomura 3 dcnltc (at) us.ibm.com 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <limits.h> 26 27 typedef enum { FALSE=0, TRUE } bool_t; 28 29 typedef enum { 30 FADDS, FSUBS, FMULS, FDIVS, 31 FMADDS, FMSUBS, FNMADDS, FNMSUBS, 32 FADD, FSUB, FMUL, FDIV, FMADD, 33 FMSUB, FNMADD, FNMSUB, FSQRT 34 } flt_op_t; 35 36 typedef enum { 37 TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t; 38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" }; 39 40 const char *flt_op_names[] = { 41 "fadds", "fsubs", "fmuls", "fdivs", 42 "fmadds", "fmsubs", "fnmadds", "fnmsubs", 43 "fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd", 44 "fnmsub", "fsqrt" 45 }; 46 47 typedef unsigned int fpscr_t; 48 49 typedef union { 50 float flt; 51 struct { 52 #if defined(VGP_ppc64le_linux) 53 unsigned int frac:23; 54 unsigned int exp:8; 55 unsigned int sign:1; 56 #else 57 unsigned int sign:1; 58 unsigned int exp:8; 59 unsigned int frac:23; 60 #endif 61 } layout; 62 } flt_overlay; 63 64 typedef union { 65 double dbl; 66 struct { 67 #if defined(VGP_ppc64le_linux) 68 unsigned int frac_lo:32; 69 unsigned int frac_hi:20; 70 unsigned int exp:11; 71 unsigned int sign:1; 72 #else 73 unsigned int sign:1; 74 unsigned int exp:11; 75 unsigned int frac_hi:20; 76 unsigned int frac_lo:32; 77 #endif 78 } layout; 79 struct { 80 unsigned int hi; 81 unsigned int lo; 82 } dbl_pair; 83 } dbl_overlay; 84 85 void assert_fail(const char *msg, 86 const char* expr, const char* file, int line, const char*fn); 87 88 #define STRING(__str) #__str 89 #define assert(msg, expr) \ 90 ((void) ((expr) ? 0 : \ 91 (assert_fail (msg, STRING(expr), \ 92 __FILE__, __LINE__, \ 93 __PRETTY_FUNCTION__), 0))) 94 float denorm_small; 95 double dbl_denorm_small; 96 float norm_small; 97 bool_t debug = FALSE; 98 bool_t long_is_64_bits = sizeof(long) == 8; 99 100 void assert_fail (msg, expr, file, line, fn) 101 const char* msg; 102 const char* expr; 103 const char* file; 104 int line; 105 const char*fn; 106 { 107 printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n", 108 msg, file, line, fn, expr ); 109 exit( 1 ); 110 } 111 void set_rounding_mode(round_mode_t mode) 112 { 113 switch(mode) { 114 case TO_NEAREST: 115 asm volatile("mtfsfi 7, 0"); 116 break; 117 case TO_ZERO: 118 asm volatile("mtfsfi 7, 1"); 119 break; 120 case TO_PLUS_INFINITY: 121 asm volatile("mtfsfi 7, 2"); 122 break; 123 case TO_MINUS_INFINITY: 124 asm volatile("mtfsfi 7, 3"); 125 break; 126 } 127 } 128 129 void print_double(char *msg, double dbl) 130 { 131 dbl_overlay D; 132 D.dbl = dbl; 133 134 printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n", 135 msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'), 136 D.layout.exp, D.layout.frac_hi, D.layout.frac_lo); 137 } 138 139 void print_single(char *msg, float *flt) 140 { 141 flt_overlay F; 142 F.flt = *flt; 143 144 /* NOTE: for the purposes of comparing the fraction of a single with 145 ** a double left shift the .frac so that hex digits are grouped 146 ** from left to right. this is necessary because the size of a 147 ** single mantissa (23) bits is not a multiple of 4 148 */ 149 printf("%15s : flt %-20a = %c(%4d, %06x)\n", 150 msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1); 151 } 152 153 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected) 154 { 155 int status = 0; 156 flt_overlay R, E; 157 char *result; 158 159 set_rounding_mode(mode); 160 161 E.flt = *expected; 162 R.flt = (float)dbl; 163 164 if ((R.layout.sign != E.layout.sign) || 165 (R.layout.exp != E.layout.exp) || 166 (R.layout.frac != E.layout.frac)) { 167 result = "FAILED"; 168 status = 1; 169 } else { 170 result = "PASSED"; 171 status = 0; 172 } 173 printf("%s:%s:(double)(%-20a) = %20a", 174 round_mode_name[mode], result, R.flt, dbl); 175 if (status) { 176 print_single("\n\texpected", &E.flt); 177 print_single("\n\trounded ", &R.flt); 178 } 179 putchar('\n'); 180 return status; 181 } 182 183 int test_dbl_to_float_convert(char *msg, float *base) 184 { 185 int status = 0; 186 double half = (double)denorm_small/2; 187 double qtr = half/2; 188 double D_hi = (double)*base + half + qtr; 189 double D_lo = (double)*base + half - qtr; 190 float F_lo = *base; 191 float F_hi = F_lo + denorm_small; 192 193 194 /* 195 ** .....+-----+-----+-----+-----+---.... 196 ** ^F_lo ^ ^ ^ 197 ** D_lo 198 ** D_hi 199 ** F_hi 200 ** F_lo and F_hi are two consecutive single float model numbers 201 ** denorm_small distance apart. D_lo and D_hi are two numbers 202 ** within that range that are not representable as single floats 203 ** and will be rounded to either F_lo or F_hi. 204 */ 205 printf("-------------------------- %s --------------------------\n", msg); 206 if (debug) { 207 print_double("D_lo", D_lo); 208 print_double("D_hi", D_hi); 209 print_single("F_lo", &F_lo); 210 print_single("F_hi", &F_hi); 211 } 212 213 /* round to nearest */ 214 status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi); 215 status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo); 216 217 /* round to zero */ 218 status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi)); 219 status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi)); 220 221 /* round to +inf */ 222 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi); 223 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi); 224 225 /* round to -inf */ 226 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo); 227 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo); 228 return status; 229 } 230 231 void 232 init() 233 { 234 flt_overlay F; 235 dbl_overlay D; 236 237 /* small is the smallest denormalized single float number */ 238 F.layout.sign = 0; 239 F.layout.exp = 0; 240 F.layout.frac = 1; 241 denorm_small = F.flt; /* == 2^(-149) */ 242 if (debug) { 243 print_single("float small", &F.flt); 244 } 245 246 D.layout.sign = 0; 247 D.layout.exp = 0; 248 D.layout.frac_hi = 0; 249 D.layout.frac_lo = 1; 250 dbl_denorm_small = D.dbl; /* == 2^(-1022) */ 251 if (debug) { 252 print_double("double small", D.dbl); 253 } 254 255 /* n_small is the smallest normalized single precision float */ 256 F.layout.exp = 1; 257 norm_small = F.flt; 258 } 259 260 int check_int_to_flt_round(round_mode_t mode, long L, float *expected) 261 { 262 int status = 0; 263 int I = L; 264 char *int_name = "int"; 265 flt_overlay R, E; 266 char *result; 267 int iter; 268 269 set_rounding_mode(mode); 270 E.flt = *expected; 271 272 for (iter = 0; iter < 2; iter++) { 273 int stat = 0; 274 R.flt = (iter == 0 ? (float)I : (float)L); 275 276 if ((R.layout.sign != E.layout.sign) || 277 (R.layout.exp != E.layout.exp) || 278 (R.layout.frac != E.layout.frac)) { 279 result = "FAILED"; 280 stat = 1; 281 } else { 282 result = "PASSED"; 283 stat = 0; 284 } 285 printf("%s:%s:(float)(%4s)%9d = %11.1f", 286 round_mode_name[mode], result, int_name, I, R.flt); 287 if (stat) { 288 print_single("\n\texpected: %.1f ", &E.flt); 289 print_single("\n\trounded ", &R.flt); 290 } 291 putchar('\n'); 292 status |= stat; 293 294 if (!long_is_64_bits) break; 295 int_name = "long"; 296 } 297 return status; 298 } 299 300 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected) 301 { 302 int status = 0; 303 dbl_overlay R, E; 304 char *result; 305 306 set_rounding_mode(mode); 307 E.dbl = *expected; 308 309 R.dbl = (double)L; 310 311 if ((R.layout.sign != E.layout.sign) || 312 (R.layout.exp != E.layout.exp) || 313 (R.layout.frac_lo != E.layout.frac_lo) || 314 (R.layout.frac_hi != E.layout.frac_hi)) { 315 result = "FAILED"; 316 status = 1; 317 } else { 318 result = "PASSED"; 319 status = 0; 320 } 321 printf("%s:%s:(double)(%18ld) = %20.1f", 322 round_mode_name[mode], result, L, R.dbl); 323 if (status) { 324 printf("\n\texpected %.1f : ", E.dbl); 325 } 326 putchar('\n'); 327 return status; 328 } 329 330 int test_int_to_float_convert(char *msg) 331 { 332 int status = 0; 333 int int24_hi = 0x03ff0fff; 334 int int24_lo = 0x03ff0ffd; 335 float pos_flt_lo = 67047420.0; 336 float pos_flt_hi = 67047424.0; 337 float neg_flt_lo = -67047420.0; 338 float neg_flt_hi = -67047424.0; 339 340 printf("-------------------------- %s --------------------------\n", msg); 341 status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo); 342 status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi); 343 status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo); 344 status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo); 345 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi); 346 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi); 347 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo); 348 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo); 349 350 status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo); 351 status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi); 352 status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo); 353 status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo); 354 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo); 355 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo); 356 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi); 357 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi); 358 return status; 359 } 360 361 #ifdef __powerpc64__ 362 int test_long_to_double_convert(char *msg) 363 { 364 int status = 0; 365 long long55_hi = 0x07ff0ffffffffff; 366 long long55_lo = 0x07ff0fffffffffd; 367 double pos_dbl_lo = 36012304344547324.0; 368 double pos_dbl_hi = 36012304344547328.0; 369 double neg_dbl_lo = -36012304344547324.0; 370 double neg_dbl_hi = -36012304344547328.0; 371 372 printf("-------------------------- %s --------------------------\n", msg); 373 status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo); 374 status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi); 375 status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo); 376 status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo); 377 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi); 378 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi); 379 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo); 380 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo); 381 382 status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo); 383 status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi); 384 status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo); 385 status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo); 386 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo); 387 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo); 388 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi); 389 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi); 390 return status; 391 } 392 #endif 393 394 int check_single_arithmetic_op(flt_op_t op) 395 { 396 char *result; 397 int status = 0; 398 dbl_overlay R, E; 399 double qtr, half, fA, fB, fD; 400 round_mode_t mode; 401 int q, s; 402 bool_t two_args = TRUE; 403 float whole = denorm_small; 404 405 #define BINOP(op) \ 406 __asm__ volatile( \ 407 op" %0, %1, %2\n\t" \ 408 : "=f"(fD) : "f"(fA) , "f"(fB)); 409 #define UNOP(op) \ 410 __asm__ volatile( \ 411 op" %0, %1\n\t" \ 412 : "=f"(fD) : "f"(fA)); 413 414 half = (double)whole/2; 415 qtr = half/2; 416 417 if (debug) { 418 print_double("qtr", qtr); 419 print_double("whole", whole); 420 print_double("2*whole", 2*whole); 421 } 422 423 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) 424 for (s = -1; s < 2; s += 2) 425 for (q = 1; q < 4; q += 2) { 426 double expected; 427 double lo = s*whole; 428 double hi = s*2*whole; 429 430 switch(op) { 431 case FADDS: 432 fA = s*whole; 433 fB = s*q*qtr; 434 break; 435 case FSUBS: 436 fA = s*2*whole; 437 fB = s*(q == 1 ? 3 : 1)*qtr; 438 break; 439 case FMULS: 440 fA = 0.5; 441 fB = s*(4+q)*half; 442 break; 443 case FDIVS: 444 fA = s*(4+q)*half; 445 fB = 2.0; 446 break; 447 default: 448 assert("check_single_arithmetic_op: unexpected op", 449 FALSE); 450 break; 451 } 452 453 switch(mode) { 454 case TO_NEAREST: 455 expected = (q == 1 ? lo : hi); 456 break; 457 case TO_ZERO: 458 expected = lo; 459 break; 460 case TO_PLUS_INFINITY: 461 expected = (s == 1 ? hi : lo); 462 break; 463 case TO_MINUS_INFINITY: 464 expected = (s == 1 ? lo : hi); 465 break; 466 } 467 468 set_rounding_mode(mode); 469 470 /* 471 ** do the double precision dual operation just for comparison 472 ** when debugging 473 */ 474 switch(op) { 475 case FADDS: 476 BINOP("fadds"); 477 R.dbl = fD; 478 BINOP("fadd"); 479 break; 480 case FSUBS: 481 BINOP("fsubs"); 482 R.dbl = fD; 483 BINOP("fsub"); 484 break; 485 case FMULS: 486 BINOP("fmuls"); 487 R.dbl = fD; 488 BINOP("fmul"); 489 break; 490 case FDIVS: 491 BINOP("fdivs"); 492 R.dbl = fD; 493 BINOP("fdiv"); 494 break; 495 default: 496 assert("check_single_arithmetic_op: unexpected op", 497 FALSE); 498 break; 499 } 500 #undef UNOP 501 #undef BINOP 502 503 E.dbl = expected; 504 505 if ((R.layout.sign != E.layout.sign) || 506 (R.layout.exp != E.layout.exp) || 507 (R.layout.frac_lo != E.layout.frac_lo) || 508 (R.layout.frac_hi != E.layout.frac_hi)) { 509 result = "FAILED"; 510 status = 1; 511 } else { 512 result = "PASSED"; 513 status = 0; 514 } 515 516 printf("%s:%s:%s(%-13a", 517 round_mode_name[mode], result, flt_op_names[op], fA); 518 if (two_args) printf(", %-13a", fB); 519 printf(") = %-13a", R.dbl); 520 if (status) printf("\n\texpected %a", E.dbl); 521 putchar('\n'); 522 523 if (debug) { 524 print_double("hi", hi); 525 print_double("lo", lo); 526 print_double("expected", expected); 527 print_double("got", R.dbl); 528 print_double("double result", fD); 529 } 530 } 531 532 return status; 533 } 534 535 int check_single_guarded_arithmetic_op(flt_op_t op) 536 { 537 typedef struct { 538 int num, den, frac; 539 } fdivs_t; 540 541 char *result; 542 int status = 0; 543 flt_overlay A, B, Z; 544 dbl_overlay Res, Exp; 545 double fA, fB, fC, fD; 546 round_mode_t mode; 547 int g, s; 548 int arg_count; 549 550 fdivs_t divs_guard_cases[16] = { 551 { 105, 56, 0x700000 }, /* : 0 */ 552 { 100, 57, 0x608FB8 }, /* : 1 */ 553 { 000, 00, 0x000000 }, /* : X */ 554 { 100, 52, 0x762762 }, /* : 3 */ 555 { 000, 00, 0x000000 }, /* : X */ 556 { 100, 55, 0x68BA2E }, /* : 5 */ 557 { 000, 00, 0x000000 }, /* : X */ 558 { 100, 51, 0x7AFAFA }, /* : 7 */ 559 { 000, 00, 0x000000 }, /* : X */ 560 { 100, 56, 0x649249 }, /* : 9 */ 561 { 000, 00, 0x000000 }, /* : X */ 562 { 100, 54, 0x6D097B }, /* : B */ 563 { 000, 00, 0x000000 }, /* : X */ 564 { 100, 59, 0x58F2FB }, /* : D */ 565 { 000, 00, 0x000000 }, /* : X */ 566 { 101, 52, 0x789D89 } /* : F */ 567 }; 568 569 /* 0x1.00000 00000000p-3 */ 570 /* set up the invariant fields of B, the arg to cause rounding */ 571 B.flt = 0.0; 572 B.layout.exp = 124; /* -3 */ 573 574 /* set up args so result is always Z = 1.200000000000<g>p+0 */ 575 Z.flt = 1.0; 576 Z.layout.sign = 0; 577 578 #define TERNOP(op) \ 579 arg_count = 3; \ 580 __asm__ volatile( \ 581 op" %0, %1, %2, %3\n\t" \ 582 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC)); 583 #define BINOP(op) \ 584 arg_count = 2; \ 585 __asm__ volatile( \ 586 op" %0, %1, %2\n\t" \ 587 : "=f"(fD) : "f"(fA) , "f"(fB)); 588 #define UNOP(op) \ 589 arg_count = 1; \ 590 __asm__ volatile( \ 591 op" %0, %1\n\t" \ 592 : "=f"(fD) : "f"(fA)); 593 594 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) 595 for (s = -1; s < 2; s += 2) 596 for (g = 0; g < 16; g += 1) { 597 double lo, hi, expected; 598 int LSB; 599 int guard = 0; 600 int z_sign = s; 601 602 /* 603 ** one argument will have exponent = 0 as will the result (by 604 ** design) so choose the other argument with exponent -3 to 605 ** force a 3 bit shift for scaling leaving us with 3 guard bits 606 ** and the LSB bit at the bottom of the manitssa. 607 */ 608 switch(op) { 609 case FADDS: 610 /* 1p+0 + 1.00000<g>p-3 */ 611 B.layout.frac = g; 612 613 fB = s*B.flt; 614 fA = s*1.0; 615 616 /* set up Z to be truncated result */ 617 618 /* mask off LSB from resulting guard bits */ 619 guard = g & 7; 620 621 Z.layout.frac = 0x100000 | (g >> 3); 622 break; 623 case FSUBS: 624 /* 1.200002p+0 - 1.000000000000<g>p-3 */ 625 A.flt = 1.125; 626 /* add enough to avoid scaling of the result */ 627 A.layout.frac |= 0x2; 628 fA = s*A.flt; 629 630 B.layout.frac = g; 631 fB = s*B.flt; 632 633 /* set up Z to be truncated result */ 634 guard = (0x10-g); 635 Z.layout.frac = guard>>3; 636 637 /* mask off LSB from resulting guard bits */ 638 guard &= 7; 639 break; 640 case FMULS: 641 /* 1 + g*2^-23 */ 642 A.flt = 1.0; 643 A.layout.frac = g; 644 fA = s*A.flt; 645 fB = 1.125; 646 647 /* set up Z to be truncated result */ 648 Z.flt = 1.0; 649 Z.layout.frac = 0x100000; 650 Z.layout.frac |= g + (g>>3); 651 guard = g & 7; 652 break; 653 case FDIVS: 654 /* g >> 3 == LSB, g & 7 == guard bits */ 655 guard = g & 7; 656 if ((guard & 1) == 0) { 657 /* special case: guard bit X = 0 */ 658 A.flt = denorm_small; 659 A.layout.frac = g; 660 fA = A.flt; 661 fB = s*8.0; 662 Z.flt = 0.0; 663 Z.layout.frac |= (g >> 3); 664 } else { 665 fA = s*divs_guard_cases[g].num; 666 fB = divs_guard_cases[g].den; 667 668 Z.flt = 1.0; 669 Z.layout.frac = divs_guard_cases[g].frac; 670 } 671 break; 672 case FMADDS: 673 case FMSUBS: 674 case FNMADDS: 675 case FNMSUBS: 676 /* 1 + g*2^-23 */ 677 A.flt = 1.0; 678 A.layout.frac = g; 679 fA = s*A.flt; 680 fB = 1.125; 681 682 /* 1.000001p-1 */ 683 A.flt = 0.5; 684 A.layout.frac = 1; 685 fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt; 686 687 /* set up Z to be truncated result */ 688 z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s); 689 guard = ((g & 7) + 0x4) & 7; 690 Z.flt = 1.0; 691 Z.layout.frac = 0x500000; 692 Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0); 693 break; 694 default: 695 assert("check_single_arithmetic_op: unexpected op", 696 FALSE); 697 break; 698 } 699 700 /* get LSB for tie breaking */ 701 LSB = Z.layout.frac & 1; 702 703 /* set up hi and lo */ 704 lo = z_sign*Z.flt; 705 Z.layout.frac += 1; 706 hi = z_sign*Z.flt; 707 708 switch(mode) { 709 case TO_NEAREST: 710 /* look at 3 guard bits to determine expected rounding */ 711 switch(guard) { 712 case 0: 713 case 1: case 2: case 3: 714 expected = lo; 715 break; 716 case 4: /* tie: round to even */ 717 if (debug) printf("tie: LSB = %d\n", LSB); 718 expected = (LSB == 0 ? lo : hi); 719 break; 720 case 5: case 6: case 7: 721 expected = hi; 722 break; 723 default: 724 assert("check_single_guarded_arithmetic_op: unexpected guard", 725 FALSE); 726 } 727 break; 728 case TO_ZERO: 729 expected = lo; 730 break; 731 case TO_PLUS_INFINITY: 732 if (guard == 0) { 733 /* no rounding */ 734 expected = lo; 735 } else { 736 expected = (s == 1 ? hi : lo); 737 } 738 break; 739 case TO_MINUS_INFINITY: 740 if (guard == 0) { 741 /* no rounding */ 742 expected = lo; 743 } else { 744 expected = (s == 1 ? lo : hi); 745 } 746 break; 747 } 748 749 set_rounding_mode(mode); 750 751 /* 752 ** do the double precision dual operation just for comparison 753 ** when debugging 754 */ 755 switch(op) { 756 case FADDS: 757 BINOP("fadds"); 758 Res.dbl = fD; 759 break; 760 case FSUBS: 761 BINOP("fsubs"); 762 Res.dbl = fD; 763 break; 764 case FMULS: 765 BINOP("fmuls"); 766 Res.dbl = fD; 767 break; 768 case FDIVS: 769 BINOP("fdivs"); 770 Res.dbl = fD; 771 break; 772 case FMADDS: 773 TERNOP("fmadds"); 774 Res.dbl = fD; 775 break; 776 case FMSUBS: 777 TERNOP("fmsubs"); 778 Res.dbl = fD; 779 break; 780 case FNMADDS: 781 TERNOP("fnmadds"); 782 Res.dbl = fD; 783 break; 784 case FNMSUBS: 785 TERNOP("fnmsubs"); 786 Res.dbl = fD; 787 break; 788 default: 789 assert("check_single_guarded_arithmetic_op: unexpected op", 790 FALSE); 791 break; 792 } 793 #undef UNOP 794 #undef BINOP 795 #undef TERNOP 796 797 Exp.dbl = expected; 798 799 if ((Res.layout.sign != Exp.layout.sign) || 800 (Res.layout.exp != Exp.layout.exp) || 801 (Res.layout.frac_lo != Exp.layout.frac_lo) || 802 (Res.layout.frac_hi != Exp.layout.frac_hi)) { 803 result = "FAILED"; 804 status = 1; 805 } else { 806 result = "PASSED"; 807 status = 0; 808 } 809 810 /* There seems to be some noise in the lower bits. The value 811 * on the least significant digit seems to vary when printing 812 * based on the rounding mode of the compiler. Just trying 813 * to get rid of the noise in the least significant bits when 814 * printing the operand. 815 */ 816 817 fA = ((long int)(fA*10000))/10000.0; 818 /* Change -0.0 to a positive 0.0. Some compilers print -0.0 819 * others do not. Make it consistent. 820 */ 821 if (fA == -0.0) 822 fA = 0.0; 823 824 printf("%s:%s:%s(%-13.6f", 825 round_mode_name[mode], result, flt_op_names[op], fA); 826 if (arg_count > 1) printf(", %-13a", fB); 827 if (arg_count > 2) printf(", %-13a", fC); 828 printf(") = %-13a", Res.dbl); 829 if (status) printf("\n\texpected %a", Exp.dbl); 830 putchar('\n'); 831 832 if (debug) { 833 print_double("hi", hi); 834 print_double("lo", lo); 835 print_double("expected", expected); 836 print_double("got", Res.dbl); 837 } 838 } 839 840 return status; 841 } 842 843 int check_double_guarded_arithmetic_op(flt_op_t op) 844 { 845 typedef struct { 846 int num, den, hi, lo; 847 } fdiv_t; 848 typedef struct { 849 double arg; 850 int exp, hi, lo; 851 } fsqrt_t; 852 853 char *result; 854 int status = 0; 855 dbl_overlay A, B, Z; 856 dbl_overlay Res, Exp; 857 double fA, fB, fC, fD; 858 round_mode_t mode; 859 int g, s; 860 int arg_count; 861 fdiv_t div_guard_cases[16] = { 862 { 62, 62, 0x00000, 0x00000000 }, /* 0 */ 863 { 64, 62, 0x08421, 0x08421084 }, /* 1 */ 864 { 66, 62, 0x10842, 0x10842108 }, /* 2 */ 865 { 100, 62, 0x9ce73, 0x9ce739ce }, /* 3 */ 866 { 100, 62, 0x9ce73, 0x9ce739ce }, /* X */ 867 { 102, 62, 0xa5294, 0xa5294a52 }, /* 5 */ 868 { 106, 62, 0xb5ad6, 0xb5ad6b5a }, /* 6 */ 869 { 108, 62, 0xbdef7, 0xbdef7bde }, /* 7 */ 870 { 108, 108, 0x00000, 0x00000000 }, /* 8 */ 871 { 112, 62, 0xce739, 0xce739ce7 }, /* 9 */ 872 { 114, 62, 0xd6b5a, 0xd6b5ad6b }, /* A */ 873 { 116, 62, 0xdef7b, 0xdef7bdef }, /* B */ 874 { 84, 62, 0x5ad6b, 0x5ad6b5ad }, /* X */ 875 { 118, 62, 0xe739c, 0xe739ce73 }, /* D */ 876 { 90, 62, 0x739ce, 0x739ce739 }, /* E */ 877 { 92, 62, 0x7bdef, 0x7bdef7bd } /* F */ 878 }; 879 880 881 fsqrt_t sqrt_guard_cases[16] = { 882 { 0x1.08800p0, 0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440 */ 883 { 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910 */ 884 { 0x1.A8220p0, 0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411 */ 885 { 0x1.05A20p0, 0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1 */ 886 { 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541 */ 887 { 0x1.DCA20p0, 0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51 */ 888 { 0x1.02C80p0, 0, 0x01630, 0x9cde7483}, /* :6 B6.8164 */ 889 { 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40 */ 890 { 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9 */ 891 { 0x1.1D020p0, 0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81 */ 892 { 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4 */ 893 { 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400 */ 894 { 0x1.48520p0, 0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429 */ 895 { 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61 */ 896 { 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684 */ 897 { 0x1.76B20p0, 0, 0x35b67, 0x81aed827} /* :F DB.BB59 */ 898 }; 899 900 /* 0x1.00000 00000000p-3 */ 901 /* set up the invariant fields of B, the arg to cause rounding */ 902 B.dbl = 0.0; 903 B.layout.exp = 1020; 904 905 /* set up args so result is always Z = 1.200000000000<g>p+0 */ 906 Z.dbl = 1.0; 907 Z.layout.sign = 0; 908 909 #define TERNOP(op) \ 910 arg_count = 3; \ 911 __asm__ volatile( \ 912 op" %0, %1, %2, %3\n\t" \ 913 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC)); 914 #define BINOP(op) \ 915 arg_count = 2; \ 916 __asm__ volatile( \ 917 op" %0, %1, %2\n\t" \ 918 : "=f"(fD) : "f"(fA) , "f"(fB)); 919 #define UNOP(op) \ 920 arg_count = 1; \ 921 __asm__ volatile( \ 922 op" %0, %1\n\t" \ 923 : "=f"(fD) : "f"(fA)); 924 925 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) 926 for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2) 927 for (g = 0; g < 16; g += 1) { 928 double lo, hi, expected; 929 int LSB; 930 int guard; 931 int z_sign = s; 932 933 /* 934 ** one argument will have exponent = 0 as will the result (by 935 ** design) so choose the other argument with exponent -3 to 936 ** force a 3 bit shift for scaling leaving us with 3 guard bits 937 ** and the LSB bit at the bottom of the manitssa. 938 */ 939 switch(op) { 940 case FADD: 941 /* 1p+0 + 1.000000000000<g>p-3 */ 942 B.layout.frac_lo = g; 943 944 fB = s*B.dbl; 945 fA = s*1.0; 946 947 /* set up Z to be truncated result */ 948 949 /* mask off LSB from resulting guard bits */ 950 guard = g & 7; 951 952 Z.layout.frac_hi = 0x20000; 953 Z.layout.frac_lo = g >> 3; 954 955 break; 956 case FSUB: 957 /* 1.2000000000002p+0 - 1.000000000000<g>p-3 */ 958 A.dbl = 1.125; 959 /* add enough to avoid scaling of the result */ 960 A.layout.frac_lo = 0x2; 961 fA = s*A.dbl; 962 963 B.layout.frac_lo = g; 964 fB = s*B.dbl; 965 966 /* set up Z to be truncated result */ 967 guard = (0x10-g); 968 Z.layout.frac_hi = 0x0; 969 Z.layout.frac_lo = guard>>3; 970 971 /* mask off LSB from resulting guard bits */ 972 guard &= 7; 973 break; 974 case FMUL: 975 /* 1 + g*2^-52 */ 976 A.dbl = 1.0; 977 A.layout.frac_lo = g; 978 fA = s*A.dbl; 979 fB = 1.125; 980 981 /* set up Z to be truncated result */ 982 Z.dbl = 1.0; 983 Z.layout.frac_hi = 0x20000; 984 Z.layout.frac_lo = g + (g>>3); 985 guard = g & 7; 986 break; 987 case FMADD: 988 case FMSUB: 989 case FNMADD: 990 case FNMSUB: 991 /* 1 + g*2^-52 */ 992 A.dbl = 1.0; 993 A.layout.frac_lo = g; 994 fA = s*A.dbl; 995 fB = 1.125; 996 997 /* 1.0000000000001p-1 */ 998 A.dbl = 0.5; 999 A.layout.frac_lo = 1; 1000 fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl; 1001 1002 /* set up Z to be truncated result */ 1003 z_sign = (op == FNMADD || op == FNMSUB ? -s : s); 1004 guard = ((g & 7) + 0x4) & 7; 1005 Z.dbl = 1.0; 1006 Z.layout.frac_hi = 0xa0000; 1007 Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0); 1008 break; 1009 case FDIV: 1010 /* g >> 3 == LSB, g & 7 == guard bits */ 1011 guard = g & 7; 1012 if (guard == 0x4) { 1013 /* special case guard bits == 4, inexact tie */ 1014 fB = s*2.0; 1015 Z.dbl = 0.0; 1016 if (g >> 3) { 1017 fA = dbl_denorm_small + 2*dbl_denorm_small; 1018 Z.layout.frac_lo = 0x1; 1019 } else { 1020 fA = dbl_denorm_small; 1021 } 1022 } else { 1023 fA = s*div_guard_cases[g].num; 1024 fB = div_guard_cases[g].den; 1025 1026 printf("%d/%d\n", 1027 s*div_guard_cases[g].num, 1028 div_guard_cases[g].den); 1029 Z.dbl = 1.0; 1030 Z.layout.frac_hi = div_guard_cases[g].hi; 1031 Z.layout.frac_lo = div_guard_cases[g].lo; 1032 } 1033 break; 1034 case FSQRT: 1035 fA = s*sqrt_guard_cases[g].arg; 1036 Z.dbl = 1.0; 1037 Z.layout.exp = sqrt_guard_cases[g].exp + 1023; 1038 Z.layout.frac_hi = sqrt_guard_cases[g].hi; 1039 Z.layout.frac_lo = sqrt_guard_cases[g].lo; 1040 guard = g >> 1; 1041 if (g & 1) guard |= 1; 1042 /* don't have test cases for when X bit = 0 */ 1043 if (guard == 0 || guard == 4) continue; 1044 break; 1045 default: 1046 assert("check_double_guarded_arithmetic_op: unexpected op", 1047 FALSE); 1048 break; 1049 } 1050 1051 /* get LSB for tie breaking */ 1052 LSB = Z.layout.frac_lo & 1; 1053 1054 /* set up hi and lo */ 1055 lo = z_sign*Z.dbl; 1056 Z.layout.frac_lo += 1; 1057 hi = z_sign*Z.dbl; 1058 1059 switch(mode) { 1060 case TO_NEAREST: 1061 /* look at 3 guard bits to determine expected rounding */ 1062 switch(guard) { 1063 case 0: 1064 case 1: case 2: case 3: 1065 expected = lo; 1066 break; 1067 case 4: /* tie: round to even */ 1068 if (debug) printf("tie: LSB = %d\n", LSB); 1069 expected = (LSB == 0 ? lo : hi); 1070 break; 1071 case 5: case 6: case 7: 1072 expected = hi; 1073 break; 1074 default: 1075 assert("check_double_guarded_arithmetic_op: unexpected guard", 1076 FALSE); 1077 } 1078 break; 1079 case TO_ZERO: 1080 expected = lo; 1081 break; 1082 case TO_PLUS_INFINITY: 1083 if (guard == 0) { 1084 /* no rounding */ 1085 expected = lo; 1086 } else { 1087 expected = (s == 1 ? hi : lo); 1088 } 1089 break; 1090 case TO_MINUS_INFINITY: 1091 if (guard == 0) { 1092 /* no rounding */ 1093 expected = lo; 1094 } else { 1095 expected = (s == 1 ? lo : hi); 1096 } 1097 break; 1098 } 1099 1100 set_rounding_mode(mode); 1101 1102 /* 1103 ** do the double precision dual operation just for comparison 1104 ** when debugging 1105 */ 1106 switch(op) { 1107 case FADD: 1108 BINOP("fadd"); 1109 Res.dbl = fD; 1110 break; 1111 case FSUB: 1112 BINOP("fsub"); 1113 Res.dbl = fD; 1114 break; 1115 case FMUL: 1116 BINOP("fmul"); 1117 Res.dbl = fD; 1118 break; 1119 case FMADD: 1120 TERNOP("fmadd"); 1121 Res.dbl = fD; 1122 break; 1123 case FMSUB: 1124 TERNOP("fmsub"); 1125 Res.dbl = fD; 1126 break; 1127 case FNMADD: 1128 TERNOP("fnmadd"); 1129 Res.dbl = fD; 1130 break; 1131 case FNMSUB: 1132 TERNOP("fnmsub"); 1133 Res.dbl = fD; 1134 break; 1135 case FDIV: 1136 BINOP("fdiv"); 1137 Res.dbl = fD; 1138 break; 1139 case FSQRT: 1140 UNOP("fsqrt"); 1141 Res.dbl = fD; 1142 break; 1143 default: 1144 assert("check_double_guarded_arithmetic_op: unexpected op", 1145 FALSE); 1146 break; 1147 } 1148 #undef UNOP 1149 #undef BINOP 1150 #undef TERNOP 1151 1152 Exp.dbl = expected; 1153 1154 if ((Res.layout.sign != Exp.layout.sign) || 1155 (Res.layout.exp != Exp.layout.exp) || 1156 (Res.layout.frac_lo != Exp.layout.frac_lo) || 1157 (Res.layout.frac_hi != Exp.layout.frac_hi)) { 1158 result = "FAILED"; 1159 status = 1; 1160 } else { 1161 result = "PASSED"; 1162 status = 0; 1163 } 1164 1165 printf("%s:%s:%s(%-13a", 1166 round_mode_name[mode], result, flt_op_names[op], fA); 1167 if (arg_count > 1) printf(", %-13a", fB); 1168 if (arg_count > 2) printf(", %-13a", fC); 1169 printf(") = %-13a", Res.dbl); 1170 if (status) printf("\n\texpected %a", Exp.dbl); 1171 putchar('\n'); 1172 1173 if (debug) { 1174 print_double("hi", hi); 1175 print_double("lo", lo); 1176 print_double("expected", expected); 1177 print_double("got", Res.dbl); 1178 } 1179 } 1180 1181 return status; 1182 } 1183 1184 int test_float_arithmetic_ops() 1185 { 1186 int status = 0; 1187 flt_op_t op; 1188 1189 /* 1190 ** choose FP operands whose result should be rounded to either 1191 ** lo or hi. 1192 */ 1193 1194 printf("-------------------------- %s --------------------------\n", 1195 "test rounding of float operators without guard bits"); 1196 for (op = FADDS; op <= FDIVS; op++) { 1197 status |= check_single_arithmetic_op(op); 1198 } 1199 1200 printf("-------------------------- %s --------------------------\n", 1201 "test rounding of float operators with guard bits"); 1202 for (op = FADDS; op <= FNMSUBS; op++) { 1203 status |= check_single_guarded_arithmetic_op(op); 1204 } 1205 1206 printf("-------------------------- %s --------------------------\n", 1207 "test rounding of double operators with guard bits"); 1208 for (op = FADD; op <= FSQRT; op++) { 1209 status |= check_double_guarded_arithmetic_op(op); 1210 } 1211 return status; 1212 } 1213 1214 1215 int 1216 main() 1217 { 1218 int status = 0; 1219 1220 init(); 1221 1222 status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small); 1223 status |= test_dbl_to_float_convert("test normalized convert", &norm_small); 1224 status |= test_int_to_float_convert("test (float)int convert"); 1225 status |= test_int_to_float_convert("test (float)int convert"); 1226 1227 #ifdef __powerpc64__ 1228 status |= test_long_to_double_convert("test (double)long convert"); 1229 #endif 1230 status |= test_float_arithmetic_ops(); 1231 return status; 1232 } 1233