1 /* Copyright (C) 2012 IBM 2 3 Author: Maynard Johnson <maynardj (at) us.ibm.com> 4 Carl Love <carll (at) us.ibm.com> 5 6 This program is free software; you can redistribute it and/or 7 modify it under the terms of the GNU General Public License as 8 published by the Free Software Foundation; either version 2 of the 9 License, or (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307, USA. 20 21 The GNU General Public License is contained in the file COPYING. 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <stdint.h> 27 #include <string.h> 28 #include <elf.h> 29 #include <link.h> 30 31 #define PPC_FEATURE_HAS_VSX 0x00000080 /* Vector Scalar Extension. */ 32 33 #if defined(HAS_DFP) 34 35 register double f14 __asm__ ("fr14"); 36 register double f15 __asm__ ("fr15"); 37 register double f16 __asm__ ("fr16"); 38 register double f17 __asm__ ("fr17"); 39 register double f18 __asm__ ("fr18"); 40 register double f19 __asm__ ("fr19"); 41 42 typedef unsigned char Bool; 43 #define True 1 44 #define False 0 45 46 #define SET_FPSCR_ZERO \ 47 do { double _d = 0.0; \ 48 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 49 } while (0) 50 51 #define GET_FPSCR(_arg) \ 52 __asm__ __volatile__ ("mffs %0" : "=f"(_arg) ) 53 54 #define SET_FPSCR_DRN \ 55 __asm__ __volatile__ ("mtfsf 1, %0, 0, 1" : : "f"(f14) ) 56 57 #define SH_0 0 58 #define SH_1 1 59 #define SH_2 15 60 #define SH_3 63 61 62 #define NUM_RND_MODES 8 63 #define CONDREG_MASK 0x0f000000 64 #define CONDREG_SHIFT 24 65 66 static char ** my_envp; 67 static inline char** __auxv_find(void) 68 { 69 char **result = my_envp; 70 /* Scan over the env vector looking for the ending NULL */ 71 for (; *result != NULL; ++result) { 72 } 73 /* Bump the pointer one more step, which should be the auxv. */ 74 return ++result; 75 } 76 77 static unsigned long fetch_at_hwcap(void) 78 { 79 static unsigned long auxv_hwcap = 0; 80 int i; 81 ElfW(auxv_t) * auxv_buf = NULL; 82 83 if (auxv_hwcap) 84 return auxv_hwcap; 85 86 auxv_buf = (ElfW(auxv_t)*) __auxv_find(); 87 for (i = 0; auxv_buf[i].a_type != AT_NULL; i++) 88 if (auxv_buf[i].a_type == AT_HWCAP) { 89 auxv_hwcap = auxv_buf[i].a_un.a_val; 90 break; 91 } 92 93 return auxv_hwcap; 94 } 95 96 int get_vsx(void) 97 { 98 /* Check to see if the AUX vector has the bit set indicating the HW 99 * supports the vsx instructions. This implies the processor is 100 * at least a POWER 7. 101 */ 102 unsigned long hwcap; 103 104 hwcap = fetch_at_hwcap(); 105 if ((hwcap & PPC_FEATURE_HAS_VSX) == PPC_FEATURE_HAS_VSX) 106 return 1; 107 108 return 0; 109 } 110 111 /* The assembly-level instructions being tested */ 112 static void _test_dscri (int shift) 113 { 114 switch(shift) { 115 case SH_0: 116 __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); 117 break; 118 119 case SH_1: 120 __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); 121 break; 122 123 case SH_2: 124 __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); 125 break; 126 127 case SH_3: 128 __asm__ __volatile__ ("dscri %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); 129 break; 130 default: 131 printf(" dscri, unsupported shift case %d\n", shift); 132 } 133 } 134 135 static void _test_dscli (int shift) 136 { 137 switch(shift) { 138 case SH_0: 139 __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); 140 break; 141 142 case SH_1: 143 __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); 144 break; 145 146 case SH_2: 147 __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); 148 break; 149 150 case SH_3: 151 __asm__ __volatile__ ("dscli %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); 152 break; 153 default: 154 printf(" dscli, unsupported shift case %d\n", shift); 155 } 156 } 157 158 static void _test_dctdp (void) 159 { 160 __asm__ __volatile__ ("dctdp %0, %1" : "=f" (f18) : "f" (f14)); 161 } 162 163 static void _test_drsp (void) 164 { 165 __asm__ __volatile__ ("drsp %0, %1" : "=f" (f18) : "f" (f14)); 166 } 167 168 static void _test_dctfix (void) 169 { 170 __asm__ __volatile__ ("dctfix %0, %1" : "=f" (f18) : "f" (f14)); 171 } 172 173 /* Power 7 and newer processors support this instruction */ 174 static void _test_dcffix (void) 175 { 176 __asm__ __volatile__ ("dcffix %0, %1" : "=f" (f18) : "f" (f14)); 177 } 178 179 static void _test_dscriq (int shift) 180 { 181 switch(shift) { 182 case SH_0: 183 __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); 184 break; 185 case SH_1: 186 __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); 187 break; 188 case SH_2: 189 __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); 190 break; 191 case SH_3: 192 __asm__ __volatile__ ("dscriq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); 193 break; 194 default: 195 printf(" dscriq, unsupported shift case %d\n", shift); 196 } 197 } 198 199 static void _test_dscliq (int shift) 200 { 201 switch(shift) { 202 case SH_0: 203 __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0)); 204 break; 205 case SH_1: 206 __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1)); 207 break; 208 case SH_2: 209 __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2)); 210 break; 211 case SH_3: 212 __asm__ __volatile__ ("dscliq %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3)); 213 break; 214 default: 215 printf(" dscliq, unsupported shift case %d\n", shift); 216 } 217 } 218 219 static void _test_dctqpq (void) 220 { 221 __asm__ __volatile__ ("dctqpq %0, %1" : "=f" (f18) : "f" (f14)); 222 } 223 224 static void _test_dctfixq (void) 225 { 226 __asm__ __volatile__ ("dctfixq %0, %1" : "=f" (f18) : "f" (f14)); 227 } 228 229 static void _test_drdpq (void) 230 { 231 __asm__ __volatile__ ("drdpq %0, %1" : "=f" (f18) : "f" (f14)); 232 } 233 234 static void _test_dcffixq (void) 235 { 236 __asm__ __volatile__ ("dcffixq %0, %1" : "=f" (f18) : "f" (f14)); 237 } 238 239 typedef void (*test_func_t)(); 240 typedef void (*test_func_main_t)(int); 241 typedef void (*test_func_shift_t)(int); 242 typedef struct test_table 243 { 244 test_func_main_t test_category; 245 char * name; 246 } test_table_t; 247 248 static unsigned long long dfp128_vals[] = { 249 // Some finite numbers 250 0x2207c00000000000ULL, 0x0000000000000e50ULL, 251 0x2f07c00000000000ULL, 0x000000000014c000ULL, //large number 252 0xa207c00000000000ULL, 0x00000000000000e0ULL, 253 0x2206c00000000000ULL, 0x00000000000000cfULL, 254 0xa205c00000000000ULL, 0x000000010a395bcfULL, 255 0x6209400000fd0000ULL, 0x00253f1f534acdd4ULL, // a small number 256 0x000400000089b000ULL, 0x0a6000d000000049ULL, // very small number 257 // flavors of zero 258 0x2208000000000000ULL, 0x0000000000000000ULL, 259 0xa208000000000000ULL, 0x0000000000000000ULL, // negative 260 0xa248000000000000ULL, 0x0000000000000000ULL, 261 // flavors of NAN 262 0x7c00000000000000ULL, 0x0000000000000000ULL, // quiet 263 0xfc00000000000000ULL, 0xc00100035b007700ULL, 264 0x7e00000000000000ULL, 0xfe000000d0e0a0d0ULL, // signaling 265 // flavors of Infinity 266 0x7800000000000000ULL, 0x0000000000000000ULL, 267 0xf800000000000000ULL, 0x0000000000000000ULL, // negative 268 0xf900000000000000ULL, 0x0000000000000000ULL 269 }; 270 271 static unsigned long long int64_vals[] = { 272 // I64 values 273 0x0ULL, // zero 274 0x1ULL, // one 275 0xffffffffffffffffULL, // minus one 276 0x2386f26fc0ffffULL, // 9999999999999999 277 0xffdc790d903f0001ULL, // -9999999999999999 278 0x462d53c8abac0ULL, // 1234567890124567 279 0xfffb9d2ac3754540ULL, // -1234567890124567 280 }; 281 282 static unsigned long long dfp64_vals[] = { 283 // various finite numbers 284 0x2234000000000e50ULL, 285 0x223400000014c000ULL, 286 0xa2340000000000e0ULL,// negative 287 0x22240000000000cfULL, 288 0xa21400010a395bcfULL,// negative 289 0x6e4d3f1f534acdd4ULL,// large number 290 0x000400000089b000ULL,// very small number 291 // flavors of zero 292 0x2238000000000000ULL, 293 0xa238000000000000ULL, 294 0x4248000000000000ULL, 295 // flavors of NAN 296 0x7e34000000000111ULL, 297 0xfe000000d0e0a0d0ULL,//signaling 298 0xfc00000000000000ULL,//quiet 299 // flavors of Infinity 300 0x7800000000000000ULL, 301 0xf800000000000000ULL,//negative 302 0x7a34000000000000ULL, 303 }; 304 305 306 typedef struct dfp_test_args { 307 int fra_idx; 308 int frb_idx; 309 } dfp_test_args_t; 310 311 312 /* Index pairs from dfp64_vals or dfp128_vals array to be used with 313 * dfp_two_arg_tests */ 314 static dfp_test_args_t int64_args_x1[] = { 315 /* {int64 input val, unused } */ 316 {0, 0}, 317 {1, 0}, 318 {2, 0}, 319 {3, 0}, 320 {4, 0}, 321 {5, 0}, 322 {6, 0}, 323 }; 324 325 static dfp_test_args_t dfp_2args_x1[] = { 326 /* {dfp_arg, shift_arg} */ 327 {0, SH_0}, 328 {0, SH_1}, 329 {0, SH_2}, 330 {0, SH_3}, 331 {5, SH_0}, 332 {5, SH_1}, 333 {5, SH_2}, 334 {5, SH_3}, 335 {6, SH_0}, 336 {6, SH_1}, 337 {6, SH_2}, 338 {6, SH_3}, 339 {7, SH_0}, 340 {7, SH_1}, 341 {7, SH_2}, 342 {7, SH_3}, 343 {10, SH_0}, 344 {10, SH_1}, 345 {10, SH_2}, 346 {10, SH_3}, 347 {13, SH_0}, 348 {13, SH_1}, 349 {13, SH_2}, 350 {13, SH_3}, 351 }; 352 353 /* Index pairs from dfp64_vals array to be used with dfp_one_arg_tests */ 354 static dfp_test_args_t dfp_1args_x1[] = { 355 /* {dfp_arg, unused} */ 356 {0, 0}, 357 {1, 0}, 358 {2, 0}, 359 {3, 0}, 360 {4, 0}, 361 {5, 0}, 362 {6, 0}, 363 {7, 0}, 364 {8, 0}, 365 {9, 0}, 366 {10, 0}, 367 {11, 0}, 368 {12, 0}, 369 {13, 0}, 370 {14, 0}, 371 }; 372 373 typedef enum { 374 LONG_TEST, 375 QUAD_TEST 376 } precision_type_t; 377 378 typedef struct dfp_test 379 { 380 test_func_t test_func; 381 const char * name; 382 dfp_test_args_t * targs; 383 int num_tests; 384 precision_type_t precision; 385 const char * op; 386 Bool cr_supported; 387 } dfp_test_t; 388 389 /* The dcffix and dcffixq tests are a little different in that they both take 390 * an I64 input. 391 */ 392 static dfp_test_t 393 dfp_dcffix_dcffixq_tests[] = { 394 { &_test_dcffixq,"dcffixq", int64_args_x1, 7, QUAD_TEST, "I64S->D128", True}, 395 /* Power 7 instruction */ 396 { &_test_dcffix, "dcffix", int64_args_x1, 7, LONG_TEST, "I64S->D64", True}, 397 { NULL, NULL, NULL, 0, 0, NULL} 398 }; 399 400 static dfp_test_t 401 dfp_one_arg_tests[] = { 402 { &_test_dctdp, "dctdp", dfp_1args_x1, 15, LONG_TEST, "D32->D64", True}, 403 { &_test_drsp, "drsp", dfp_1args_x1, 15, LONG_TEST, "D64->D32", True}, 404 { &_test_dctfix, "dctfix", dfp_1args_x1, 15, LONG_TEST, "D64->I64S", True}, 405 { &_test_dctqpq, "dctqpq", dfp_1args_x1, 15, QUAD_TEST, "D64->D128", True}, 406 { &_test_dctfixq,"dctfixq", dfp_1args_x1, 15, QUAD_TEST, "D128->I64S", True}, 407 { &_test_drdpq, "drdpq", dfp_1args_x1, 15, QUAD_TEST, "D128->D64", True}, 408 { NULL, NULL, NULL, 0, 0, NULL} 409 }; 410 411 412 static dfp_test_t 413 dfp_two_arg_tests[] = { 414 { &_test_dscri, "dscri", dfp_2args_x1, 20, LONG_TEST, ">>", True}, 415 { &_test_dscli, "dscli", dfp_2args_x1, 20, LONG_TEST, "<<", True}, 416 { &_test_dscriq, "dscriq", dfp_2args_x1, 20, QUAD_TEST, ">>", True}, 417 { &_test_dscliq, "dscliq", dfp_2args_x1, 20, QUAD_TEST, "<<", True}, 418 { NULL, NULL, NULL, 0, 0, NULL} 419 }; 420 421 void set_rounding_mode(unsigned long long rnd_mode) 422 { 423 double fpscr; 424 unsigned long long * hex_fpscr = (unsigned long long *)&fpscr; 425 426 *hex_fpscr = 0ULL; 427 __asm__ __volatile__ ("mffs %0" : "=f"(f14)); 428 fpscr = f14; 429 *hex_fpscr &= 0xFFFFFFF0FFFFFFFFULL; 430 *hex_fpscr |= (rnd_mode << 32); 431 f14 = fpscr; 432 SET_FPSCR_DRN; 433 } 434 435 static void test_dfp_one_arg_ops(int unused) 436 { 437 test_func_t func; 438 unsigned long long u0, u0x; 439 double res, d0, *d0p; 440 double d0x, *d0xp; 441 unsigned long round_mode; 442 int k = 0; 443 444 u0x = 0; 445 d0p = &d0; 446 d0xp = &d0x; 447 448 while ((func = dfp_one_arg_tests[k].test_func)) { 449 int i; 450 451 for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { 452 /* Do each test with each of the possible rounding modes */ 453 dfp_test_t test_group = dfp_one_arg_tests[k]; 454 455 printf("\ntest with rounding mode %lu \n", round_mode); 456 /* The set_rounding_mode() uses the global value f14. Call the 457 * function before setting up the test for the specific instruction 458 * to avoid avoid conflicts using f14. 459 */ 460 set_rounding_mode(round_mode); 461 462 for (i = 0; i < test_group.num_tests; i++) { 463 464 if (test_group.precision == LONG_TEST) { 465 u0 = dfp64_vals[test_group.targs[i].fra_idx]; 466 } else { 467 u0 = dfp128_vals[test_group.targs[i].fra_idx * 2]; 468 u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1]; 469 } 470 471 *(unsigned long long *)d0p = u0; 472 f14 = d0; 473 if (test_group.precision == QUAD_TEST) { 474 *(unsigned long long *)d0xp = u0x; 475 f15 = d0x; 476 } 477 478 (*func)(); 479 res = f18; 480 481 printf("%s %016llx", test_group.name, u0); 482 483 if (test_group.precision == LONG_TEST) { 484 printf(" %s => %016llx", 485 test_group.op, *((unsigned long long *)(&res))); 486 } else { 487 double resx = f19; 488 printf(" %016llx %s ==> %016llx %016llx", 489 u0x, test_group.op, 490 *((unsigned long long *)(&res)), 491 *((unsigned long long *)(&resx))); 492 } 493 printf("\n"); 494 } 495 } 496 497 k++; 498 printf( "\n" ); 499 } 500 } 501 502 static void test_dfp_two_arg_ops(int unused) 503 /* Shift instructions: first argument is the DFP source, second argument 504 * is 6 bit shift amount. 505 */ 506 { 507 test_func_shift_t func; 508 unsigned long long u0, u0x; 509 unsigned int shift_by; 510 double res, d0, *d0p; 511 double d0x, *d0xp; 512 unsigned long round_mode; 513 int k = 0; 514 515 u0x = 0; 516 d0p = &d0; 517 d0xp = &d0x; 518 519 while ((func = dfp_two_arg_tests[k].test_func)) { 520 int i; 521 522 for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { 523 /* Do each test with each of the possible rounding modes */ 524 dfp_test_t test_group = dfp_two_arg_tests[k]; 525 526 printf("\ntest with rounding mode %lu \n", round_mode); 527 528 /* The set_rounding_mode() uses the global value f14. Call the 529 * function before setting up the test for the specific instruction 530 * to avoid avoid conflicts using f14. 531 */ 532 set_rounding_mode(round_mode); 533 534 for (i = 0; i < test_group.num_tests; i++) { 535 536 shift_by = test_group.targs[i].frb_idx; 537 538 if (test_group.precision == LONG_TEST) { 539 u0 = dfp64_vals[test_group.targs[i].fra_idx]; 540 } else { 541 u0 = dfp128_vals[test_group.targs[i].fra_idx * 2]; 542 u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1]; 543 } 544 545 *(unsigned long long *)d0p = u0; 546 f14 = d0; 547 if (test_group.precision == QUAD_TEST) { 548 *(unsigned long long *)d0xp = u0x; 549 f15 = d0x; 550 } 551 552 (*func)(shift_by); 553 res = f18; 554 555 printf("%s %016llx", test_group.name, u0); 556 557 if (test_group.precision) { 558 printf(" %s %-3d => %016llx", 559 test_group.op, shift_by, *((unsigned long long *)(&res))); 560 } else { 561 double resx = f19; 562 printf(" %016llx %s %-3d ==> %016llx %016llx", 563 u0x, test_group.op, shift_by, 564 *((unsigned long long *)(&res)), 565 *((unsigned long long *)(&resx))); 566 } 567 printf("\n" ); 568 } 569 } 570 571 k++; 572 printf( "\n" ); 573 } 574 } 575 576 static void test_dcffix_dcffixq(int has_vsx) 577 { 578 test_func_t func; 579 unsigned long long u0; 580 double res, d0, *d0p; 581 int k = 0, round_mode; 582 583 d0p = &d0; 584 585 586 while ((func = dfp_dcffix_dcffixq_tests[k].test_func)) { 587 int i; 588 589 if ((!has_vsx) && (!strcmp("dcffix", dfp_dcffix_dcffixq_tests[k].name))) { 590 k++; 591 /* The test instruction is dcffix it is supported on POWER 7 592 * and newer processors. Skip if not POWER 7 or newer. 593 */ 594 continue; 595 } 596 597 for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) { 598 /* Do each test with each of the possible rounding modes */ 599 dfp_test_t test_group = dfp_dcffix_dcffixq_tests[k]; 600 601 printf("\ntest with rounding mode %u \n", round_mode); 602 603 /* The set_rounding_mode() uses the global value f14. Call the 604 * function before setting up the test for the specific instruction 605 * to avoid avoid conflicts using f14. 606 */ 607 set_rounding_mode(round_mode); 608 609 for (i = 0; i < test_group.num_tests; i++) { 610 611 /* The instructions take I64 inputs */ 612 u0 = int64_vals[test_group.targs[i].fra_idx]; 613 614 *(unsigned long long *)d0p = u0; 615 f14 = d0; 616 617 (*func)(); 618 res = f18; 619 620 printf("%s %016llx", test_group.name, u0); 621 622 if (test_group.precision) { 623 printf(" %s => %016llx", 624 test_group.op, *((unsigned long long *)(&res))); 625 } else { 626 double resx = f19; 627 printf(" %s ==> %016llx %016llx", 628 test_group.op, 629 *((unsigned long long *)(&res)), 630 *((unsigned long long *)(&resx))); 631 } 632 printf("\n" ); 633 } 634 } 635 636 k++; 637 printf( "\n" ); 638 } 639 } 640 641 static test_table_t 642 all_tests[] = 643 { 644 { &test_dfp_one_arg_ops, 645 "Test DFP fomat conversion instructions" }, 646 { &test_dfp_two_arg_ops, 647 "Test DFP shift instructions" }, 648 { test_dcffix_dcffixq, 649 "Test DCFFIX and DCFFIXQ instructions" }, 650 { NULL, NULL } 651 }; 652 #endif // HAS_DFP 653 654 int main(int argc, char ** argv, char ** envp) { 655 #if defined(HAS_DFP) 656 test_table_t aTest; 657 test_func_t func; 658 int i = 0, has_vsx; 659 660 /* If the processor has the VSX functionality then it is POWER 7 661 * or newer. 662 */ 663 my_envp = envp; 664 has_vsx = get_vsx(); 665 666 while ((func = all_tests[i].test_category)) { 667 aTest = all_tests[i]; 668 printf( "%s\n", aTest.name ); 669 (*func)(has_vsx); 670 i++; 671 } 672 673 #endif // HAS_DFP 674 return 0; 675 } 676