1 /* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj (at) us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23 #ifdef HAS_VSX 24 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <malloc.h> 30 #include <altivec.h> 31 #include <math.h> 32 33 #ifndef __powerpc64__ 34 typedef uint32_t HWord_t; 35 #else 36 typedef uint64_t HWord_t; 37 #endif /* __powerpc64__ */ 38 39 typedef unsigned char Bool; 40 #define True 1 41 #define False 0 42 register HWord_t r14 __asm__ ("r14"); 43 register HWord_t r15 __asm__ ("r15"); 44 register HWord_t r16 __asm__ ("r16"); 45 register HWord_t r17 __asm__ ("r17"); 46 register double f14 __asm__ ("fr14"); 47 register double f15 __asm__ ("fr15"); 48 register double f16 __asm__ ("fr16"); 49 register double f17 __asm__ ("fr17"); 50 51 static volatile unsigned int div_flags, div_xer; 52 53 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 54 55 #define SET_CR(_arg) \ 56 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 57 58 #define SET_XER(_arg) \ 59 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 60 61 #define GET_CR(_lval) \ 62 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 63 64 #define GET_XER(_lval) \ 65 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 66 67 #define GET_CR_XER(_lval_cr,_lval_xer) \ 68 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 69 70 #define SET_CR_ZERO \ 71 SET_CR(0) 72 73 #define SET_XER_ZERO \ 74 SET_XER(0) 75 76 #define SET_CR_XER_ZERO \ 77 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 78 79 #define SET_FPSCR_ZERO \ 80 do { double _d = 0.0; \ 81 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 82 } while (0) 83 84 85 typedef void (*test_func_t)(void); 86 typedef struct test_table test_table_t; 87 88 89 /* These functions below that construct a table of floating point 90 * values were lifted from none/tests/ppc32/jm-insns.c. 91 */ 92 93 #if defined (DEBUG_ARGS_BUILD) 94 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 95 #else 96 #define AB_DPRINTF(fmt, args...) do { } while (0) 97 #endif 98 99 static inline void register_farg (void *farg, 100 int s, uint16_t _exp, uint64_t mant) 101 { 102 uint64_t tmp; 103 104 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 105 *(uint64_t *)farg = tmp; 106 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 107 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 108 } 109 110 111 typedef struct fp_test_args { 112 int fra_idx; 113 int frb_idx; 114 } fp_test_args_t; 115 116 117 fp_test_args_t two_arg_fp_tests[] = { 118 {8, 8}, 119 {8, 14}, 120 {15, 16}, 121 {8, 5}, 122 {8, 4}, 123 {8, 7}, 124 {8, 9}, 125 {8, 11}, 126 {14, 8}, 127 {14, 14}, 128 {14, 6}, 129 {14, 5}, 130 {14, 4}, 131 {14, 7}, 132 {14, 9}, 133 {14, 11}, 134 {6, 8}, 135 {6, 14}, 136 {6, 6}, 137 {6, 5}, 138 {6, 4}, 139 {6, 7}, 140 {6, 9}, 141 {6, 11}, 142 {5, 8}, 143 {5, 14}, 144 {5, 6}, 145 {5, 5}, 146 {5, 4}, 147 {5, 7}, 148 {5, 9}, 149 {5, 11}, 150 {4, 8}, 151 {4, 14}, 152 {4, 6}, 153 {4, 5}, 154 {4, 1}, 155 {4, 7}, 156 {4, 9}, 157 {4, 11}, 158 {7, 8}, 159 {7, 14}, 160 {7, 6}, 161 {7, 5}, 162 {7, 4}, 163 {7, 7}, 164 {7, 9}, 165 {7, 11}, 166 {10, 8}, 167 {10, 14}, 168 {12, 6}, 169 {12, 5}, 170 {10, 4}, 171 {10, 7}, 172 {10, 9}, 173 {10, 11}, 174 {12, 8 }, 175 {12, 14}, 176 {12, 6}, 177 {15, 16}, 178 {15, 16}, 179 {9, 11}, 180 {11, 11}, 181 {11, 12}, 182 {16, 18}, 183 {17, 16}, 184 {19, 19}, 185 {19, 18} 186 }; 187 188 189 static int nb_special_fargs; 190 static double * spec_fargs; 191 static float * spec_sp_fargs; 192 193 static void build_special_fargs_table(void) 194 { 195 /* 196 Entry Sign Exp fraction Special value 197 0 0 3fd 0x8000000000000ULL Positive finite number 198 1 0 404 0xf000000000000ULL ... 199 2 0 001 0x8000000b77501ULL ... 200 3 0 7fe 0x800000000051bULL ... 201 4 0 012 0x3214569900000ULL ... 202 5 0 000 0x0000000000000ULL +0.0 (+zero) 203 6 1 000 0x0000000000000ULL -0.0 (-zero) 204 7 0 7ff 0x0000000000000ULL +infinity 205 8 1 7ff 0x0000000000000ULL -infinity 206 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 207 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 208 11 0 7ff 0x8000000000000ULL +QNaN 209 12 1 7ff 0x8000000000000ULL -QNaN 210 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 211 14 1 40d 0x0650f5a07b353ULL Negative finite number 212 15 0 412 0x32585a9900000ULL A few more positive finite numbers 213 16 0 413 0x82511a2000000ULL ... 214 17 . . . . . . . . . . . . . . . . . . . . . . . 215 18 . . . . . . . . . . . . . . . . . . . . . . . 216 19 . . . . . . . . . . . . . . . . . . . . . . . 217 */ 218 219 uint64_t mant; 220 uint16_t _exp; 221 int s; 222 int j, i = 0; 223 224 if (spec_fargs) 225 return; 226 227 spec_fargs = malloc( 20 * sizeof(double) ); 228 spec_sp_fargs = malloc( 20 * sizeof(float) ); 229 230 // #0 231 s = 0; 232 _exp = 0x3fd; 233 mant = 0x8000000000000ULL; 234 register_farg(&spec_fargs[i++], s, _exp, mant); 235 236 // #1 237 s = 0; 238 _exp = 0x404; 239 mant = 0xf000000000000ULL; 240 register_farg(&spec_fargs[i++], s, _exp, mant); 241 242 // #2 243 s = 0; 244 _exp = 0x001; 245 mant = 0x8000000b77501ULL; 246 register_farg(&spec_fargs[i++], s, _exp, mant); 247 248 // #3 249 s = 0; 250 _exp = 0x7fe; 251 mant = 0x800000000051bULL; 252 register_farg(&spec_fargs[i++], s, _exp, mant); 253 254 // #4 255 s = 0; 256 _exp = 0x012; 257 mant = 0x3214569900000ULL; 258 register_farg(&spec_fargs[i++], s, _exp, mant); 259 260 261 /* Special values */ 262 /* +0.0 : 0 0x000 0x0000000000000 */ 263 // #5 264 s = 0; 265 _exp = 0x000; 266 mant = 0x0000000000000ULL; 267 register_farg(&spec_fargs[i++], s, _exp, mant); 268 269 /* -0.0 : 1 0x000 0x0000000000000 */ 270 // #6 271 s = 1; 272 _exp = 0x000; 273 mant = 0x0000000000000ULL; 274 register_farg(&spec_fargs[i++], s, _exp, mant); 275 276 /* +infinity : 0 0x7FF 0x0000000000000 */ 277 // #7 278 s = 0; 279 _exp = 0x7FF; 280 mant = 0x0000000000000ULL; 281 register_farg(&spec_fargs[i++], s, _exp, mant); 282 283 /* -infinity : 1 0x7FF 0x0000000000000 */ 284 // #8 285 s = 1; 286 _exp = 0x7FF; 287 mant = 0x0000000000000ULL; 288 register_farg(&spec_fargs[i++], s, _exp, mant); 289 290 /* +QNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 291 // #9 292 s = 0; 293 _exp = 0x7FF; 294 mant = 0x7FFFFFFFFFFFFULL; 295 register_farg(&spec_fargs[i++], s, _exp, mant); 296 297 /* -QNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 298 // #10 299 s = 1; 300 _exp = 0x7FF; 301 mant = 0x7FFFFFFFFFFFFULL; 302 register_farg(&spec_fargs[i++], s, _exp, mant); 303 304 /* +SNaN : 0 0x7FF 0x8000000000000 */ 305 // #11 306 s = 0; 307 _exp = 0x7FF; 308 mant = 0x8000000000000ULL; 309 register_farg(&spec_fargs[i++], s, _exp, mant); 310 311 /* -SNaN : 1 0x7FF 0x8000000000000 */ 312 // #12 313 s = 1; 314 _exp = 0x7FF; 315 mant = 0x8000000000000ULL; 316 register_farg(&spec_fargs[i++], s, _exp, mant); 317 318 /* denormalized value */ 319 // #13 320 s = 1; 321 _exp = 0x000; 322 mant = 0x8340000078000ULL; 323 register_farg(&spec_fargs[i++], s, _exp, mant); 324 325 /* Negative finite number */ 326 // #14 327 s = 1; 328 _exp = 0x40d; 329 mant = 0x0650f5a07b353ULL; 330 register_farg(&spec_fargs[i++], s, _exp, mant); 331 332 /* A few positive finite numbers ... */ 333 // #15 334 s = 0; 335 _exp = 0x412; 336 mant = 0x32585a9900000ULL; 337 register_farg(&spec_fargs[i++], s, _exp, mant); 338 339 // #16 340 s = 0; 341 _exp = 0x413; 342 mant = 0x82511a2000000ULL; 343 register_farg(&spec_fargs[i++], s, _exp, mant); 344 345 // #17 346 s = 0; 347 _exp = 0x403; 348 mant = 0x12ef5a9300000ULL; 349 register_farg(&spec_fargs[i++], s, _exp, mant); 350 351 // #18 352 s = 0; 353 _exp = 0x405; 354 mant = 0x14bf5d2300000ULL; 355 register_farg(&spec_fargs[i++], s, _exp, mant); 356 357 // #19 358 s = 0; 359 _exp = 0x409; 360 mant = 0x76bf982440000ULL; 361 register_farg(&spec_fargs[i++], s, _exp, mant); 362 363 nb_special_fargs = i; 364 for (j = 0; j < i; j++) { 365 spec_sp_fargs[j] = spec_fargs[j]; 366 } 367 } 368 369 370 struct test_table 371 { 372 test_func_t test_category; 373 char * name; 374 }; 375 376 /* Type of input for floating point operations.*/ 377 typedef enum { 378 SINGLE_TEST, 379 DOUBLE_TEST 380 } precision_type_t; 381 382 typedef enum { 383 VX_SCALAR_CONV_TO_WORD, 384 VX_CONV_TO_SINGLE, 385 VX_CONV_TO_DOUBLE, 386 VX_ESTIMATE, 387 VX_DEFAULT 388 } vx_fp_test_type; 389 390 static vector unsigned int vec_out, vec_inA, vec_inB; 391 392 /* This function is for checking the reciprocal and reciprocal square root 393 * estimate instructions. 394 */ 395 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx) 396 { 397 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is 398 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions 399 * does an actual reciprocal calculation versus estimation, so the answer we get back from 400 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of 401 * precision) and the estimate may still be within expected tolerances. On top of that, 402 * we can't count on these estimates always being the same across implementations. 403 * For example, with the fre[s] instruction (which should be correct to within one part 404 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111, 405 * one implementation could return 1.0111_1111_0000 and another implementation could return 406 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a 407 * single bit in common. 408 * 409 * The upshot is we can't validate the VEX output for these instructions by comparing against 410 * stored bit patterns. We must check that the result is within expected tolerances. 411 */ 412 413 414 /* A mask to be used for validation as a last resort. 415 * Only use 12 bits of precision for reasons discussed above. 416 */ 417 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL 418 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00 419 420 Bool result = False; 421 Bool dp_test = type == DOUBLE_TEST; 422 double src_dp, res_dp; 423 float src_sp, res_sp; 424 src_dp = res_dp = 0; 425 src_sp = res_sp = 0; 426 #define SRC (dp_test ? src_dp : src_sp) 427 #define RES (dp_test ? res_dp : res_sp) 428 Bool src_is_negative = False; 429 Bool res_is_negative = False; 430 unsigned long long * dst_dp = NULL; 431 unsigned int * dst_sp = NULL; 432 if (dp_test) { 433 unsigned long long * src_dp_ull; 434 dst_dp = (unsigned long long *) &vec_out; 435 src_dp = spec_fargs[idx]; 436 src_dp_ull = (unsigned long long *) &src_dp; 437 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False; 438 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False; 439 memcpy(&res_dp, &dst_dp[output_vec_idx], 8); 440 } else { 441 unsigned int * src_sp_uint; 442 dst_sp = (unsigned int *) &vec_out; 443 src_sp = spec_sp_fargs[idx]; 444 src_sp_uint = (unsigned int *) &src_sp; 445 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False; 446 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False; 447 memcpy(&res_sp, &dst_sp[output_vec_idx], 4); 448 } 449 450 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p 451 if (isnan(SRC)) 452 return isnan(RES); 453 if (fpclassify(SRC) == FP_ZERO) 454 return isinf(RES); 455 if (!src_is_negative && isinf(SRC)) 456 return !res_is_negative && (fpclassify(RES) == FP_ZERO); 457 if (is_rsqrte) { 458 if (src_is_negative) 459 return isnan(RES); 460 } else { 461 if (src_is_negative && isinf(SRC)) 462 return res_is_negative && (fpclassify(RES) == FP_ZERO); 463 } 464 if (dp_test) { 465 double calc_diff; 466 double real_diff; 467 double recip_divisor; 468 double div_result; 469 double calc_diff_tmp; 470 471 if (is_rsqrte) 472 recip_divisor = sqrt(src_dp); 473 else 474 recip_divisor = src_dp; 475 476 div_result = 1.0/recip_divisor; 477 calc_diff_tmp = recip_divisor * 16384.0; 478 if (isnormal(calc_diff_tmp)) { 479 calc_diff = fabs(1.0/calc_diff_tmp); 480 real_diff = fabs(res_dp - div_result); 481 result = ( ( res_dp == div_result ) 482 || ( real_diff <= calc_diff ) ); 483 } else { 484 /* Unable to compute theoretical difference, so we fall back to masking out 485 * un-precise bits. 486 */ 487 unsigned long long * div_result_dp = (unsigned long long *) &div_result; 488 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP); 489 } 490 /* For debug use . . . 491 if (!result) { 492 unsigned long long * dv = &div_result; 493 unsigned long long * rd = &real_diff; 494 unsigned long long * cd = &calc_diff; 495 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 496 *dv, *rd, *cd); 497 } 498 */ 499 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2) 500 float calc_diff; 501 float real_diff; 502 float div_result; 503 float calc_diff_tmp; 504 float recip_divisor = sqrt(src_sp); 505 506 div_result = 1.0/recip_divisor; 507 calc_diff_tmp = recip_divisor * 16384.0; 508 if (isnormal(calc_diff_tmp)) { 509 calc_diff = fabsf(1.0/calc_diff_tmp); 510 real_diff = fabsf(res_sp - div_result); 511 result = ( ( res_sp == div_result ) 512 || ( real_diff <= calc_diff ) ); 513 } else { 514 /* Unable to compute theoretical difference, so we fall back to masking out 515 * un-precise bits. 516 */ 517 unsigned int * div_result_sp = (unsigned int *) &div_result; 518 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP); 519 } 520 /* For debug use . . . 521 if (!result) { 522 unsigned long long * dv = &div_result; 523 unsigned long long * rd = &real_diff; 524 unsigned long long * cd = &calc_diff; 525 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 526 *dv, *rd, *cd); 527 } 528 */ 529 } 530 return result; 531 } 532 533 typedef struct vx_fp_test 534 { 535 test_func_t test_func; 536 const char * name; 537 fp_test_args_t * targs; 538 int num_tests; 539 precision_type_t precision; 540 vx_fp_test_type type; 541 const char * op; 542 } vx_fp_test_t; 543 544 545 static Bool do_dot; 546 547 static void test_xvredp(void) 548 { 549 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 550 } 551 552 static void test_xsredp(void) 553 { 554 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 555 } 556 557 static void test_xvrsqrtedp(void) 558 { 559 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 560 } 561 562 static void test_xsrsqrtedp(void) 563 { 564 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 565 } 566 567 static void test_xvrsqrtesp(void) 568 { 569 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 570 } 571 572 static void test_xstsqrtdp(void) 573 { 574 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB)); 575 } 576 577 static void test_xvtsqrtdp(void) 578 { 579 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB)); 580 } 581 582 static void test_xvtsqrtsp(void) 583 { 584 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB)); 585 } 586 587 static void test_xvsqrtdp(void) 588 { 589 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 590 } 591 592 static void test_xvsqrtsp(void) 593 { 594 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 595 } 596 597 static void test_xvtdivdp(void) 598 { 599 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 600 } 601 602 static void test_xvtdivsp(void) 603 { 604 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 605 } 606 607 static void test_xscvdpsp(void) 608 { 609 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 610 } 611 612 static void test_xscvdpuxws(void) 613 { 614 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 615 } 616 617 static void test_xscvspdp(void) 618 { 619 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 620 } 621 622 static void test_xvcvdpsp(void) 623 { 624 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 625 } 626 627 static void test_xvcvdpuxds(void) 628 { 629 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 630 } 631 632 static void test_xvcvdpuxws(void) 633 { 634 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 635 } 636 637 static void test_xvcvspdp(void) 638 { 639 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 640 } 641 642 static void test_xvcvspsxds(void) 643 { 644 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 645 } 646 647 static void test_xvcvspuxds(void) 648 { 649 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 650 } 651 652 static void test_xvcvdpsxds(void) 653 { 654 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 655 } 656 657 static void test_xvcvspuxws(void) 658 { 659 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 660 } 661 662 static void test_xvcvsxddp(void) 663 { 664 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 665 } 666 667 static void test_xvcvuxddp(void) 668 { 669 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 670 } 671 672 static void test_xvcvsxdsp(void) 673 { 674 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 675 } 676 677 static void test_xvcvuxdsp(void) 678 { 679 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 680 } 681 682 static void test_xvcvsxwdp(void) 683 { 684 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 685 } 686 687 static void test_xvcvuxwdp(void) 688 { 689 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 690 } 691 692 static void test_xvcvsxwsp(void) 693 { 694 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 695 } 696 697 static void test_xvcvuxwsp(void) 698 { 699 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 700 } 701 702 static void test_xsrdpic(void) 703 { 704 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 705 } 706 707 static void test_xsrdpiz(void) 708 { 709 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 710 } 711 712 static void test_xsrdpi(void) 713 { 714 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 715 } 716 717 static void test_xvabsdp(void) 718 { 719 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 720 } 721 722 static void test_xvnabsdp(void) 723 { 724 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 725 } 726 727 static void test_xvnegdp(void) 728 { 729 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 730 } 731 732 static void test_xvabssp(void) 733 { 734 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 735 } 736 737 static void test_xvnabssp(void) 738 { 739 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 740 } 741 742 static void test_xvrdpi(void) 743 { 744 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 745 } 746 747 static void test_xvrdpic(void) 748 { 749 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 750 } 751 752 static void test_xvrdpim(void) 753 { 754 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 755 } 756 757 static void test_xvrdpip(void) 758 { 759 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 760 } 761 762 static void test_xvrdpiz(void) 763 { 764 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 765 } 766 767 static void test_xvrspi(void) 768 { 769 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 770 } 771 772 static void test_xvrspic(void) 773 { 774 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 775 } 776 777 static void test_xvrspim(void) 778 { 779 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 780 } 781 782 static void test_xvrspip(void) 783 { 784 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 785 } 786 787 static void test_xvrspiz(void) 788 { 789 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 790 } 791 792 static vx_fp_test_t 793 vsx_one_fp_arg_tests[] = { 794 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 795 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 796 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 797 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 798 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 799 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"}, 800 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"}, 801 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 802 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"}, 803 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 804 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 805 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 806 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 807 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 808 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 809 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 810 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 811 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 812 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 813 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 814 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 815 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"}, 816 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"}, 817 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"}, 818 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"}, 819 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"}, 820 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 821 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 822 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 823 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 824 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 825 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 826 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 827 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 828 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 829 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 830 { NULL, NULL, NULL, 0, 0, 0, NULL} 831 }; 832 833 static vx_fp_test_t 834 vx_tdivORtsqrt_tests[] = { 835 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 836 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 837 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"}, 838 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"}, 839 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"}, 840 { NULL, NULL, NULL, 0 , 0, 0, NULL} 841 }; 842 843 static unsigned long long doubleWord[] = { 0, 844 0xffffffff00000000LL, 845 0x00000000ffffffffLL, 846 0xffffffffffffffffLL, 847 0x89abcde123456789LL, 848 0x0102030405060708LL, 849 0x00000000a0b1c2d3LL, 850 0x1111222233334444LL 851 }; 852 853 static unsigned int singleWord[] = {0, 854 0xffff0000, 855 0x0000ffff, 856 0xffffffff, 857 0x89a73522, 858 0x01020304, 859 0x0000abcd, 860 0x11223344 861 }; 862 863 typedef struct vx_intToFp_test 864 { 865 test_func_t test_func; 866 const char * name; 867 void * targs; 868 int num_tests; 869 precision_type_t precision; 870 vx_fp_test_type type; 871 } vx_intToFp_test_t; 872 873 static vx_intToFp_test_t 874 intToFp_tests[] = { 875 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 876 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 877 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 878 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 879 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 880 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 881 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 882 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 883 { NULL, NULL, NULL, 0, 0 } 884 }; 885 886 static Bool do_OE; 887 typedef enum { 888 DIV_BASE = 1, 889 DIV_OE = 2, 890 DIV_DOT = 4, 891 } div_type_t; 892 /* Possible divde type combinations are: 893 * - base 894 * - base+dot 895 * - base+OE 896 * - base+OE+dot 897 */ 898 #ifdef __powerpc64__ 899 static void test_divdeu(void) 900 { 901 int divdeu_type = DIV_BASE; 902 if (do_OE) 903 divdeu_type |= DIV_OE; 904 if (do_dot) 905 divdeu_type |= DIV_DOT; 906 907 switch (divdeu_type) { 908 case 1: 909 SET_CR_XER_ZERO; 910 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 911 GET_CR_XER(div_flags, div_xer); 912 break; 913 case 3: 914 SET_CR_XER_ZERO; 915 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 916 GET_CR_XER(div_flags, div_xer); 917 break; 918 case 5: 919 SET_CR_XER_ZERO; 920 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 921 GET_CR_XER(div_flags, div_xer); 922 break; 923 case 7: 924 SET_CR_XER_ZERO; 925 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 926 GET_CR_XER(div_flags, div_xer); 927 break; 928 default: 929 fprintf(stderr, "Invalid divdeu type. Exiting\n"); 930 exit(1); 931 } 932 } 933 #endif 934 935 static void test_divwe(void) 936 { 937 int divwe_type = DIV_BASE; 938 if (do_OE) 939 divwe_type |= DIV_OE; 940 if (do_dot) 941 divwe_type |= DIV_DOT; 942 943 switch (divwe_type) { 944 case 1: 945 SET_CR_XER_ZERO; 946 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 947 GET_CR_XER(div_flags, div_xer); 948 break; 949 case 3: 950 SET_CR_XER_ZERO; 951 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 952 GET_CR_XER(div_flags, div_xer); 953 break; 954 case 5: 955 SET_CR_XER_ZERO; 956 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 957 GET_CR_XER(div_flags, div_xer); 958 break; 959 case 7: 960 SET_CR_XER_ZERO; 961 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 962 GET_CR_XER(div_flags, div_xer); 963 break; 964 default: 965 fprintf(stderr, "Invalid divweu type. Exiting\n"); 966 exit(1); 967 } 968 } 969 970 971 typedef struct simple_test { 972 test_func_t test_func; 973 char * name; 974 precision_type_t precision; 975 } simple_test_t; 976 977 978 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 979 { 980 int a_idx, b_idx, i; 981 void * inA, * inB; 982 void * vec_src = swap_inputs ? &vec_out : &vec_inB; 983 984 for (i = 0; i < 4; i++) { 985 a_idx = targs->fra_idx; 986 b_idx = targs->frb_idx; 987 inA = (void *)&spec_sp_fargs[a_idx]; 988 inB = (void *)&spec_sp_fargs[b_idx]; 989 // copy single precision FP into vector element i 990 memcpy(((void *)&vec_inA) + (i * 4), inA, 4); 991 memcpy(vec_src + (i * 4), inB, 4); 992 targs++; 993 } 994 } 995 996 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 997 { 998 int a_idx, b_idx, i; 999 void * inA, * inB; 1000 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB; 1001 1002 for (i = 0; i < 2; i++) { 1003 a_idx = targs->fra_idx; 1004 b_idx = targs->frb_idx; 1005 inA = (void *)&spec_fargs[a_idx]; 1006 inB = (void *)&spec_fargs[b_idx]; 1007 // copy double precision FP into vector element i 1008 memcpy(((void *)&vec_inA) + (i * 8), inA, 8); 1009 memcpy(vec_src + (i * 8), inB, 8); 1010 targs++; 1011 } 1012 } 1013 1014 #define VX_NOT_CMP_OP 0xffffffff 1015 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out) 1016 { 1017 int a_idx, b_idx, k; 1018 char * name = malloc(20); 1019 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1020 int loops = dp ? 2 : 4; 1021 fp_test_args_t * targs = &test_group->targs[i]; 1022 unsigned long long * frA_dp, * frB_dp, * dst_dp; 1023 unsigned int * frA_sp, *frB_sp, * dst_sp; 1024 strcpy(name, test_group->name); 1025 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : "")); 1026 for (k = 0; k < loops; k++) { 1027 a_idx = targs->fra_idx; 1028 b_idx = targs->frb_idx; 1029 if (k) 1030 printf(" AND "); 1031 if (dp) { 1032 frA_dp = (unsigned long long *)&spec_fargs[a_idx]; 1033 frB_dp = (unsigned long long *)&spec_fargs[b_idx]; 1034 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp); 1035 } else { 1036 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx]; 1037 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx]; 1038 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp); 1039 } 1040 targs++; 1041 } 1042 if (cc != VX_NOT_CMP_OP) 1043 printf(" ? cc=%x", cc); 1044 1045 if (print_vec_out) { 1046 if (dp) { 1047 dst_dp = (unsigned long long *) &vec_out; 1048 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1049 } else { 1050 dst_sp = (unsigned int *) &vec_out; 1051 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1052 } 1053 } else { 1054 printf("\n"); 1055 } 1056 free(name); 1057 } 1058 1059 1060 1061 static void test_vsx_one_fp_arg(void) 1062 { 1063 test_func_t func; 1064 int k; 1065 k = 0; 1066 build_special_fargs_table(); 1067 1068 while ((func = vsx_one_fp_arg_tests[k].test_func)) { 1069 int idx, i; 1070 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k]; 1071 Bool estimate = (test_group.type == VX_ESTIMATE); 1072 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1073 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False; 1074 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1075 Bool sparse_sp = False; 1076 int stride = dp ? 2 : 4; 1077 int loops = is_scalar ? 1 : stride; 1078 stride = is_scalar ? 1: stride; 1079 1080 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1081 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op 1082 * or 1083 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op 1084 * 1085 * For the vector op case, we need to adjust stride from '4' to '2', since 1086 * we'll only be loading two values per loop into the input register. 1087 */ 1088 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) { 1089 sparse_sp = True; 1090 stride = 2; 1091 } 1092 1093 for (i = 0; i < test_group.num_tests; i+=stride) { 1094 unsigned int * pv; 1095 void * inB; 1096 1097 pv = (unsigned int *)&vec_out; 1098 // clear vec_out 1099 for (idx = 0; idx < 4; idx++, pv++) 1100 *pv = 0; 1101 1102 if (dp) { 1103 int j; 1104 unsigned long long * frB_dp, *dst_dp; 1105 for (j = 0; j < loops; j++) { 1106 inB = (void *)&spec_fargs[i + j]; 1107 // copy double precision FP into vector element i 1108 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1109 } 1110 // execute test insn 1111 (*func)(); 1112 dst_dp = (unsigned long long *) &vec_out; 1113 printf("#%d: %s ", i/stride, test_group.name); 1114 for (j = 0; j < loops; j++) { 1115 if (j) 1116 printf("; "); 1117 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1118 printf("%s(%016llx)", test_group.op, *frB_dp); 1119 if (estimate) { 1120 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, j); 1121 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1122 /* For debugging . . . 1123 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]); 1124 */ 1125 } else { 1126 vx_fp_test_type type = test_group.type; 1127 switch (type) { 1128 case VX_SCALAR_CONV_TO_WORD: 1129 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL); 1130 break; 1131 case VX_CONV_TO_SINGLE: 1132 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL); 1133 break; 1134 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . . 1135 printf(" = %016llx", dst_dp[j]); 1136 } 1137 } 1138 } 1139 printf("\n"); 1140 } else { 1141 int j, skip_slot; 1142 unsigned int * frB_sp, * dst_sp = NULL; 1143 unsigned long long * dst_dp = NULL; 1144 if (sparse_sp) { 1145 skip_slot = 1; 1146 loops = 2; 1147 } else { 1148 skip_slot = 0; 1149 } 1150 for (j = 0; j < loops; j++) { 1151 inB = (void *)&spec_sp_fargs[i + j]; 1152 // copy single precision FP into vector element i 1153 if (skip_slot && j > 0) 1154 memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4); 1155 else 1156 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1157 } 1158 // execute test insn 1159 (*func)(); 1160 if (test_group.type == VX_CONV_TO_DOUBLE) 1161 dst_dp = (unsigned long long *) &vec_out; 1162 else 1163 dst_sp = (unsigned int *) &vec_out; 1164 // print result 1165 printf("#%d: %s ", i/stride, test_group.name); 1166 for (j = 0; j < loops; j++) { 1167 if (j) 1168 printf("; "); 1169 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1170 printf("%s(%08x)", test_group.op, *frB_sp); 1171 if (estimate) { 1172 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, j); 1173 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1174 } else { 1175 if (test_group.type == VX_CONV_TO_DOUBLE) 1176 printf(" = %016llx", dst_dp[j]); 1177 else 1178 /* Special case: Current VEX implementation for fsqrts (single precision) 1179 * uses the same implementation as that used for double precision fsqrt. 1180 * However, I've found that for xvsqrtsp, the result from that implementation 1181 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the 1182 * output to appear very different if you end up with a carry. But for the given 1183 * inputs in this testcase, we can simply mask out these bits. 1184 */ 1185 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]); 1186 } 1187 } 1188 printf("\n"); 1189 } 1190 } 1191 k++; 1192 printf( "\n" ); 1193 } 1194 } 1195 1196 static void test_int_to_fp_convert(void) 1197 { 1198 test_func_t func; 1199 int k; 1200 k = 0; 1201 1202 while ((func = intToFp_tests[k].test_func)) { 1203 int idx, i; 1204 vx_intToFp_test_t test_group = intToFp_tests[k]; 1205 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1206 Bool sparse_sp = False; 1207 int stride = dp ? 2 : 4; 1208 int loops = stride; 1209 1210 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1211 * |___ int___|_Unused_|___int___|__Unused__| // for vector op 1212 * or 1213 * We need to adjust stride from '4' to '2', since we'll only be loading 1214 * two values per loop into the input register. 1215 */ 1216 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) { 1217 sparse_sp = True; 1218 stride = 2; 1219 } 1220 1221 for (i = 0; i < test_group.num_tests; i+=stride) { 1222 unsigned int * pv; 1223 void * inB; 1224 1225 pv = (unsigned int *)&vec_out; 1226 // clear vec_out 1227 for (idx = 0; idx < 4; idx++, pv++) 1228 *pv = 0; 1229 1230 if (dp) { 1231 int j; 1232 unsigned long long *dst_dw, * targs = test_group.targs; 1233 for (j = 0; j < loops; j++) { 1234 inB = (void *)&targs[i + j]; 1235 // copy doubleword into vector element i 1236 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1237 } 1238 // execute test insn 1239 (*func)(); 1240 dst_dw = (unsigned long long *) &vec_out; 1241 printf("#%d: %s ", i/stride, test_group.name); 1242 for (j = 0; j < loops; j++) { 1243 if (j) 1244 printf("; "); 1245 printf("conv(%016llx)", targs[i + j]); 1246 1247 if (test_group.type == VX_CONV_TO_SINGLE) 1248 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL); 1249 else 1250 printf(" = %016llx", dst_dw[j]); 1251 } 1252 printf("\n"); 1253 } else { 1254 int j, skip_slot; 1255 unsigned int * dst_sp = NULL; 1256 unsigned int * targs = test_group.targs; 1257 unsigned long long * dst_dp = NULL; 1258 if (sparse_sp) { 1259 skip_slot = 1; 1260 loops = 2; 1261 } else { 1262 skip_slot = 0; 1263 } 1264 for (j = 0; j < loops; j++) { 1265 inB = (void *)&targs[i + j]; 1266 // copy single word into vector element i 1267 if (skip_slot && j > 0) 1268 memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4); 1269 else 1270 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1271 } 1272 // execute test insn 1273 (*func)(); 1274 if (test_group.type == VX_CONV_TO_DOUBLE) 1275 dst_dp = (unsigned long long *) &vec_out; 1276 else 1277 dst_sp = (unsigned int *) &vec_out; 1278 // print result 1279 printf("#%d: %s ", i/stride, test_group.name); 1280 for (j = 0; j < loops; j++) { 1281 if (j) 1282 printf("; "); 1283 printf("conv(%08x)", targs[i + j]); 1284 if (test_group.type == VX_CONV_TO_DOUBLE) 1285 printf(" = %016llx", dst_dp[j]); 1286 else 1287 printf(" = %08x", dst_sp[j]); 1288 } 1289 printf("\n"); 1290 } 1291 } 1292 k++; 1293 printf( "\n" ); 1294 } 1295 } 1296 1297 1298 1299 // The div doubleword test data 1300 signed long long div_dw_tdata[13][2] = { 1301 { 4, -4 }, 1302 { 4, -3 }, 1303 { 4, 4 }, 1304 { 4, -5 }, 1305 { 3, 8 }, 1306 { 0x8000000000000000ULL, 0xa }, 1307 { 0x50c, -1 }, 1308 { 0x50c, -4096 }, 1309 { 0x1234fedc, 0x8000a873 }, 1310 { 0xabcd87651234fedcULL, 0xa123b893 }, 1311 { 0x123456789abdcULL, 0 }, 1312 { 0, 2 }, 1313 { 0x77, 0xa3499 } 1314 }; 1315 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2) 1316 1317 // The div word test data 1318 unsigned int div_w_tdata[6][2] = { 1319 { 0, 2 }, 1320 { 2, 0 }, 1321 { 0x7abc1234, 0xf0000000 }, 1322 { 0xfabc1234, 5 }, 1323 { 77, 66 }, 1324 { 5, 0xfabc1234 }, 1325 }; 1326 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2) 1327 1328 typedef struct div_ext_test 1329 { 1330 test_func_t test_func; 1331 const char *name; 1332 int num_tests; 1333 div_type_t div_type; 1334 precision_type_t precision; 1335 } div_ext_test_t; 1336 1337 static div_ext_test_t div_tests[] = { 1338 #ifdef __powerpc64__ 1339 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST }, 1340 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST }, 1341 #endif 1342 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST }, 1343 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST }, 1344 { NULL, NULL, 0, 0, 0 } 1345 }; 1346 1347 static void test_div_extensions(void) 1348 { 1349 test_func_t func; 1350 int k; 1351 k = 0; 1352 1353 while ((func = div_tests[k].test_func)) { 1354 int i, repeat = 1; 1355 div_ext_test_t test_group = div_tests[k]; 1356 do_dot = False; 1357 1358 again: 1359 for (i = 0; i < test_group.num_tests; i++) { 1360 unsigned int condreg; 1361 1362 if (test_group.div_type == DIV_OE) 1363 do_OE = True; 1364 else 1365 do_OE = False; 1366 1367 if (test_group.precision == DOUBLE_TEST) { 1368 r14 = div_dw_tdata[i][0]; 1369 r15 = div_dw_tdata[i][1]; 1370 } else { 1371 r14 = div_w_tdata[i][0]; 1372 r15 = div_w_tdata[i][1]; 1373 } 1374 // execute test insn 1375 (*func)(); 1376 condreg = (div_flags & 0xf0000000) >> 28; 1377 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1378 if (test_group.precision == DOUBLE_TEST) { 1379 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;", 1380 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17); 1381 } else { 1382 printf("0x%08x00000000 / 0x%08x = 0x%08x;", 1383 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17); 1384 } 1385 printf(" CR=%x; XER=%x\n", condreg, div_xer); 1386 } 1387 printf("\n"); 1388 if (repeat) { 1389 repeat = 0; 1390 do_dot = True; 1391 goto again; 1392 } 1393 k++; 1394 printf( "\n" ); 1395 } 1396 } 1397 1398 1399 static void test_vx_tdivORtsqrt(void) 1400 { 1401 test_func_t func; 1402 int k, crx; 1403 unsigned int flags; 1404 k = 0; 1405 do_dot = False; 1406 build_special_fargs_table(); 1407 1408 while ((func = vx_tdivORtsqrt_tests[k].test_func)) { 1409 int idx, i; 1410 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k]; 1411 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1412 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1413 Bool two_args = test_group.targs ? True : False; 1414 int stride = dp ? 2 : 4; 1415 int loops = is_scalar ? 1 : stride; 1416 stride = is_scalar ? 1: stride; 1417 1418 for (i = 0; i < test_group.num_tests; i+=stride) { 1419 unsigned int * pv; 1420 void * inB; 1421 1422 pv = (unsigned int *)&vec_out; 1423 // clear vec_out 1424 for (idx = 0; idx < 4; idx++, pv++) 1425 *pv = 0; 1426 1427 if (dp) { 1428 int j; 1429 unsigned long long * frB_dp; 1430 if (two_args) { 1431 setup_dp_fp_args(&test_group.targs[i], False); 1432 } else { 1433 for (j = 0; j < loops; j++) { 1434 inB = (void *)&spec_fargs[i + j]; 1435 // copy double precision FP into vector element i 1436 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1437 } 1438 } 1439 // execute test insn 1440 // Must do set/get of CRs immediately before/after calling the asm func 1441 // to avoid CRs being modified by other instructions. 1442 SET_FPSCR_ZERO; 1443 SET_CR_XER_ZERO; 1444 (*func)(); 1445 GET_CR(flags); 1446 // assumes using CR1 1447 crx = (flags & 0x0f000000) >> 24; 1448 if (two_args) { 1449 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1450 } else { 1451 printf("#%d: %s ", i/stride, test_group.name); 1452 for (j = 0; j < loops; j++) { 1453 if (j) 1454 printf("; "); 1455 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1456 printf("%s(%016llx)", test_group.op, *frB_dp); 1457 } 1458 printf( " ? %x (CRx)\n", crx); 1459 } 1460 } else { 1461 int j; 1462 unsigned int * frB_sp; 1463 if (two_args) { 1464 setup_sp_fp_args(&test_group.targs[i], False); 1465 } else { 1466 for (j = 0; j < loops; j++) { 1467 inB = (void *)&spec_sp_fargs[i + j]; 1468 // copy single precision FP into vector element i 1469 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1470 } 1471 } 1472 // execute test insn 1473 SET_FPSCR_ZERO; 1474 SET_CR_XER_ZERO; 1475 (*func)(); 1476 GET_CR(flags); 1477 crx = (flags & 0x0f000000) >> 24; 1478 // print result 1479 if (two_args) { 1480 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1481 } else { 1482 printf("#%d: %s ", i/stride, test_group.name); 1483 for (j = 0; j < loops; j++) { 1484 if (j) 1485 printf("; "); 1486 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1487 printf("%s(%08x)", test_group.op, *frB_sp); 1488 } 1489 printf( " ? %x (CRx)\n", crx); 1490 } 1491 } 1492 } 1493 k++; 1494 printf( "\n" ); 1495 } 1496 } 1497 1498 1499 static void test_ftsqrt(void) 1500 { 1501 int i, crx; 1502 unsigned int flags; 1503 unsigned long long * frbp; 1504 build_special_fargs_table(); 1505 1506 1507 for (i = 0; i < nb_special_fargs; i++) { 1508 f14 = spec_fargs[i]; 1509 frbp = (unsigned long long *)&spec_fargs[i]; 1510 SET_FPSCR_ZERO; 1511 SET_CR_XER_ZERO; 1512 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14)); 1513 GET_CR(flags); 1514 crx = (flags & 0x0f000000) >> 24; 1515 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx); 1516 } 1517 printf( "\n" ); 1518 } 1519 1520 static void 1521 test_popcntw(void) 1522 { 1523 #ifdef __powerpc64__ 1524 uint64_t res; 1525 unsigned long long src = 0x9182736405504536ULL; 1526 r14 = src; 1527 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1528 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res); 1529 #else 1530 uint32_t res; 1531 unsigned int src = 0x9182730E; 1532 r14 = src; 1533 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1534 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res); 1535 #endif 1536 printf( "\n" ); 1537 } 1538 1539 1540 static test_table_t 1541 all_tests[] = 1542 { 1543 1544 { &test_vsx_one_fp_arg, 1545 "Test VSX vector and scalar single argument instructions"} , 1546 { &test_int_to_fp_convert, 1547 "Test VSX vector integer to float conversion instructions" }, 1548 { &test_div_extensions, 1549 "Test div extensions" }, 1550 { &test_ftsqrt, 1551 "Test ftsqrt instruction" }, 1552 { &test_vx_tdivORtsqrt, 1553 "Test vector and scalar tdiv and tsqrt instructions" }, 1554 { &test_popcntw, 1555 "Test popcntw instruction" }, 1556 { NULL, NULL } 1557 }; 1558 #endif // HAS_VSX 1559 1560 int main(int argc, char *argv[]) 1561 { 1562 #ifdef HAS_VSX 1563 1564 test_table_t aTest; 1565 test_func_t func; 1566 int i = 0; 1567 1568 while ((func = all_tests[i].test_category)) { 1569 aTest = all_tests[i]; 1570 printf( "%s\n", aTest.name ); 1571 (*func)(); 1572 i++; 1573 } 1574 if (spec_fargs) 1575 free(spec_fargs); 1576 if (spec_sp_fargs) 1577 free(spec_sp_fargs); 1578 1579 #endif // HAS _VSX 1580 1581 return 0; 1582 } 1583