1 /* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj (at) us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23 #ifdef HAS_VSX 24 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <malloc.h> 30 #include <altivec.h> 31 #include <math.h> 32 33 #ifndef __powerpc64__ 34 typedef uint32_t HWord_t; 35 #else 36 typedef uint64_t HWord_t; 37 #endif /* __powerpc64__ */ 38 39 #ifdef VGP_ppc64le_linux 40 #define isLE 1 41 #else 42 #define isLE 0 43 #endif 44 45 typedef unsigned char Bool; 46 #define True 1 47 #define False 0 48 register HWord_t r14 __asm__ ("r14"); 49 register HWord_t r15 __asm__ ("r15"); 50 register HWord_t r16 __asm__ ("r16"); 51 register HWord_t r17 __asm__ ("r17"); 52 register double f14 __asm__ ("fr14"); 53 register double f15 __asm__ ("fr15"); 54 register double f16 __asm__ ("fr16"); 55 register double f17 __asm__ ("fr17"); 56 57 static volatile unsigned int div_flags, div_xer; 58 59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 60 61 #define SET_CR(_arg) \ 62 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 63 64 #define SET_XER(_arg) \ 65 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 66 67 #define GET_CR(_lval) \ 68 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 69 70 #define GET_XER(_lval) \ 71 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 72 73 #define GET_CR_XER(_lval_cr,_lval_xer) \ 74 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 75 76 #define SET_CR_ZERO \ 77 SET_CR(0) 78 79 #define SET_XER_ZERO \ 80 SET_XER(0) 81 82 #define SET_CR_XER_ZERO \ 83 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 84 85 #define SET_FPSCR_ZERO \ 86 do { double _d = 0.0; \ 87 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 88 } while (0) 89 90 91 typedef void (*test_func_t)(void); 92 typedef struct test_table test_table_t; 93 94 95 /* These functions below that construct a table of floating point 96 * values were lifted from none/tests/ppc32/jm-insns.c. 97 */ 98 99 #if defined (DEBUG_ARGS_BUILD) 100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 101 #else 102 #define AB_DPRINTF(fmt, args...) do { } while (0) 103 #endif 104 105 static inline void register_farg (void *farg, 106 int s, uint16_t _exp, uint64_t mant) 107 { 108 uint64_t tmp; 109 110 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 111 *(uint64_t *)farg = tmp; 112 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 113 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 114 } 115 116 static inline void register_sp_farg (void *farg, 117 int s, uint16_t _exp, uint32_t mant) 118 { 119 uint32_t tmp; 120 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant; 121 *(uint32_t *)farg = tmp; 122 } 123 124 125 typedef struct fp_test_args { 126 int fra_idx; 127 int frb_idx; 128 } fp_test_args_t; 129 130 131 fp_test_args_t two_arg_fp_tests[] = { 132 {8, 8}, 133 {8, 14}, 134 {15, 16}, 135 {8, 5}, 136 {8, 4}, 137 {8, 7}, 138 {8, 9}, 139 {8, 11}, 140 {14, 8}, 141 {14, 14}, 142 {14, 6}, 143 {14, 5}, 144 {14, 4}, 145 {14, 7}, 146 {14, 9}, 147 {14, 11}, 148 {6, 8}, 149 {6, 14}, 150 {6, 6}, 151 {6, 5}, 152 {6, 4}, 153 {6, 7}, 154 {6, 9}, 155 {6, 11}, 156 {5, 8}, 157 {5, 14}, 158 {5, 6}, 159 {5, 5}, 160 {5, 4}, 161 {5, 7}, 162 {5, 9}, 163 {5, 11}, 164 {4, 8}, 165 {4, 14}, 166 {4, 6}, 167 {4, 5}, 168 {4, 1}, 169 {4, 7}, 170 {4, 9}, 171 {4, 11}, 172 {7, 8}, 173 {7, 14}, 174 {7, 6}, 175 {7, 5}, 176 {7, 4}, 177 {7, 7}, 178 {7, 9}, 179 {7, 11}, 180 {10, 8}, 181 {10, 14}, 182 {12, 6}, 183 {12, 5}, 184 {10, 4}, 185 {10, 7}, 186 {10, 9}, 187 {10, 11}, 188 {12, 8 }, 189 {12, 14}, 190 {12, 6}, 191 {15, 16}, 192 {15, 16}, 193 {9, 11}, 194 {11, 11}, 195 {11, 12}, 196 {16, 18}, 197 {17, 16}, 198 {19, 19}, 199 {19, 18} 200 }; 201 202 203 static int nb_special_fargs; 204 static double * spec_fargs; 205 static float * spec_sp_fargs; 206 207 static void build_special_fargs_table(void) 208 { 209 /* 210 Entry Sign Exp fraction Special value 211 0 0 3fd 0x8000000000000ULL Positive finite number 212 1 0 404 0xf000000000000ULL ... 213 2 0 001 0x8000000b77501ULL ... 214 3 0 7fe 0x800000000051bULL ... 215 4 0 012 0x3214569900000ULL ... 216 5 0 000 0x0000000000000ULL +0.0 (+zero) 217 6 1 000 0x0000000000000ULL -0.0 (-zero) 218 7 0 7ff 0x0000000000000ULL +infinity 219 8 1 7ff 0x0000000000000ULL -infinity 220 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 221 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 222 11 0 7ff 0x8000000000000ULL +QNaN 223 12 1 7ff 0x8000000000000ULL -QNaN 224 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 225 14 1 40d 0x0650f5a07b353ULL Negative finite number 226 15 0 412 0x32585a9900000ULL A few more positive finite numbers 227 16 0 413 0x82511a2000000ULL ... 228 17 . . . . . . . . . . . . . . . . . . . . . . . 229 18 . . . . . . . . . . . . . . . . . . . . . . . 230 19 . . . . . . . . . . . . . . . . . . . . . . . 231 */ 232 233 uint64_t mant; 234 uint32_t mant_sp; 235 uint16_t _exp; 236 int s; 237 int j, i = 0; 238 239 if (spec_fargs) 240 return; 241 242 spec_fargs = malloc( 20 * sizeof(double) ); 243 spec_sp_fargs = malloc( 20 * sizeof(float) ); 244 245 // #0 246 s = 0; 247 _exp = 0x3fd; 248 mant = 0x8000000000000ULL; 249 register_farg(&spec_fargs[i++], s, _exp, mant); 250 251 // #1 252 s = 0; 253 _exp = 0x404; 254 mant = 0xf000000000000ULL; 255 register_farg(&spec_fargs[i++], s, _exp, mant); 256 257 // #2 258 s = 0; 259 _exp = 0x001; 260 mant = 0x8000000b77501ULL; 261 register_farg(&spec_fargs[i++], s, _exp, mant); 262 263 // #3 264 s = 0; 265 _exp = 0x7fe; 266 mant = 0x800000000051bULL; 267 register_farg(&spec_fargs[i++], s, _exp, mant); 268 269 // #4 270 s = 0; 271 _exp = 0x012; 272 mant = 0x3214569900000ULL; 273 register_farg(&spec_fargs[i++], s, _exp, mant); 274 275 276 /* Special values */ 277 /* +0.0 : 0 0x000 0x0000000000000 */ 278 // #5 279 s = 0; 280 _exp = 0x000; 281 mant = 0x0000000000000ULL; 282 register_farg(&spec_fargs[i++], s, _exp, mant); 283 284 /* -0.0 : 1 0x000 0x0000000000000 */ 285 // #6 286 s = 1; 287 _exp = 0x000; 288 mant = 0x0000000000000ULL; 289 register_farg(&spec_fargs[i++], s, _exp, mant); 290 291 /* +infinity : 0 0x7FF 0x0000000000000 */ 292 // #7 293 s = 0; 294 _exp = 0x7FF; 295 mant = 0x0000000000000ULL; 296 register_farg(&spec_fargs[i++], s, _exp, mant); 297 298 /* -infinity : 1 0x7FF 0x0000000000000 */ 299 // #8 300 s = 1; 301 _exp = 0x7FF; 302 mant = 0x0000000000000ULL; 303 register_farg(&spec_fargs[i++], s, _exp, mant); 304 305 /* 306 * This comment applies to values #9 and #10 below: 307 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision, 308 * so we can't just copy the double-precision value to the corresponding slot in the 309 * single-precision array (i.e., in the loop at the end of this function). Instead, we 310 * have to manually set the bits using register_sp_farg(). 311 */ 312 313 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 314 // #9 315 s = 0; 316 _exp = 0x7FF; 317 mant = 0x7FFFFFFFFFFFFULL; 318 register_farg(&spec_fargs[i++], s, _exp, mant); 319 _exp = 0xff; 320 mant_sp = 0x3FFFFF; 321 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 322 323 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 324 // #10 325 s = 1; 326 _exp = 0x7FF; 327 mant = 0x7FFFFFFFFFFFFULL; 328 register_farg(&spec_fargs[i++], s, _exp, mant); 329 _exp = 0xff; 330 mant_sp = 0x3FFFFF; 331 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 332 333 /* +QNaN : 0 0x7FF 0x8000000000000 */ 334 // #11 335 s = 0; 336 _exp = 0x7FF; 337 mant = 0x8000000000000ULL; 338 register_farg(&spec_fargs[i++], s, _exp, mant); 339 340 /* -QNaN : 1 0x7FF 0x8000000000000 */ 341 // #12 342 s = 1; 343 _exp = 0x7FF; 344 mant = 0x8000000000000ULL; 345 register_farg(&spec_fargs[i++], s, _exp, mant); 346 347 /* denormalized value */ 348 // #13 349 s = 1; 350 _exp = 0x000; 351 mant = 0x8340000078000ULL; 352 register_farg(&spec_fargs[i++], s, _exp, mant); 353 354 /* Negative finite number */ 355 // #14 356 s = 1; 357 _exp = 0x40d; 358 mant = 0x0650f5a07b353ULL; 359 register_farg(&spec_fargs[i++], s, _exp, mant); 360 361 /* A few positive finite numbers ... */ 362 // #15 363 s = 0; 364 _exp = 0x412; 365 mant = 0x32585a9900000ULL; 366 register_farg(&spec_fargs[i++], s, _exp, mant); 367 368 // #16 369 s = 0; 370 _exp = 0x413; 371 mant = 0x82511a2000000ULL; 372 register_farg(&spec_fargs[i++], s, _exp, mant); 373 374 // #17 375 s = 0; 376 _exp = 0x403; 377 mant = 0x12ef5a9300000ULL; 378 register_farg(&spec_fargs[i++], s, _exp, mant); 379 380 // #18 381 s = 0; 382 _exp = 0x405; 383 mant = 0x14bf5d2300000ULL; 384 register_farg(&spec_fargs[i++], s, _exp, mant); 385 386 // #19 387 s = 0; 388 _exp = 0x409; 389 mant = 0x76bf982440000ULL; 390 register_farg(&spec_fargs[i++], s, _exp, mant); 391 392 nb_special_fargs = i; 393 for (j = 0; j < i; j++) { 394 if (!(j == 9 || j == 10)) 395 spec_sp_fargs[j] = spec_fargs[j]; 396 } 397 } 398 399 400 struct test_table 401 { 402 test_func_t test_category; 403 char * name; 404 }; 405 406 /* Type of input for floating point operations.*/ 407 typedef enum { 408 SINGLE_TEST, 409 DOUBLE_TEST 410 } precision_type_t; 411 412 typedef enum { 413 VX_SCALAR_CONV_TO_WORD, 414 VX_CONV_TO_SINGLE, 415 VX_CONV_TO_DOUBLE, 416 VX_ESTIMATE, 417 VX_DEFAULT 418 } vx_fp_test_type; 419 420 static vector unsigned int vec_out, vec_inA, vec_inB; 421 422 /* This function is for checking the reciprocal and reciprocal square root 423 * estimate instructions. 424 */ 425 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx) 426 { 427 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is 428 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions 429 * does an actual reciprocal calculation versus estimation, so the answer we get back from 430 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of 431 * precision) and the estimate may still be within expected tolerances. On top of that, 432 * we can't count on these estimates always being the same across implementations. 433 * For example, with the fre[s] instruction (which should be correct to within one part 434 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111, 435 * one implementation could return 1.0111_1111_0000 and another implementation could return 436 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a 437 * single bit in common. 438 * 439 * The upshot is we can't validate the VEX output for these instructions by comparing against 440 * stored bit patterns. We must check that the result is within expected tolerances. 441 */ 442 443 444 /* A mask to be used for validation as a last resort. 445 * Only use 12 bits of precision for reasons discussed above. 446 */ 447 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL 448 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00 449 450 Bool result = False; 451 Bool dp_test = type == DOUBLE_TEST; 452 double src_dp, res_dp; 453 float src_sp, res_sp; 454 src_dp = res_dp = 0; 455 src_sp = res_sp = 0; 456 #define SRC (dp_test ? src_dp : src_sp) 457 #define RES (dp_test ? res_dp : res_sp) 458 Bool src_is_negative = False; 459 Bool res_is_negative = False; 460 unsigned long long * dst_dp = NULL; 461 unsigned int * dst_sp = NULL; 462 if (dp_test) { 463 unsigned long long * src_dp_ull; 464 dst_dp = (unsigned long long *) &vec_out; 465 src_dp = spec_fargs[idx]; 466 src_dp_ull = (unsigned long long *) &src_dp; 467 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False; 468 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False; 469 memcpy(&res_dp, &dst_dp[output_vec_idx], 8); 470 } else { 471 unsigned int * src_sp_uint; 472 dst_sp = (unsigned int *) &vec_out; 473 src_sp = spec_sp_fargs[idx]; 474 src_sp_uint = (unsigned int *) &src_sp; 475 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False; 476 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False; 477 memcpy(&res_sp, &dst_sp[output_vec_idx], 4); 478 } 479 480 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p 481 if (isnan(SRC)) 482 return isnan(RES); 483 if (fpclassify(SRC) == FP_ZERO) 484 return isinf(RES); 485 if (!src_is_negative && isinf(SRC)) 486 return !res_is_negative && (fpclassify(RES) == FP_ZERO); 487 if (is_rsqrte) { 488 if (src_is_negative) 489 return isnan(RES); 490 } else { 491 if (src_is_negative && isinf(SRC)) 492 return res_is_negative && (fpclassify(RES) == FP_ZERO); 493 } 494 if (dp_test) { 495 double calc_diff; 496 double real_diff; 497 double recip_divisor; 498 double div_result; 499 double calc_diff_tmp; 500 501 if (is_rsqrte) 502 recip_divisor = sqrt(src_dp); 503 else 504 recip_divisor = src_dp; 505 506 div_result = 1.0/recip_divisor; 507 calc_diff_tmp = recip_divisor * 16384.0; 508 if (isnormal(calc_diff_tmp)) { 509 calc_diff = fabs(1.0/calc_diff_tmp); 510 real_diff = fabs(res_dp - div_result); 511 result = ( ( res_dp == div_result ) 512 || ( real_diff <= calc_diff ) ); 513 } else { 514 /* Unable to compute theoretical difference, so we fall back to masking out 515 * un-precise bits. 516 */ 517 unsigned long long * div_result_dp = (unsigned long long *) &div_result; 518 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP); 519 } 520 /* For debug use . . . 521 if (!result) { 522 unsigned long long * dv = &div_result; 523 unsigned long long * rd = &real_diff; 524 unsigned long long * cd = &calc_diff; 525 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 526 *dv, *rd, *cd); 527 } 528 */ 529 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2) 530 float calc_diff; 531 float real_diff; 532 float div_result; 533 float calc_diff_tmp; 534 float recip_divisor = sqrt(src_sp); 535 536 div_result = 1.0/recip_divisor; 537 calc_diff_tmp = recip_divisor * 16384.0; 538 if (isnormal(calc_diff_tmp)) { 539 calc_diff = fabsf(1.0/calc_diff_tmp); 540 real_diff = fabsf(res_sp - div_result); 541 result = ( ( res_sp == div_result ) 542 || ( real_diff <= calc_diff ) ); 543 } else { 544 /* Unable to compute theoretical difference, so we fall back to masking out 545 * un-precise bits. 546 */ 547 unsigned int * div_result_sp = (unsigned int *) &div_result; 548 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP); 549 } 550 /* For debug use . . . 551 if (!result) { 552 unsigned long long * dv = &div_result; 553 unsigned long long * rd = &real_diff; 554 unsigned long long * cd = &calc_diff; 555 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n", 556 *dv, *rd, *cd); 557 } 558 */ 559 } 560 return result; 561 } 562 563 typedef struct vx_fp_test 564 { 565 test_func_t test_func; 566 const char * name; 567 fp_test_args_t * targs; 568 int num_tests; 569 precision_type_t precision; 570 vx_fp_test_type type; 571 const char * op; 572 } vx_fp_test_t; 573 574 575 static Bool do_dot; 576 577 static void test_xvredp(void) 578 { 579 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 580 } 581 582 static void test_xsredp(void) 583 { 584 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 585 } 586 587 static void test_xvrsqrtedp(void) 588 { 589 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 590 } 591 592 static void test_xsrsqrtedp(void) 593 { 594 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 595 } 596 597 static void test_xvrsqrtesp(void) 598 { 599 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 600 } 601 602 static void test_xstsqrtdp(void) 603 { 604 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB)); 605 } 606 607 static void test_xvtsqrtdp(void) 608 { 609 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB)); 610 } 611 612 static void test_xvtsqrtsp(void) 613 { 614 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB)); 615 } 616 617 static void test_xvsqrtdp(void) 618 { 619 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 620 } 621 622 static void test_xvsqrtsp(void) 623 { 624 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 625 } 626 627 static void test_xvtdivdp(void) 628 { 629 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 630 } 631 632 static void test_xvtdivsp(void) 633 { 634 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB)); 635 } 636 637 static void test_xscvdpsp(void) 638 { 639 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 640 } 641 642 static void test_xscvdpuxws(void) 643 { 644 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 645 } 646 647 static void test_xscvspdp(void) 648 { 649 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 650 } 651 652 static void test_xvcvdpsp(void) 653 { 654 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 655 } 656 657 static void test_xvcvdpuxds(void) 658 { 659 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 660 } 661 662 static void test_xvcvdpuxws(void) 663 { 664 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 665 } 666 667 static void test_xvcvspdp(void) 668 { 669 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 670 } 671 672 static void test_xvcvspsxds(void) 673 { 674 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 675 } 676 677 static void test_xvcvspuxds(void) 678 { 679 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 680 } 681 682 static void test_xvcvdpsxds(void) 683 { 684 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 685 } 686 687 static void test_xvcvspuxws(void) 688 { 689 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 690 } 691 692 static void test_xvcvsxddp(void) 693 { 694 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 695 } 696 697 static void test_xvcvuxddp(void) 698 { 699 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 700 } 701 702 static void test_xvcvsxdsp(void) 703 { 704 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 705 } 706 707 static void test_xvcvuxdsp(void) 708 { 709 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 710 } 711 712 static void test_xvcvsxwdp(void) 713 { 714 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 715 } 716 717 static void test_xvcvuxwdp(void) 718 { 719 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 720 } 721 722 static void test_xvcvsxwsp(void) 723 { 724 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 725 } 726 727 static void test_xvcvuxwsp(void) 728 { 729 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 730 } 731 732 static void test_xsrdpic(void) 733 { 734 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 735 } 736 737 static void test_xsrdpiz(void) 738 { 739 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 740 } 741 742 static void test_xsrdpi(void) 743 { 744 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 745 } 746 747 static void test_xvabsdp(void) 748 { 749 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 750 } 751 752 static void test_xvnabsdp(void) 753 { 754 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 755 } 756 757 static void test_xvnegdp(void) 758 { 759 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 760 } 761 762 static void test_xvabssp(void) 763 { 764 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 765 } 766 767 static void test_xvnabssp(void) 768 { 769 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 770 } 771 772 static void test_xvrdpi(void) 773 { 774 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 775 } 776 777 static void test_xvrdpic(void) 778 { 779 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 780 } 781 782 static void test_xvrdpim(void) 783 { 784 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 785 } 786 787 static void test_xvrdpip(void) 788 { 789 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 790 } 791 792 static void test_xvrdpiz(void) 793 { 794 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 795 } 796 797 static void test_xvrspi(void) 798 { 799 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 800 } 801 802 static void test_xvrspic(void) 803 { 804 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 805 } 806 807 static void test_xvrspim(void) 808 { 809 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 810 } 811 812 static void test_xvrspip(void) 813 { 814 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 815 } 816 817 static void test_xvrspiz(void) 818 { 819 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 820 } 821 822 static vx_fp_test_t 823 vsx_one_fp_arg_tests[] = { 824 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 825 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 826 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 827 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 828 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"}, 829 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"}, 830 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"}, 831 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 832 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"}, 833 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 834 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 835 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 836 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 837 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 838 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 839 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 840 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"}, 841 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"}, 842 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 843 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 844 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 845 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"}, 846 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"}, 847 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"}, 848 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"}, 849 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"}, 850 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 851 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 852 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 853 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 854 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"}, 855 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 856 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 857 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 858 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 859 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"}, 860 { NULL, NULL, NULL, 0, 0, 0, NULL} 861 }; 862 863 static vx_fp_test_t 864 vx_tdivORtsqrt_tests[] = { 865 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 866 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"}, 867 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"}, 868 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"}, 869 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"}, 870 { NULL, NULL, NULL, 0 , 0, 0, NULL} 871 }; 872 873 static unsigned long long doubleWord[] = { 0, 874 0xffffffff00000000LL, 875 0x00000000ffffffffLL, 876 0xffffffffffffffffLL, 877 0x89abcde123456789LL, 878 0x0102030405060708LL, 879 0x00000000a0b1c2d3LL, 880 0x1111222233334444LL 881 }; 882 883 static unsigned int singleWord[] = {0, 884 0xffff0000, 885 0x0000ffff, 886 0xffffffff, 887 0x89a73522, 888 0x01020304, 889 0x0000abcd, 890 0x11223344 891 }; 892 893 typedef struct vx_intToFp_test 894 { 895 test_func_t test_func; 896 const char * name; 897 void * targs; 898 int num_tests; 899 precision_type_t precision; 900 vx_fp_test_type type; 901 } vx_intToFp_test_t; 902 903 static vx_intToFp_test_t 904 intToFp_tests[] = { 905 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 906 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE }, 907 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 908 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE }, 909 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 910 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE }, 911 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 912 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE }, 913 { NULL, NULL, NULL, 0, 0 } 914 }; 915 916 static Bool do_OE; 917 typedef enum { 918 DIV_BASE = 1, 919 DIV_OE = 2, 920 DIV_DOT = 4, 921 } div_type_t; 922 /* Possible divde type combinations are: 923 * - base 924 * - base+dot 925 * - base+OE 926 * - base+OE+dot 927 */ 928 #ifdef __powerpc64__ 929 static void test_divdeu(void) 930 { 931 int divdeu_type = DIV_BASE; 932 if (do_OE) 933 divdeu_type |= DIV_OE; 934 if (do_dot) 935 divdeu_type |= DIV_DOT; 936 937 switch (divdeu_type) { 938 case 1: 939 SET_CR_XER_ZERO; 940 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 941 GET_CR_XER(div_flags, div_xer); 942 break; 943 case 3: 944 SET_CR_XER_ZERO; 945 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 946 GET_CR_XER(div_flags, div_xer); 947 break; 948 case 5: 949 SET_CR_XER_ZERO; 950 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 951 GET_CR_XER(div_flags, div_xer); 952 break; 953 case 7: 954 SET_CR_XER_ZERO; 955 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 956 GET_CR_XER(div_flags, div_xer); 957 break; 958 default: 959 fprintf(stderr, "Invalid divdeu type. Exiting\n"); 960 exit(1); 961 } 962 } 963 #endif 964 965 static void test_divwe(void) 966 { 967 int divwe_type = DIV_BASE; 968 if (do_OE) 969 divwe_type |= DIV_OE; 970 if (do_dot) 971 divwe_type |= DIV_DOT; 972 973 switch (divwe_type) { 974 case 1: 975 SET_CR_XER_ZERO; 976 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 977 GET_CR_XER(div_flags, div_xer); 978 break; 979 case 3: 980 SET_CR_XER_ZERO; 981 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 982 GET_CR_XER(div_flags, div_xer); 983 break; 984 case 5: 985 SET_CR_XER_ZERO; 986 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 987 GET_CR_XER(div_flags, div_xer); 988 break; 989 case 7: 990 SET_CR_XER_ZERO; 991 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15)); 992 GET_CR_XER(div_flags, div_xer); 993 break; 994 default: 995 fprintf(stderr, "Invalid divweu type. Exiting\n"); 996 exit(1); 997 } 998 } 999 1000 1001 typedef struct simple_test { 1002 test_func_t test_func; 1003 char * name; 1004 precision_type_t precision; 1005 } simple_test_t; 1006 1007 1008 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1009 { 1010 int a_idx, b_idx, i; 1011 void * inA, * inB; 1012 void * vec_src = swap_inputs ? &vec_out : &vec_inB; 1013 1014 for (i = 0; i < 4; i++) { 1015 a_idx = targs->fra_idx; 1016 b_idx = targs->frb_idx; 1017 inA = (void *)&spec_sp_fargs[a_idx]; 1018 inB = (void *)&spec_sp_fargs[b_idx]; 1019 // copy single precision FP into vector element i 1020 memcpy(((void *)&vec_inA) + (i * 4), inA, 4); 1021 memcpy(vec_src + (i * 4), inB, 4); 1022 targs++; 1023 } 1024 } 1025 1026 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs) 1027 { 1028 int a_idx, b_idx, i; 1029 void * inA, * inB; 1030 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB; 1031 1032 for (i = 0; i < 2; i++) { 1033 a_idx = targs->fra_idx; 1034 b_idx = targs->frb_idx; 1035 inA = (void *)&spec_fargs[a_idx]; 1036 inB = (void *)&spec_fargs[b_idx]; 1037 // copy double precision FP into vector element i 1038 memcpy(((void *)&vec_inA) + (i * 8), inA, 8); 1039 memcpy(vec_src + (i * 8), inB, 8); 1040 targs++; 1041 } 1042 } 1043 1044 #define VX_NOT_CMP_OP 0xffffffff 1045 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out) 1046 { 1047 int a_idx, b_idx, k; 1048 char * name = malloc(20); 1049 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0; 1050 int loops = dp ? 2 : 4; 1051 fp_test_args_t * targs = &test_group->targs[i]; 1052 unsigned long long * frA_dp, * frB_dp, * dst_dp; 1053 unsigned int * frA_sp, *frB_sp, * dst_sp; 1054 strcpy(name, test_group->name); 1055 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : "")); 1056 for (k = 0; k < loops; k++) { 1057 a_idx = targs->fra_idx; 1058 b_idx = targs->frb_idx; 1059 if (k) 1060 printf(" AND "); 1061 if (dp) { 1062 frA_dp = (unsigned long long *)&spec_fargs[a_idx]; 1063 frB_dp = (unsigned long long *)&spec_fargs[b_idx]; 1064 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp); 1065 } else { 1066 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx]; 1067 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx]; 1068 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp); 1069 } 1070 targs++; 1071 } 1072 if (cc != VX_NOT_CMP_OP) 1073 printf(" ? cc=%x", cc); 1074 1075 if (print_vec_out) { 1076 if (dp) { 1077 dst_dp = (unsigned long long *) &vec_out; 1078 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]); 1079 } else { 1080 dst_sp = (unsigned int *) &vec_out; 1081 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]); 1082 } 1083 } else { 1084 printf("\n"); 1085 } 1086 free(name); 1087 } 1088 1089 1090 1091 static void test_vsx_one_fp_arg(void) 1092 { 1093 test_func_t func; 1094 int k; 1095 k = 0; 1096 build_special_fargs_table(); 1097 1098 while ((func = vsx_one_fp_arg_tests[k].test_func)) { 1099 int idx, i; 1100 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k]; 1101 Bool estimate = (test_group.type == VX_ESTIMATE); 1102 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1103 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False; 1104 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1105 Bool sparse_sp = False; 1106 int stride = dp ? 2 : 4; 1107 int loops = is_scalar ? 1 : stride; 1108 stride = is_scalar ? 1: stride; 1109 1110 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1111 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op 1112 * or 1113 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op 1114 * 1115 * For the vector op case, we need to adjust stride from '4' to '2', since 1116 * we'll only be loading two values per loop into the input register. 1117 */ 1118 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) { 1119 sparse_sp = True; 1120 stride = 2; 1121 } 1122 1123 for (i = 0; i < test_group.num_tests; i+=stride) { 1124 unsigned int * pv; 1125 void * inB, * vecB_void_ptr = (void *)&vec_inB; 1126 1127 pv = (unsigned int *)&vec_out; 1128 // clear vec_out 1129 for (idx = 0; idx < 4; idx++, pv++) 1130 *pv = 0; 1131 1132 if (dp) { 1133 int j; 1134 unsigned long long * frB_dp, *dst_dp; 1135 for (j = 0; j < loops; j++) { 1136 inB = (void *)&spec_fargs[i + j]; 1137 // copy double precision FP into vector element i 1138 if (isLE && is_scalar) 1139 vecB_void_ptr += 8; 1140 memcpy(vecB_void_ptr + (j * 8), inB, 8); 1141 } 1142 // execute test insn 1143 (*func)(); 1144 dst_dp = (unsigned long long *) &vec_out; 1145 if (isLE && is_scalar) 1146 dst_dp++; 1147 printf("#%d: %s ", i/stride, test_group.name); 1148 for (j = 0; j < loops; j++) { 1149 if (j) 1150 printf("; "); 1151 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1152 printf("%s(%016llx)", test_group.op, *frB_dp); 1153 if (estimate) { 1154 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j); 1155 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1156 /* For debugging . . . 1157 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]); 1158 */ 1159 } else { 1160 vx_fp_test_type type = test_group.type; 1161 switch (type) { 1162 case VX_SCALAR_CONV_TO_WORD: 1163 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL); 1164 break; 1165 case VX_CONV_TO_SINGLE: 1166 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL); 1167 break; 1168 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . . 1169 printf(" = %016llx", dst_dp[j]); 1170 } 1171 } 1172 } 1173 printf("\n"); 1174 } else { 1175 int j; 1176 unsigned int * frB_sp, * dst_sp = NULL; 1177 unsigned long long * dst_dp = NULL; 1178 if (sparse_sp) 1179 loops = 2; 1180 for (j = 0; j < loops; j++) { 1181 inB = (void *)&spec_sp_fargs[i + j]; 1182 // copy single precision FP into vector element i 1183 if (sparse_sp) { 1184 if (isLE) 1185 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4); 1186 else 1187 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4); 1188 } else { 1189 if (isLE && is_scalar) 1190 vecB_void_ptr += 12; 1191 memcpy(vecB_void_ptr + (j * 4), inB, 4); 1192 } 1193 } 1194 // execute test insn 1195 (*func)(); 1196 if (test_group.type == VX_CONV_TO_DOUBLE) { 1197 dst_dp = (unsigned long long *) &vec_out; 1198 if (isLE && is_scalar) 1199 dst_dp++; 1200 } else { 1201 dst_sp = (unsigned int *) &vec_out; 1202 if (isLE && is_scalar) 1203 dst_sp += 3; 1204 } 1205 // print result 1206 printf("#%d: %s ", i/stride, test_group.name); 1207 for (j = 0; j < loops; j++) { 1208 if (j) 1209 printf("; "); 1210 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1211 printf("%s(%08x)", test_group.op, *frB_sp); 1212 if (estimate) { 1213 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j); 1214 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1215 } else { 1216 if (test_group.type == VX_CONV_TO_DOUBLE) 1217 printf(" = %016llx", dst_dp[j]); 1218 else 1219 /* Special case: Current VEX implementation for fsqrts (single precision) 1220 * uses the same implementation as that used for double precision fsqrt. 1221 * However, I've found that for xvsqrtsp, the result from that implementation 1222 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the 1223 * output to appear very different if you end up with a carry. But for the given 1224 * inputs in this testcase, we can simply mask out these bits. 1225 */ 1226 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]); 1227 } 1228 } 1229 printf("\n"); 1230 } 1231 } 1232 k++; 1233 printf( "\n" ); 1234 } 1235 } 1236 1237 static void test_int_to_fp_convert(void) 1238 { 1239 test_func_t func; 1240 int k; 1241 k = 0; 1242 1243 while ((func = intToFp_tests[k].test_func)) { 1244 int idx, i; 1245 vx_intToFp_test_t test_group = intToFp_tests[k]; 1246 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1247 Bool sparse_sp = False; 1248 int stride = dp ? 2 : 4; 1249 int loops = stride; 1250 1251 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1252 * |___ int___|_Unused_|___int___|__Unused__| // for vector op 1253 * or 1254 * We need to adjust stride from '4' to '2', since we'll only be loading 1255 * two values per loop into the input register. 1256 */ 1257 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) { 1258 sparse_sp = True; 1259 stride = 2; 1260 } 1261 1262 for (i = 0; i < test_group.num_tests; i+=stride) { 1263 unsigned int * pv; 1264 void * inB; 1265 1266 pv = (unsigned int *)&vec_out; 1267 // clear vec_out 1268 for (idx = 0; idx < 4; idx++, pv++) 1269 *pv = 0; 1270 1271 if (dp) { 1272 int j; 1273 unsigned long long *dst_dw, * targs = test_group.targs; 1274 for (j = 0; j < loops; j++) { 1275 inB = (void *)&targs[i + j]; 1276 // copy doubleword into vector element i 1277 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1278 } 1279 // execute test insn 1280 (*func)(); 1281 dst_dw = (unsigned long long *) &vec_out; 1282 printf("#%d: %s ", i/stride, test_group.name); 1283 for (j = 0; j < loops; j++) { 1284 if (j) 1285 printf("; "); 1286 printf("conv(%016llx)", targs[i + j]); 1287 1288 if (test_group.type == VX_CONV_TO_SINGLE) 1289 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL); 1290 else 1291 printf(" = %016llx", dst_dw[j]); 1292 } 1293 printf("\n"); 1294 } else { 1295 int j; 1296 unsigned int * dst_sp = NULL; 1297 unsigned int * targs = test_group.targs; 1298 unsigned long long * dst_dp = NULL; 1299 void * vecB_void_ptr = (void *)&vec_inB; 1300 if (sparse_sp) 1301 loops = 2; 1302 for (j = 0; j < loops; j++) { 1303 inB = (void *)&targs[i + j]; 1304 // copy single word into vector element i 1305 if (sparse_sp) { 1306 if (isLE) 1307 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4); 1308 else 1309 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4); 1310 } else { 1311 memcpy(vecB_void_ptr + (j * 4), inB, 4); 1312 } 1313 } 1314 // execute test insn 1315 (*func)(); 1316 if (test_group.type == VX_CONV_TO_DOUBLE) 1317 dst_dp = (unsigned long long *) &vec_out; 1318 else 1319 dst_sp = (unsigned int *) &vec_out; 1320 // print result 1321 printf("#%d: %s ", i/stride, test_group.name); 1322 for (j = 0; j < loops; j++) { 1323 if (j) 1324 printf("; "); 1325 printf("conv(%08x)", targs[i + j]); 1326 if (test_group.type == VX_CONV_TO_DOUBLE) 1327 printf(" = %016llx", dst_dp[j]); 1328 else 1329 printf(" = %08x", dst_sp[j]); 1330 } 1331 printf("\n"); 1332 } 1333 } 1334 k++; 1335 printf( "\n" ); 1336 } 1337 } 1338 1339 1340 1341 // The div doubleword test data 1342 signed long long div_dw_tdata[13][2] = { 1343 { 4, -4 }, 1344 { 4, -3 }, 1345 { 4, 4 }, 1346 { 4, -5 }, 1347 { 3, 8 }, 1348 { 0x8000000000000000ULL, 0xa }, 1349 { 0x50c, -1 }, 1350 { 0x50c, -4096 }, 1351 { 0x1234fedc, 0x8000a873 }, 1352 { 0xabcd87651234fedcULL, 0xa123b893 }, 1353 { 0x123456789abdcULL, 0 }, 1354 { 0, 2 }, 1355 { 0x77, 0xa3499 } 1356 }; 1357 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2) 1358 1359 // The div word test data 1360 unsigned int div_w_tdata[6][2] = { 1361 { 0, 2 }, 1362 { 2, 0 }, 1363 { 0x7abc1234, 0xf0000000 }, 1364 { 0xfabc1234, 5 }, 1365 { 77, 66 }, 1366 { 5, 0xfabc1234 }, 1367 }; 1368 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2) 1369 1370 typedef struct div_ext_test 1371 { 1372 test_func_t test_func; 1373 const char *name; 1374 int num_tests; 1375 div_type_t div_type; 1376 precision_type_t precision; 1377 } div_ext_test_t; 1378 1379 static div_ext_test_t div_tests[] = { 1380 #ifdef __powerpc64__ 1381 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST }, 1382 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST }, 1383 #endif 1384 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST }, 1385 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST }, 1386 { NULL, NULL, 0, 0, 0 } 1387 }; 1388 1389 static void test_div_extensions(void) 1390 { 1391 test_func_t func; 1392 int k; 1393 k = 0; 1394 1395 while ((func = div_tests[k].test_func)) { 1396 int i, repeat = 1; 1397 div_ext_test_t test_group = div_tests[k]; 1398 do_dot = False; 1399 1400 again: 1401 for (i = 0; i < test_group.num_tests; i++) { 1402 unsigned int condreg; 1403 1404 if (test_group.div_type == DIV_OE) 1405 do_OE = True; 1406 else 1407 do_OE = False; 1408 1409 if (test_group.precision == DOUBLE_TEST) { 1410 r14 = div_dw_tdata[i][0]; 1411 r15 = div_dw_tdata[i][1]; 1412 } else { 1413 r14 = div_w_tdata[i][0]; 1414 r15 = div_w_tdata[i][1]; 1415 } 1416 // execute test insn 1417 (*func)(); 1418 condreg = (div_flags & 0xf0000000) >> 28; 1419 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : ""); 1420 if (test_group.precision == DOUBLE_TEST) { 1421 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;", 1422 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17); 1423 } else { 1424 printf("0x%08x00000000 / 0x%08x = 0x%08x;", 1425 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17); 1426 } 1427 printf(" CR=%x; XER=%x\n", condreg, div_xer); 1428 } 1429 printf("\n"); 1430 if (repeat) { 1431 repeat = 0; 1432 do_dot = True; 1433 goto again; 1434 } 1435 k++; 1436 printf( "\n" ); 1437 } 1438 } 1439 1440 1441 static void test_vx_tdivORtsqrt(void) 1442 { 1443 test_func_t func; 1444 int k, crx; 1445 unsigned int flags; 1446 k = 0; 1447 do_dot = False; 1448 build_special_fargs_table(); 1449 1450 while ((func = vx_tdivORtsqrt_tests[k].test_func)) { 1451 int idx, i; 1452 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k]; 1453 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False; 1454 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1455 Bool two_args = test_group.targs ? True : False; 1456 int stride = dp ? 2 : 4; 1457 int loops = is_scalar ? 1 : stride; 1458 stride = is_scalar ? 1: stride; 1459 1460 for (i = 0; i < test_group.num_tests; i+=stride) { 1461 unsigned int * pv; 1462 void * inB, * vecB_void_ptr = (void *)&vec_inB; 1463 1464 pv = (unsigned int *)&vec_out; 1465 // clear vec_out 1466 for (idx = 0; idx < 4; idx++, pv++) 1467 *pv = 0; 1468 1469 if (dp) { 1470 int j; 1471 unsigned long long * frB_dp; 1472 if (two_args) { 1473 setup_dp_fp_args(&test_group.targs[i], False); 1474 } else { 1475 for (j = 0; j < loops; j++) { 1476 inB = (void *)&spec_fargs[i + j]; 1477 // copy double precision FP into vector element i 1478 if (isLE && is_scalar) 1479 vecB_void_ptr += 8; 1480 memcpy(vecB_void_ptr + (j * 8), inB, 8); 1481 } 1482 } 1483 // execute test insn 1484 // Must do set/get of CRs immediately before/after calling the asm func 1485 // to avoid CRs being modified by other instructions. 1486 SET_FPSCR_ZERO; 1487 SET_CR_XER_ZERO; 1488 (*func)(); 1489 GET_CR(flags); 1490 // assumes using CR1 1491 crx = (flags & 0x0f000000) >> 24; 1492 if (two_args) { 1493 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1494 } else { 1495 printf("#%d: %s ", i/stride, test_group.name); 1496 for (j = 0; j < loops; j++) { 1497 if (j) 1498 printf("; "); 1499 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1500 printf("%s(%016llx)", test_group.op, *frB_dp); 1501 } 1502 printf( " ? %x (CRx)\n", crx); 1503 } 1504 } else { 1505 int j; 1506 unsigned int * frB_sp; 1507 if (two_args) { 1508 setup_sp_fp_args(&test_group.targs[i], False); 1509 } else { 1510 for (j = 0; j < loops; j++) { 1511 inB = (void *)&spec_sp_fargs[i + j]; 1512 // copy single precision FP into vector element i 1513 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1514 } 1515 } 1516 // execute test insn 1517 SET_FPSCR_ZERO; 1518 SET_CR_XER_ZERO; 1519 (*func)(); 1520 GET_CR(flags); 1521 crx = (flags & 0x0f000000) >> 24; 1522 // print result 1523 if (two_args) { 1524 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/); 1525 } else { 1526 printf("#%d: %s ", i/stride, test_group.name); 1527 for (j = 0; j < loops; j++) { 1528 if (j) 1529 printf("; "); 1530 frB_sp = (unsigned int *)&spec_sp_fargs[i + j]; 1531 printf("%s(%08x)", test_group.op, *frB_sp); 1532 } 1533 printf( " ? %x (CRx)\n", crx); 1534 } 1535 } 1536 } 1537 k++; 1538 printf( "\n" ); 1539 } 1540 } 1541 1542 1543 static void test_ftsqrt(void) 1544 { 1545 int i, crx; 1546 unsigned int flags; 1547 unsigned long long * frbp; 1548 build_special_fargs_table(); 1549 1550 1551 for (i = 0; i < nb_special_fargs; i++) { 1552 f14 = spec_fargs[i]; 1553 frbp = (unsigned long long *)&spec_fargs[i]; 1554 SET_FPSCR_ZERO; 1555 SET_CR_XER_ZERO; 1556 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14)); 1557 GET_CR(flags); 1558 crx = (flags & 0x0f000000) >> 24; 1559 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx); 1560 } 1561 printf( "\n" ); 1562 } 1563 1564 static void 1565 test_popcntw(void) 1566 { 1567 #ifdef __powerpc64__ 1568 uint64_t res; 1569 unsigned long long src = 0x9182736405504536ULL; 1570 r14 = src; 1571 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1572 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res); 1573 #else 1574 uint32_t res; 1575 unsigned int src = 0x9182730E; 1576 r14 = src; 1577 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14)); 1578 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res); 1579 #endif 1580 printf( "\n" ); 1581 } 1582 1583 1584 static test_table_t 1585 all_tests[] = 1586 { 1587 1588 { &test_vsx_one_fp_arg, 1589 "Test VSX vector and scalar single argument instructions"} , 1590 { &test_int_to_fp_convert, 1591 "Test VSX vector integer to float conversion instructions" }, 1592 { &test_div_extensions, 1593 "Test div extensions" }, 1594 { &test_ftsqrt, 1595 "Test ftsqrt instruction" }, 1596 { &test_vx_tdivORtsqrt, 1597 "Test vector and scalar tdiv and tsqrt instructions" }, 1598 { &test_popcntw, 1599 "Test popcntw instruction" }, 1600 { NULL, NULL } 1601 }; 1602 #endif // HAS_VSX 1603 1604 int main(int argc, char *argv[]) 1605 { 1606 #ifdef HAS_VSX 1607 1608 test_table_t aTest; 1609 test_func_t func; 1610 int i = 0; 1611 1612 while ((func = all_tests[i].test_category)) { 1613 aTest = all_tests[i]; 1614 printf( "%s\n", aTest.name ); 1615 (*func)(); 1616 i++; 1617 } 1618 if (spec_fargs) 1619 free(spec_fargs); 1620 if (spec_sp_fargs) 1621 free(spec_sp_fargs); 1622 1623 #endif // HAS _VSX 1624 1625 return 0; 1626 } 1627