1 /* Copyright (C) 2013 IBM 2 3 Authors: Carl Love <carll (at) us.ibm.com> 4 Maynard Johnson <maynardj (at) us.ibm.com> 5 6 This program is free software; you can redistribute it and/or 7 modify it under the terms of the GNU General Public License as 8 published by the Free Software Foundation; either version 2 of the 9 License, or (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307, USA. 20 21 The GNU General Public License is contained in the file COPYING. 22 23 This program is based heavily on the test_isa_2_06_part*.c source files. 24 */ 25 26 #include <stdio.h> 27 28 #ifdef HAS_ISA_2_07 29 30 #include <stdint.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <malloc.h> 34 #include <altivec.h> 35 #include <math.h> 36 37 #ifndef __powerpc64__ 38 typedef uint32_t HWord_t; 39 #else 40 typedef uint64_t HWord_t; 41 #endif /* __powerpc64__ */ 42 43 register HWord_t r14 __asm__ ("r14"); 44 register HWord_t r15 __asm__ ("r15"); 45 register HWord_t r16 __asm__ ("r16"); 46 register HWord_t r17 __asm__ ("r17"); 47 register double f14 __asm__ ("fr14"); 48 register double f15 __asm__ ("fr15"); 49 register double f16 __asm__ ("fr16"); 50 register double f17 __asm__ ("fr17"); 51 52 static volatile unsigned int cond_reg; 53 54 #define True 1 55 #define False 0 56 57 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 58 59 #define SET_CR(_arg) \ 60 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 61 62 #define SET_XER(_arg) \ 63 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 64 65 #define GET_CR(_lval) \ 66 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 67 68 #define GET_XER(_lval) \ 69 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 70 71 #define GET_CR_XER(_lval_cr,_lval_xer) \ 72 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 73 74 #define SET_CR_ZERO \ 75 SET_CR(0) 76 77 #define SET_XER_ZERO \ 78 SET_XER(0) 79 80 #define SET_CR_XER_ZERO \ 81 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 82 83 #define SET_FPSCR_ZERO \ 84 do { double _d = 0.0; \ 85 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 86 } while (0) 87 88 typedef unsigned char Bool; 89 90 91 /* These functions below that construct a table of floating point 92 * values were lifted from none/tests/ppc32/jm-insns.c. 93 */ 94 95 #if defined (DEBUG_ARGS_BUILD) 96 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 97 #else 98 #define AB_DPRINTF(fmt, args...) do { } while (0) 99 #endif 100 101 static inline void register_farg (void *farg, 102 int s, uint16_t _exp, uint64_t mant) 103 { 104 uint64_t tmp; 105 106 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 107 *(uint64_t *)farg = tmp; 108 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 109 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 110 } 111 112 static inline void register_sp_farg (void *farg, 113 int s, uint16_t _exp, uint32_t mant) 114 { 115 uint32_t tmp; 116 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant; 117 *(uint32_t *)farg = tmp; 118 } 119 120 121 typedef struct fp_test_args { 122 int fra_idx; 123 int frb_idx; 124 } fp_test_args_t; 125 126 static int nb_special_fargs; 127 static double * spec_fargs; 128 static float * spec_sp_fargs; 129 130 static void build_special_fargs_table(void) 131 { 132 /* 133 * Double precision: 134 * Sign goes from zero to one (1 bit) 135 * Exponent goes from 0 to ((1 << 12) - 1) (11 bits) 136 * Mantissa goes from 1 to ((1 << 52) - 1) (52 bits) 137 * + special values: 138 * +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000 139 * -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000 140 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000 141 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000 142 * +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF 143 * -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF 144 * +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000 145 * -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000 146 * (8 values) 147 * 148 * Single precision 149 * Sign: 1 bit 150 * Exponent: 8 bits 151 * Mantissa: 23 bits 152 * +0.0 : 0 0x00 0x000000 => 0x00000000 153 * -0.0 : 1 0x00 0x000000 => 0x80000000 154 * +infinity : 0 0xFF 0x000000 => 0x7F800000 155 * -infinity : 1 0xFF 0x000000 => 0xFF800000 156 * +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF 157 * -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF 158 * +QNaN : 0 0xFF 0x400000 => 0x7FC00000 159 * -QNaN : 1 0xFF 0x400000 => 0xFFC00000 160 */ 161 162 uint64_t mant; 163 uint32_t mant_sp; 164 uint16_t _exp; 165 int s; 166 int j, i = 0; 167 168 if (spec_fargs) 169 return; 170 171 spec_fargs = malloc( 20 * sizeof(double) ); 172 spec_sp_fargs = malloc( 20 * sizeof(float) ); 173 174 // #0 175 s = 0; 176 _exp = 0x3fd; 177 mant = 0x8000000000000ULL; 178 register_farg(&spec_fargs[i++], s, _exp, mant); 179 180 // #1 181 s = 0; 182 _exp = 0x404; 183 mant = 0xf000000000000ULL; 184 register_farg(&spec_fargs[i++], s, _exp, mant); 185 186 // #2 187 s = 0; 188 _exp = 0x001; 189 mant = 0x8000000b77501ULL; 190 register_farg(&spec_fargs[i++], s, _exp, mant); 191 192 // #3 193 s = 0; 194 _exp = 0x7fe; 195 mant = 0x800000000051bULL; 196 register_farg(&spec_fargs[i++], s, _exp, mant); 197 198 // #4 199 s = 0; 200 _exp = 0x012; 201 mant = 0x3214569900000ULL; 202 register_farg(&spec_fargs[i++], s, _exp, mant); 203 204 /* Special values */ 205 /* +0.0 : 0 0x000 0x0000000000000 */ 206 // #5 207 s = 0; 208 _exp = 0x000; 209 mant = 0x0000000000000ULL; 210 register_farg(&spec_fargs[i++], s, _exp, mant); 211 212 /* -0.0 : 1 0x000 0x0000000000000 */ 213 // #6 214 s = 1; 215 _exp = 0x000; 216 mant = 0x0000000000000ULL; 217 register_farg(&spec_fargs[i++], s, _exp, mant); 218 219 /* +infinity : 0 0x7FF 0x0000000000000 */ 220 // #7 221 s = 0; 222 _exp = 0x7FF; 223 mant = 0x0000000000000ULL; 224 register_farg(&spec_fargs[i++], s, _exp, mant); 225 226 /* -infinity : 1 0x7FF 0x0000000000000 */ 227 // #8 228 s = 1; 229 _exp = 0x7FF; 230 mant = 0x0000000000000ULL; 231 register_farg(&spec_fargs[i++], s, _exp, mant); 232 233 /* 234 * This comment applies to values #9 and #10 below: 235 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision, 236 * so we can't just copy the double-precision value to the corresponding slot in the 237 * single-precision array (i.e., in the loop at the end of this function). Instead, we 238 * have to manually set the bits using register_sp_farg(). 239 */ 240 241 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 242 // #9 243 s = 0; 244 _exp = 0x7FF; 245 mant = 0x7FFFFFFFFFFFFULL; 246 register_farg(&spec_fargs[i++], s, _exp, mant); 247 _exp = 0xff; 248 mant_sp = 0x3FFFFF; 249 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 250 251 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 252 // #10 253 s = 1; 254 _exp = 0x7FF; 255 mant = 0x7FFFFFFFFFFFFULL; 256 register_farg(&spec_fargs[i++], s, _exp, mant); 257 _exp = 0xff; 258 mant_sp = 0x3FFFFF; 259 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp); 260 261 /* +QNaN : 0 0x7FF 0x8000000000000 */ 262 // #11 263 s = 0; 264 _exp = 0x7FF; 265 mant = 0x8000000000000ULL; 266 register_farg(&spec_fargs[i++], s, _exp, mant); 267 268 /* -QNaN : 1 0x7FF 0x8000000000000 */ 269 // #12 270 s = 1; 271 _exp = 0x7FF; 272 mant = 0x8000000000000ULL; 273 register_farg(&spec_fargs[i++], s, _exp, mant); 274 275 /* denormalized value */ 276 // #13 277 s = 1; 278 _exp = 0x000; 279 mant = 0x8340000078000ULL; 280 register_farg(&spec_fargs[i++], s, _exp, mant); 281 282 /* Negative finite number */ 283 // #14 284 s = 1; 285 _exp = 0x40d; 286 mant = 0x0650f5a07b353ULL; 287 register_farg(&spec_fargs[i++], s, _exp, mant); 288 289 /* A few positive finite numbers ... */ 290 // #15 291 s = 0; 292 _exp = 0x412; 293 mant = 0x32585a9900000ULL; 294 register_farg(&spec_fargs[i++], s, _exp, mant); 295 296 // #16 297 s = 0; 298 _exp = 0x413; 299 mant = 0x82511a2000000ULL; 300 register_farg(&spec_fargs[i++], s, _exp, mant); 301 302 // #17 303 s = 0; 304 _exp = 0x403; 305 mant = 0x12ef5a9300000ULL; 306 register_farg(&spec_fargs[i++], s, _exp, mant); 307 308 // #18 309 s = 0; 310 _exp = 0x405; 311 mant = 0x14bf5d2300000ULL; 312 register_farg(&spec_fargs[i++], s, _exp, mant); 313 314 // #19 315 s = 0; 316 _exp = 0x409; 317 mant = 0x76bf982440000ULL; 318 register_farg(&spec_fargs[i++], s, _exp, mant); 319 320 321 nb_special_fargs = i; 322 for (j = 0; j < i; j++) { 323 if (!(j == 9 || j == 10)) 324 spec_sp_fargs[j] = spec_fargs[j]; 325 } 326 } 327 328 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0, 329 0, 0, 0, 0 }; 330 331 332 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001, 333 0x89abcdef, 334 0x00112233, 335 0x74556677, 336 0x00001abb, 337 0x00000001, 338 0x31929394, 339 0xa1a2a3a4, 340 }; 341 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0]) 342 #define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4) 343 344 typedef void (*test_func_t)(void); 345 346 struct test_table 347 { 348 test_func_t test_category; 349 char * name; 350 }; 351 352 353 typedef enum { 354 SINGLE_TEST, 355 SINGLE_TEST_SINGLE_RES, 356 DOUBLE_TEST, 357 DOUBLE_TEST_SINGLE_RES 358 } precision_type_t; 359 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST)) 360 361 typedef enum { 362 VX_FP_SMAS, // multiply add single precision result 363 VX_FP_SMSS, // multiply sub single precision result 364 VX_FP_SNMAS, // negative multiply add single precision result 365 VX_FP_SNMSS, // negative multiply sub single precision result 366 VX_FP_OTHER, 367 VX_CONV_WORD, 368 VX_ESTIMATE, 369 VX_CONV_TO_SINGLE, 370 VX_CONV_TO_DOUBLE, 371 VX_SCALAR_CONV_TO_WORD, 372 VX_SCALAR_SP_TO_VECTOR_SP, 373 VX_DEFAULT 374 } vx_fp_test_type; 375 376 typedef enum { 377 VSX_LOAD = 1, 378 VSX_LOAD_SPLAT, 379 VSX_STORE, 380 } vsx_ldst_type; 381 382 typedef enum { 383 VSX_AND = 1, 384 VSX_NAND, 385 VSX_ANDC, 386 VSX_OR, 387 VSX_ORC, 388 VSX_NOR, 389 VSX_XOR, 390 VSX_EQV, 391 } vsx_log_op; 392 393 struct vx_fp_test1 394 { 395 test_func_t test_func; 396 const char *name; 397 fp_test_args_t * targs; 398 int num_tests; 399 vx_fp_test_type test_type; 400 }; 401 402 struct ldst_test 403 { 404 test_func_t test_func; 405 const char *name; 406 precision_type_t precision; 407 void * base_addr; 408 uint32_t offset; 409 vsx_ldst_type type; 410 }; 411 412 struct vx_fp_test2 413 { 414 test_func_t test_func; 415 const char *name; 416 fp_test_args_t * targs; 417 int num_tests; 418 precision_type_t precision; 419 vx_fp_test_type test_type; 420 const char * op; 421 }; 422 423 struct xs_conv_test 424 { 425 test_func_t test_func; 426 const char *name; 427 int num_tests; 428 }; 429 430 struct simple_test 431 { 432 test_func_t test_func; 433 const char *name; 434 }; 435 436 struct vsx_logic_test 437 { 438 test_func_t test_func; 439 const char *name; 440 vsx_log_op op; 441 }; 442 443 typedef struct vsx_logic_test logic_test_t; 444 typedef struct ldst_test ldst_test_t; 445 typedef struct simple_test xs_conv_test_t; 446 typedef struct vx_fp_test1 vx_fp_test_basic_t; 447 typedef struct vx_fp_test2 vx_fp_test2_t; 448 typedef struct test_table test_table_t; 449 450 451 static vector unsigned int vec_out, vec_inA, vec_inB; 452 453 static void test_xscvdpspn(void) 454 { 455 __asm__ __volatile__ ("xscvdpspn %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 456 } 457 458 static void test_xscvspdpn(void) 459 { 460 __asm__ __volatile__ ("xscvspdpn %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 461 } 462 463 static int do_asp; 464 static void test_xsmadds(void) 465 { 466 if (do_asp) 467 __asm__ __volatile__ ("xsmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 468 else 469 __asm__ __volatile__ ("xsmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 470 } 471 472 static void test_xsmsubs(void) 473 { 474 if (do_asp) 475 __asm__ __volatile__ ("xsmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 476 else 477 __asm__ __volatile__ ("xsmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 478 } 479 480 static void test_xscvsxdsp (void) 481 { 482 __asm__ __volatile__ ("xscvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 483 } 484 485 static void test_xscvuxdsp (void) 486 { 487 __asm__ __volatile__ ("xscvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 488 } 489 490 static void test_xsnmadds(void) 491 { 492 if (do_asp) 493 __asm__ __volatile__ ("xsnmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 494 else 495 __asm__ __volatile__ ("xsnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 496 } 497 498 static void test_xsnmsubs(void) 499 { 500 if (do_asp) 501 __asm__ __volatile__ ("xsnmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 502 else 503 __asm__ __volatile__ ("xsnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 504 } 505 506 static void test_stxsspx(void) 507 { 508 __asm__ __volatile__ ("stxsspx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); 509 } 510 511 static void test_stxsiwx(void) 512 { 513 __asm__ __volatile__ ("stxsiwx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); 514 } 515 516 static void test_lxsiwax(void) 517 { 518 __asm__ __volatile__ ("lxsiwax %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 519 } 520 521 static void test_lxsiwzx(void) 522 { 523 __asm__ __volatile__ ("lxsiwzx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 524 } 525 526 static void test_lxsspx(void) 527 { 528 __asm__ __volatile__ ("lxsspx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 529 } 530 531 static void test_xssqrtsp(void) 532 { 533 __asm__ __volatile__ ("xssqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 534 } 535 536 static void test_xsrsqrtesp(void) 537 { 538 __asm__ __volatile__ ("xsrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 539 } 540 541 /* Three argument instuctions */ 542 static void test_xxleqv(void) 543 { 544 __asm__ __volatile__ ("xxleqv %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 545 } 546 547 static void test_xxlorc(void) 548 { 549 __asm__ __volatile__ ("xxlorc %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 550 } 551 552 static void test_xxlnand(void) 553 { 554 __asm__ __volatile__ ("xxlnand %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 555 } 556 557 static void test_xsaddsp(void) 558 { 559 __asm__ __volatile__ ("xsaddsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB)); 560 } 561 562 static void test_xssubsp(void) 563 { 564 __asm__ __volatile__ ("xssubsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB)); 565 } 566 567 static void test_xsdivsp(void) 568 { 569 __asm__ __volatile__ ("xsdivsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB)); 570 } 571 572 static void test_xsmulsp(void) 573 { 574 __asm__ __volatile__ ("xsmulsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 575 } 576 577 static void test_xsresp(void) 578 { 579 __asm__ __volatile__ ("xsresp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 580 } 581 static void test_xsrsp(void) 582 { 583 __asm__ __volatile__ ("xsrsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 584 } 585 586 fp_test_args_t vx_math_tests[] = { 587 {8, 8}, 588 {8, 14}, 589 {8, 6}, 590 {8, 5}, 591 {8, 4}, 592 {8, 7}, 593 {8, 9}, 594 {8, 11}, 595 {14, 8}, 596 {14, 14}, 597 {14, 6}, 598 {14, 5}, 599 {14, 4}, 600 {14, 7}, 601 {14, 9}, 602 {14, 11}, 603 {6, 8}, 604 {6, 14}, 605 {6, 6}, 606 {6, 5}, 607 {6, 4}, 608 {6, 7}, 609 {6, 9}, 610 {6, 11}, 611 {5, 8}, 612 {5, 14}, 613 {5, 6}, 614 {5, 5}, 615 {5, 4}, 616 {5, 7}, 617 {5, 9}, 618 {5, 11}, 619 {4, 8}, 620 {4, 14}, 621 {4, 6}, 622 {4, 5}, 623 {4, 1}, 624 {4, 7}, 625 {4, 9}, 626 {4, 11}, 627 {7, 8}, 628 {7, 14}, 629 {7, 6}, 630 {7, 5}, 631 {7, 4}, 632 {7, 7}, 633 {7, 9}, 634 {7, 11}, 635 {10, 8}, 636 {10, 14}, 637 {10, 6}, 638 {10, 5}, 639 {10, 4}, 640 {10, 7}, 641 {10, 9}, 642 {10, 11}, 643 {12, 8}, 644 {12, 14}, 645 {12, 6}, 646 {12, 5}, 647 {12, 4}, 648 {12, 7}, 649 {12, 9}, 650 {12, 11}, 651 {8, 8}, 652 {8, 14}, 653 {8, 6}, 654 {8, 5}, 655 {8, 4}, 656 {8, 7}, 657 {8, 9}, 658 {8, 11}, 659 {14, 8}, 660 {14, 14}, 661 {14, 6}, 662 {14, 5}, 663 {14, 4}, 664 {14, 7}, 665 {14, 9}, 666 {14, 11}, 667 {6, 8}, 668 {6, 14}, 669 {6, 6}, 670 {6, 5}, 671 {6, 4}, 672 {6, 7}, 673 {6, 9}, 674 {6, 11}, 675 {5, 8}, 676 {5, 14}, 677 {5, 6}, 678 {5, 5}, 679 {5, 4}, 680 {5, 7}, 681 {5, 9}, 682 {5, 11}, 683 {4, 8}, 684 {4, 14}, 685 {4, 6}, 686 {4, 5}, 687 {4, 1}, 688 {4, 7}, 689 {4, 9}, 690 {4, 11}, 691 {7, 8}, 692 {7, 14}, 693 {7, 6}, 694 {7, 5}, 695 {7, 4}, 696 {7, 7}, 697 {7, 9}, 698 {7, 11}, 699 {10, 8}, 700 {10, 14}, 701 {10, 6}, 702 {10, 5}, 703 {10, 4}, 704 {10, 7}, 705 {10, 9}, 706 {10, 11}, 707 {12, 8}, 708 {12, 14}, 709 {12, 6}, 710 {12, 5}, 711 {12, 4}, 712 {12, 7}, 713 {12, 9}, 714 {12, 11} 715 }; 716 717 // These are all double precision inputs with double word outputs (mostly converted to single precision) 718 static vx_fp_test_basic_t vx_fp_tests[] = { 719 { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS}, 720 { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS}, 721 { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER}, 722 { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER}, 723 { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS}, 724 { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS}, 725 { NULL, NULL, NULL, 0, 0 } 726 }; 727 728 static vx_fp_test2_t 729 vsx_one_fp_arg_tests[] = { 730 { &test_xscvdpspn, "xscvdpspn", NULL, 20, SINGLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"}, 731 { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"}, 732 { &test_xsresp, "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"}, 733 { &test_xsrsp, "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"}, 734 { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"}, 735 { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"}, 736 { NULL, NULL, NULL, 0, 0, 0, NULL} 737 }; 738 739 // These are all double precision inputs with double word outputs (mostly converted to single precision) 740 static vx_fp_test_basic_t 741 vx_simple_scalar_fp_tests[] = { 742 { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT}, 743 { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT}, 744 { NULL, NULL, NULL, 0 , 0} 745 }; 746 747 static ldst_test_t 748 ldst_tests[] = { 749 { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE }, 750 { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE }, 751 { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD }, 752 { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 1, VSX_LOAD }, 753 { &test_lxsspx, "lxsspx", SINGLE_TEST, NULL, 0, VSX_LOAD }, 754 { NULL, NULL, 0, NULL, 0, 0 } }; 755 756 static xs_conv_test_t 757 xs_conv_tests[] = { 758 { &test_xscvsxdsp, "xscvsxdsp"}, 759 { &test_xscvuxdsp, "xscvuxdsp"}, 760 { NULL, NULL} 761 }; 762 763 static logic_test_t 764 logic_tests[] = { 765 { &test_xxleqv, "xxleqv", VSX_EQV }, 766 { &test_xxlorc, "xxlorc", VSX_ORC }, 767 { &test_xxlnand, "xxlnand", VSX_NAND }, 768 { NULL, NULL} 769 }; 770 771 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx) 772 { 773 /* NOTE: 774 * This function has been verified only with the xsresp and xsrsqrtes instructions. 775 * 776 * Technically, the number of bits of precision for xsresp and xsrsqrtesp is 777 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions 778 * does an actual reciprocal calculation versus estimation, so the answer we get back from 779 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of 780 * precision) and the estimate may still be within expected tolerances. On top of that, 781 * we can't count on these estimates always being the same across implementations. 782 * For example, with the fre[s] instruction (which should be correct to within one part 783 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111, 784 * one implementation could return 1.0111_1111_0000 and another implementation could return 785 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a 786 * single bit in common. 787 * 788 * The upshot is we can't validate the VEX output for these instructions by comparing against 789 * stored bit patterns. We must check that the result is within expected tolerances. 790 */ 791 792 /* A mask to be used for validation as a last resort. 793 * Only use 12 bits of precision for reasons discussed above. 794 */ 795 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000 796 797 798 Bool result = False; 799 double src_dp, res_dp; 800 float calc_diff = 0; 801 float real_diff = 0; 802 double recip_divisor; 803 float div_result; 804 float calc_diff_tmp; 805 806 src_dp = res_dp = 0; 807 Bool src_is_negative = False; 808 Bool res_is_negative = False; 809 unsigned long long * dst_dp = NULL; 810 unsigned long long * src_dp_ull; 811 dst_dp = (unsigned long long *) &vec_out; 812 src_dp = spec_fargs[idx]; 813 src_dp_ull = (unsigned long long *) &src_dp; 814 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False; 815 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False; 816 memcpy(&res_dp, &dst_dp[output_vec_idx], 8); 817 818 819 // Below are common rules 820 if (isnan(src_dp)) 821 return isnan(res_dp); 822 if (fpclassify(src_dp) == FP_ZERO) 823 return isinf(res_dp); 824 if (!src_is_negative && isinf(src_dp)) 825 return !res_is_negative && (fpclassify(res_dp) == FP_ZERO); 826 if (is_rsqrte) { 827 if (src_is_negative) 828 return isnan(res_dp); 829 } else { 830 if (src_is_negative && isinf(src_dp)) 831 return res_is_negative && (fpclassify(res_dp) == FP_ZERO); 832 } 833 834 if (is_rsqrte) 835 recip_divisor = sqrt(src_dp); 836 else 837 recip_divisor = src_dp; 838 839 /* The instructions handled by this function take a double precision 840 * input, perform a reciprocal estimate in double-precision, round 841 * the result to single precision and store into the destination 842 * register in double precision format. So, to check the result 843 * for accuracy, we use float (single precision) values. 844 */ 845 div_result = 1.0/recip_divisor; 846 calc_diff_tmp = recip_divisor * 16384.0; 847 if (isnormal(calc_diff_tmp)) { 848 calc_diff = fabs(1.0/calc_diff_tmp); 849 real_diff = fabs((float)res_dp - div_result); 850 result = ( ( res_dp == div_result ) 851 || ( real_diff <= calc_diff ) ); 852 #if FRES_DEBUG 853 unsigned int * dv = (unsigned int *)&div_result; 854 unsigned int * rd = (unsigned int *)&real_diff; 855 unsigned int * cd = (unsigned int *)&calc_diff; 856 printf("\n\t {computed div_result: %08x; real_diff: %08x; calc_diff: %08x}\n", 857 *dv, *rd, *cd); 858 #endif 859 860 } else { 861 /* Unable to compute theoretical difference, so we fall back to masking out 862 * un-precise bits. 863 */ 864 unsigned int * div_result_sp = (unsigned int *)&div_result; 865 float res_sp = (float)res_dp; 866 unsigned int * dst_sp = (unsigned int *)&res_sp; 867 #if FRES_DEBUG 868 unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp; 869 printf("Unable to compute theoretical difference, so we fall back to masking\n"); 870 printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n", 871 *calc_diff_tmp_sp, *div_result_sp, *dst_sp); 872 #endif 873 result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP); 874 } 875 return result; 876 } 877 878 static void test_vx_fp_ops(void) 879 { 880 881 test_func_t func; 882 int k; 883 char * test_name = (char *)malloc(20); 884 k = 0; 885 886 build_special_fargs_table(); 887 while ((func = vx_fp_tests[k].test_func)) { 888 int i, repeat = 0; 889 unsigned long long * frap, * frbp, * dst; 890 vx_fp_test_basic_t test_group = vx_fp_tests[k]; 891 vx_fp_test_type test_type = test_group.test_type; 892 893 switch (test_type) { 894 case VX_FP_SMAS: 895 case VX_FP_SMSS: 896 case VX_FP_SNMAS: 897 case VX_FP_SNMSS: 898 if (test_type == VX_FP_SMAS) 899 strcpy(test_name, "xsmadd"); 900 else if (test_type == VX_FP_SMSS) 901 strcpy(test_name, "xsmsub"); 902 else if (test_type == VX_FP_SNMAS) 903 strcpy(test_name, "xsnmadd"); 904 else 905 strcpy(test_name, "xsnmsub"); 906 907 if (!repeat) { 908 repeat = 1; 909 strcat(test_name, "asp"); 910 do_asp = 1; 911 } 912 break; 913 case VX_FP_OTHER: 914 strcpy(test_name, test_group.name); 915 break; 916 default: 917 printf("ERROR: Invalid VX FP test type %d\n", test_type); 918 exit(1); 919 } 920 921 again: 922 for (i = 0; i < test_group.num_tests; i++) { 923 unsigned int * inA, * inB, * pv; 924 925 fp_test_args_t aTest = test_group.targs[i]; 926 inA = (unsigned int *)&spec_fargs[aTest.fra_idx]; 927 inB = (unsigned int *)&spec_fargs[aTest.frb_idx]; 928 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 929 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 930 int idx; 931 unsigned long long vsr_XT; 932 pv = (unsigned int *)&vec_out; 933 934 // Only need to copy one doubleword into each vector's element 0 935 memcpy(&vec_inA, inA, 8); 936 memcpy(&vec_inB, inB, 8); 937 938 // clear vec_out 939 for (idx = 0; idx < 4; idx++, pv++) 940 *pv = 0; 941 942 if (test_type != VX_FP_OTHER) { 943 /* Then we need a third src argument, which is stored in element 0 of 944 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds 945 * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds 946 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test 947 * data (input args, result) contain only two inputs, so I arbitrarily 948 * use spec_fargs elements 4 and 14 (alternating) for the third source 949 * argument. We can use the same input data for a given pair of 950 * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus 951 * the expected result should be the same. 952 */ 953 int extra_arg_idx; 954 if (i % 2) 955 extra_arg_idx = 4; 956 else 957 extra_arg_idx = 14; 958 959 if (repeat) { 960 /* We're on the first time through of one of the VX_FP_SMx 961 * test types, meaning we're testing a xs<ZZZ>adp case, thus 962 * we have to swap inputs as described above: 963 * src2 <= VSX[XT] 964 * src3 <= VSX[XB] 965 */ 966 memcpy(&vec_out, inB, 8); // src2 967 memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8); //src3 968 frbp = (unsigned long long *)&spec_fargs[extra_arg_idx]; 969 } else { 970 // Don't need to init src2, as it's done before the switch() 971 memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8); //src3 972 } 973 memcpy(&vsr_XT, &vec_out, 8); 974 } 975 976 (*func)(); 977 dst = (unsigned long long *) &vec_out; 978 979 if (test_type == VX_FP_OTHER) 980 printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, 981 *frap, *frbp, *dst); 982 else 983 printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i, 984 test_name, vsr_XT, *frap, *frbp, *dst ); 985 986 } 987 /* 988 { 989 // Debug code. Keep this block commented out except when debugging. 990 double result, expected; 991 memcpy(&result, dst, 8); 992 memcpy(&expected, &aTest.dp_bin_result, 8); 993 printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n", 994 spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx], 995 expected, result ); 996 } 997 */ 998 printf( "\n" ); 999 1000 if (repeat) { 1001 repeat = 0; 1002 strcat(test_name, "UNKNOWN"); 1003 switch (test_type) { 1004 case VX_FP_SMAS: 1005 case VX_FP_SMSS: 1006 case VX_FP_SNMAS: 1007 case VX_FP_SNMSS: 1008 if (test_type == VX_FP_SMAS) 1009 strcpy(test_name, "xsmadd"); 1010 else if (test_type == VX_FP_SMSS) 1011 strcpy(test_name, "xsmsub"); 1012 else if (test_type == VX_FP_SNMAS) 1013 strcpy(test_name, "xsnmadd"); 1014 else 1015 strcpy(test_name, "xsnmsub"); 1016 1017 do_asp = 0; 1018 strcat(test_name, "msp"); 1019 break; 1020 default: 1021 break; 1022 } 1023 goto again; 1024 } 1025 k++; 1026 } 1027 printf( "\n" ); 1028 free(test_name); 1029 } 1030 1031 1032 static void test_vsx_one_fp_arg(void) 1033 { 1034 test_func_t func; 1035 int k; 1036 k = 0; 1037 build_special_fargs_table(); 1038 1039 while ((func = vsx_one_fp_arg_tests[k].test_func)) { 1040 int idx, i; 1041 unsigned long long *dst_dp; 1042 unsigned int * dst_sp; 1043 vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k]; 1044 /* size of source operands */ 1045 Bool dp = ((test_group.precision == DOUBLE_TEST) || 1046 (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False; 1047 /* size of result */ 1048 Bool dp_res = IS_DP_RESULT(test_group.precision); 1049 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False; 1050 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False; 1051 Bool sparse_sp = False; 1052 int stride = dp ? 2 : 4; 1053 int loops = is_scalar ? 1 : stride; 1054 stride = is_scalar ? 1: stride; 1055 1056 /* For conversions of single to double, the 128-bit input register is sparsely populated: 1057 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op 1058 * or 1059 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op 1060 * 1061 * For the vector op case, we need to adjust stride from '4' to '2', since 1062 * we'll only be loading two values per loop into the input register. 1063 */ 1064 if (!dp && !is_scalar && test_group.test_type == VX_CONV_TO_DOUBLE) { 1065 sparse_sp = True; 1066 stride = 2; 1067 } 1068 1069 for (i = 0; i < test_group.num_tests; i+=stride) { 1070 unsigned int * pv; 1071 void * inB; 1072 1073 pv = (unsigned int *)&vec_out; 1074 // clear vec_out 1075 for (idx = 0; idx < 4; idx++, pv++) 1076 *pv = 0; 1077 1078 if (dp) { 1079 int j; 1080 unsigned long long * frB_dp; 1081 for (j = 0; j < loops; j++) { 1082 inB = (void *)&spec_fargs[i + j]; 1083 // copy double precision FP into vector element i 1084 memcpy(((void *)&vec_inB) + (j * 8), inB, 8); 1085 } 1086 // execute test insn 1087 (*func)(); 1088 if (dp_res) 1089 dst_dp = (unsigned long long *) &vec_out; 1090 else 1091 dst_sp = (unsigned int *) &vec_out; 1092 1093 printf("#%d: %s ", i/stride, test_group.name); 1094 for (j = 0; j < loops; j++) { 1095 if (j) 1096 printf("; "); 1097 frB_dp = (unsigned long long *)&spec_fargs[i + j]; 1098 printf("%s(%016llx)", test_group.op, *frB_dp); 1099 if (test_group.test_type == VX_ESTIMATE) 1100 { 1101 Bool res; 1102 res = check_reciprocal_estimate(is_sqrt, i + j, j); 1103 printf(" ==> %s)", res ? "PASS" : "FAIL"); 1104 } else if (dp_res) { 1105 printf(" = %016llx", dst_dp[j]); 1106 } else { 1107 printf(" = %08x", dst_sp[j]); 1108 } 1109 } 1110 printf("\n"); 1111 } else { // single precision test type 1112 int j; 1113 // Clear input vector 1114 pv = (unsigned int *)&vec_inB; 1115 for (idx = 0; idx < 4; idx++, pv++) 1116 *pv = 0; 1117 1118 if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) { 1119 /* Take a single-precision value stored in double word element 0 1120 * of src in double-precision format and convert to single- 1121 * precision and store in word element 0 of dst. 1122 */ 1123 double input = spec_sp_fargs[i]; 1124 memcpy(((void *)&vec_inB), (void *)&input, 8); 1125 } else { 1126 int skip_slot; 1127 if (sparse_sp) { 1128 skip_slot = 1; 1129 loops = 2; 1130 } else { 1131 skip_slot = 0; 1132 } 1133 for (j = 0; j < loops; j++) { 1134 inB = (void *)&spec_sp_fargs[i + j]; 1135 // copy single precision FP into vector element i 1136 1137 if (skip_slot && j > 0) 1138 memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4); 1139 else 1140 memcpy(((void *)&vec_inB) + (j * 4), inB, 4); 1141 } 1142 } 1143 // execute test insn 1144 (*func)(); 1145 if (dp_res) 1146 dst_dp = (unsigned long long *) &vec_out; 1147 else 1148 dst_sp = (unsigned int *) &vec_out; 1149 // print result 1150 printf("#%d: %s ", i/stride, test_group.name); 1151 for (j = 0; j < loops; j++) { 1152 if (j) 1153 printf("; "); 1154 printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i + j])); 1155 if (dp_res) 1156 printf(" = %016llx", dst_dp[j]); 1157 else 1158 printf(" = %08x", dst_sp[j]); 1159 } 1160 printf("\n"); 1161 } 1162 } 1163 k++; 1164 printf( "\n" ); 1165 } 1166 } 1167 1168 /* This function currently only supports two double precision input arguments. */ 1169 static void test_vsx_two_fp_arg(void) 1170 { 1171 test_func_t func; 1172 int k = 0; 1173 1174 build_special_fargs_table(); 1175 while ((func = vx_simple_scalar_fp_tests[k].test_func)) { 1176 unsigned long long * frap, * frbp, * dst; 1177 unsigned int * pv; 1178 int idx; 1179 vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k]; 1180 pv = (unsigned int *)&vec_out; 1181 // clear vec_out 1182 for (idx = 0; idx < 4; idx++, pv++) 1183 *pv = 0; 1184 1185 void * inA, * inB; 1186 int i; 1187 for (i = 0; i < test_group.num_tests; i++) { 1188 fp_test_args_t aTest = test_group.targs[i]; 1189 inA = (void *)&spec_fargs[aTest.fra_idx]; 1190 inB = (void *)&spec_fargs[aTest.frb_idx]; 1191 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1192 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1193 // Only need to copy one doubleword into each vector's element 0 1194 memcpy(&vec_inA, inA, 8); 1195 memcpy(&vec_inB, inB, 8); 1196 (*func)(); 1197 dst = (unsigned long long *) &vec_out; 1198 printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name, 1199 *frap, *frbp, *dst); 1200 } 1201 printf( "\n" ); 1202 k++; 1203 } 1204 } 1205 1206 /* This function handles the following cases: 1207 * 1) Single precision value stored in double-precision 1208 * floating-point format in doubleword element 0 of src VSX register 1209 * 2) Integer word value stored in word element 1 of src VSX register 1210 */ 1211 static void _do_store_test (ldst_test_t storeTest) 1212 { 1213 test_func_t func; 1214 unsigned int *dst32; 1215 unsigned int i, idx; 1216 unsigned int * pv = (unsigned int *) storeTest.base_addr; 1217 1218 func = storeTest.test_func; 1219 r14 = (HWord_t) storeTest.base_addr; 1220 r15 = (HWord_t) storeTest.offset; 1221 1222 if (storeTest.precision == DOUBLE_TEST_SINGLE_RES) { 1223 /* source is single precision stored in double precision format */ 1224 /* test some of the pre-defined single precision values */ 1225 for (i = 0; i < nb_special_fargs; i+=3) { 1226 // clear out storage destination 1227 for (idx = 0; idx < 4; idx++) 1228 *(pv + idx) = 0; 1229 1230 printf( "%s:", storeTest.name ); 1231 unsigned long long * dp; 1232 double input = spec_sp_fargs[i]; 1233 dp = (unsigned long long *)&input; 1234 memcpy(&vec_inA, dp, sizeof(unsigned long long)); 1235 printf(" %016llx ==> ", *dp); 1236 1237 // execute test insn 1238 (*func)(); 1239 dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset); 1240 printf( "%08x\n", *dst32); 1241 } 1242 } else { 1243 // source is an integer word 1244 for (i = 0; i < NUM_VIARGS_INTS; i++) { 1245 // clear out storage destination 1246 for (idx = 0; idx < 4; idx++) 1247 *(pv + idx) = 0; 1248 printf( "%s:", storeTest.name ); 1249 unsigned int * pi = (unsigned int *)&vec_inA; 1250 memcpy(pi + 1, &viargs[i], sizeof(unsigned int)); 1251 printf(" %08x ==> ", *(pi + 1)); 1252 1253 // execute test insn 1254 (*func)(); 1255 dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset); 1256 printf( "%08x\n", *dst32); 1257 } 1258 } 1259 printf("\n"); 1260 } 1261 1262 static void _do_load_test(ldst_test_t storeTest) 1263 { 1264 test_func_t func; 1265 unsigned int i; 1266 unsigned long long * dst_dp; 1267 1268 func = storeTest.test_func; 1269 r15 = (HWord_t) storeTest.offset; 1270 1271 if (storeTest.base_addr == NULL) { 1272 /* Test lxsspx: source is single precision value, so let's */ 1273 /* test some of the pre-defined single precision values. */ 1274 for (i = 0; i + storeTest.offset < nb_special_fargs; i+=3) { 1275 unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + storeTest.offset]; 1276 printf( "%s:", storeTest.name ); 1277 printf(" %08x ==> ", *sp); 1278 r14 = (HWord_t)&spec_sp_fargs[i]; 1279 1280 // execute test insn 1281 (*func)(); 1282 dst_dp = (unsigned long long *) &vec_out; 1283 printf("%016llx\n", *dst_dp); 1284 } 1285 } else { 1286 // source is an integer word 1287 for (i = 0; i < NUM_VIARGS_INTS; i++) { 1288 printf( "%s:", storeTest.name ); 1289 r14 = (HWord_t)&viargs[i + storeTest.offset]; 1290 printf(" %08x ==> ", viargs[i + storeTest.offset]); 1291 1292 // execute test insn 1293 (*func)(); 1294 dst_dp = (unsigned long long *) &vec_out; 1295 printf("%016llx\n", *dst_dp); 1296 } 1297 } 1298 printf("\n"); 1299 } 1300 1301 static void test_ldst(void) 1302 { 1303 int k = 0; 1304 1305 while (ldst_tests[k].test_func) { 1306 if (ldst_tests[k].type == VSX_STORE) 1307 _do_store_test(ldst_tests[k]); 1308 else { 1309 _do_load_test(ldst_tests[k]); 1310 } 1311 k++; 1312 printf("\n"); 1313 } 1314 } 1315 1316 static void test_xs_conv_ops(void) 1317 { 1318 1319 test_func_t func; 1320 int k = 0; 1321 1322 build_special_fargs_table(); 1323 while ((func = xs_conv_tests[k].test_func)) { 1324 int i; 1325 unsigned long long * dst; 1326 xs_conv_test_t test_group = xs_conv_tests[k]; 1327 for (i = 0; i < NUM_VIARGS_INTS; i++) { 1328 unsigned int * inB, * pv; 1329 int idx; 1330 inB = (unsigned int *)&viargs[i]; 1331 memcpy(&vec_inB, inB, 4); 1332 pv = (unsigned int *)&vec_out; 1333 // clear vec_out 1334 for (idx = 0; idx < 4; idx++, pv++) 1335 *pv = 0; 1336 (*func)(); 1337 dst = (unsigned long long *) &vec_out; 1338 printf("#%d: %s %08x => %016llx\n", i, test_group.name, viargs[i], *dst); 1339 } 1340 k++; 1341 printf("\n"); 1342 } 1343 printf( "\n" ); 1344 } 1345 1346 1347 static void test_vsx_logic(void) 1348 { 1349 logic_test_t aTest; 1350 test_func_t func; 1351 int k; 1352 k = 0; 1353 1354 while ((func = logic_tests[k].test_func)) { 1355 1356 unsigned int * pv; 1357 unsigned int * inA, * inB, * dst; 1358 int idx, i; 1359 aTest = logic_tests[k]; 1360 for (i = 0; i <= NUM_VIARGS_VECS; i+=4) { 1361 pv = (unsigned int *)&vec_out; 1362 inA = &viargs[i]; 1363 inB = &viargs[i]; 1364 memcpy(&vec_inA, inA, sizeof(vector unsigned int)); 1365 memcpy(&vec_inB, inB, sizeof(vector unsigned int)); 1366 // clear vec_out 1367 for (idx = 0; idx < 4; idx++, pv++) 1368 *pv = 0; 1369 1370 // execute test insn 1371 (*func)(); 1372 dst = (unsigned int*) &vec_out; 1373 1374 printf( "#%d: %10s ", k, aTest.name); 1375 printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]); 1376 printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]); 1377 printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]); 1378 } 1379 k++; 1380 } 1381 printf( "\n" ); 1382 } 1383 1384 1385 //---------------------------------------------------------- 1386 1387 static test_table_t all_tests[] = { 1388 { &test_vx_fp_ops, 1389 "Test VSX floating point instructions"}, 1390 { &test_vsx_one_fp_arg, 1391 "Test VSX vector and scalar single argument instructions"} , 1392 { &test_vsx_logic, 1393 "Test VSX logic instructions" }, 1394 { &test_xs_conv_ops, 1395 "Test VSX scalar integer conversion instructions" }, 1396 { &test_ldst, 1397 "Test VSX load/store dp to sp instructions" }, 1398 { &test_vsx_two_fp_arg, 1399 "Test VSX vector and scalar two argument instructions"} , 1400 { NULL, NULL } 1401 }; 1402 1403 #endif 1404 1405 int main(int argc, char *argv[]) 1406 { 1407 1408 #ifdef HAS_ISA_2_07 1409 test_table_t aTest; 1410 test_func_t func; 1411 int i = 0; 1412 1413 while ((func = all_tests[i].test_category)) { 1414 aTest = all_tests[i]; 1415 printf( "%s\n", aTest.name ); 1416 (*func)(); 1417 i++; 1418 } 1419 #else 1420 printf("NO ISA 2.07 SUPPORT\n"); 1421 #endif 1422 return 0; 1423 } 1424