1 /* Copyright (C) 2011 IBM 2 3 Author: Maynard Johnson <maynardj (at) us.ibm.com> 4 5 This program is free software; you can redistribute it and/or 6 modify it under the terms of the GNU General Public License as 7 published by the Free Software Foundation; either version 2 of the 8 License, or (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307, USA. 19 20 The GNU General Public License is contained in the file COPYING. 21 */ 22 23 #ifdef HAS_VSX 24 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <malloc.h> 30 #include <altivec.h> 31 32 #ifndef __powerpc64__ 33 typedef uint32_t HWord_t; 34 #else 35 typedef uint64_t HWord_t; 36 #endif /* __powerpc64__ */ 37 38 #ifdef VGP_ppc64le_linux 39 #define isLE 1 40 #else 41 #define isLE 0 42 #endif 43 44 register HWord_t r14 __asm__ ("r14"); 45 register HWord_t r15 __asm__ ("r15"); 46 register HWord_t r16 __asm__ ("r16"); 47 register HWord_t r17 __asm__ ("r17"); 48 register double f14 __asm__ ("fr14"); 49 register double f15 __asm__ ("fr15"); 50 register double f16 __asm__ ("fr16"); 51 register double f17 __asm__ ("fr17"); 52 53 static volatile unsigned int cond_reg; 54 55 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" 56 57 #define SET_CR(_arg) \ 58 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); 59 60 #define SET_XER(_arg) \ 61 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); 62 63 #define GET_CR(_lval) \ 64 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) 65 66 #define GET_XER(_lval) \ 67 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) 68 69 #define GET_CR_XER(_lval_cr,_lval_xer) \ 70 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0) 71 72 #define SET_CR_ZERO \ 73 SET_CR(0) 74 75 #define SET_XER_ZERO \ 76 SET_XER(0) 77 78 #define SET_CR_XER_ZERO \ 79 do { SET_CR_ZERO; SET_XER_ZERO; } while (0) 80 81 #define SET_FPSCR_ZERO \ 82 do { double _d = 0.0; \ 83 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ 84 } while (0) 85 86 87 typedef void (*test_func_t)(void); 88 typedef struct ldst_test ldst_test_t; 89 typedef struct vsx_logic_test logic_test_t; 90 typedef struct xs_conv_test xs_conv_test_t; 91 typedef struct p7_fp_test fp_test_t; 92 typedef struct vx_fp_test vx_fp_test_t; 93 typedef struct vsx_move_test move_test_t; 94 typedef struct vsx_permute_test permute_test_t; 95 typedef struct test_table test_table_t; 96 97 static double *fargs = NULL; 98 static int nb_fargs; 99 100 /* These functions below that construct a table of floating point 101 * values were lifted from none/tests/ppc32/jm-insns.c. 102 */ 103 104 #if defined (DEBUG_ARGS_BUILD) 105 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0) 106 #else 107 #define AB_DPRINTF(fmt, args...) do { } while (0) 108 #endif 109 110 static inline void register_farg (void *farg, 111 int s, uint16_t _exp, uint64_t mant) 112 { 113 uint64_t tmp; 114 115 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant; 116 *(uint64_t *)farg = tmp; 117 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n", 118 s, _exp, mant, *(uint64_t *)farg, *(double *)farg); 119 } 120 121 static void build_fargs_table(void) 122 /* 123 * Double precision: 124 * Sign goes from zero to one (1 bit) 125 * Exponent goes from 0 to ((1 << 12) - 1) (11 bits) 126 * Mantissa goes from 1 to ((1 << 52) - 1) (52 bits) 127 * + special values: 128 * +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000 129 * -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000 130 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000 131 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000 132 * +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000 133 * -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000 134 * +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF 135 * -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF 136 * (8 values) 137 * 138 * Single precision 139 * Sign: 1 bit 140 * Exponent: 8 bits 141 * Mantissa: 23 bits 142 * +0.0 : 0 0x00 0x000000 => 0x00000000 143 * -0.0 : 1 0x00 0x000000 => 0x80000000 144 * +infinity : 0 0xFF 0x000000 => 0x7F800000 145 * -infinity : 1 0xFF 0x000000 => 0xFF800000 146 * +QNaN : 0 0xFF 0x400000 => 0x7FC00000 147 * -QNaN : 1 0xFF 0x400000 => 0xFFC00000 148 * +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF 149 * -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF 150 */ 151 { 152 uint64_t mant; 153 uint16_t _exp, e1; 154 int s; 155 int i=0; 156 157 if (nb_fargs) 158 return; 159 160 fargs = malloc( 16 * sizeof(double) ); 161 for (s = 0; s < 2; s++) { 162 for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) { 163 if (e1 >= 0x400) 164 e1 = 0x3fe; 165 _exp = e1; 166 for (mant = 0x0000000000001ULL; mant < (1ULL << 52); 167 /* Add 'random' bits */ 168 mant = ((mant + 0x4A6) << 29) + 0x359) { 169 register_farg( &fargs[i++], s, _exp, mant ); 170 } 171 if (e1 == 0x3fe) 172 break; 173 } 174 } 175 // add a few smaller values to fargs . . . 176 s = 0; 177 _exp = 0x002; 178 mant = 0x0000000000b01ULL; 179 register_farg(&fargs[i++], s, _exp, mant); 180 181 _exp = 0x000; 182 mant = 0x00000203f0b3dULL; 183 register_farg(&fargs[i++], s, _exp, mant); 184 185 mant = 0x00000005a203dULL; 186 register_farg(&fargs[i++], s, _exp, mant); 187 188 s = 1; 189 _exp = 0x002; 190 mant = 0x0000000000b01ULL; 191 register_farg(&fargs[i++], s, _exp, mant); 192 193 _exp = 0x000; 194 mant = 0x00000203f0b3dULL; 195 register_farg(&fargs[i++], s, _exp, mant); 196 197 nb_fargs = i; 198 } 199 200 201 typedef struct fp_test_args { 202 int fra_idx; 203 int frb_idx; 204 int cr_flags; 205 } fp_test_args_t; 206 207 208 fp_test_args_t ftdiv_tests[] = { 209 {0, 1, 0x8}, 210 {9, 1, 0xa}, 211 {1, 12, 0xa}, 212 {0, 2, 0xa}, 213 {1, 3, 0xa}, 214 {3, 0, 0xa}, 215 {0, 3, 0xa}, 216 {4, 0, 0xa}, 217 {7, 1, 0xe}, 218 {8, 1, 0xe}, 219 {1, 7, 0xe}, 220 {0, 13, 0xe}, 221 {5, 5, 0xe}, 222 {5, 6, 0xe}, 223 }; 224 225 fp_test_args_t xscmpX_tests[] = { 226 {8, 8, 0x2}, 227 {8, 14, 0x8}, 228 {8, 6, 0x8}, 229 {8, 5, 0x8}, 230 {8, 4, 0x8}, 231 {8, 7, 0x8}, 232 {8, 9, 0x1}, 233 {8, 11, 0x1}, 234 {14, 8, 0x4}, 235 {14, 14, 0x2}, 236 {14, 6, 0x8}, 237 {14, 5, 0x8}, 238 {14, 4, 0x8}, 239 {14, 7, 0x8}, 240 {14, 9, 0x1}, 241 {14, 11, 0x1}, 242 {6, 8, 0x4}, 243 {6, 14, 0x4}, 244 {6, 6, 0x2}, 245 {6, 5, 0x2}, 246 {6, 4, 0x8}, 247 {6, 7, 0x8}, 248 {6, 9, 0x1}, 249 {6, 11, 0x1}, 250 {5, 8, 0x4}, 251 {5, 14, 0x4}, 252 {5, 6, 0x2}, 253 {5, 5, 0x2}, 254 {5, 4, 0x8}, 255 {5, 7, 0x8}, 256 {5, 9, 0x1}, 257 {5, 11, 0x1}, 258 {4, 8, 0x4}, 259 {4, 14, 0x4}, 260 {4, 6, 0x4}, 261 {4, 5, 0x4}, 262 {4, 1, 0x8}, 263 {4, 7, 0x8}, 264 {4, 9, 0x1}, 265 {4, 11, 0x1}, 266 {7, 8, 0x4}, 267 {7, 14, 0x4}, 268 {7, 6, 0x4}, 269 {7, 5, 0x4}, 270 {7, 4, 0x4}, 271 {7, 7, 0x2}, 272 {7, 9, 0x1}, 273 {7, 11, 0x1}, 274 {10, 8, 0x1}, 275 {10, 14, 0x1}, 276 {10, 6, 0x1}, 277 {10, 5, 0x1}, 278 {10, 4, 0x1}, 279 {10, 7, 0x1}, 280 {10, 9, 0x1}, 281 {10, 11, 0x1}, 282 {12, 8, 0x1}, 283 {12, 14, 0x1}, 284 {12, 6, 0x1}, 285 {12, 5, 0x1}, 286 {12, 4, 0x1}, 287 {12, 7, 0x1}, 288 {12, 9, 0x1}, 289 {12, 11, 0x1}, 290 }; 291 292 fp_test_args_t xsadddp_tests[] = { 293 {8, 8, 0x0}, 294 {8, 14, 0x0}, 295 {8, 6, 0x0}, 296 {8, 5, 0x0}, 297 {8, 4, 0x0}, 298 {8, 7, 0x0}, 299 {8, 9, 0x0}, 300 {8, 11, 0x0}, 301 {14, 8, 0x0}, 302 {14, 14, 0x0}, 303 {14, 6, 0x0}, 304 {14, 5, 0x0}, 305 {14, 4, 0x0}, 306 {14, 7, 0x0}, 307 {14, 9, 0x0}, 308 {14, 11, 0x0}, 309 {6, 8, 0x0}, 310 {6, 14, 0x0}, 311 {6, 6, 0x0}, 312 {6, 5, 0x0}, 313 {6, 4, 0x0}, 314 {6, 7, 0x0}, 315 {6, 9, 0x0}, 316 {6, 11, 0x0}, 317 {5, 8, 0x0}, 318 {5, 14, 0x0}, 319 {5, 6, 0x0}, 320 {5, 5, 0x0}, 321 {5, 4, 0x0}, 322 {5, 7, 0x0}, 323 {5, 9, 0x0}, 324 {5, 11, 0x0}, 325 {4, 8, 0x0}, 326 {4, 14, 0x0}, 327 {4, 6, 0x0}, 328 {4, 5, 0x0}, 329 {4, 1, 0x0}, 330 {4, 7, 0x0}, 331 {4, 9, 0x0}, 332 {4, 11, 0x0}, 333 {7, 8, 0x0}, 334 {7, 14, 0x0}, 335 {7, 6, 0x0}, 336 {7, 5, 0x0}, 337 {7, 4, 0x0}, 338 {7, 7, 0x0}, 339 {7, 9, 0x0}, 340 {7, 11, 0x0}, 341 {10, 8, 0x0}, 342 {10, 14, 0x0}, 343 {10, 6, 0x0}, 344 {10, 5, 0x0}, 345 {10, 4, 0x0}, 346 {10, 7, 0x0}, 347 {10, 9, 0x0}, 348 {10, 11, 0x0}, 349 {12, 8, 0x0}, 350 {12, 14, 0x0}, 351 {12, 6, 0x0}, 352 {12, 5, 0x0}, 353 {12, 4, 0x0}, 354 {12, 7, 0x0}, 355 {12, 9, 0x0}, 356 {12, 11, 0x0}, 357 }; 358 359 fp_test_args_t xsdivdp_tests[] = { 360 {8, 8, 0x0}, 361 {8, 14, 0x0}, 362 {8, 6, 0x0}, 363 {8, 5, 0x0}, 364 {8, 4, 0x0}, 365 {8, 7, 0x0}, 366 {8, 9, 0x0}, 367 {8, 11, 0x0}, 368 {14, 8, 0x0}, 369 {14, 14, 0x0}, 370 {14, 6, 0x0}, 371 {14, 5, 0x0}, 372 {14, 4, 0x0}, 373 {14, 7, 0x0}, 374 {14, 9, 0x0}, 375 {14, 11, 0x0}, 376 {6, 8, 0x0}, 377 {6, 14, 0x0}, 378 {6, 6, 0x0}, 379 {6, 5, 0x0}, 380 {6, 4, 0x0}, 381 {6, 7, 0x0}, 382 {6, 9, 0x0}, 383 {6, 11, 0x0}, 384 {5, 8, 0x0}, 385 {5, 14, 0x0}, 386 {5, 6, 0x0}, 387 {5, 5, 0x0}, 388 {5, 4, 0x0}, 389 {5, 7, 0x0}, 390 {5, 9, 0x0}, 391 {5, 11, 0x0}, 392 {4, 8, 0x0}, 393 {4, 14, 0x0}, 394 {4, 6, 0x0}, 395 {4, 5, 0x0}, 396 {4, 1, 0x0}, 397 {4, 7, 0x0}, 398 {4, 9, 0x0}, 399 {4, 11, 0x0}, 400 {7, 8, 0x0}, 401 {7, 14, 0x0}, 402 {7, 6, 0x0}, 403 {7, 5, 0x0}, 404 {7, 4, 0x0}, 405 {7, 7, 0x0}, 406 {7, 9, 0x0}, 407 {7, 11, 0x0}, 408 {10, 8, 0x0}, 409 {10, 14, 0x0}, 410 {10, 6, 0x0}, 411 {10, 5, 0x0}, 412 {10, 4, 0x0}, 413 {10, 7, 0x0}, 414 {10, 9, 0x0}, 415 {10, 11, 0x0}, 416 {12, 8, 0x0}, 417 {12, 14, 0x0}, 418 {12, 6, 0x0}, 419 {12, 5, 0x0}, 420 {12, 4, 0x0}, 421 {12, 7, 0x0}, 422 {12, 9, 0x0}, 423 {12, 11, 0x0}, 424 }; 425 426 fp_test_args_t xsmaddXdp_tests[] = { 427 {8, 8, 0x0}, 428 {8, 14, 0x0}, 429 {8, 6, 0x0}, 430 {8, 5, 0x0}, 431 {8, 4, 0x0}, 432 {8, 7, 0x0}, 433 {8, 9, 0x0}, 434 {8, 11, 0x0}, 435 {14, 8, 0x0}, 436 {14, 14, 0x0}, 437 {14, 6, 0x0}, 438 {14, 5, 0x0}, 439 {14, 4, 0x0}, 440 {14, 7, 0x0}, 441 {14, 9, 0x0}, 442 {14, 11, 0x0}, 443 {6, 8, 0x0}, 444 {6, 14, 0x0}, 445 {6, 6, 0x0}, 446 {6, 5, 0x0}, 447 {6, 4, 0x0}, 448 {6, 7, 0x0}, 449 {6, 9, 0x0}, 450 {6, 11, 0x0}, 451 {5, 8, 0x0}, 452 {5, 14, 0x0}, 453 {5, 6, 0x0}, 454 {5, 5, 0x0}, 455 {5, 4, 0x0}, 456 {5, 7, 0x0}, 457 {5, 9, 0x0}, 458 {5, 11, 0x0}, 459 {4, 8, 0x0}, 460 {4, 14, 0x0}, 461 {4, 6, 0x0}, 462 {4, 5, 0x0}, 463 {4, 1, 0x0}, 464 {4, 7, 0x0}, 465 {4, 9, 0x0}, 466 {4, 11, 0x0}, 467 {7, 8, 0x0}, 468 {7, 14, 0x0}, 469 {7, 6, 0x0}, 470 {7, 5, 0x0}, 471 {7, 4, 0x0}, 472 {7, 7, 0x0}, 473 {7, 9, 0x0}, 474 {7, 11, 0x0}, 475 {10, 8, 0x0}, 476 {10, 14, 0x0}, 477 {10, 6, 0x0}, 478 {10, 5, 0x0}, 479 {10, 4, 0x0}, 480 {10, 7, 0x0}, 481 {10, 9, 0x0}, 482 {10, 11, 0x0}, 483 {12, 8, 0x0}, 484 {12, 14, 0x0}, 485 {12, 6, 0x0}, 486 {12, 5, 0x0}, 487 {12, 4, 0x0}, 488 {12, 7, 0x0}, 489 {12, 9, 0x0}, 490 {12, 11, 0x0}, 491 }; 492 493 fp_test_args_t xsmsubXdp_tests[] = { 494 {8, 8, 0x0}, 495 {8, 14, 0x0}, 496 {8, 6, 0x0}, 497 {8, 5, 0x0}, 498 {8, 4, 0x0}, 499 {8, 7, 0x0}, 500 {8, 9, 0x0}, 501 {8, 11, 0x0}, 502 {14, 8, 0x0}, 503 {14, 14, 0x0}, 504 {14, 6, 0x0}, 505 {14, 5, 0x0}, 506 {14, 4, 0x0}, 507 {14, 7, 0x0}, 508 {14, 9, 0x0}, 509 {14, 11, 0x0}, 510 {6, 8, 0x0}, 511 {6, 14, 0x0}, 512 {6, 6, 0x0}, 513 {6, 5, 0x0}, 514 {6, 4, 0x0}, 515 {6, 7, 0x0}, 516 {6, 9, 0x0}, 517 {6, 11, 0x0}, 518 {5, 8, 0x0}, 519 {5, 14, 0x0}, 520 {5, 6, 0x0}, 521 {5, 5, 0x0}, 522 {5, 4, 0x0}, 523 {5, 7, 0x0}, 524 {5, 9, 0x0}, 525 {5, 11, 0x0}, 526 {4, 8, 0x0}, 527 {4, 14, 0x0}, 528 {4, 6, 0x0}, 529 {4, 5, 0x0}, 530 {4, 1, 0x0}, 531 {4, 7, 0x0}, 532 {4, 9, 0x0}, 533 {4, 11, 0x0}, 534 {7, 8, 0x0}, 535 {7, 14, 0x0}, 536 {7, 6, 0x0}, 537 {7, 5, 0x0}, 538 {7, 4, 0x0}, 539 {7, 7, 0x0}, 540 {7, 9, 0x0}, 541 {7, 11, 0x0}, 542 {10, 8, 0x0}, 543 {10, 14, 0x0}, 544 {10, 6, 0x0}, 545 {10, 5, 0x0}, 546 {10, 4, 0x0}, 547 {10, 7, 0x0}, 548 {10, 9, 0x0}, 549 {10, 11, 0x0}, 550 {12, 8, 0x0}, 551 {12, 14, 0x0}, 552 {12, 6, 0x0}, 553 {12, 5, 0x0}, 554 {12, 4, 0x0}, 555 {12, 7, 0x0}, 556 {12, 9, 0x0}, 557 {12, 11, 0x0}, 558 }; 559 560 fp_test_args_t xsnmaddXdp_tests[] = { 561 {8, 8, 0x0}, 562 {8, 14, 0x0}, 563 {8, 6, 0x0}, 564 {8, 5, 0x0}, 565 {8, 4, 0x0}, 566 {8, 7, 0x0}, 567 {8, 9, 0x0}, 568 {8, 11, 0x0}, 569 {14, 8, 0x0}, 570 {14, 14, 0x0}, 571 {14, 6, 0x0}, 572 {14, 5, 0x0}, 573 {14, 4, 0x0}, 574 {14, 7, 0x0}, 575 {14, 9, 0x0}, 576 {14, 11, 0x0}, 577 {6, 8, 0x0}, 578 {6, 14, 0x0}, 579 {6, 6, 0x0}, 580 {6, 5, 0x0}, 581 {6, 4, 0x0}, 582 {6, 7, 0x0}, 583 {6, 9, 0x0}, 584 {6, 11, 0x0}, 585 {5, 8, 0x0}, 586 {5, 14, 0x0}, 587 {5, 6, 0x0}, 588 {5, 5, 0x0}, 589 {5, 4, 0x0}, 590 {5, 7, 0x0}, 591 {5, 9, 0x0}, 592 {5, 11, 0x0}, 593 {4, 8, 0x0}, 594 {4, 14, 0x0}, 595 {4, 6, 0x0}, 596 {4, 5, 0x0}, 597 {4, 1, 0x0}, 598 {4, 7, 0x0}, 599 {4, 9, 0x0}, 600 {4, 11, 0x0}, 601 {7, 8, 0x0}, 602 {7, 14, 0x0}, 603 {7, 6, 0x0}, 604 {7, 5, 0x0}, 605 {7, 4, 0x0}, 606 {7, 7, 0x0}, 607 {7, 9, 0x0}, 608 {7, 11, 0x0}, 609 {10, 8, 0x0}, 610 {10, 14, 0x0}, 611 {10, 6, 0x0}, 612 {10, 5, 0x0}, 613 {10, 4, 0x0}, 614 {10, 7, 0x0}, 615 {10, 9, 0x0}, 616 {10, 11, 0x0}, 617 {12, 8, 0x0}, 618 {12, 14, 0x0}, 619 {12, 6, 0x0}, 620 {12, 5, 0x0}, 621 {12, 4, 0x0}, 622 {12, 7, 0x0}, 623 {12, 9, 0x0}, 624 {12, 11, 0x0}, 625 }; 626 627 fp_test_args_t xsmuldp_tests[] = { 628 {8, 8, 0x0}, 629 {8, 14, 0x0}, 630 {8, 6, 0x0}, 631 {8, 5, 0x0}, 632 {8, 4, 0x0}, 633 {8, 7, 0x0}, 634 {8, 9, 0x0}, 635 {8, 11, 0x0}, 636 {14, 8, 0x0}, 637 {14, 14, 0x0}, 638 {14, 6, 0x0}, 639 {14, 5, 0x0}, 640 {14, 4, 0x0}, 641 {14, 7, 0x0}, 642 {14, 9, 0x0}, 643 {14, 11, 0x0}, 644 {6, 8, 0x0}, 645 {6, 14, 0x0}, 646 {6, 6, 0x0}, 647 {6, 5, 0x0}, 648 {6, 4, 0x0}, 649 {6, 7, 0x0}, 650 {6, 9, 0x0}, 651 {6, 11, 0x0}, 652 {5, 8, 0x0}, 653 {5, 14, 0x0}, 654 {5, 6, 0x0}, 655 {5, 5, 0x0}, 656 {5, 4, 0x0}, 657 {5, 7, 0x0}, 658 {5, 9, 0x0}, 659 {5, 11, 0x0}, 660 {4, 8, 0x0}, 661 {4, 14, 0x0}, 662 {4, 6, 0x0}, 663 {4, 5, 0x0}, 664 {4, 1, 0x0}, 665 {4, 7, 0x0}, 666 {4, 9, 0x0}, 667 {4, 11, 0x0}, 668 {7, 8, 0x0}, 669 {7, 14, 0x0}, 670 {7, 6, 0x0}, 671 {7, 5, 0x0}, 672 {7, 4, 0x0}, 673 {7, 7, 0x0}, 674 {7, 9, 0x0}, 675 {7, 11, 0x0}, 676 {10, 8, 0x0}, 677 {10, 14, 0x0}, 678 {10, 6, 0x0}, 679 {10, 5, 0x0}, 680 {10, 4, 0x0}, 681 {10, 7, 0x0}, 682 {10, 9, 0x0}, 683 {10, 11, 0x0}, 684 {12, 8, 0x0}, 685 {12, 14, 0x0}, 686 {12, 6, 0x0}, 687 {12, 5, 0x0}, 688 {12, 4, 0x0}, 689 {12, 7, 0x0}, 690 {12, 9, 0x0}, 691 {12, 11, 0x0}, 692 }; 693 694 fp_test_args_t xssubdp_tests[] = { 695 {8, 8, 0x0}, 696 {8, 14, 0x0}, 697 {8, 6, 0x0}, 698 {8, 5, 0x0}, 699 {8, 4, 0x0}, 700 {8, 7, 0x0}, 701 {8, 9, 0x0}, 702 {8, 11, 0x0}, 703 {14, 8, 0x0}, 704 {14, 14, 0x0}, 705 {14, 6, 0x0}, 706 {14, 5, 0x0}, 707 {14, 4, 0x0}, 708 {14, 7, 0x0}, 709 {14, 9, 0x0}, 710 {14, 11, 0x0}, 711 {6, 8, 0x0}, 712 {6, 14, 0x0}, 713 {6, 6, 0x0}, 714 {6, 5, 0x0}, 715 {6, 4, 0x0}, 716 {6, 7, 0x0}, 717 {6, 9, 0x0}, 718 {6, 11, 0x0}, 719 {5, 8, 0x0}, 720 {5, 14, 0x0}, 721 {5, 6, 0x0}, 722 {5, 5, 0x0}, 723 {5, 4, 0x0}, 724 {5, 7, 0x0}, 725 {5, 9, 0x0}, 726 {5, 11, 0x0}, 727 {4, 8, 0x0}, 728 {4, 14, 0x0}, 729 {4, 6, 0x0}, 730 {4, 5, 0x0}, 731 {4, 1, 0x0}, 732 {4, 7, 0x0}, 733 {4, 9, 0x0}, 734 {4, 11, 0x0}, 735 {7, 8, 0x0}, 736 {7, 14, 0x0}, 737 {7, 6, 0x0}, 738 {7, 5, 0x0}, 739 {7, 4, 0x0}, 740 {7, 7, 0x0}, 741 {7, 9, 0x0}, 742 {7, 11, 0x0}, 743 {10, 8, 0x0}, 744 {10, 14, 0x0}, 745 {10, 6, 0x0}, 746 {10, 5, 0x0}, 747 {10, 4, 0x0}, 748 {10, 7, 0x0}, 749 {10, 9, 0x0}, 750 {10, 11, 0x0}, 751 {12, 8, 0x0}, 752 {12, 14, 0x0}, 753 {12, 6, 0x0}, 754 {12, 5, 0x0}, 755 {12, 4, 0x0}, 756 {12, 7, 0x0}, 757 {12, 9, 0x0}, 758 {12, 11, 0x0}, 759 }; 760 761 762 763 static int nb_special_fargs; 764 static double * spec_fargs; 765 766 static void build_special_fargs_table(void) 767 { 768 /* The special floating point values created below are for 769 * use in the ftdiv tests for setting the fe_flag and fg_flag, 770 * but they can also be used for other tests (e.g., xscmpudp). 771 * 772 * Note that fl_flag is 'always '1' on ppc64 Linux. 773 * 774 Entry Sign Exp fraction Special value 775 0 0 3fd 0x8000000000000ULL Positive finite number 776 1 0 404 0xf000000000000ULL ... 777 2 0 001 0x8000000b77501ULL ... 778 3 0 7fe 0x800000000051bULL ... 779 4 0 012 0x3214569900000ULL ... 780 5 0 000 0x0000000000000ULL +0.0 (+zero) 781 6 1 000 0x0000000000000ULL -0.0 (-zero) 782 7 0 7ff 0x0000000000000ULL +infinity 783 8 1 7ff 0x0000000000000ULL -infinity 784 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN 785 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN 786 11 0 7ff 0x8000000000000ULL +QNaN 787 12 1 7ff 0x8000000000000ULL -QNaN 788 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction) 789 14 1 40d 0x0650f5a07b353ULL Negative finite number 790 */ 791 792 uint64_t mant; 793 uint16_t _exp; 794 int s; 795 int i = 0; 796 797 if (spec_fargs) 798 return; 799 800 spec_fargs = malloc( 16 * sizeof(double) ); 801 802 // #0 803 s = 0; 804 _exp = 0x3fd; 805 mant = 0x8000000000000ULL; 806 register_farg(&spec_fargs[i++], s, _exp, mant); 807 808 // #1 809 s = 0; 810 _exp = 0x404; 811 mant = 0xf000000000000ULL; 812 register_farg(&spec_fargs[i++], s, _exp, mant); 813 814 /* None of the ftdiv tests succeed. 815 * FRA = value #0; FRB = value #1 816 * ea_ = -2; e_b = 5 817 * fl_flag || fg_flag || fe_flag = 100 818 */ 819 820 /************************************************* 821 * fe_flag tests 822 * 823 *************************************************/ 824 825 /* fe_flag <- 1 if FRA is a NaN 826 * FRA = value #9; FRB = value #1 827 * e_a = 1024; e_b = 5 828 * fl_flag || fg_flag || fe_flag = 101 829 */ 830 831 /* fe_flag <- 1 if FRB is a NaN 832 * FRA = value #1; FRB = value #12 833 * e_a = 5; e_b = 1024 834 * fl_flag || fg_flag || fe_flag = 101 835 */ 836 837 /* fe_flag <- 1 if e_b <= -1022 838 * FRA = value #0; FRB = value #2 839 * e_a = -2; e_b = -1022 840 * fl_flag || fg_flag || fe_flag = 101 841 * 842 */ 843 // #2 844 s = 0; 845 _exp = 0x001; 846 mant = 0x8000000b77501ULL; 847 register_farg(&spec_fargs[i++], s, _exp, mant); 848 849 /* fe_flag <- 1 if e_b >= 1021 850 * FRA = value #1; FRB = value #3 851 * e_a = 5; e_b = 1023 852 * fl_flag || fg_flag || fe_flag = 101 853 */ 854 // #3 855 s = 0; 856 _exp = 0x7fe; 857 mant = 0x800000000051bULL; 858 register_farg(&spec_fargs[i++], s, _exp, mant); 859 860 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023 861 * Let FRA = value #3 and FRB be value #0. 862 * e_a = 1023; e_b = -2 863 * fl_flag || fg_flag || fe_flag = 101 864 */ 865 866 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023 867 * Let FRA = value #0 above and FRB be value #3 above 868 * e_a = -2; e_b = 1023 869 * fl_flag || fg_flag || fe_flag = 101 870 */ 871 872 /* fe_flag <- 1 if FRA != 0 && e_a <= -970 873 * Let FRA = value #4 and FRB be value #0 874 * e_a = -1005; e_b = -2 875 * fl_flag || fg_flag || fe_flag = 101 876 */ 877 // #4 878 s = 0; 879 _exp = 0x012; 880 mant = 0x3214569900000ULL; 881 register_farg(&spec_fargs[i++], s, _exp, mant); 882 883 /************************************************* 884 * fg_flag tests 885 * 886 *************************************************/ 887 /* fg_flag <- 1 if FRA is an Infinity 888 * NOTE: FRA = Inf also sets fe_flag 889 * Do two tests, using values #7 and #8 (+/- Inf) for FRA. 890 * Test 1: 891 * Let FRA be value #7 and FRB be value #1 892 * e_a = 1024; e_b = 5 893 * fl_flag || fg_flag || fe_flag = 111 894 * 895 * Test 2: 896 * Let FRA be value #8 and FRB be value #1 897 * e_a = 1024; e_b = 5 898 * fl_flag || fg_flag || fe_flag = 111 899 * 900 */ 901 902 /* fg_flag <- 1 if FRB is an Infinity 903 * NOTE: FRB = Inf also sets fe_flag 904 * Let FRA be value #1 and FRB be value #7 905 * e_a = 5; e_b = 1024 906 * fl_flag || fg_flag || fe_flag = 111 907 */ 908 909 /* fg_flag <- 1 if FRB is denormalized 910 * NOTE: e_b < -1022 ==> fe_flag <- 1 911 * Let FRA be value #0 and FRB be value #13 912 * e_a = -2; e_b = -1023 913 * fl_flag || fg_flag || fe_flag = 111 914 */ 915 916 /* fg_flag <- 1 if FRB is +zero 917 * NOTE: FRA = Inf also sets fe_flag 918 * Let FRA = val #5; FRB = val #5 919 * ea_ = -1023; e_b = -1023 920 * fl_flag || fg_flag || fe_flag = 111 921 */ 922 923 /* fg_flag <- 1 if FRB is -zero 924 * NOTE: FRA = Inf also sets fe_flag 925 * Let FRA = val #5; FRB = val #6 926 * ea_ = -1023; e_b = -1023 927 * fl_flag || fg_flag || fe_flag = 111 928 */ 929 930 /* Special values */ 931 /* +0.0 : 0 0x000 0x0000000000000 */ 932 // #5 933 s = 0; 934 _exp = 0x000; 935 mant = 0x0000000000000ULL; 936 register_farg(&spec_fargs[i++], s, _exp, mant); 937 938 /* -0.0 : 1 0x000 0x0000000000000 */ 939 // #6 940 s = 1; 941 _exp = 0x000; 942 mant = 0x0000000000000ULL; 943 register_farg(&spec_fargs[i++], s, _exp, mant); 944 945 /* +infinity : 0 0x7FF 0x0000000000000 */ 946 // #7 947 s = 0; 948 _exp = 0x7FF; 949 mant = 0x0000000000000ULL; 950 register_farg(&spec_fargs[i++], s, _exp, mant); 951 952 /* -infinity : 1 0x7FF 0x0000000000000 */ 953 // #8 954 s = 1; 955 _exp = 0x7FF; 956 mant = 0x0000000000000ULL; 957 register_farg(&spec_fargs[i++], s, _exp, mant); 958 959 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */ 960 // #9 961 s = 0; 962 _exp = 0x7FF; 963 mant = 0x7FFFFFFFFFFFFULL; 964 register_farg(&spec_fargs[i++], s, _exp, mant); 965 966 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */ 967 // #10 968 s = 1; 969 _exp = 0x7FF; 970 mant = 0x7FFFFFFFFFFFFULL; 971 register_farg(&spec_fargs[i++], s, _exp, mant); 972 973 /* +QNaN : 0 0x7FF 0x8000000000000 */ 974 // #11 975 s = 0; 976 _exp = 0x7FF; 977 mant = 0x8000000000000ULL; 978 register_farg(&spec_fargs[i++], s, _exp, mant); 979 980 /* -QNaN : 1 0x7FF 0x8000000000000 */ 981 // #12 982 s = 1; 983 _exp = 0x7FF; 984 mant = 0x8000000000000ULL; 985 register_farg(&spec_fargs[i++], s, _exp, mant); 986 987 /* denormalized value */ 988 // #13 989 s = 1; 990 _exp = 0x000; 991 mant = 0x8340000078000ULL; 992 register_farg(&spec_fargs[i++], s, _exp, mant); 993 994 /* Negative finite number */ 995 // #14 996 s = 1; 997 _exp = 0x40d; 998 mant = 0x0650f5a07b353ULL; 999 register_farg(&spec_fargs[i++], s, _exp, mant); 1000 1001 nb_special_fargs = i; 1002 } 1003 1004 1005 struct test_table 1006 { 1007 test_func_t test_category; 1008 char * name; 1009 }; 1010 1011 struct p7_fp_test 1012 { 1013 test_func_t test_func; 1014 const char *name; 1015 int single; // 1=single precision result; 0=double precision result 1016 }; 1017 1018 typedef enum { 1019 VX_FP_CMP, 1020 VX_FP_SMA, 1021 VX_FP_SMS, 1022 VX_FP_SNMA, 1023 VX_FP_OTHER 1024 } vx_fp_test_type; 1025 1026 struct vx_fp_test 1027 { 1028 test_func_t test_func; 1029 const char *name; 1030 fp_test_args_t * targs; 1031 int num_tests; 1032 vx_fp_test_type test_type; 1033 }; 1034 1035 struct xs_conv_test 1036 { 1037 test_func_t test_func; 1038 const char *name; 1039 int num_tests; 1040 }; 1041 1042 typedef enum { 1043 VSX_LOAD =1, 1044 VSX_LOAD_SPLAT, 1045 VSX_STORE 1046 } vsx_ldst_type; 1047 1048 struct ldst_test 1049 { 1050 test_func_t test_func; 1051 const char *name; 1052 void * base_addr; 1053 uint32_t offset; 1054 int num_words_to_process; 1055 vsx_ldst_type type; 1056 }; 1057 1058 typedef enum { 1059 VSX_AND = 1, 1060 VSX_XOR, 1061 VSX_ANDC, 1062 VSX_OR, 1063 VSX_NOR 1064 } vsx_log_op; 1065 1066 struct vsx_logic_test 1067 { 1068 test_func_t test_func; 1069 const char *name; 1070 vsx_log_op op; 1071 }; 1072 1073 struct vsx_move_test 1074 { 1075 test_func_t test_func; 1076 const char *name; 1077 }; 1078 1079 struct vsx_permute_test 1080 { 1081 test_func_t test_func; 1082 const char *name; 1083 unsigned int xa[4]; 1084 unsigned int xb[4]; 1085 }; 1086 1087 static vector unsigned int vec_out, vec_inA, vec_inB; 1088 1089 static void test_lxsdx(void) 1090 { 1091 __asm__ __volatile__ ("lxsdx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 1092 } 1093 1094 static void 1095 test_lxvd2x(void) 1096 { 1097 __asm__ __volatile__ ("lxvd2x %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 1098 } 1099 1100 static void test_lxvdsx(void) 1101 { 1102 __asm__ __volatile__ ("lxvdsx %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 1103 } 1104 1105 static void test_lxvw4x(void) 1106 { 1107 __asm__ __volatile__ ("lxvw4x %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15)); 1108 } 1109 1110 static void test_stxsdx(void) 1111 { 1112 __asm__ __volatile__ ("stxsdx %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); 1113 } 1114 1115 static void test_stxvd2x(void) 1116 { 1117 __asm__ __volatile__ ("stxvd2x %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); 1118 } 1119 1120 static void test_stxvw4x(void) 1121 { 1122 __asm__ __volatile__ ("stxvw4x %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15)); 1123 } 1124 1125 static void test_xxlxor(void) 1126 { 1127 __asm__ __volatile__ ("xxlxor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1128 } 1129 1130 static void test_xxlor(void) 1131 { 1132 __asm__ __volatile__ ("xxlor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1133 } 1134 1135 static void test_xxlnor(void) 1136 { 1137 __asm__ __volatile__ ("xxlnor %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1138 } 1139 1140 static void test_xxland(void) 1141 { 1142 __asm__ __volatile__ ("xxland %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1143 } 1144 1145 static void test_xxlandc(void) 1146 { 1147 __asm__ __volatile__ ("xxlandc %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1148 } 1149 1150 static void test_xxmrghw(void) 1151 { 1152 __asm__ __volatile__ ("xxmrghw %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1153 } 1154 1155 static void test_xxmrglw(void) 1156 { 1157 __asm__ __volatile__ ("xxmrglw %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1158 } 1159 1160 static void test_xxpermdi_00(void) 1161 { 1162 __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1163 } 1164 1165 static void test_xxpermdi_01(void) 1166 { 1167 __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1168 } 1169 1170 static void test_xxpermdi_10(void) 1171 { 1172 __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1173 } 1174 1175 static void test_xxpermdi_11(void) 1176 { 1177 __asm__ __volatile__ ("xxpermdi %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1178 } 1179 1180 static void test_xxsldwi_0(void) 1181 { 1182 __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1183 } 1184 1185 static void test_xxsldwi_1(void) 1186 { 1187 __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1188 } 1189 1190 static void test_xxsldwi_2(void) 1191 { 1192 __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1193 } 1194 1195 static void test_xxsldwi_3(void) 1196 { 1197 __asm__ __volatile__ ("xxsldwi %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1198 } 1199 1200 static void test_fcfids (void) 1201 { 1202 __asm__ __volatile__ ("fcfids %0, %1" : "=f" (f17): "d" (f14)); 1203 } 1204 1205 static void test_fcfidus (void) 1206 { 1207 __asm__ __volatile__ ("fcfidus %0, %1" : "=f" (f17): "d" (f14)); 1208 } 1209 1210 static void test_fcfidu (void) 1211 { 1212 __asm__ __volatile__ ("fcfidu %0, %1" : "=f" (f17): "d" (f14)); 1213 } 1214 1215 static void test_xsabsdp (void) 1216 { 1217 __asm__ __volatile__ ("xsabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1218 } 1219 1220 static void test_xscpsgndp (void) 1221 { 1222 __asm__ __volatile__ ("xscpsgndp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1223 } 1224 1225 static void test_xsnabsdp (void) 1226 { 1227 __asm__ __volatile__ ("xsnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1228 } 1229 1230 static void test_xsnegdp (void) 1231 { 1232 __asm__ __volatile__ ("xsnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1233 } 1234 1235 static int do_cmpudp; 1236 static void test_xscmp (void) 1237 { 1238 if (do_cmpudp) 1239 __asm__ __volatile__ ("xscmpudp cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB)); 1240 else 1241 __asm__ __volatile__ ("xscmpodp cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB)); 1242 } 1243 1244 static void test_xsadddp(void) 1245 { 1246 __asm__ __volatile__ ("xsadddp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1247 } 1248 1249 static void test_xsdivdp(void) 1250 { 1251 __asm__ __volatile__ ("xsdivdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1252 } 1253 1254 static int do_adp; 1255 static void test_xsmadd(void) 1256 { 1257 if (do_adp) 1258 __asm__ __volatile__ ("xsmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1259 else 1260 __asm__ __volatile__ ("xsmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1261 } 1262 1263 static void test_xsmsub(void) 1264 { 1265 if (do_adp) 1266 __asm__ __volatile__ ("xsmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1267 else 1268 __asm__ __volatile__ ("xsmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1269 } 1270 1271 static void test_xsnmadd(void) 1272 { 1273 if (do_adp) 1274 __asm__ __volatile__ ("xsnmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1275 else 1276 __asm__ __volatile__ ("xsnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1277 } 1278 1279 static void test_xsmuldp(void) 1280 { 1281 __asm__ __volatile__ ("xsmuldp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1282 } 1283 1284 static void test_xssubdp(void) 1285 { 1286 __asm__ __volatile__ ("xssubdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB)); 1287 } 1288 1289 static void test_xscvdpsxds (void) 1290 { 1291 __asm__ __volatile__ ("xscvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1292 } 1293 1294 static void test_xscvsxddp (void) 1295 { 1296 __asm__ __volatile__ ("xscvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1297 } 1298 1299 static void test_xscvuxddp (void) 1300 { 1301 __asm__ __volatile__ ("xscvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB)); 1302 } 1303 1304 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0, 1305 0, 0, 0, 0 }; 1306 1307 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0]) 1308 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4) 1309 1310 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567, 1311 0x89abcdef, 1312 0x00112233, 1313 0x44556677, 1314 0x8899aabb, 1315 0x91929394, 1316 0xa1a2a3a4, 1317 0xb1b2b3b4, 1318 0xc1c2c3c4, 1319 0xd1d2d3d4, 1320 0x7a6b5d3e 1321 }; 1322 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0]) 1323 #define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4) 1324 1325 static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD }, 1326 { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD }, 1327 { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD }, 1328 { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD }, 1329 { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT }, 1330 { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT }, 1331 { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD }, 1332 { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD }, 1333 { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE }, 1334 { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE }, 1335 { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE }, 1336 { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE }, 1337 { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE }, 1338 { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE }, 1339 { NULL, NULL, NULL, 0, 0, 0 } }; 1340 1341 static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR }, 1342 { &test_xxlor, "xxlor", VSX_OR } , 1343 { &test_xxlnor, "xxlnor", VSX_NOR }, 1344 { &test_xxland, "xxland", VSX_AND }, 1345 { &test_xxlandc, "xxlandc", VSX_ANDC }, 1346 { NULL, NULL, 0}}; 1347 1348 static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" }, 1349 { &test_xscpsgndp, "xscpsgndp" }, 1350 { &test_xsnabsdp, "xsnabsdp" }, 1351 { &test_xsnegdp, "xsnegdp" }, 1352 { NULL, NULL } 1353 1354 }; 1355 1356 static permute_test_t permute_tests[] = 1357 { 1358 { &test_xxmrghw, "xxmrghw", 1359 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1360 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1361 }, 1362 { &test_xxmrghw, "xxmrghw", 1363 { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */ 1364 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */ 1365 }, 1366 { &test_xxmrglw, "xxmrglw", 1367 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1368 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1369 }, 1370 { &test_xxmrglw, "xxmrglw", 1371 { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */ 1372 { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */ 1373 }, 1374 { &test_xxpermdi_00, "xxpermdi DM=00", 1375 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1376 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1377 }, 1378 { &test_xxpermdi_01, "xxpermdi DM=01", 1379 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1380 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1381 }, 1382 { &test_xxpermdi_10, "xxpermdi DM=10", 1383 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1384 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1385 }, 1386 { &test_xxpermdi_11, "xxpermdi DM=11", 1387 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1388 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1389 }, 1390 { &test_xxsldwi_0, "xxsldwi SHW=0", 1391 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1392 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1393 }, 1394 { &test_xxsldwi_1, "xxsldwi SHW=1", 1395 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1396 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1397 }, 1398 { &test_xxsldwi_2, "xxsldwi SHW=2", 1399 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1400 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1401 }, 1402 { &test_xxsldwi_3, "xxsldwi SHW=3", 1403 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */ 1404 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */ 1405 }, 1406 { NULL, NULL } 1407 }; 1408 1409 static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 }, 1410 { &test_fcfidus, "fcfidus", 1 }, 1411 { &test_fcfidu, "fcfidu", 1 }, 1412 { NULL, NULL, 0 }, 1413 1414 }; 1415 1416 static vx_fp_test_t vx_fp_tests[] = { 1417 { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP}, 1418 { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER}, 1419 { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER}, 1420 { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA}, 1421 { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS}, 1422 { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA}, 1423 { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER}, 1424 { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER}, 1425 { NULL, NULL, NULL, 0, 0 } 1426 }; 1427 1428 static xs_conv_test_t xs_conv_tests[] = { 1429 { &test_xscvdpsxds, "xscvdpsxds", 15}, 1430 { &test_xscvsxddp, "xscvsxddp", 15}, 1431 { &test_xscvuxddp, "xscvuxddp", 15}, 1432 { NULL, NULL, 0} 1433 }; 1434 1435 #ifdef __powerpc64__ 1436 static void test_ldbrx(void) 1437 { 1438 int i; 1439 HWord_t reg_out; 1440 unsigned char * byteIn, * byteOut; 1441 r14 = (HWord_t)viargs; 1442 // Just try the instruction an arbitrary number of times at different r15 offsets. 1443 for (i = 0; i < 3; i++) { 1444 int j, k; 1445 reg_out = 0; 1446 r15 = i * 4; 1447 __asm__ __volatile__ ("ldbrx %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15)); 1448 byteIn = ((unsigned char *)(r14 + r15)); 1449 byteOut = (unsigned char *)®_out; 1450 1451 printf("ldbrx:"); 1452 for (k = 0; k < 8; k++) { 1453 printf( " %02x", (byteIn[k])); 1454 } 1455 printf(" (reverse) =>"); 1456 for (j = 0; j < 8; j++) { 1457 printf( " %02x", (byteOut[j])); 1458 } 1459 printf("\n"); 1460 } 1461 printf( "\n" ); 1462 } 1463 1464 static void 1465 test_popcntd(void) 1466 { 1467 uint64_t res; 1468 unsigned long long src = 0x9182736405504536ULL; 1469 r14 = src; 1470 __asm__ __volatile__ ("popcntd %0, %1" : "=r" (res): "r" (r14)); 1471 printf("popcntd: 0x%llx => %d\n", src, (int)res); 1472 printf( "\n" ); 1473 } 1474 #endif 1475 1476 static void 1477 test_lfiwzx(void) 1478 { 1479 unsigned int i; 1480 unsigned int * src; 1481 uint64_t reg_out; 1482 r14 = (HWord_t)viargs; 1483 // Just try the instruction an arbitrary number of times at different r15 offsets. 1484 for (i = 0; i < 3; i++) { 1485 reg_out = 0; 1486 r15 = i * 4; 1487 __asm__ __volatile__ ("lfiwzx %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15)); 1488 src = ((unsigned int *)(r14 + r15)); 1489 printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out); 1490 1491 } 1492 printf( "\n" ); 1493 } 1494 1495 static void test_vx_fp_ops(void) 1496 { 1497 1498 test_func_t func; 1499 int k; 1500 char * test_name = (char *)malloc(20); 1501 k = 0; 1502 1503 build_special_fargs_table(); 1504 while ((func = vx_fp_tests[k].test_func)) { 1505 int i, condreg, repeat = 0; 1506 unsigned int flags; 1507 unsigned long long * frap, * frbp, * dst; 1508 vx_fp_test_t test_group = vx_fp_tests[k]; 1509 vx_fp_test_type test_type = test_group.test_type; 1510 1511 switch (test_type) { 1512 case VX_FP_CMP: 1513 strcpy(test_name, "xscmp"); 1514 if (!repeat) { 1515 repeat = 1; 1516 strcat(test_name, "udp"); 1517 do_cmpudp = 1; 1518 } 1519 break; 1520 case VX_FP_SMA: 1521 case VX_FP_SMS: 1522 case VX_FP_SNMA: 1523 if (test_type == VX_FP_SMA) 1524 strcpy(test_name, "xsmadd"); 1525 else if (test_type == VX_FP_SMS) 1526 strcpy(test_name, "xsmsub"); 1527 else 1528 strcpy(test_name, "xsnmadd"); 1529 if (!repeat) { 1530 repeat = 1; 1531 strcat(test_name, "adp"); 1532 do_adp = 1; 1533 } 1534 break; 1535 case VX_FP_OTHER: 1536 strcpy(test_name, test_group.name); 1537 break; 1538 default: 1539 printf("ERROR: Invalid VX FP test type %d\n", test_type); 1540 exit(1); 1541 } 1542 1543 again: 1544 for (i = 0; i < test_group.num_tests; i++) { 1545 unsigned int * inA, * inB, * pv; 1546 double * dpA = (double *)&vec_inA; 1547 double * dpB = (double *)&vec_inB; 1548 double * dpT = (double *)&vec_out; 1549 1550 fp_test_args_t aTest = test_group.targs[i]; 1551 inA = (unsigned int *)&spec_fargs[aTest.fra_idx]; 1552 inB = (unsigned int *)&spec_fargs[aTest.frb_idx]; 1553 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1554 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1555 // Only need to copy one doubleword into each vector's element 0 1556 if (isLE) { 1557 // With LE, vector element 0 is the second doubleword from the left 1558 memset(dpA, 0, 8); 1559 memset(dpB, 0, 8); 1560 dpA++; 1561 dpB++; 1562 } 1563 memcpy(dpA, inA, 8); 1564 memcpy(dpB, inB, 8); 1565 1566 switch (test_type) { 1567 case VX_FP_CMP: 1568 SET_FPSCR_ZERO; 1569 SET_CR_XER_ZERO; 1570 (*func)(); 1571 GET_CR(flags); 1572 condreg = (flags & 0x0f000000) >> 24; 1573 printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg); 1574 // printf("\tFRA: %e; FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]); 1575 if ( condreg != aTest.cr_flags) { 1576 printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg); 1577 } 1578 break; 1579 case VX_FP_SMA: 1580 case VX_FP_SMS: 1581 case VX_FP_SNMA: 1582 case VX_FP_OTHER: 1583 { 1584 int idx; 1585 unsigned long long vsr_XT; 1586 pv = (unsigned int *)&vec_out; 1587 // clear vec_out 1588 for (idx = 0; idx < 4; idx++, pv++) 1589 *pv = 0; 1590 1591 if (test_type != VX_FP_OTHER) { 1592 /* Then we need a third src argument, which is stored in element 0 of 1593 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds 1594 * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds 1595 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test 1596 * data (input args) contain only two inputs, so I arbitrarily 1597 * use spec_fargs elements 4 and 14 (alternating) for the third source 1598 * argument. We can use the same input data for a given pair of 1599 * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus 1600 * the expected result should be the same. 1601 */ 1602 int extra_arg_idx; 1603 if (i % 2) 1604 extra_arg_idx = 4; 1605 else 1606 extra_arg_idx = 14; 1607 1608 if (repeat) { 1609 /* We're on the first time through of one of the VX_FP_SMx 1610 * test types, meaning we're testing a xs<ZZZ>adp case, thus we 1611 * have to swap inputs as described above: 1612 * src2 <= VSX[XT] 1613 * src3 <= VSX[XB] 1614 */ 1615 if (isLE) 1616 dpT++; 1617 memcpy(dpT, inB, 8); // src2 1618 memcpy(dpB, &spec_fargs[extra_arg_idx], 8); //src3 1619 frbp = (unsigned long long *)&spec_fargs[extra_arg_idx]; 1620 } else { 1621 // Don't need to init src2, as it's done before the switch() 1622 if (isLE) 1623 dpT++; 1624 memcpy(dpT, &spec_fargs[extra_arg_idx], 8); //src3 1625 } 1626 memcpy(&vsr_XT, dpT, 8); 1627 } 1628 1629 (*func)(); 1630 dst = (unsigned long long *) &vec_out; 1631 if (isLE) 1632 dst++; 1633 if (test_type == VX_FP_OTHER) 1634 printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst); 1635 else 1636 printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i, 1637 test_name, vsr_XT, *frap, *frbp, *dst ); 1638 1639 /* 1640 { 1641 // Debug code. Keep this block commented out except when debugging. 1642 double result, expected; 1643 memcpy(&result, dst, 8); 1644 memcpy(&expected, &aTest.dp_bin_result, 8); 1645 printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n", 1646 spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx], 1647 expected, result ); 1648 } 1649 */ 1650 break; 1651 } 1652 } 1653 1654 1655 } 1656 printf( "\n" ); 1657 1658 if (repeat) { 1659 repeat = 0; 1660 switch (test_type) { 1661 case VX_FP_CMP: 1662 strcpy(test_name, "xscmp"); 1663 strcat(test_name, "odp"); 1664 do_cmpudp = 0; 1665 break; 1666 case VX_FP_SMA: 1667 case VX_FP_SMS: 1668 case VX_FP_SNMA: 1669 if (test_type == VX_FP_SMA) 1670 strcpy(test_name, "xsmadd"); 1671 else if (test_type == VX_FP_SMS) 1672 strcpy(test_name, "xsmsub"); 1673 else 1674 strcpy(test_name, "xsnmadd"); 1675 strcat(test_name, "mdp"); 1676 do_adp = 0; 1677 break; 1678 case VX_FP_OTHER: 1679 break; 1680 } 1681 goto again; 1682 } 1683 k++; 1684 } 1685 printf( "\n" ); 1686 free(test_name); 1687 } 1688 1689 static void test_xs_conv_ops(void) 1690 { 1691 1692 test_func_t func; 1693 int k = 0; 1694 double * dpB = (double *)&vec_inB; 1695 if (isLE) { 1696 memset(dpB, 0, 8); 1697 dpB++; 1698 } 1699 1700 build_special_fargs_table(); 1701 while ((func = xs_conv_tests[k].test_func)) { 1702 int i; 1703 unsigned long long * frbp, * dst; 1704 xs_conv_test_t test_group = xs_conv_tests[k]; 1705 for (i = 0; i < test_group.num_tests; i++) { 1706 unsigned int * inB, * pv; 1707 int idx; 1708 inB = (unsigned int *)&spec_fargs[i]; 1709 frbp = (unsigned long long *)&spec_fargs[i]; 1710 1711 memcpy(dpB, inB, 8); 1712 pv = (unsigned int *)&vec_out; 1713 // clear vec_out 1714 for (idx = 0; idx < 4; idx++, pv++) 1715 *pv = 0; 1716 (*func)(); 1717 dst = (unsigned long long *) &vec_out; 1718 if (isLE) 1719 dst++; 1720 printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst); 1721 1722 } 1723 k++; 1724 printf("\n"); 1725 } 1726 printf( "\n" ); 1727 } 1728 1729 static void do_load_test(ldst_test_t loadTest) 1730 { 1731 test_func_t func; 1732 unsigned int *src, *dst; 1733 int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0; 1734 int i, j, m, k; 1735 i = j = 0; 1736 1737 func = loadTest.test_func; 1738 for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) { 1739 int again; 1740 j = 0; 1741 r14 += i * 16; 1742 do { 1743 unsigned int * pv = (unsigned int *)&vec_out; 1744 int idx; 1745 // clear vec_out 1746 for (idx = 0; idx < 4; idx++, pv+=idx) 1747 *pv = 0; 1748 1749 again = 0; 1750 r15 = j; 1751 1752 // execute test insn 1753 (*func)(); 1754 1755 src = (unsigned int*) (((unsigned char *)r14) + j); 1756 dst = (unsigned int*) &vec_out; 1757 1758 printf( "%s:", loadTest.name); 1759 for (m = 0; m < loadTest.num_words_to_process; m++) { 1760 printf( " %08x", src[splat ? m % 2 : m]); 1761 } 1762 printf( " =>"); 1763 m = 0; 1764 k = loadTest.num_words_to_process; 1765 if (isLE) { 1766 if (loadTest.num_words_to_process == 2) { 1767 m = 2; 1768 k += 2; 1769 } 1770 } 1771 1772 for (; m < k; m++) { 1773 printf( " %08x", dst[m]); 1774 } 1775 printf("\n"); 1776 if (j == 0 && loadTest.offset) { 1777 again = 1; 1778 j += loadTest.offset; 1779 } 1780 } 1781 while (again); 1782 } 1783 } 1784 1785 static void 1786 do_store_test ( ldst_test_t storeTest ) 1787 { 1788 test_func_t func; 1789 unsigned int *src, *dst; 1790 int m; 1791 1792 func = storeTest.test_func; 1793 r14 = (HWord_t) storeTest.base_addr; 1794 r15 = (HWord_t) storeTest.offset; 1795 unsigned int * pv = (unsigned int *) storeTest.base_addr; 1796 int idx; 1797 // clear out storage destination 1798 for (idx = 0; idx < 4; idx++, pv += idx) 1799 *pv = 0; 1800 1801 memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char)); 1802 1803 // execute test insn 1804 (*func)(); 1805 src = &viargs[0]; 1806 dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset); 1807 1808 printf( "%s:", storeTest.name ); 1809 for (m = 0; m < storeTest.num_words_to_process; m++) { 1810 printf( " %08x", src[m] ); 1811 } 1812 printf( " =>" ); 1813 for (m = 0; m < storeTest.num_words_to_process; m++) { 1814 printf( " %08x", dst[m] ); 1815 } 1816 printf( "\n" ); 1817 } 1818 1819 1820 static void test_ldst(void) 1821 { 1822 int k = 0; 1823 1824 while (ldst_tests[k].test_func) { 1825 if (ldst_tests[k].type == VSX_STORE) 1826 do_store_test(ldst_tests[k]); 1827 else 1828 do_load_test(ldst_tests[k]); 1829 k++; 1830 printf("\n"); 1831 } 1832 } 1833 1834 static void test_ftdiv(void) 1835 { 1836 int i, num_tests, crx; 1837 unsigned int flags; 1838 unsigned long long * frap, * frbp; 1839 build_special_fargs_table(); 1840 1841 num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0]; 1842 1843 for (i = 0; i < num_tests; i++) { 1844 fp_test_args_t aTest = ftdiv_tests[i]; 1845 f14 = spec_fargs[aTest.fra_idx]; 1846 f15 = spec_fargs[aTest.frb_idx]; 1847 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx]; 1848 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx]; 1849 SET_FPSCR_ZERO; 1850 SET_CR_XER_ZERO; 1851 __asm__ __volatile__ ("ftdiv cr1, %0, %1" : : "d" (f14), "d" (f15)); 1852 GET_CR(flags); 1853 crx = (flags & 0x0f000000) >> 24; 1854 printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx); 1855 // printf("\tFRA: %e; FRB: %e\n", f14, f15); 1856 if ( crx != aTest.cr_flags) { 1857 printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx); 1858 } 1859 } 1860 printf( "\n" ); 1861 } 1862 1863 1864 static void test_p7_fpops ( void ) 1865 { 1866 int k = 0; 1867 test_func_t func; 1868 1869 build_fargs_table(); 1870 while ((func = fp_tests[k].test_func)) { 1871 float res; 1872 double resd; 1873 unsigned long long u0; 1874 int i; 1875 int res32 = strcmp(fp_tests[k].name, "fcfidu"); 1876 1877 for (i = 0; i < nb_fargs; i++) { 1878 u0 = *(unsigned long long *) (&fargs[i]); 1879 f14 = fargs[i]; 1880 (*func)(); 1881 if (res32) { 1882 res = f17; 1883 printf( "%s %016llx => (raw sp) %08x)", 1884 fp_tests[k].name, u0, *((unsigned int *)&res)); 1885 } else { 1886 resd = f17; 1887 printf( "%s %016llx => (raw sp) %016llx)", 1888 fp_tests[k].name, u0, *(unsigned long long *)(&resd)); 1889 } 1890 printf( "\n" ); 1891 } 1892 1893 k++; 1894 printf( "\n" ); 1895 } 1896 } 1897 1898 static void test_vsx_logic(void) 1899 { 1900 logic_test_t aTest; 1901 test_func_t func; 1902 int k; 1903 k = 0; 1904 1905 while ((func = logic_tests[k].test_func)) { 1906 unsigned int * pv; 1907 int startA, startB; 1908 unsigned int * inA, * inB, * dst; 1909 int idx, i; 1910 startA = 0; 1911 aTest = logic_tests[k]; 1912 for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) { 1913 startB = startA + 4; 1914 pv = (unsigned int *)&vec_out; 1915 inA = &viargs[startA]; 1916 inB = &viargs[startB]; 1917 memcpy(&vec_inA, inA, sizeof(vector unsigned char)); 1918 memcpy(&vec_inB, inB, sizeof(vector unsigned char)); 1919 // clear vec_out 1920 for (idx = 0; idx < 4; idx++, pv++) 1921 *pv = 0; 1922 1923 // execute test insn 1924 (*func)(); 1925 dst = (unsigned int*) &vec_out; 1926 1927 printf( "%s:", aTest.name); 1928 printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name); 1929 printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]); 1930 printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]); 1931 1932 } 1933 k++; 1934 } 1935 printf( "\n" ); 1936 } 1937 1938 static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) = 1939 { 1940 { 0x0123456789abcdefULL, 0x0011223344556677ULL}, 1941 { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL}, 1942 { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL} 1943 }; 1944 #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0]) 1945 1946 static void test_move_ops (void) 1947 { 1948 move_test_t aTest; 1949 test_func_t func; 1950 int k; 1951 k = 0; 1952 1953 while ((func = move_tests[k].test_func)) { 1954 unsigned int * pv; 1955 int startA, startB; 1956 unsigned long long * inA, * inB, * dst; 1957 int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0); 1958 int idx; 1959 inA = NULL; 1960 aTest = move_tests[k]; 1961 for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) { 1962 inB = (unsigned long long *)&vec_args[startB]; 1963 memcpy(&vec_inB, inB, sizeof(vector unsigned char)); 1964 if (isLE) 1965 inB++; 1966 startA = 0; 1967 repeat: 1968 if (use_vecA) { 1969 inA = (unsigned long long *)&vec_args[startA]; 1970 memcpy(&vec_inA, inA, sizeof(vector unsigned char)); 1971 startA++; 1972 } 1973 pv = (unsigned int *)&vec_out; 1974 // clear vec_out 1975 for (idx = 0; idx < 4; idx++, pv++) 1976 *pv = 0; 1977 1978 // execute test insn 1979 (*func)(); 1980 dst = (unsigned long long *) &vec_out; 1981 if (isLE) { 1982 dst++; 1983 inA++; 1984 } 1985 1986 printf( "%s:", aTest.name); 1987 if (use_vecA) 1988 printf( " X[A]: %016llx ", *inA); 1989 printf( " X[B]: %016llx", *inB); 1990 printf(" => %016llx\n", *dst); 1991 1992 if (use_vecA && startA < NUM_VEC_ARGS_LONGS) 1993 goto repeat; 1994 } 1995 k++; 1996 printf( "\n" ); 1997 } 1998 } 1999 2000 static void test_permute_ops (void) 2001 { 2002 permute_test_t *aTest; 2003 unsigned int *dst = (unsigned int *) &vec_out; 2004 2005 for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++) 2006 { 2007 /* Grab test input and clear output vector. */ 2008 memcpy(&vec_inA, aTest->xa, sizeof(vec_inA)); 2009 memcpy(&vec_inB, aTest->xb, sizeof(vec_inB)); 2010 memset(dst, 0, sizeof(vec_out)); 2011 2012 /* execute test insn */ 2013 aTest->test_func(); 2014 2015 printf( "%s:\n", aTest->name); 2016 printf( " XA[%08x,%08x,%08x,%08x]\n", 2017 aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]); 2018 printf( " XB[%08x,%08x,%08x,%08x]\n", 2019 aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]); 2020 printf( " => XT[%08x,%08x,%08x,%08x]\n", 2021 dst[0], dst[1], dst[2], dst[3]); 2022 2023 } 2024 printf( "\n" ); 2025 } 2026 2027 static test_table_t all_tests[] = { { &test_ldst, 2028 "Test VSX load/store instructions" }, 2029 { &test_vsx_logic, 2030 "Test VSX logic instructions" }, 2031 #ifdef __powerpc64__ 2032 { &test_ldbrx, 2033 "Test ldbrx instruction" }, 2034 { &test_popcntd, 2035 "Test popcntd instruction" }, 2036 #endif 2037 { &test_lfiwzx, 2038 "Test lfiwzx instruction" }, 2039 { &test_p7_fpops, 2040 "Test P7 floating point convert instructions"}, 2041 { &test_ftdiv, 2042 "Test ftdiv instruction" }, 2043 { &test_move_ops, 2044 "Test VSX move instructions"}, 2045 { &test_permute_ops, 2046 "Test VSX permute instructions"}, 2047 { &test_vx_fp_ops, 2048 "Test VSX floating point instructions"}, 2049 { &test_xs_conv_ops, 2050 "Test VSX scalar integer conversion instructions" }, 2051 { NULL, NULL } 2052 }; 2053 #endif // HAS_VSX 2054 2055 int main(int argc, char *argv[]) 2056 { 2057 #ifdef HAS_VSX 2058 2059 test_table_t aTest; 2060 test_func_t func; 2061 int i = 0; 2062 2063 while ((func = all_tests[i].test_category)) { 2064 aTest = all_tests[i]; 2065 printf( "%s\n", aTest.name ); 2066 (*func)(); 2067 i++; 2068 } 2069 2070 #endif // HAS _VSX 2071 2072 return 0; 2073 } 2074