Home | History | Annotate | Download | only in ppc64
      1 /*  Copyright (C) 2011 IBM
      2 
      3  Author: Maynard Johnson <maynardj (at) us.ibm.com>
      4 
      5  This program is free software; you can redistribute it and/or
      6  modify it under the terms of the GNU General Public License as
      7  published by the Free Software Foundation; either version 2 of the
      8  License, or (at your option) any later version.
      9 
     10  This program is distributed in the hope that it will be useful, but
     11  WITHOUT ANY WARRANTY; without even the implied warranty of
     12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  General Public License for more details.
     14 
     15  You should have received a copy of the GNU General Public License
     16  along with this program; if not, write to the Free Software
     17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18  02111-1307, USA.
     19 
     20  The GNU General Public License is contained in the file COPYING.
     21  */
     22 
     23 #ifdef HAS_VSX
     24 
     25 #include <stdio.h>
     26 #include <stdint.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <malloc.h>
     30 #include <altivec.h>
     31 
     32 #ifndef __powerpc64__
     33 typedef uint32_t HWord_t;
     34 #else
     35 typedef uint64_t HWord_t;
     36 #endif /* __powerpc64__ */
     37 
     38 #ifdef VGP_ppc64le_linux
     39 #define isLE 1
     40 #else
     41 #define isLE 0
     42 #endif
     43 
     44 register HWord_t r14 __asm__ ("r14");
     45 register HWord_t r15 __asm__ ("r15");
     46 register HWord_t r16 __asm__ ("r16");
     47 register HWord_t r17 __asm__ ("r17");
     48 register double f14 __asm__ ("fr14");
     49 register double f15 __asm__ ("fr15");
     50 register double f16 __asm__ ("fr16");
     51 register double f17 __asm__ ("fr17");
     52 
     53 static volatile unsigned int cond_reg;
     54 
     55 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     56 
     57 #define SET_CR(_arg) \
     58       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     59 
     60 #define SET_XER(_arg) \
     61       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     62 
     63 #define GET_CR(_lval) \
     64       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     65 
     66 #define GET_XER(_lval) \
     67       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     68 
     69 #define GET_CR_XER(_lval_cr,_lval_xer) \
     70    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     71 
     72 #define SET_CR_ZERO \
     73       SET_CR(0)
     74 
     75 #define SET_XER_ZERO \
     76       SET_XER(0)
     77 
     78 #define SET_CR_XER_ZERO \
     79    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     80 
     81 #define SET_FPSCR_ZERO \
     82    do { double _d = 0.0; \
     83         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     84    } while (0)
     85 
     86 
     87 typedef void (*test_func_t)(void);
     88 typedef struct ldst_test ldst_test_t;
     89 typedef struct vsx_logic_test logic_test_t;
     90 typedef struct xs_conv_test xs_conv_test_t;
     91 typedef struct p7_fp_test fp_test_t;
     92 typedef struct vx_fp_test vx_fp_test_t;
     93 typedef struct vsx_move_test move_test_t;
     94 typedef struct vsx_permute_test permute_test_t;
     95 typedef struct test_table test_table_t;
     96 
     97 static double *fargs = NULL;
     98 static int nb_fargs;
     99 
    100 /* These functions below that construct a table of floating point
    101  * values were lifted from none/tests/ppc32/jm-insns.c.
    102  */
    103 
    104 #if defined (DEBUG_ARGS_BUILD)
    105 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    106 #else
    107 #define AB_DPRINTF(fmt, args...) do { } while (0)
    108 #endif
    109 
    110 static inline void register_farg (void *farg,
    111                                   int s, uint16_t _exp, uint64_t mant)
    112 {
    113    uint64_t tmp;
    114 
    115    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    116    *(uint64_t *)farg = tmp;
    117    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    118               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    119 }
    120 
    121 static void build_fargs_table(void)
    122 /*
    123  * Double precision:
    124  * Sign goes from zero to one               (1 bit)
    125  * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    126  * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    127  * + special values:
    128  * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    129  * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    130  * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    131  * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    132  * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    133  * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    134  * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    135  * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    136  * (8 values)
    137  *
    138  * Single precision
    139  * Sign:     1 bit
    140  * Exponent: 8 bits
    141  * Mantissa: 23 bits
    142  * +0.0      : 0 0x00 0x000000 => 0x00000000
    143  * -0.0      : 1 0x00 0x000000 => 0x80000000
    144  * +infinity : 0 0xFF 0x000000 => 0x7F800000
    145  * -infinity : 1 0xFF 0x000000 => 0xFF800000
    146  * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    147  * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    148  * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    149  * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    150 */
    151 {
    152    uint64_t mant;
    153    uint16_t _exp, e1;
    154    int s;
    155    int i=0;
    156 
    157    if (nb_fargs)
    158       return;
    159 
    160    fargs = malloc( 16 * sizeof(double) );
    161    for (s = 0; s < 2; s++) {
    162       for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) {
    163          if (e1 >= 0x400)
    164             e1 = 0x3fe;
    165          _exp = e1;
    166          for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
    167          /* Add 'random' bits */
    168          mant = ((mant + 0x4A6) << 29) + 0x359) {
    169             register_farg( &fargs[i++], s, _exp, mant );
    170          }
    171          if (e1 == 0x3fe)
    172             break;
    173       }
    174    }
    175    // add a few smaller values to fargs . . .
    176    s = 0;
    177    _exp = 0x002;
    178    mant = 0x0000000000b01ULL;
    179    register_farg(&fargs[i++], s, _exp, mant);
    180 
    181    _exp = 0x000;
    182    mant = 0x00000203f0b3dULL;
    183    register_farg(&fargs[i++], s, _exp, mant);
    184 
    185    mant = 0x00000005a203dULL;
    186    register_farg(&fargs[i++], s, _exp, mant);
    187 
    188    s = 1;
    189    _exp = 0x002;
    190    mant = 0x0000000000b01ULL;
    191    register_farg(&fargs[i++], s, _exp, mant);
    192 
    193    _exp = 0x000;
    194    mant = 0x00000203f0b3dULL;
    195    register_farg(&fargs[i++], s, _exp, mant);
    196 
    197    nb_fargs = i;
    198 }
    199 
    200 
    201 typedef struct fp_test_args {
    202    int fra_idx;
    203    int frb_idx;
    204    int cr_flags;
    205 } fp_test_args_t;
    206 
    207 
    208 fp_test_args_t ftdiv_tests[] = {
    209                               {0, 1, 0x8},
    210                               {9, 1, 0xa},
    211                               {1, 12, 0xa},
    212                               {0, 2, 0xa},
    213                               {1, 3, 0xa},
    214                               {3, 0, 0xa},
    215                               {0, 3, 0xa},
    216                               {4, 0, 0xa},
    217                               {7, 1, 0xe},
    218                               {8, 1, 0xe},
    219                               {1, 7, 0xe},
    220                               {0, 13, 0xe},
    221                               {5, 5, 0xe},
    222                               {5, 6, 0xe},
    223 };
    224 
    225 fp_test_args_t xscmpX_tests[] = {
    226                                    {8, 8, 0x2},
    227                                    {8, 14, 0x8},
    228                                    {8, 6, 0x8},
    229                                    {8, 5, 0x8},
    230                                    {8, 4, 0x8},
    231                                    {8, 7, 0x8},
    232                                    {8, 9, 0x1},
    233                                    {8, 11, 0x1},
    234                                    {14, 8, 0x4},
    235                                    {14, 14, 0x2},
    236                                    {14, 6, 0x8},
    237                                    {14, 5, 0x8},
    238                                    {14, 4, 0x8},
    239                                    {14, 7, 0x8},
    240                                    {14, 9, 0x1},
    241                                    {14, 11, 0x1},
    242                                    {6, 8, 0x4},
    243                                    {6, 14, 0x4},
    244                                    {6, 6, 0x2},
    245                                    {6, 5, 0x2},
    246                                    {6, 4, 0x8},
    247                                    {6, 7, 0x8},
    248                                    {6, 9, 0x1},
    249                                    {6, 11, 0x1},
    250                                    {5, 8, 0x4},
    251                                    {5, 14, 0x4},
    252                                    {5, 6, 0x2},
    253                                    {5, 5, 0x2},
    254                                    {5, 4, 0x8},
    255                                    {5, 7, 0x8},
    256                                    {5, 9, 0x1},
    257                                    {5, 11, 0x1},
    258                                    {4, 8, 0x4},
    259                                    {4, 14, 0x4},
    260                                    {4, 6, 0x4},
    261                                    {4, 5, 0x4},
    262                                    {4, 1, 0x8},
    263                                    {4, 7, 0x8},
    264                                    {4, 9, 0x1},
    265                                    {4, 11, 0x1},
    266                                    {7, 8, 0x4},
    267                                    {7, 14, 0x4},
    268                                    {7, 6, 0x4},
    269                                    {7, 5, 0x4},
    270                                    {7, 4, 0x4},
    271                                    {7, 7, 0x2},
    272                                    {7, 9, 0x1},
    273                                    {7, 11, 0x1},
    274                                    {10, 8, 0x1},
    275                                    {10, 14, 0x1},
    276                                    {10, 6, 0x1},
    277                                    {10, 5, 0x1},
    278                                    {10, 4, 0x1},
    279                                    {10, 7, 0x1},
    280                                    {10, 9, 0x1},
    281                                    {10, 11, 0x1},
    282                                    {12, 8, 0x1},
    283                                    {12, 14, 0x1},
    284                                    {12, 6, 0x1},
    285                                    {12, 5, 0x1},
    286                                    {12, 4, 0x1},
    287                                    {12, 7, 0x1},
    288                                    {12, 9, 0x1},
    289                                    {12, 11, 0x1},
    290 };
    291 
    292 fp_test_args_t xsadddp_tests[] = {
    293                                    {8, 8, 0x0},
    294                                    {8, 14, 0x0},
    295                                    {8, 6, 0x0},
    296                                    {8, 5, 0x0},
    297                                    {8, 4, 0x0},
    298                                    {8, 7, 0x0},
    299                                    {8, 9, 0x0},
    300                                    {8, 11, 0x0},
    301                                    {14, 8, 0x0},
    302                                    {14, 14, 0x0},
    303                                    {14, 6, 0x0},
    304                                    {14, 5, 0x0},
    305                                    {14, 4, 0x0},
    306                                    {14, 7, 0x0},
    307                                    {14, 9, 0x0},
    308                                    {14, 11, 0x0},
    309                                    {6, 8, 0x0},
    310                                    {6, 14, 0x0},
    311                                    {6, 6, 0x0},
    312                                    {6, 5, 0x0},
    313                                    {6, 4, 0x0},
    314                                    {6, 7, 0x0},
    315                                    {6, 9, 0x0},
    316                                    {6, 11, 0x0},
    317                                    {5, 8, 0x0},
    318                                    {5, 14, 0x0},
    319                                    {5, 6, 0x0},
    320                                    {5, 5, 0x0},
    321                                    {5, 4, 0x0},
    322                                    {5, 7, 0x0},
    323                                    {5, 9, 0x0},
    324                                    {5, 11, 0x0},
    325                                    {4, 8, 0x0},
    326                                    {4, 14, 0x0},
    327                                    {4, 6, 0x0},
    328                                    {4, 5, 0x0},
    329                                    {4, 1, 0x0},
    330                                    {4, 7, 0x0},
    331                                    {4, 9, 0x0},
    332                                    {4, 11, 0x0},
    333                                    {7, 8, 0x0},
    334                                    {7, 14, 0x0},
    335                                    {7, 6, 0x0},
    336                                    {7, 5, 0x0},
    337                                    {7, 4, 0x0},
    338                                    {7, 7, 0x0},
    339                                    {7, 9, 0x0},
    340                                    {7, 11, 0x0},
    341                                    {10, 8, 0x0},
    342                                    {10, 14, 0x0},
    343                                    {10, 6, 0x0},
    344                                    {10, 5, 0x0},
    345                                    {10, 4, 0x0},
    346                                    {10, 7, 0x0},
    347                                    {10, 9, 0x0},
    348                                    {10, 11, 0x0},
    349                                    {12, 8, 0x0},
    350                                    {12, 14, 0x0},
    351                                    {12, 6, 0x0},
    352                                    {12, 5, 0x0},
    353                                    {12, 4, 0x0},
    354                                    {12, 7, 0x0},
    355                                    {12, 9, 0x0},
    356                                    {12, 11, 0x0},
    357 };
    358 
    359 fp_test_args_t xsdivdp_tests[] = {
    360                                    {8, 8, 0x0},
    361                                    {8, 14, 0x0},
    362                                    {8, 6, 0x0},
    363                                    {8, 5, 0x0},
    364                                    {8, 4, 0x0},
    365                                    {8, 7, 0x0},
    366                                    {8, 9, 0x0},
    367                                    {8, 11, 0x0},
    368                                    {14, 8, 0x0},
    369                                    {14, 14, 0x0},
    370                                    {14, 6, 0x0},
    371                                    {14, 5, 0x0},
    372                                    {14, 4, 0x0},
    373                                    {14, 7, 0x0},
    374                                    {14, 9, 0x0},
    375                                    {14, 11, 0x0},
    376                                    {6, 8, 0x0},
    377                                    {6, 14, 0x0},
    378                                    {6, 6, 0x0},
    379                                    {6, 5, 0x0},
    380                                    {6, 4, 0x0},
    381                                    {6, 7, 0x0},
    382                                    {6, 9, 0x0},
    383                                    {6, 11, 0x0},
    384                                    {5, 8, 0x0},
    385                                    {5, 14, 0x0},
    386                                    {5, 6, 0x0},
    387                                    {5, 5, 0x0},
    388                                    {5, 4, 0x0},
    389                                    {5, 7, 0x0},
    390                                    {5, 9, 0x0},
    391                                    {5, 11, 0x0},
    392                                    {4, 8, 0x0},
    393                                    {4, 14, 0x0},
    394                                    {4, 6, 0x0},
    395                                    {4, 5, 0x0},
    396                                    {4, 1, 0x0},
    397                                    {4, 7, 0x0},
    398                                    {4, 9, 0x0},
    399                                    {4, 11, 0x0},
    400                                    {7, 8, 0x0},
    401                                    {7, 14, 0x0},
    402                                    {7, 6, 0x0},
    403                                    {7, 5, 0x0},
    404                                    {7, 4, 0x0},
    405                                    {7, 7, 0x0},
    406                                    {7, 9, 0x0},
    407                                    {7, 11, 0x0},
    408                                    {10, 8, 0x0},
    409                                    {10, 14, 0x0},
    410                                    {10, 6, 0x0},
    411                                    {10, 5, 0x0},
    412                                    {10, 4, 0x0},
    413                                    {10, 7, 0x0},
    414                                    {10, 9, 0x0},
    415                                    {10, 11, 0x0},
    416                                    {12, 8, 0x0},
    417                                    {12, 14, 0x0},
    418                                    {12, 6, 0x0},
    419                                    {12, 5, 0x0},
    420                                    {12, 4, 0x0},
    421                                    {12, 7, 0x0},
    422                                    {12, 9, 0x0},
    423                                    {12, 11, 0x0},
    424 };
    425 
    426 fp_test_args_t xsmaddXdp_tests[] = {
    427                                    {8, 8, 0x0},
    428                                    {8, 14, 0x0},
    429                                    {8, 6, 0x0},
    430                                    {8, 5, 0x0},
    431                                    {8, 4, 0x0},
    432                                    {8, 7, 0x0},
    433                                    {8, 9, 0x0},
    434                                    {8, 11, 0x0},
    435                                    {14, 8, 0x0},
    436                                    {14, 14, 0x0},
    437                                    {14, 6, 0x0},
    438                                    {14, 5, 0x0},
    439                                    {14, 4, 0x0},
    440                                    {14, 7, 0x0},
    441                                    {14, 9, 0x0},
    442                                    {14, 11, 0x0},
    443                                    {6, 8, 0x0},
    444                                    {6, 14, 0x0},
    445                                    {6, 6, 0x0},
    446                                    {6, 5, 0x0},
    447                                    {6, 4, 0x0},
    448                                    {6, 7, 0x0},
    449                                    {6, 9, 0x0},
    450                                    {6, 11, 0x0},
    451                                    {5, 8, 0x0},
    452                                    {5, 14, 0x0},
    453                                    {5, 6, 0x0},
    454                                    {5, 5, 0x0},
    455                                    {5, 4, 0x0},
    456                                    {5, 7, 0x0},
    457                                    {5, 9, 0x0},
    458                                    {5, 11, 0x0},
    459                                    {4, 8, 0x0},
    460                                    {4, 14, 0x0},
    461                                    {4, 6, 0x0},
    462                                    {4, 5, 0x0},
    463                                    {4, 1, 0x0},
    464                                    {4, 7, 0x0},
    465                                    {4, 9, 0x0},
    466                                    {4, 11, 0x0},
    467                                    {7, 8, 0x0},
    468                                    {7, 14, 0x0},
    469                                    {7, 6, 0x0},
    470                                    {7, 5, 0x0},
    471                                    {7, 4, 0x0},
    472                                    {7, 7, 0x0},
    473                                    {7, 9, 0x0},
    474                                    {7, 11, 0x0},
    475                                    {10, 8, 0x0},
    476                                    {10, 14, 0x0},
    477                                    {10, 6, 0x0},
    478                                    {10, 5, 0x0},
    479                                    {10, 4, 0x0},
    480                                    {10, 7, 0x0},
    481                                    {10, 9, 0x0},
    482                                    {10, 11, 0x0},
    483                                    {12, 8, 0x0},
    484                                    {12, 14, 0x0},
    485                                    {12, 6, 0x0},
    486                                    {12, 5, 0x0},
    487                                    {12, 4, 0x0},
    488                                    {12, 7, 0x0},
    489                                    {12, 9, 0x0},
    490                                    {12, 11, 0x0},
    491 };
    492 
    493 fp_test_args_t xsmsubXdp_tests[] = {
    494                                    {8, 8, 0x0},
    495                                    {8, 14, 0x0},
    496                                    {8, 6, 0x0},
    497                                    {8, 5, 0x0},
    498                                    {8, 4, 0x0},
    499                                    {8, 7, 0x0},
    500                                    {8, 9, 0x0},
    501                                    {8, 11, 0x0},
    502                                    {14, 8, 0x0},
    503                                    {14, 14, 0x0},
    504                                    {14, 6, 0x0},
    505                                    {14, 5, 0x0},
    506                                    {14, 4, 0x0},
    507                                    {14, 7, 0x0},
    508                                    {14, 9, 0x0},
    509                                    {14, 11, 0x0},
    510                                    {6, 8, 0x0},
    511                                    {6, 14, 0x0},
    512                                    {6, 6, 0x0},
    513                                    {6, 5, 0x0},
    514                                    {6, 4, 0x0},
    515                                    {6, 7, 0x0},
    516                                    {6, 9, 0x0},
    517                                    {6, 11, 0x0},
    518                                    {5, 8, 0x0},
    519                                    {5, 14, 0x0},
    520                                    {5, 6, 0x0},
    521                                    {5, 5, 0x0},
    522                                    {5, 4, 0x0},
    523                                    {5, 7, 0x0},
    524                                    {5, 9, 0x0},
    525                                    {5, 11, 0x0},
    526                                    {4, 8, 0x0},
    527                                    {4, 14, 0x0},
    528                                    {4, 6, 0x0},
    529                                    {4, 5, 0x0},
    530                                    {4, 1, 0x0},
    531                                    {4, 7, 0x0},
    532                                    {4, 9, 0x0},
    533                                    {4, 11, 0x0},
    534                                    {7, 8, 0x0},
    535                                    {7, 14, 0x0},
    536                                    {7, 6, 0x0},
    537                                    {7, 5, 0x0},
    538                                    {7, 4, 0x0},
    539                                    {7, 7, 0x0},
    540                                    {7, 9, 0x0},
    541                                    {7, 11, 0x0},
    542                                    {10, 8, 0x0},
    543                                    {10, 14, 0x0},
    544                                    {10, 6, 0x0},
    545                                    {10, 5, 0x0},
    546                                    {10, 4, 0x0},
    547                                    {10, 7, 0x0},
    548                                    {10, 9, 0x0},
    549                                    {10, 11, 0x0},
    550                                    {12, 8, 0x0},
    551                                    {12, 14, 0x0},
    552                                    {12, 6, 0x0},
    553                                    {12, 5, 0x0},
    554                                    {12, 4, 0x0},
    555                                    {12, 7, 0x0},
    556                                    {12, 9, 0x0},
    557                                    {12, 11, 0x0},
    558 };
    559 
    560 fp_test_args_t xsnmaddXdp_tests[] = {
    561                                      {8, 8, 0x0},
    562                                      {8, 14, 0x0},
    563                                      {8, 6, 0x0},
    564                                      {8, 5, 0x0},
    565                                      {8, 4, 0x0},
    566                                      {8, 7, 0x0},
    567                                      {8, 9, 0x0},
    568                                      {8, 11, 0x0},
    569                                      {14, 8, 0x0},
    570                                      {14, 14, 0x0},
    571                                      {14, 6, 0x0},
    572                                      {14, 5, 0x0},
    573                                      {14, 4, 0x0},
    574                                      {14, 7, 0x0},
    575                                      {14, 9, 0x0},
    576                                      {14, 11, 0x0},
    577                                      {6, 8, 0x0},
    578                                      {6, 14, 0x0},
    579                                      {6, 6, 0x0},
    580                                      {6, 5, 0x0},
    581                                      {6, 4, 0x0},
    582                                      {6, 7, 0x0},
    583                                      {6, 9, 0x0},
    584                                      {6, 11, 0x0},
    585                                      {5, 8, 0x0},
    586                                      {5, 14, 0x0},
    587                                      {5, 6, 0x0},
    588                                      {5, 5, 0x0},
    589                                      {5, 4, 0x0},
    590                                      {5, 7, 0x0},
    591                                      {5, 9, 0x0},
    592                                      {5, 11, 0x0},
    593                                      {4, 8, 0x0},
    594                                      {4, 14, 0x0},
    595                                      {4, 6, 0x0},
    596                                      {4, 5, 0x0},
    597                                      {4, 1, 0x0},
    598                                      {4, 7, 0x0},
    599                                      {4, 9, 0x0},
    600                                      {4, 11, 0x0},
    601                                      {7, 8, 0x0},
    602                                      {7, 14, 0x0},
    603                                      {7, 6, 0x0},
    604                                      {7, 5, 0x0},
    605                                      {7, 4, 0x0},
    606                                      {7, 7, 0x0},
    607                                      {7, 9, 0x0},
    608                                      {7, 11, 0x0},
    609                                      {10, 8, 0x0},
    610                                      {10, 14, 0x0},
    611                                      {10, 6, 0x0},
    612                                      {10, 5, 0x0},
    613                                      {10, 4, 0x0},
    614                                      {10, 7, 0x0},
    615                                      {10, 9, 0x0},
    616                                      {10, 11, 0x0},
    617                                      {12, 8, 0x0},
    618                                      {12, 14, 0x0},
    619                                      {12, 6, 0x0},
    620                                      {12, 5, 0x0},
    621                                      {12, 4, 0x0},
    622                                      {12, 7, 0x0},
    623                                      {12, 9, 0x0},
    624                                      {12, 11, 0x0},
    625 };
    626 
    627 fp_test_args_t xsmuldp_tests[] = {
    628                                   {8, 8, 0x0},
    629                                   {8, 14, 0x0},
    630                                   {8, 6, 0x0},
    631                                   {8, 5, 0x0},
    632                                   {8, 4, 0x0},
    633                                   {8, 7, 0x0},
    634                                   {8, 9, 0x0},
    635                                   {8, 11, 0x0},
    636                                   {14, 8, 0x0},
    637                                   {14, 14, 0x0},
    638                                   {14, 6, 0x0},
    639                                   {14, 5, 0x0},
    640                                   {14, 4, 0x0},
    641                                   {14, 7, 0x0},
    642                                   {14, 9, 0x0},
    643                                   {14, 11, 0x0},
    644                                   {6, 8, 0x0},
    645                                   {6, 14, 0x0},
    646                                   {6, 6, 0x0},
    647                                   {6, 5, 0x0},
    648                                   {6, 4, 0x0},
    649                                   {6, 7, 0x0},
    650                                   {6, 9, 0x0},
    651                                   {6, 11, 0x0},
    652                                   {5, 8, 0x0},
    653                                   {5, 14, 0x0},
    654                                   {5, 6, 0x0},
    655                                   {5, 5, 0x0},
    656                                   {5, 4, 0x0},
    657                                   {5, 7, 0x0},
    658                                   {5, 9, 0x0},
    659                                   {5, 11, 0x0},
    660                                   {4, 8, 0x0},
    661                                   {4, 14, 0x0},
    662                                   {4, 6, 0x0},
    663                                   {4, 5, 0x0},
    664                                   {4, 1, 0x0},
    665                                   {4, 7, 0x0},
    666                                   {4, 9, 0x0},
    667                                   {4, 11, 0x0},
    668                                   {7, 8, 0x0},
    669                                   {7, 14, 0x0},
    670                                   {7, 6, 0x0},
    671                                   {7, 5, 0x0},
    672                                   {7, 4, 0x0},
    673                                   {7, 7, 0x0},
    674                                   {7, 9, 0x0},
    675                                   {7, 11, 0x0},
    676                                   {10, 8, 0x0},
    677                                   {10, 14, 0x0},
    678                                   {10, 6, 0x0},
    679                                   {10, 5, 0x0},
    680                                   {10, 4, 0x0},
    681                                   {10, 7, 0x0},
    682                                   {10, 9, 0x0},
    683                                   {10, 11, 0x0},
    684                                   {12, 8, 0x0},
    685                                   {12, 14, 0x0},
    686                                   {12, 6, 0x0},
    687                                   {12, 5, 0x0},
    688                                   {12, 4, 0x0},
    689                                   {12, 7, 0x0},
    690                                   {12, 9, 0x0},
    691                                   {12, 11, 0x0},
    692 };
    693 
    694 fp_test_args_t xssubdp_tests[] = {
    695                                   {8, 8, 0x0},
    696                                   {8, 14, 0x0},
    697                                   {8, 6, 0x0},
    698                                   {8, 5, 0x0},
    699                                   {8, 4, 0x0},
    700                                   {8, 7, 0x0},
    701                                   {8, 9, 0x0},
    702                                   {8, 11, 0x0},
    703                                   {14, 8, 0x0},
    704                                   {14, 14, 0x0},
    705                                   {14, 6, 0x0},
    706                                   {14, 5, 0x0},
    707                                   {14, 4, 0x0},
    708                                   {14, 7, 0x0},
    709                                   {14, 9, 0x0},
    710                                   {14, 11, 0x0},
    711                                   {6, 8, 0x0},
    712                                   {6, 14, 0x0},
    713                                   {6, 6, 0x0},
    714                                   {6, 5, 0x0},
    715                                   {6, 4, 0x0},
    716                                   {6, 7, 0x0},
    717                                   {6, 9, 0x0},
    718                                   {6, 11, 0x0},
    719                                   {5, 8, 0x0},
    720                                   {5, 14, 0x0},
    721                                   {5, 6, 0x0},
    722                                   {5, 5, 0x0},
    723                                   {5, 4, 0x0},
    724                                   {5, 7, 0x0},
    725                                   {5, 9, 0x0},
    726                                   {5, 11, 0x0},
    727                                   {4, 8, 0x0},
    728                                   {4, 14, 0x0},
    729                                   {4, 6, 0x0},
    730                                   {4, 5, 0x0},
    731                                   {4, 1, 0x0},
    732                                   {4, 7, 0x0},
    733                                   {4, 9, 0x0},
    734                                   {4, 11, 0x0},
    735                                   {7, 8, 0x0},
    736                                   {7, 14, 0x0},
    737                                   {7, 6, 0x0},
    738                                   {7, 5, 0x0},
    739                                   {7, 4, 0x0},
    740                                   {7, 7, 0x0},
    741                                   {7, 9, 0x0},
    742                                   {7, 11, 0x0},
    743                                   {10, 8, 0x0},
    744                                   {10, 14, 0x0},
    745                                   {10, 6, 0x0},
    746                                   {10, 5, 0x0},
    747                                   {10, 4, 0x0},
    748                                   {10, 7, 0x0},
    749                                   {10, 9, 0x0},
    750                                   {10, 11, 0x0},
    751                                   {12, 8, 0x0},
    752                                   {12, 14, 0x0},
    753                                   {12, 6, 0x0},
    754                                   {12, 5, 0x0},
    755                                   {12, 4, 0x0},
    756                                   {12, 7, 0x0},
    757                                   {12, 9, 0x0},
    758                                   {12, 11, 0x0},
    759 };
    760 
    761 
    762 
    763 static int nb_special_fargs;
    764 static double * spec_fargs;
    765 
    766 static void build_special_fargs_table(void)
    767 {
    768    /* The special floating point values created below are for
    769     * use in the ftdiv tests for setting the fe_flag and fg_flag,
    770     * but they can also be used for other tests (e.g., xscmpudp).
    771     *
    772     * Note that fl_flag is 'always '1' on ppc64 Linux.
    773     *
    774   Entry  Sign Exp   fraction                  Special value
    775    0      0   3fd   0x8000000000000ULL         Positive finite number
    776    1      0   404   0xf000000000000ULL         ...
    777    2      0   001   0x8000000b77501ULL         ...
    778    3      0   7fe   0x800000000051bULL         ...
    779    4      0   012   0x3214569900000ULL         ...
    780    5      0   000   0x0000000000000ULL         +0.0 (+zero)
    781    6      1   000   0x0000000000000ULL         -0.0 (-zero)
    782    7      0   7ff   0x0000000000000ULL         +infinity
    783    8      1   7ff   0x0000000000000ULL         -infinity
    784    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
    785    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
    786    11     0   7ff   0x8000000000000ULL         +QNaN
    787    12     1   7ff   0x8000000000000ULL         -QNaN
    788    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
    789    14     1   40d   0x0650f5a07b353ULL         Negative finite number
    790     */
    791 
    792    uint64_t mant;
    793    uint16_t _exp;
    794    int s;
    795    int i = 0;
    796 
    797    if (spec_fargs)
    798       return;
    799 
    800    spec_fargs = malloc( 16 * sizeof(double) );
    801 
    802    // #0
    803    s = 0;
    804    _exp = 0x3fd;
    805    mant = 0x8000000000000ULL;
    806    register_farg(&spec_fargs[i++], s, _exp, mant);
    807 
    808    // #1
    809    s = 0;
    810    _exp = 0x404;
    811    mant = 0xf000000000000ULL;
    812    register_farg(&spec_fargs[i++], s, _exp, mant);
    813 
    814    /* None of the ftdiv tests succeed.
    815     * FRA = value #0; FRB = value #1
    816     * ea_ = -2; e_b = 5
    817     * fl_flag || fg_flag || fe_flag = 100
    818     */
    819 
    820    /*************************************************
    821     *     fe_flag tests
    822     *
    823     *************************************************/
    824 
    825    /* fe_flag <- 1 if FRA is a NaN
    826     * FRA = value #9; FRB = value #1
    827     * e_a = 1024; e_b = 5
    828     * fl_flag || fg_flag || fe_flag = 101
    829     */
    830 
    831    /* fe_flag <- 1 if FRB is a NaN
    832     * FRA = value #1; FRB = value #12
    833     * e_a = 5; e_b = 1024
    834     * fl_flag || fg_flag || fe_flag = 101
    835     */
    836 
    837    /* fe_flag <- 1 if e_b <= -1022
    838     * FRA = value #0; FRB = value #2
    839     * e_a = -2; e_b = -1022
    840     * fl_flag || fg_flag || fe_flag = 101
    841     *
    842     */
    843    // #2
    844    s = 0;
    845    _exp = 0x001;
    846    mant = 0x8000000b77501ULL;
    847    register_farg(&spec_fargs[i++], s, _exp, mant);
    848 
    849    /* fe_flag <- 1 if e_b >= 1021
    850     * FRA = value #1; FRB = value #3
    851     * e_a = 5; e_b = 1023
    852     * fl_flag || fg_flag || fe_flag = 101
    853     */
    854    // #3
    855    s = 0;
    856    _exp = 0x7fe;
    857    mant = 0x800000000051bULL;
    858    register_farg(&spec_fargs[i++], s, _exp, mant);
    859 
    860    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
    861     * Let FRA = value #3 and FRB be value #0.
    862     * e_a = 1023; e_b = -2
    863     * fl_flag || fg_flag || fe_flag = 101
    864     */
    865 
    866    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
    867     * Let FRA = value #0 above and FRB be value #3 above
    868     * e_a = -2; e_b = 1023
    869     * fl_flag || fg_flag || fe_flag = 101
    870     */
    871 
    872    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
    873     * Let FRA = value #4 and FRB be value #0
    874     * e_a = -1005; e_b = -2
    875     * fl_flag || fg_flag || fe_flag = 101
    876    */
    877    // #4
    878    s = 0;
    879    _exp = 0x012;
    880    mant = 0x3214569900000ULL;
    881    register_farg(&spec_fargs[i++], s, _exp, mant);
    882 
    883    /*************************************************
    884     *     fg_flag tests
    885     *
    886     *************************************************/
    887    /* fg_flag <- 1 if FRA is an Infinity
    888     * NOTE: FRA = Inf also sets fe_flag
    889     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
    890     * Test 1:
    891     *   Let FRA be value #7 and FRB be value #1
    892     *   e_a = 1024; e_b = 5
    893     *   fl_flag || fg_flag || fe_flag = 111
    894     *
    895     * Test 2:
    896     *   Let FRA be value #8 and FRB be value #1
    897     *   e_a = 1024; e_b = 5
    898     *   fl_flag || fg_flag || fe_flag = 111
    899     *
    900     */
    901 
    902    /* fg_flag <- 1 if FRB is an Infinity
    903     * NOTE: FRB = Inf also sets fe_flag
    904     * Let FRA be value #1 and FRB be value #7
    905     * e_a = 5; e_b = 1024
    906     * fl_flag || fg_flag || fe_flag = 111
    907     */
    908 
    909    /* fg_flag <- 1 if FRB is denormalized
    910     * NOTE: e_b < -1022 ==> fe_flag <- 1
    911     * Let FRA be value #0 and FRB be value #13
    912     * e_a = -2; e_b = -1023
    913     * fl_flag || fg_flag || fe_flag = 111
    914     */
    915 
    916    /* fg_flag <- 1 if FRB is +zero
    917     * NOTE: FRA = Inf also sets fe_flag
    918     * Let FRA = val #5; FRB = val #5
    919     * ea_ = -1023; e_b = -1023
    920     * fl_flag || fg_flag || fe_flag = 111
    921     */
    922 
    923    /* fg_flag <- 1 if FRB is -zero
    924     * NOTE: FRA = Inf also sets fe_flag
    925     * Let FRA = val #5; FRB = val #6
    926     * ea_ = -1023; e_b = -1023
    927     * fl_flag || fg_flag || fe_flag = 111
    928     */
    929 
    930    /* Special values */
    931    /* +0.0      : 0 0x000 0x0000000000000 */
    932    // #5
    933    s = 0;
    934    _exp = 0x000;
    935    mant = 0x0000000000000ULL;
    936    register_farg(&spec_fargs[i++], s, _exp, mant);
    937 
    938    /* -0.0      : 1 0x000 0x0000000000000 */
    939    // #6
    940    s = 1;
    941    _exp = 0x000;
    942    mant = 0x0000000000000ULL;
    943    register_farg(&spec_fargs[i++], s, _exp, mant);
    944 
    945    /* +infinity : 0 0x7FF 0x0000000000000  */
    946    // #7
    947    s = 0;
    948    _exp = 0x7FF;
    949    mant = 0x0000000000000ULL;
    950    register_farg(&spec_fargs[i++], s, _exp, mant);
    951 
    952    /* -infinity : 1 0x7FF 0x0000000000000 */
    953    // #8
    954    s = 1;
    955    _exp = 0x7FF;
    956    mant = 0x0000000000000ULL;
    957    register_farg(&spec_fargs[i++], s, _exp, mant);
    958 
    959    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    960    // #9
    961    s = 0;
    962    _exp = 0x7FF;
    963    mant = 0x7FFFFFFFFFFFFULL;
    964    register_farg(&spec_fargs[i++], s, _exp, mant);
    965 
    966    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    967    // #10
    968    s = 1;
    969    _exp = 0x7FF;
    970    mant = 0x7FFFFFFFFFFFFULL;
    971    register_farg(&spec_fargs[i++], s, _exp, mant);
    972 
    973    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    974    // #11
    975    s = 0;
    976    _exp = 0x7FF;
    977    mant = 0x8000000000000ULL;
    978    register_farg(&spec_fargs[i++], s, _exp, mant);
    979 
    980    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    981    // #12
    982    s = 1;
    983    _exp = 0x7FF;
    984    mant = 0x8000000000000ULL;
    985    register_farg(&spec_fargs[i++], s, _exp, mant);
    986 
    987    /* denormalized value */
    988    // #13
    989    s = 1;
    990    _exp = 0x000;
    991    mant = 0x8340000078000ULL;
    992    register_farg(&spec_fargs[i++], s, _exp, mant);
    993 
    994    /* Negative finite number */
    995    // #14
    996    s = 1;
    997    _exp = 0x40d;
    998    mant = 0x0650f5a07b353ULL;
    999    register_farg(&spec_fargs[i++], s, _exp, mant);
   1000 
   1001    nb_special_fargs = i;
   1002 }
   1003 
   1004 
   1005 struct test_table
   1006 {
   1007    test_func_t test_category;
   1008    char * name;
   1009 };
   1010 
   1011 struct p7_fp_test
   1012 {
   1013    test_func_t test_func;
   1014    const char *name;
   1015    int single;  // 1=single precision result; 0=double precision result
   1016 };
   1017 
   1018 typedef enum {
   1019    VX_FP_CMP,
   1020    VX_FP_SMA,
   1021    VX_FP_SMS,
   1022    VX_FP_SNMA,
   1023    VX_FP_OTHER
   1024 } vx_fp_test_type;
   1025 
   1026 struct vx_fp_test
   1027 {
   1028    test_func_t test_func;
   1029    const char *name;
   1030    fp_test_args_t * targs;
   1031    int num_tests;
   1032    vx_fp_test_type test_type;
   1033 };
   1034 
   1035 struct xs_conv_test
   1036 {
   1037    test_func_t test_func;
   1038    const char *name;
   1039    int num_tests;
   1040 };
   1041 
   1042 typedef enum {
   1043    VSX_LOAD =1,
   1044    VSX_LOAD_SPLAT,
   1045    VSX_STORE
   1046 } vsx_ldst_type;
   1047 
   1048 struct ldst_test
   1049 {
   1050    test_func_t test_func;
   1051    const char *name;
   1052    void * base_addr;
   1053    uint32_t offset;
   1054    int num_words_to_process;
   1055    vsx_ldst_type type;
   1056 };
   1057 
   1058 typedef enum {
   1059    VSX_AND = 1,
   1060    VSX_XOR,
   1061    VSX_ANDC,
   1062    VSX_OR,
   1063    VSX_NOR
   1064 } vsx_log_op;
   1065 
   1066 struct vsx_logic_test
   1067 {
   1068    test_func_t test_func;
   1069    const char *name;
   1070    vsx_log_op op;
   1071 };
   1072 
   1073 struct vsx_move_test
   1074 {
   1075    test_func_t test_func;
   1076    const char *name;
   1077 };
   1078 
   1079 struct vsx_permute_test
   1080 {
   1081    test_func_t test_func;
   1082    const char *name;
   1083    unsigned int xa[4];
   1084    unsigned int xb[4];
   1085 };
   1086 
   1087 static vector unsigned int vec_out, vec_inA, vec_inB;
   1088 
   1089 static void test_lxsdx(void)
   1090 {
   1091    __asm__ __volatile__ ("lxsdx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1092 }
   1093 
   1094 static void
   1095 test_lxvd2x(void)
   1096 {
   1097    __asm__ __volatile__ ("lxvd2x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1098 }
   1099 
   1100 static void test_lxvdsx(void)
   1101 {
   1102    __asm__ __volatile__ ("lxvdsx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1103 }
   1104 
   1105 static void test_lxvw4x(void)
   1106 {
   1107    __asm__ __volatile__ ("lxvw4x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1108 }
   1109 
   1110 static void test_stxsdx(void)
   1111 {
   1112    __asm__ __volatile__ ("stxsdx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1113 }
   1114 
   1115 static void test_stxvd2x(void)
   1116 {
   1117    __asm__ __volatile__ ("stxvd2x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1118 }
   1119 
   1120 static void test_stxvw4x(void)
   1121 {
   1122    __asm__ __volatile__ ("stxvw4x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1123 }
   1124 
   1125 static void test_xxlxor(void)
   1126 {
   1127    __asm__ __volatile__ ("xxlxor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1128 }
   1129 
   1130 static void test_xxlor(void)
   1131 {
   1132    __asm__ __volatile__ ("xxlor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1133 }
   1134 
   1135 static void test_xxlnor(void)
   1136 {
   1137    __asm__ __volatile__ ("xxlnor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1138 }
   1139 
   1140 static void test_xxland(void)
   1141 {
   1142    __asm__ __volatile__ ("xxland          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1143 }
   1144 
   1145 static void test_xxlandc(void)
   1146 {
   1147    __asm__ __volatile__ ("xxlandc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1148 }
   1149 
   1150 static void test_xxmrghw(void)
   1151 {
   1152    __asm__ __volatile__ ("xxmrghw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1153 }
   1154 
   1155 static void test_xxmrglw(void)
   1156 {
   1157    __asm__ __volatile__ ("xxmrglw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1158 }
   1159 
   1160 static void test_xxpermdi_00(void)
   1161 {
   1162    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1163 }
   1164 
   1165 static void test_xxpermdi_01(void)
   1166 {
   1167    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1168 }
   1169 
   1170 static void test_xxpermdi_10(void)
   1171 {
   1172    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1173 }
   1174 
   1175 static void test_xxpermdi_11(void)
   1176 {
   1177    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1178 }
   1179 
   1180 static void test_xxsldwi_0(void)
   1181 {
   1182    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1183 }
   1184 
   1185 static void test_xxsldwi_1(void)
   1186 {
   1187    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1188 }
   1189 
   1190 static void test_xxsldwi_2(void)
   1191 {
   1192    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1193 }
   1194 
   1195 static void test_xxsldwi_3(void)
   1196 {
   1197    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1198 }
   1199 
   1200 static void test_fcfids (void)
   1201 {
   1202     __asm__ __volatile__ ("fcfids          %0, %1" : "=f" (f17): "d" (f14));
   1203 }
   1204 
   1205 static void test_fcfidus (void)
   1206 {
   1207     __asm__ __volatile__ ("fcfidus          %0, %1" : "=f" (f17): "d" (f14));
   1208 }
   1209 
   1210 static void test_fcfidu (void)
   1211 {
   1212     __asm__ __volatile__ ("fcfidu          %0, %1" : "=f" (f17): "d" (f14));
   1213 }
   1214 
   1215 static void test_xsabsdp (void)
   1216 {
   1217    __asm__ __volatile__ ("xsabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1218 }
   1219 
   1220 static void test_xscpsgndp (void)
   1221 {
   1222    __asm__ __volatile__ ("xscpsgndp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1223 }
   1224 
   1225 static void test_xsnabsdp (void)
   1226 {
   1227    __asm__ __volatile__ ("xsnabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1228 }
   1229 
   1230 static void test_xsnegdp (void)
   1231 {
   1232    __asm__ __volatile__ ("xsnegdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1233 }
   1234 
   1235 static int do_cmpudp;
   1236 static void test_xscmp (void)
   1237 {
   1238    if (do_cmpudp)
   1239       __asm__ __volatile__ ("xscmpudp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
   1240    else
   1241       __asm__ __volatile__ ("xscmpodp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
   1242 }
   1243 
   1244 static void test_xsadddp(void)
   1245 {
   1246    __asm__ __volatile__ ("xsadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1247 }
   1248 
   1249 static void test_xsdivdp(void)
   1250 {
   1251    __asm__ __volatile__ ("xsdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1252 }
   1253 
   1254 static int do_adp;
   1255 static void test_xsmadd(void)
   1256 {
   1257    if (do_adp)
   1258       __asm__ __volatile__ ("xsmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1259    else
   1260       __asm__ __volatile__ ("xsmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1261 }
   1262 
   1263 static void test_xsmsub(void)
   1264 {
   1265    if (do_adp)
   1266       __asm__ __volatile__ ("xsmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1267    else
   1268       __asm__ __volatile__ ("xsmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1269 }
   1270 
   1271 static void test_xsnmadd(void)
   1272 {
   1273    if (do_adp)
   1274       __asm__ __volatile__ ("xsnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1275    else
   1276       __asm__ __volatile__ ("xsnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1277 }
   1278 
   1279 static void test_xsmuldp(void)
   1280 {
   1281    __asm__ __volatile__ ("xsmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1282 }
   1283 
   1284 static void test_xssubdp(void)
   1285 {
   1286    __asm__ __volatile__ ("xssubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1287 }
   1288 
   1289 static void test_xscvdpsxds (void)
   1290 {
   1291    __asm__ __volatile__ ("xscvdpsxds          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1292 }
   1293 
   1294 static void test_xscvsxddp (void)
   1295 {
   1296    __asm__ __volatile__ ("xscvsxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1297 }
   1298 
   1299 static void test_xscvuxddp (void)
   1300 {
   1301    __asm__ __volatile__ ("xscvuxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1302 }
   1303 
   1304 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
   1305                                                               0, 0, 0, 0 };
   1306 
   1307 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
   1308 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
   1309 
   1310 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567,
   1311                                                                 0x89abcdef,
   1312                                                                 0x00112233,
   1313                                                                 0x44556677,
   1314                                                                 0x8899aabb,
   1315                                                                 0x91929394,
   1316                                                                 0xa1a2a3a4,
   1317                                                                 0xb1b2b3b4,
   1318                                                                 0xc1c2c3c4,
   1319                                                                 0xd1d2d3d4,
   1320                                                                 0x7a6b5d3e
   1321 };
   1322 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
   1323 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
   1324 
   1325 static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD },
   1326                                      { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD },
   1327                                      { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD },
   1328                                      { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD },
   1329                                      { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT },
   1330                                      { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT },
   1331                                      { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD },
   1332                                      { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD },
   1333                                      { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE },
   1334                                      { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE },
   1335                                      { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE },
   1336                                      { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE },
   1337                                      { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE },
   1338                                      { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE },
   1339                                      { NULL, NULL, NULL, 0, 0, 0 } };
   1340 
   1341 static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR },
   1342                                       { &test_xxlor, "xxlor", VSX_OR } ,
   1343                                       { &test_xxlnor, "xxlnor", VSX_NOR },
   1344                                       { &test_xxland, "xxland", VSX_AND },
   1345                                       { &test_xxlandc, "xxlandc", VSX_ANDC },
   1346                                       { NULL, NULL, 0}};
   1347 
   1348 static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" },
   1349                                     { &test_xscpsgndp, "xscpsgndp" },
   1350                                     { &test_xsnabsdp, "xsnabsdp" },
   1351                                     { &test_xsnegdp, "xsnegdp" },
   1352                                     { NULL, NULL }
   1353 
   1354 };
   1355 
   1356 static permute_test_t permute_tests[] =
   1357 {
   1358   { &test_xxmrghw, "xxmrghw",
   1359     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1360     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1361   },
   1362   { &test_xxmrghw, "xxmrghw",
   1363     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
   1364     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
   1365   },
   1366   { &test_xxmrglw, "xxmrglw",
   1367     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1368     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1369   },
   1370   { &test_xxmrglw, "xxmrglw",
   1371     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
   1372     { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
   1373   },
   1374   { &test_xxpermdi_00, "xxpermdi DM=00",
   1375     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1376     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1377   },
   1378   { &test_xxpermdi_01, "xxpermdi DM=01",
   1379     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1380     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1381   },
   1382   { &test_xxpermdi_10, "xxpermdi DM=10",
   1383     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1384     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1385   },
   1386   { &test_xxpermdi_11, "xxpermdi DM=11",
   1387     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1388     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1389   },
   1390   { &test_xxsldwi_0, "xxsldwi SHW=0",
   1391     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1392     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1393   },
   1394   { &test_xxsldwi_1, "xxsldwi SHW=1",
   1395     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1396     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1397   },
   1398   { &test_xxsldwi_2, "xxsldwi SHW=2",
   1399     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1400     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1401   },
   1402   { &test_xxsldwi_3, "xxsldwi SHW=3",
   1403     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1404     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1405   },
   1406   { NULL, NULL }
   1407 };
   1408 
   1409 static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 },
   1410                                 { &test_fcfidus, "fcfidus", 1 },
   1411                                 { &test_fcfidu, "fcfidu", 1 },
   1412                                 { NULL, NULL, 0 },
   1413 
   1414 };
   1415 
   1416 static vx_fp_test_t vx_fp_tests[] = {
   1417                                      { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP},
   1418                                      { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER},
   1419                                      { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER},
   1420                                      { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA},
   1421                                      { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS},
   1422                                      { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA},
   1423                                      { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER},
   1424                                      { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER},
   1425                                      { NULL, NULL, NULL, 0, 0 }
   1426 };
   1427 
   1428 static xs_conv_test_t xs_conv_tests[] = {
   1429                                          { &test_xscvdpsxds, "xscvdpsxds", 15},
   1430                                          { &test_xscvsxddp, "xscvsxddp", 15},
   1431                                          { &test_xscvuxddp, "xscvuxddp", 15},
   1432                                          { NULL, NULL, 0}
   1433 };
   1434 
   1435 #ifdef __powerpc64__
   1436 static void test_ldbrx(void)
   1437 {
   1438    int i;
   1439    HWord_t reg_out;
   1440    unsigned char * byteIn, * byteOut;
   1441    r14 = (HWord_t)viargs;
   1442    // Just try the instruction an arbitrary number of times at different r15 offsets.
   1443    for (i = 0; i < 3; i++) {
   1444       int j, k;
   1445       reg_out = 0;
   1446       r15 = i * 4;
   1447       __asm__ __volatile__ ("ldbrx          %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15));
   1448       byteIn = ((unsigned char *)(r14 + r15));
   1449       byteOut = (unsigned char *)&reg_out;
   1450 
   1451       printf("ldbrx:");
   1452       for (k = 0; k < 8; k++) {
   1453          printf( " %02x", (byteIn[k]));
   1454       }
   1455       printf(" (reverse) =>");
   1456       for (j = 0; j < 8; j++) {
   1457          printf( " %02x", (byteOut[j]));
   1458       }
   1459       printf("\n");
   1460    }
   1461    printf( "\n" );
   1462 }
   1463 
   1464 static void
   1465 test_popcntd(void)
   1466 {
   1467    uint64_t res;
   1468    unsigned long long src = 0x9182736405504536ULL;
   1469    r14 = src;
   1470    __asm__ __volatile__ ("popcntd          %0, %1" : "=r" (res): "r" (r14));
   1471    printf("popcntd: 0x%llx => %d\n", src, (int)res);
   1472    printf( "\n" );
   1473 }
   1474 #endif
   1475 
   1476 static void
   1477 test_lfiwzx(void)
   1478 {
   1479    unsigned int i;
   1480    unsigned int * src;
   1481    uint64_t reg_out;
   1482    r14 = (HWord_t)viargs;
   1483    // Just try the instruction an arbitrary number of times at different r15 offsets.
   1484    for (i = 0; i < 3; i++) {
   1485       reg_out = 0;
   1486       r15 = i * 4;
   1487       __asm__ __volatile__ ("lfiwzx          %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15));
   1488       src = ((unsigned int *)(r14 + r15));
   1489       printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out);
   1490 
   1491    }
   1492    printf( "\n" );
   1493 }
   1494 
   1495 static void test_vx_fp_ops(void)
   1496 {
   1497 
   1498    test_func_t func;
   1499    int k;
   1500    char * test_name = (char *)malloc(20);
   1501    k = 0;
   1502 
   1503    build_special_fargs_table();
   1504    while ((func = vx_fp_tests[k].test_func)) {
   1505       int i, condreg, repeat = 0;
   1506       unsigned int flags;
   1507       unsigned long long * frap, * frbp, * dst;
   1508       vx_fp_test_t test_group = vx_fp_tests[k];
   1509       vx_fp_test_type test_type = test_group.test_type;
   1510 
   1511       switch (test_type) {
   1512          case VX_FP_CMP:
   1513             strcpy(test_name, "xscmp");
   1514             if (!repeat) {
   1515                repeat = 1;
   1516                strcat(test_name, "udp");
   1517                do_cmpudp = 1;
   1518             }
   1519             break;
   1520          case VX_FP_SMA:
   1521          case VX_FP_SMS:
   1522          case VX_FP_SNMA:
   1523             if (test_type == VX_FP_SMA)
   1524                strcpy(test_name, "xsmadd");
   1525             else if (test_type == VX_FP_SMS)
   1526                strcpy(test_name, "xsmsub");
   1527             else
   1528                strcpy(test_name, "xsnmadd");
   1529             if (!repeat) {
   1530                repeat = 1;
   1531                strcat(test_name, "adp");
   1532                do_adp = 1;
   1533             }
   1534             break;
   1535          case VX_FP_OTHER:
   1536             strcpy(test_name, test_group.name);
   1537             break;
   1538          default:
   1539             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
   1540             exit(1);
   1541       }
   1542 
   1543 again:
   1544       for (i = 0; i < test_group.num_tests; i++) {
   1545          unsigned int * inA, * inB, * pv;
   1546          double * dpA = (double *)&vec_inA;
   1547          double * dpB = (double *)&vec_inB;
   1548          double * dpT = (double *)&vec_out;
   1549 
   1550          fp_test_args_t aTest = test_group.targs[i];
   1551          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
   1552          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
   1553          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1554          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1555          // Only need to copy one doubleword into each vector's element 0
   1556          if (isLE) {
   1557             // With LE, vector element 0 is the second doubleword from the left
   1558             memset(dpA, 0, 8);
   1559             memset(dpB, 0, 8);
   1560             dpA++;
   1561             dpB++;
   1562          }
   1563          memcpy(dpA, inA, 8);
   1564          memcpy(dpB, inB, 8);
   1565 
   1566          switch (test_type) {
   1567             case VX_FP_CMP:
   1568                SET_FPSCR_ZERO;
   1569                SET_CR_XER_ZERO;
   1570                (*func)();
   1571                GET_CR(flags);
   1572                condreg = (flags & 0x0f000000) >> 24;
   1573                printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg);
   1574               // printf("\tFRA: %e;  FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
   1575                if ( condreg != aTest.cr_flags) {
   1576                   printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg);
   1577                }
   1578                break;
   1579             case VX_FP_SMA:
   1580             case VX_FP_SMS:
   1581             case VX_FP_SNMA:
   1582             case VX_FP_OTHER:
   1583             {
   1584                int idx;
   1585                unsigned long long vsr_XT;
   1586                pv = (unsigned int *)&vec_out;
   1587                // clear vec_out
   1588                for (idx = 0; idx < 4; idx++, pv++)
   1589                   *pv = 0;
   1590 
   1591                if (test_type != VX_FP_OTHER) {
   1592                   /* Then we need a third src argument, which is stored in element 0 of
   1593                    * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
   1594                    * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
   1595                    * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
   1596                    * data (input args) contain only two inputs, so I arbitrarily
   1597                    * use spec_fargs elements 4 and 14 (alternating) for the third source
   1598                    * argument.  We can use the same input data for a given pair of
   1599                    * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
   1600                    * the expected result should be the same.
   1601                    */
   1602                   int extra_arg_idx;
   1603                   if (i % 2)
   1604                      extra_arg_idx = 4;
   1605                   else
   1606                      extra_arg_idx = 14;
   1607 
   1608                   if (repeat) {
   1609                      /* We're on the first time through of one of the VX_FP_SMx
   1610                       * test types, meaning we're testing a xs<ZZZ>adp case, thus we
   1611                       * have to swap inputs as described above:
   1612                       *    src2 <= VSX[XT]
   1613                       *    src3 <= VSX[XB]
   1614                       */
   1615                      if (isLE)
   1616                         dpT++;
   1617                      memcpy(dpT, inB, 8);  // src2
   1618                      memcpy(dpB, &spec_fargs[extra_arg_idx], 8);  //src3
   1619                      frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
   1620                   } else {
   1621                      // Don't need to init src2, as it's done before the switch()
   1622                      if (isLE)
   1623                         dpT++;
   1624                      memcpy(dpT, &spec_fargs[extra_arg_idx], 8);  //src3
   1625                   }
   1626                   memcpy(&vsr_XT, dpT, 8);
   1627                }
   1628 
   1629                (*func)();
   1630                dst = (unsigned long long *) &vec_out;
   1631                if (isLE)
   1632                   dst++;
   1633                if (test_type == VX_FP_OTHER)
   1634                   printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst);
   1635                else
   1636                   printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
   1637                           test_name, vsr_XT, *frap, *frbp, *dst );
   1638 
   1639                /*
   1640               {
   1641                   // Debug code.  Keep this block commented out except when debugging.
   1642                   double result, expected;
   1643                   memcpy(&result, dst, 8);
   1644                   memcpy(&expected, &aTest.dp_bin_result, 8);
   1645                   printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
   1646                           spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
   1647                           expected, result );
   1648                }
   1649               */
   1650                break;
   1651             }
   1652          }
   1653 
   1654 
   1655       }
   1656       printf( "\n" );
   1657 
   1658       if (repeat) {
   1659          repeat = 0;
   1660          switch (test_type) {
   1661             case VX_FP_CMP:
   1662                strcpy(test_name, "xscmp");
   1663                strcat(test_name, "odp");
   1664                do_cmpudp = 0;
   1665                break;
   1666             case VX_FP_SMA:
   1667             case VX_FP_SMS:
   1668             case VX_FP_SNMA:
   1669                if (test_type == VX_FP_SMA)
   1670                   strcpy(test_name, "xsmadd");
   1671                else if (test_type == VX_FP_SMS)
   1672                   strcpy(test_name, "xsmsub");
   1673                else
   1674                   strcpy(test_name, "xsnmadd");
   1675                strcat(test_name, "mdp");
   1676                do_adp = 0;
   1677                break;
   1678             case VX_FP_OTHER:
   1679                break;
   1680          }
   1681          goto again;
   1682       }
   1683       k++;
   1684    }
   1685    printf( "\n" );
   1686    free(test_name);
   1687 }
   1688 
   1689 static void test_xs_conv_ops(void)
   1690 {
   1691 
   1692    test_func_t func;
   1693    int k = 0;
   1694    double * dpB = (double *)&vec_inB;
   1695    if (isLE) {
   1696       memset(dpB, 0, 8);
   1697       dpB++;
   1698    }
   1699 
   1700    build_special_fargs_table();
   1701    while ((func = xs_conv_tests[k].test_func)) {
   1702       int i;
   1703       unsigned long long * frbp, * dst;
   1704       xs_conv_test_t test_group = xs_conv_tests[k];
   1705       for (i = 0; i < test_group.num_tests; i++) {
   1706          unsigned int * inB, * pv;
   1707          int idx;
   1708          inB = (unsigned int *)&spec_fargs[i];
   1709          frbp = (unsigned long long *)&spec_fargs[i];
   1710 
   1711          memcpy(dpB, inB, 8);
   1712          pv = (unsigned int *)&vec_out;
   1713          // clear vec_out
   1714          for (idx = 0; idx < 4; idx++, pv++)
   1715             *pv = 0;
   1716          (*func)();
   1717          dst = (unsigned long long *) &vec_out;
   1718          if (isLE)
   1719             dst++;
   1720          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst);
   1721 
   1722       }
   1723       k++;
   1724       printf("\n");
   1725    }
   1726    printf( "\n" );
   1727 }
   1728 
   1729 static void do_load_test(ldst_test_t loadTest)
   1730 {
   1731    test_func_t func;
   1732    unsigned int *src, *dst;
   1733    int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0;
   1734    int i, j, m, k;
   1735    i = j = 0;
   1736 
   1737    func = loadTest.test_func;
   1738    for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) {
   1739       int again;
   1740       j = 0;
   1741        r14 += i * 16;
   1742       do {
   1743          unsigned int * pv = (unsigned int *)&vec_out;
   1744          int idx;
   1745          // clear vec_out
   1746          for (idx = 0; idx < 4; idx++, pv+=idx)
   1747             *pv = 0;
   1748 
   1749          again = 0;
   1750          r15 = j;
   1751 
   1752          // execute test insn
   1753          (*func)();
   1754 
   1755          src = (unsigned int*) (((unsigned char *)r14) + j);
   1756          dst = (unsigned int*) &vec_out;
   1757 
   1758          printf( "%s:", loadTest.name);
   1759          for (m = 0; m < loadTest.num_words_to_process; m++) {
   1760             printf( " %08x", src[splat ? m % 2 : m]);
   1761          }
   1762          printf( " =>");
   1763          m = 0;
   1764          k = loadTest.num_words_to_process;
   1765          if (isLE) {
   1766             if (loadTest.num_words_to_process == 2) {
   1767                m = 2;
   1768                k += 2;
   1769             }
   1770          }
   1771 
   1772          for (; m < k; m++) {
   1773             printf( " %08x", dst[m]);
   1774          }
   1775          printf("\n");
   1776          if (j == 0 && loadTest.offset) {
   1777             again = 1;
   1778             j += loadTest.offset;
   1779          }
   1780       }
   1781       while (again);
   1782    }
   1783 }
   1784 
   1785 static void
   1786 do_store_test ( ldst_test_t storeTest )
   1787 {
   1788    test_func_t func;
   1789    unsigned int *src, *dst;
   1790    int m;
   1791 
   1792    func = storeTest.test_func;
   1793    r14 = (HWord_t) storeTest.base_addr;
   1794    r15 = (HWord_t) storeTest.offset;
   1795    unsigned int * pv = (unsigned int *) storeTest.base_addr;
   1796    int idx;
   1797    // clear out storage destination
   1798    for (idx = 0; idx < 4; idx++, pv += idx)
   1799       *pv = 0;
   1800 
   1801    memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char));
   1802 
   1803    // execute test insn
   1804    (*func)();
   1805    src = &viargs[0];
   1806    dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset);
   1807 
   1808    printf( "%s:", storeTest.name );
   1809    for (m = 0; m < storeTest.num_words_to_process; m++) {
   1810       printf( " %08x", src[m] );
   1811    }
   1812    printf( " =>" );
   1813    for (m = 0; m < storeTest.num_words_to_process; m++) {
   1814       printf( " %08x", dst[m] );
   1815    }
   1816    printf( "\n" );
   1817 }
   1818 
   1819 
   1820 static void test_ldst(void)
   1821 {
   1822    int k = 0;
   1823 
   1824    while (ldst_tests[k].test_func) {
   1825       if (ldst_tests[k].type == VSX_STORE)
   1826          do_store_test(ldst_tests[k]);
   1827       else
   1828          do_load_test(ldst_tests[k]);
   1829       k++;
   1830       printf("\n");
   1831    }
   1832 }
   1833 
   1834 static void test_ftdiv(void)
   1835 {
   1836    int i, num_tests, crx;
   1837    unsigned int flags;
   1838    unsigned long long * frap, * frbp;
   1839    build_special_fargs_table();
   1840 
   1841    num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0];
   1842 
   1843    for (i = 0; i < num_tests; i++) {
   1844       fp_test_args_t aTest = ftdiv_tests[i];
   1845       f14 = spec_fargs[aTest.fra_idx];
   1846       f15 = spec_fargs[aTest.frb_idx];
   1847       frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1848       frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1849       SET_FPSCR_ZERO;
   1850       SET_CR_XER_ZERO;
   1851       __asm__ __volatile__ ("ftdiv           cr1, %0, %1" : : "d" (f14), "d" (f15));
   1852       GET_CR(flags);
   1853       crx = (flags & 0x0f000000) >> 24;
   1854       printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx);
   1855 //      printf("\tFRA: %e;  FRB: %e\n", f14, f15);
   1856       if ( crx != aTest.cr_flags) {
   1857          printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx);
   1858       }
   1859    }
   1860    printf( "\n" );
   1861 }
   1862 
   1863 
   1864 static void test_p7_fpops ( void )
   1865 {
   1866    int k = 0;
   1867    test_func_t func;
   1868 
   1869    build_fargs_table();
   1870    while ((func = fp_tests[k].test_func)) {
   1871       float res;
   1872       double resd;
   1873       unsigned long long u0;
   1874       int i;
   1875       int res32 = strcmp(fp_tests[k].name, "fcfidu");
   1876 
   1877       for (i = 0; i < nb_fargs; i++) {
   1878          u0 = *(unsigned long long *) (&fargs[i]);
   1879          f14 = fargs[i];
   1880          (*func)();
   1881          if (res32) {
   1882             res = f17;
   1883             printf( "%s %016llx => (raw sp) %08x)",
   1884                     fp_tests[k].name, u0, *((unsigned int *)&res));
   1885          } else {
   1886             resd = f17;
   1887             printf( "%s %016llx => (raw sp) %016llx)",
   1888                     fp_tests[k].name, u0, *(unsigned long long *)(&resd));
   1889          }
   1890          printf( "\n" );
   1891       }
   1892 
   1893       k++;
   1894       printf( "\n" );
   1895    }
   1896 }
   1897 
   1898 static void test_vsx_logic(void)
   1899 {
   1900    logic_test_t aTest;
   1901    test_func_t func;
   1902    int k;
   1903    k = 0;
   1904 
   1905    while ((func = logic_tests[k].test_func)) {
   1906       unsigned int * pv;
   1907       int startA, startB;
   1908       unsigned int * inA, * inB, * dst;
   1909       int idx, i;
   1910       startA = 0;
   1911       aTest = logic_tests[k];
   1912       for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) {
   1913          startB = startA + 4;
   1914          pv = (unsigned int *)&vec_out;
   1915          inA = &viargs[startA];
   1916          inB = &viargs[startB];
   1917          memcpy(&vec_inA, inA, sizeof(vector unsigned char));
   1918          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
   1919          // clear vec_out
   1920          for (idx = 0; idx < 4; idx++, pv++)
   1921             *pv = 0;
   1922 
   1923          // execute test insn
   1924          (*func)();
   1925          dst = (unsigned int*) &vec_out;
   1926 
   1927          printf( "%s:", aTest.name);
   1928          printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name);
   1929          printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]);
   1930          printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
   1931 
   1932       }
   1933       k++;
   1934    }
   1935    printf( "\n" );
   1936 }
   1937 
   1938 static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) =
   1939 {
   1940  { 0x0123456789abcdefULL, 0x0011223344556677ULL},
   1941  { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL},
   1942  { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL}
   1943 };
   1944 #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0])
   1945 
   1946 static void test_move_ops (void)
   1947 {
   1948    move_test_t aTest;
   1949    test_func_t func;
   1950    int k;
   1951    k = 0;
   1952 
   1953    while ((func = move_tests[k].test_func)) {
   1954       unsigned int * pv;
   1955       int startA, startB;
   1956       unsigned long long * inA, * inB, * dst;
   1957       int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0);
   1958       int idx;
   1959       inA = NULL;
   1960       aTest = move_tests[k];
   1961       for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) {
   1962          inB = (unsigned long long *)&vec_args[startB];
   1963          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
   1964          if (isLE)
   1965             inB++;
   1966          startA = 0;
   1967 repeat:
   1968          if (use_vecA) {
   1969             inA = (unsigned long long *)&vec_args[startA];
   1970             memcpy(&vec_inA, inA, sizeof(vector unsigned char));
   1971             startA++;
   1972          }
   1973          pv = (unsigned int *)&vec_out;
   1974          // clear vec_out
   1975          for (idx = 0; idx < 4; idx++, pv++)
   1976             *pv = 0;
   1977 
   1978          // execute test insn
   1979          (*func)();
   1980          dst = (unsigned long long *) &vec_out;
   1981          if (isLE) {
   1982             dst++;
   1983             inA++;
   1984          }
   1985 
   1986          printf( "%s:", aTest.name);
   1987          if (use_vecA)
   1988             printf( " X[A]: %016llx ", *inA);
   1989          printf( " X[B]: %016llx", *inB);
   1990          printf(" => %016llx\n", *dst);
   1991 
   1992          if (use_vecA && startA < NUM_VEC_ARGS_LONGS)
   1993             goto repeat;
   1994       }
   1995       k++;
   1996       printf( "\n" );
   1997    }
   1998 }
   1999 
   2000 static void test_permute_ops (void)
   2001 {
   2002   permute_test_t *aTest;
   2003   unsigned int *dst = (unsigned int *) &vec_out;
   2004 
   2005   for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++)
   2006     {
   2007       /* Grab test input and clear output vector.  */
   2008       memcpy(&vec_inA, aTest->xa, sizeof(vec_inA));
   2009       memcpy(&vec_inB, aTest->xb, sizeof(vec_inB));
   2010       memset(dst, 0, sizeof(vec_out));
   2011 
   2012       /* execute test insn */
   2013       aTest->test_func();
   2014 
   2015       printf( "%s:\n", aTest->name);
   2016       printf( "        XA[%08x,%08x,%08x,%08x]\n",
   2017               aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]);
   2018       printf( "        XB[%08x,%08x,%08x,%08x]\n",
   2019               aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]);
   2020       printf( "   =>   XT[%08x,%08x,%08x,%08x]\n",
   2021               dst[0], dst[1], dst[2], dst[3]);
   2022 
   2023     }
   2024   printf( "\n" );
   2025 }
   2026 
   2027 static test_table_t all_tests[] = { { &test_ldst,
   2028                                        "Test VSX load/store instructions" },
   2029                                      { &test_vsx_logic,
   2030                                        "Test VSX logic instructions" },
   2031 #ifdef __powerpc64__
   2032                                      { &test_ldbrx,
   2033                                        "Test ldbrx instruction" },
   2034                                      { &test_popcntd,
   2035                                        "Test popcntd instruction" },
   2036 #endif
   2037                                      { &test_lfiwzx,
   2038                                        "Test lfiwzx instruction" },
   2039                                      { &test_p7_fpops,
   2040                                        "Test P7 floating point convert instructions"},
   2041                                      { &test_ftdiv,
   2042                                        "Test ftdiv instruction" },
   2043                                      { &test_move_ops,
   2044                                        "Test VSX move instructions"},
   2045                                      { &test_permute_ops,
   2046                                        "Test VSX permute instructions"},
   2047                                      { &test_vx_fp_ops,
   2048                                        "Test VSX floating point instructions"},
   2049                                      { &test_xs_conv_ops,
   2050                                        "Test VSX scalar integer conversion instructions" },
   2051                                      { NULL, NULL }
   2052 };
   2053 #endif // HAS_VSX
   2054 
   2055 int main(int argc, char *argv[])
   2056 {
   2057 #ifdef HAS_VSX
   2058 
   2059    test_table_t aTest;
   2060    test_func_t func;
   2061    int i = 0;
   2062 
   2063    while ((func = all_tests[i].test_category)) {
   2064       aTest = all_tests[i];
   2065       printf( "%s\n", aTest.name );
   2066       (*func)();
   2067       i++;
   2068    }
   2069 
   2070 #endif // HAS _VSX
   2071 
   2072    return 0;
   2073 }
   2074