Home | History | Annotate | Download | only in ppc32
      1 /*  Copyright (C) 2011 IBM
      2 
      3  Author: Maynard Johnson <maynardj (at) us.ibm.com>
      4 
      5  This program is free software; you can redistribute it and/or
      6  modify it under the terms of the GNU General Public License as
      7  published by the Free Software Foundation; either version 2 of the
      8  License, or (at your option) any later version.
      9 
     10  This program is distributed in the hope that it will be useful, but
     11  WITHOUT ANY WARRANTY; without even the implied warranty of
     12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  General Public License for more details.
     14 
     15  You should have received a copy of the GNU General Public License
     16  along with this program; if not, write to the Free Software
     17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18  02111-1307, USA.
     19 
     20  The GNU General Public License is contained in the file COPYING.
     21  */
     22 
     23 #ifdef HAS_VSX
     24 
     25 #include <stdio.h>
     26 #include <stdint.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <malloc.h>
     30 #include <altivec.h>
     31 
     32 #ifndef __powerpc64__
     33 typedef uint32_t HWord_t;
     34 #else
     35 typedef uint64_t HWord_t;
     36 #endif /* __powerpc64__ */
     37 
     38 static int errors;
     39 register HWord_t r14 __asm__ ("r14");
     40 register HWord_t r15 __asm__ ("r15");
     41 register HWord_t r16 __asm__ ("r16");
     42 register HWord_t r17 __asm__ ("r17");
     43 register double f14 __asm__ ("fr14");
     44 register double f15 __asm__ ("fr15");
     45 register double f16 __asm__ ("fr16");
     46 register double f17 __asm__ ("fr17");
     47 
     48 static volatile unsigned int cond_reg;
     49 
     50 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     51 
     52 #define SET_CR(_arg) \
     53       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     54 
     55 #define SET_XER(_arg) \
     56       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     57 
     58 #define GET_CR(_lval) \
     59       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     60 
     61 #define GET_XER(_lval) \
     62       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     63 
     64 #define GET_CR_XER(_lval_cr,_lval_xer) \
     65    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     66 
     67 #define SET_CR_ZERO \
     68       SET_CR(0)
     69 
     70 #define SET_XER_ZERO \
     71       SET_XER(0)
     72 
     73 #define SET_CR_XER_ZERO \
     74    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     75 
     76 #define SET_FPSCR_ZERO \
     77    do { double _d = 0.0; \
     78         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     79    } while (0)
     80 
     81 
     82 typedef void (*test_func_t)(void);
     83 typedef struct ldst_test ldst_test_t;
     84 typedef struct vsx_logic_test logic_test_t;
     85 typedef struct xs_conv_test xs_conv_test_t;
     86 typedef struct p7_fp_test fp_test_t;
     87 typedef struct vx_fp_test vx_fp_test_t;
     88 typedef struct vsx_move_test move_test_t;
     89 typedef struct vsx_permute_test permute_test_t;
     90 typedef struct test_table test_table_t;
     91 
     92 static double *fargs = NULL;
     93 static int nb_fargs;
     94 
     95 /* These functions below that construct a table of floating point
     96  * values were lifted from none/tests/ppc32/jm-insns.c.
     97  */
     98 
     99 #if defined (DEBUG_ARGS_BUILD)
    100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    101 #else
    102 #define AB_DPRINTF(fmt, args...) do { } while (0)
    103 #endif
    104 
    105 static inline void register_farg (void *farg,
    106                                   int s, uint16_t _exp, uint64_t mant)
    107 {
    108    uint64_t tmp;
    109 
    110    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    111    *(uint64_t *)farg = tmp;
    112    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    113               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    114 }
    115 
    116 static void build_fargs_table(void)
    117 /*
    118  * Double precision:
    119  * Sign goes from zero to one               (1 bit)
    120  * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    121  * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    122  * + special values:
    123  * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    124  * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    125  * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    126  * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    127  * +QNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    128  * -QNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    129  * +SNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    130  * -SNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    131  * (8 values)
    132  *
    133  * Single precision
    134  * Sign:     1 bit
    135  * Exponent: 8 bits
    136  * Mantissa: 23 bits
    137  * +0.0      : 0 0x00 0x000000 => 0x00000000
    138  * -0.0      : 1 0x00 0x000000 => 0x80000000
    139  * +infinity : 0 0xFF 0x000000 => 0x7F800000
    140  * -infinity : 1 0xFF 0x000000 => 0xFF800000
    141  * +QNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    142  * -QNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    143  * +SNaN     : 0 0xFF 0x400000 => 0x7FC00000
    144  * -SNaN     : 1 0xFF 0x400000 => 0xFFC00000
    145 */
    146 {
    147    uint64_t mant;
    148    uint16_t _exp, e1;
    149    int s;
    150    int i=0;
    151 
    152    if (nb_fargs)
    153       return;
    154 
    155    fargs = malloc( 16 * sizeof(double) );
    156    for (s = 0; s < 2; s++) {
    157       for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) {
    158          if (e1 >= 0x400)
    159             e1 = 0x3fe;
    160          _exp = e1;
    161          for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
    162          /* Add 'random' bits */
    163          mant = ((mant + 0x4A6) << 29) + 0x359) {
    164             register_farg( &fargs[i++], s, _exp, mant );
    165          }
    166          if (e1 == 0x3fe)
    167             break;
    168       }
    169    }
    170    // add a few smaller values to fargs . . .
    171    s = 0;
    172    _exp = 0x002;
    173    mant = 0x0000000000b01ULL;
    174    register_farg(&fargs[i++], s, _exp, mant);
    175 
    176    _exp = 0x000;
    177    mant = 0x00000203f0b3dULL;
    178    register_farg(&fargs[i++], s, _exp, mant);
    179 
    180    mant = 0x00000005a203dULL;
    181    register_farg(&fargs[i++], s, _exp, mant);
    182 
    183    s = 1;
    184    _exp = 0x002;
    185    mant = 0x0000000000b01ULL;
    186    register_farg(&fargs[i++], s, _exp, mant);
    187 
    188    _exp = 0x000;
    189    mant = 0x00000203f0b3dULL;
    190    register_farg(&fargs[i++], s, _exp, mant);
    191 
    192    nb_fargs = i;
    193 }
    194 
    195 
    196 typedef struct ftdiv_test {
    197    int fra_idx;
    198    int frb_idx;
    199    int cr_flags;
    200 } ftdiv_test_args_t;
    201 
    202 typedef struct fp_test_args {
    203    int fra_idx;
    204    int frb_idx;
    205    int cr_flags;
    206    unsigned long long dp_bin_result;
    207 } fp_test_args_t;
    208 
    209 unsigned long long xscvuxddp_results[] = {
    210                                           0x43cfec0000000000ULL,
    211                                           0x43d013c000000000ULL,
    212                                           0x4338000000b77501ULL,
    213                                           0x43dffa0000000001ULL,
    214                                           0x4372321456990000ULL,
    215                                           0x0000000000000000ULL,
    216                                           0x43e0000000000000ULL,
    217                                           0x43dffc0000000000ULL,
    218                                           0x43effe0000000000ULL,
    219                                           0x43dffe0000000000ULL,
    220                                           0x43efff0000000000ULL,
    221                                           0x43dffe0000000000ULL,
    222                                           0x43efff0000000000ULL,
    223                                           0x43e00106800000f0ULL,
    224                                           0x43e81a0ca1eb40f6ULL
    225 };
    226 
    227 unsigned long long xscvsxddp_results[] = {
    228                                            0x43cfec0000000000ULL,
    229                                            0x43d013c000000000ULL,
    230                                            0x4338000000b77501ULL,
    231                                            0x43dffa0000000001ULL,
    232                                            0x4372321456990000ULL,
    233                                            0x0000000000000000ULL,
    234                                            0xc3e0000000000000ULL,
    235                                            0x43dffc0000000000ULL,
    236                                            0xc330000000000000ULL,
    237                                            0x43dffe0000000000ULL,
    238                                            0xc320000000000002ULL,
    239                                            0x43dffe0000000000ULL,
    240                                            0xc320000000000000ULL,
    241                                            0xc3dffdf2fffffe20ULL,
    242                                            0xc3cf97cd7852fc26ULL,
    243 };
    244 
    245 unsigned long long xscvdpsxds_results[] = {
    246                                            0x0000000000000000ULL,
    247                                            0x000000000000003eULL,
    248                                            0x0000000000000000ULL,
    249                                            0x7fffffffffffffffULL,
    250                                            0x0000000000000000ULL,
    251                                            0x0000000000000000ULL,
    252                                            0x0000000000000000ULL,
    253                                            0x7fffffffffffffffULL,
    254                                            0x8000000000000000ULL,
    255                                            0x8000000000000000ULL,
    256                                            0x8000000000000000ULL,
    257                                            0x8000000000000000ULL,
    258                                            0x8000000000000000ULL,
    259                                            0x0000000000000000ULL,
    260                                            0xffffffffffffbe6cULL
    261 };
    262 
    263 ftdiv_test_args_t ftdiv_tests[] = {
    264                               {0, 1, 0x8},
    265                               {9, 1, 0xa},
    266                               {1, 12, 0xa},
    267                               {0, 2, 0xa},
    268                               {1, 3, 0xa},
    269                               {3, 0, 0xa},
    270                               {0, 3, 0xa},
    271                               {4, 0, 0xa},
    272                               {7, 1, 0xe},
    273                               {8, 1, 0xe},
    274                               {1, 7, 0xe},
    275                               {0, 13, 0xe},
    276                               {5, 5, 0xe},
    277                               {5, 6, 0xe},
    278 };
    279 
    280 fp_test_args_t xscmpX_tests[] = {
    281                                    {8, 8, 0x2, 0ULL},
    282                                    {8, 14, 0x8, 0ULL},
    283                                    {8, 6, 0x8, 0ULL},
    284                                    {8, 5, 0x8, 0ULL},
    285                                    {8, 4, 0x8, 0ULL},
    286                                    {8, 7, 0x8, 0ULL},
    287                                    {8, 9, 0x1, 0ULL},
    288                                    {8, 11, 0x1, 0ULL},
    289                                    {14, 8, 0x4, 0ULL},
    290                                    {14, 14, 0x2, 0ULL},
    291                                    {14, 6, 0x8, 0ULL},
    292                                    {14, 5, 0x8, 0ULL},
    293                                    {14, 4, 0x8, 0ULL},
    294                                    {14, 7, 0x8, 0ULL},
    295                                    {14, 9, 0x1, 0ULL},
    296                                    {14, 11, 0x1, 0ULL},
    297                                    {6, 8, 0x4, 0ULL},
    298                                    {6, 14, 0x4, 0ULL},
    299                                    {6, 6, 0x2, 0ULL},
    300                                    {6, 5, 0x2, 0ULL},
    301                                    {6, 4, 0x8, 0ULL},
    302                                    {6, 7, 0x8, 0ULL},
    303                                    {6, 9, 0x1, 0ULL},
    304                                    {6, 11, 0x1, 0ULL},
    305                                    {5, 8, 0x4, 0ULL},
    306                                    {5, 14, 0x4, 0ULL},
    307                                    {5, 6, 0x2, 0ULL},
    308                                    {5, 5, 0x2, 0ULL},
    309                                    {5, 4, 0x8, 0ULL},
    310                                    {5, 7, 0x8, 0ULL},
    311                                    {5, 9, 0x1, 0ULL},
    312                                    {5, 11, 0x1, 0ULL},
    313                                    {4, 8, 0x4, 0ULL},
    314                                    {4, 14, 0x4, 0ULL},
    315                                    {4, 6, 0x4, 0ULL},
    316                                    {4, 5, 0x4, 0ULL},
    317                                    {4, 1, 0x8, 0ULL},
    318                                    {4, 7, 0x8, 0ULL},
    319                                    {4, 9, 0x1, 0ULL},
    320                                    {4, 11, 0x1, 0ULL},
    321                                    {7, 8, 0x4, 0ULL},
    322                                    {7, 14, 0x4, 0ULL},
    323                                    {7, 6, 0x4, 0ULL},
    324                                    {7, 5, 0x4, 0ULL},
    325                                    {7, 4, 0x4, 0ULL},
    326                                    {7, 7, 0x2, 0ULL},
    327                                    {7, 9, 0x1, 0ULL},
    328                                    {7, 11, 0x1, 0ULL},
    329                                    {10, 8, 0x1, 0ULL},
    330                                    {10, 14, 0x1, 0ULL},
    331                                    {10, 6, 0x1, 0ULL},
    332                                    {10, 5, 0x1, 0ULL},
    333                                    {10, 4, 0x1, 0ULL},
    334                                    {10, 7, 0x1, 0ULL},
    335                                    {10, 9, 0x1, 0ULL},
    336                                    {10, 11, 0x1, 0ULL},
    337                                    {12, 8, 0x1, 0ULL},
    338                                    {12, 14, 0x1, 0ULL},
    339                                    {12, 6, 0x1, 0ULL},
    340                                    {12, 5, 0x1, 0ULL},
    341                                    {12, 4, 0x1, 0ULL},
    342                                    {12, 7, 0x1, 0ULL},
    343                                    {12, 9, 0x1, 0ULL},
    344                                    {12, 11, 0x1, 0ULL},
    345 };
    346 
    347 fp_test_args_t xsadddp_tests[] = {
    348                                    {8, 8, 0x0,   0xfff0000000000000ULL},
    349                                    {8, 14, 0x0,  0xfff0000000000000ULL},
    350                                    {8, 6, 0x0,   0xfff0000000000000ULL},
    351                                    {8, 5, 0x0,   0xfff0000000000000ULL},
    352                                    {8, 4, 0x0,   0xfff0000000000000ULL},
    353                                    {8, 7, 0x0,   0x7ff8000000000000ULL},
    354                                    {8, 9, 0x0,   0x7fffffffffffffffULL},
    355                                    {8, 11, 0x0,  0x7ff8000000000000ULL},
    356                                    {14, 8, 0x0,  0xfff0000000000000ULL},
    357                                    {14, 14, 0x0, 0xc0e0650f5a07b353ULL},
    358                                    {14, 6, 0x0,  0xc0d0650f5a07b353ULL},
    359                                    {14, 5, 0x0,  0xc0d0650f5a07b353ULL},
    360                                    {14, 4, 0x0,  0xc0d0650f5a07b353ULL},
    361                                    {14, 7, 0x0,  0x7ff0000000000000ULL},
    362                                    {14, 9, 0x0,  0x7fffffffffffffffULL},
    363                                    {14, 11, 0x0, 0x7ff8000000000000ULL},
    364                                    {6, 8, 0x0,   0xfff0000000000000ULL},
    365                                    {6, 14, 0x0,  0xc0d0650f5a07b353ULL},
    366                                    {6, 6, 0x0,   0x8000000000000000ULL},
    367                                    {6, 5, 0x0,   0x0000000000000000ULL},
    368                                    {6, 4, 0x0,   0x0123214569900000ULL},
    369                                    {6, 7, 0x0,   0x7ff0000000000000ULL},
    370                                    {6, 9, 0x0,   0x7fffffffffffffffULL},
    371                                    {6, 11, 0x0,  0x7ff8000000000000ULL},
    372                                    {5, 8, 0x0,   0xfff0000000000000ULL},
    373                                    {5, 14, 0x0,  0xc0d0650f5a07b353ULL},
    374                                    {5, 6, 0x0,   0x0000000000000000ULL},
    375                                    {5, 5, 0x0,   0x0000000000000000ULL},
    376                                    {5, 4, 0x0,   0x0123214569900000ULL},
    377                                    {5, 7, 0x0,   0x7ff0000000000000ULL},
    378                                    {5, 9, 0x0,   0x7fffffffffffffffULL},
    379                                    {5, 11, 0x0,  0x7ff8000000000000ULL},
    380                                    {4, 8, 0x0,   0xfff0000000000000ULL},
    381                                    {4, 14, 0x0,  0xc0d0650f5a07b353ULL},
    382                                    {4, 6, 0x0,   0x0123214569900000ULL},
    383                                    {4, 5, 0x0,   0x0123214569900000ULL},
    384                                    {4, 1, 0x0,   0x404f000000000000ULL},
    385                                    {4, 7, 0x0,   0x7ff0000000000000ULL},
    386                                    {4, 9, 0x0,   0x7fffffffffffffffULL},
    387                                    {4, 11, 0x0,  0x7ff8000000000000ULL},
    388                                    {7, 8, 0x0,   0x7ff8000000000000ULL},
    389                                    {7, 14, 0x0,  0x7ff0000000000000ULL},
    390                                    {7, 6, 0x0,   0x7ff0000000000000ULL},
    391                                    {7, 5, 0x0,   0x7ff0000000000000ULL},
    392                                    {7, 4, 0x0,   0x7ff0000000000000ULL},
    393                                    {7, 7, 0x0,   0x7ff0000000000000ULL},
    394                                    {7, 9, 0x0,   0x7fffffffffffffffULL},
    395                                    {7, 11, 0x0,  0x7ff8000000000000ULL},
    396                                    {10, 8, 0x0,  0xffffffffffffffffULL},
    397                                    {10, 14, 0x0, 0xffffffffffffffffULL},
    398                                    {10, 6, 0x0,  0xffffffffffffffffULL},
    399                                    {10, 5, 0x0,  0xffffffffffffffffULL},
    400                                    {10, 4, 0x0,  0xffffffffffffffffULL},
    401                                    {10, 7, 0x0,  0xffffffffffffffffULL},
    402                                    {10, 9, 0x0,  0xffffffffffffffffULL},
    403                                    {10, 11, 0x0, 0xffffffffffffffffULL},
    404                                    {12, 8, 0x0,  0xfff8000000000000ULL},
    405                                    {12, 14, 0x0, 0xfff8000000000000ULL},
    406                                    {12, 6, 0x0,  0xfff8000000000000ULL},
    407                                    {12, 5, 0x0,  0xfff8000000000000ULL},
    408                                    {12, 4, 0x0,  0xfff8000000000000ULL},
    409                                    {12, 7, 0x0,  0xfff8000000000000ULL},
    410                                    {12, 9, 0x0,  0xfff8000000000000ULL},
    411                                    {12, 11, 0x0, 0xfff8000000000000ULL},
    412 };
    413 
    414 fp_test_args_t xsdivdp_tests[] = {
    415                                    {8, 8, 0x0,   0x7ff8000000000000ULL},
    416                                    {8, 14, 0x0,  0x7ff0000000000000ULL},
    417                                    {8, 6, 0x0,   0x7ff0000000000000ULL},
    418                                    {8, 5, 0x0,   0xfff0000000000000ULL},
    419                                    {8, 4, 0x0,   0xfff0000000000000ULL},
    420                                    {8, 7, 0x0,   0x7ff8000000000000ULL},
    421                                    {8, 9, 0x0,   0x7fffffffffffffffULL},
    422                                    {8, 11, 0x0,  0x7ff8000000000000ULL},
    423                                    {14, 8, 0x0,  0x0000000000000000ULL},
    424                                    {14, 14, 0x0, 0x3ff0000000000000ULL},
    425                                    {14, 6, 0x0,  0x7ff0000000000000ULL},
    426                                    {14, 5, 0x0,  0xfff0000000000000ULL},
    427                                    {14, 4, 0x0,  0xff9b6cb57ca13c00ULL},
    428                                    {14, 7, 0x0,  0x8000000000000000ULL},
    429                                    {14, 9, 0x0,  0x7fffffffffffffffULL},
    430                                    {14, 11, 0x0, 0x7ff8000000000000ULL},
    431                                    {6, 8, 0x0,   0x0000000000000000ULL},
    432                                    {6, 14, 0x0,  0x0000000000000000ULL},
    433                                    {6, 6, 0x0,   0x7ff8000000000000ULL},
    434                                    {6, 5, 0x0,   0x7ff8000000000000ULL},
    435                                    {6, 4, 0x0,   0x8000000000000000ULL},
    436                                    {6, 7, 0x0,   0x8000000000000000ULL},
    437                                    {6, 9, 0x0,   0x7fffffffffffffffULL},
    438                                    {6, 11, 0x0,  0x7ff8000000000000ULL},
    439                                    {5, 8, 0x0,   0x8000000000000000ULL},
    440                                    {5, 14, 0x0,  0x8000000000000000ULL},
    441                                    {5, 6, 0x0,   0x7ff8000000000000ULL},
    442                                    {5, 5, 0x0,   0x7ff8000000000000ULL},
    443                                    {5, 4, 0x0,   0x0000000000000000ULL},
    444                                    {5, 7, 0x0,   0x0000000000000000ULL},
    445                                    {5, 9, 0x0,   0x7fffffffffffffffULL},
    446                                    {5, 11, 0x0,  0x7ff8000000000000ULL},
    447                                    {4, 8, 0x0,   0x8000000000000000ULL},
    448                                    {4, 14, 0x0,  0x8042ab59d8b6ec87ULL},
    449                                    {4, 6, 0x0,   0xfff0000000000000ULL},
    450                                    {4, 5, 0x0,   0x7ff0000000000000ULL},
    451                                    {4, 1, 0x0,   0x00c3bf3f64b5ad6bULL},
    452                                    {4, 7, 0x0,   0x0000000000000000ULL},
    453                                    {4, 9, 0x0,   0x7fffffffffffffffULL},
    454                                    {4, 11, 0x0,  0x7ff8000000000000ULL},
    455                                    {7, 8, 0x0,   0x7ff8000000000000ULL},
    456                                    {7, 14, 0x0,  0xfff0000000000000ULL},
    457                                    {7, 6, 0x0,   0xfff0000000000000ULL},
    458                                    {7, 5, 0x0,   0x7ff0000000000000ULL},
    459                                    {7, 4, 0x0,   0x7ff0000000000000ULL},
    460                                    {7, 7, 0x0,   0x7ff8000000000000ULL},
    461                                    {7, 9, 0x0,   0x7fffffffffffffffULL},
    462                                    {7, 11, 0x0,  0x7ff8000000000000ULL},
    463                                    {10, 8, 0x0,  0xffffffffffffffffULL},
    464                                    {10, 14, 0x0, 0xffffffffffffffffULL},
    465                                    {10, 6, 0x0,  0xffffffffffffffffULL},
    466                                    {10, 5, 0x0,  0xffffffffffffffffULL},
    467                                    {10, 4, 0x0,  0xffffffffffffffffULL},
    468                                    {10, 7, 0x0,  0xffffffffffffffffULL},
    469                                    {10, 9, 0x0,  0xffffffffffffffffULL},
    470                                    {10, 11, 0x0, 0xffffffffffffffffULL},
    471                                    {12, 8, 0x0,  0xfff8000000000000ULL},
    472                                    {12, 14, 0x0, 0xfff8000000000000ULL},
    473                                    {12, 6, 0x0,  0xfff8000000000000ULL},
    474                                    {12, 5, 0x0,  0xfff8000000000000ULL},
    475                                    {12, 4, 0x0,  0xfff8000000000000ULL},
    476                                    {12, 7, 0x0,  0xfff8000000000000ULL},
    477                                    {12, 9, 0x0,  0xfff8000000000000ULL},
    478                                    {12, 11, 0x0, 0xfff8000000000000ULL},
    479 };
    480 
    481 fp_test_args_t xsmaddXdp_tests[] = {
    482                                    {8, 8, 0x0,   0x7ff8000000000000ULL},
    483                                    {8, 14, 0x0,  0xfff0000000000000ULL},
    484                                    {8, 6, 0x0,   0x7ff0000000000000ULL},
    485                                    {8, 5, 0x0,   0xfff0000000000000ULL},
    486                                    {8, 4, 0x0,   0x7ff0000000000000ULL},
    487                                    {8, 7, 0x0,   0x7ff8000000000000ULL},
    488                                    {8, 9, 0x0,   0x7fffffffffffffffULL},
    489                                    {8, 11, 0x0,  0x7ff8000000000000ULL},
    490                                    {14, 8, 0x0,  0xfff0000000000000ULL},
    491                                    {14, 14, 0x0, 0xc0d0650f5a07b353ULL},
    492                                    {14, 6, 0x0,  0x41b0cc9d05eec2a7ULL},
    493                                    {14, 5, 0x0,  0x82039a19ca8fcb5fULL},
    494                                    {14, 4, 0x0,  0x41b0cc9d05eec2a7ULL},
    495                                    {14, 7, 0x0,  0x7ff0000000000000ULL},
    496                                    {14, 9, 0x0,  0x7fffffffffffffffULL},
    497                                    {14, 11, 0x0, 0x7ff8000000000000ULL},
    498                                    {6, 8, 0x0,   0xfff0000000000000ULL},
    499                                    {6, 14, 0x0,  0xc0d0650f5a07b353ULL},
    500                                    {6, 6, 0x0,   0x0000000000000000ULL},
    501                                    {6, 5, 0x0,   0x0000000000000000ULL},
    502                                    {6, 4, 0x0,   0x0123214569900000ULL},
    503                                    {6, 7, 0x0,   0x7ff0000000000000ULL},
    504                                    {6, 9, 0x0,   0x7fffffffffffffffULL},
    505                                    {6, 11, 0x0,  0x7ff8000000000000ULL},
    506                                    {5, 8, 0x0,   0xfff0000000000000ULL},
    507                                    {5, 14, 0x0,  0xc0d0650f5a07b353ULL},
    508                                    {5, 6, 0x0,   0x8000000000000000ULL},
    509                                    {5, 5, 0x0,   0x0000000000000000ULL},
    510                                    {5, 4, 0x0,   0x0123214569900000ULL},
    511                                    {5, 7, 0x0,   0x7ff0000000000000ULL},
    512                                    {5, 9, 0x0,   0x7fffffffffffffffULL},
    513                                    {5, 11, 0x0,  0x7ff8000000000000ULL},
    514                                    {4, 8, 0x0,   0xfff0000000000000ULL},
    515                                    {4, 14, 0x0,  0xc0d0650f5a07b353ULL},
    516                                    {4, 6, 0x0,   0x82039a19ca8fcb5fULL},
    517                                    {4, 5, 0x0,   0x0000000000000000ULL},
    518                                    {4, 1, 0x0,   0x404f000000000000ULL},
    519                                    {4, 7, 0x0,   0x7ff0000000000000ULL},
    520                                    {4, 9, 0x0,   0x7fffffffffffffffULL},
    521                                    {4, 11, 0x0,  0x7ff8000000000000ULL},
    522                                    {7, 8, 0x0,   0xfff0000000000000ULL},
    523                                    {7, 14, 0x0,  0x7ff0000000000000ULL},
    524                                    {7, 6, 0x0,   0xfff0000000000000ULL},
    525                                    {7, 5, 0x0,   0x7ff0000000000000ULL},
    526                                    {7, 4, 0x0,   0xfff0000000000000ULL},
    527                                    {7, 7, 0x0,   0x7ff0000000000000ULL},
    528                                    {7, 9, 0x0,   0x7fffffffffffffffULL},
    529                                    {7, 11, 0x0,  0x7ff8000000000000ULL},
    530                                    {10, 8, 0x0,  0xffffffffffffffffULL},
    531                                    {10, 14, 0x0, 0xffffffffffffffffULL},
    532                                    {10, 6, 0x0,  0xffffffffffffffffULL},
    533                                    {10, 5, 0x0,  0xffffffffffffffffULL},
    534                                    {10, 4, 0x0,  0xffffffffffffffffULL},
    535                                    {10, 7, 0x0,  0xffffffffffffffffULL},
    536                                    {10, 9, 0x0,  0xffffffffffffffffULL},
    537                                    {10, 11, 0x0, 0xffffffffffffffffULL},
    538                                    {12, 8, 0x0,  0xfff8000000000000ULL},
    539                                    {12, 14, 0x0, 0xfff8000000000000ULL},
    540                                    {12, 6, 0x0,  0xfff8000000000000ULL},
    541                                    {12, 5, 0x0,  0xfff8000000000000ULL},
    542                                    {12, 4, 0x0,  0xfff8000000000000ULL},
    543                                    {12, 7, 0x0,  0xfff8000000000000ULL},
    544                                    {12, 9, 0x0,  0xfff8000000000000ULL},
    545                                    {12, 11, 0x0, 0xfff8000000000000ULL},
    546 };
    547 
    548 fp_test_args_t xsmsubXdp_tests[] = {
    549                                    {8, 8, 0x0,   0x7ff0000000000000ULL},
    550                                    {8, 14, 0x0,  0xfff0000000000000ULL},
    551                                    {8, 6, 0x0,   0x7ff0000000000000ULL},
    552                                    {8, 5, 0x0,   0xfff0000000000000ULL},
    553                                    {8, 4, 0x0,   0x7ff0000000000000ULL},
    554                                    {8, 7, 0x0,   0xfff0000000000000ULL},
    555                                    {8, 9, 0x0,   0x7fffffffffffffffULL},
    556                                    {8, 11, 0x0,  0x7ff8000000000000ULL},
    557                                    {14, 8, 0x0,  0x7ff0000000000000ULL},
    558                                    {14, 14, 0x0, 0x40d0650f5a07b353ULL},
    559                                    {14, 6, 0x0,  0x41b0cc9d05eec2a7ULL},
    560                                    {14, 5, 0x0,  0x82039a19ca8fcb5fULL},
    561                                    {14, 4, 0x0,  0x41b0cc9d05eec2a7ULL},
    562                                    {14, 7, 0x0,  0xfff0000000000000ULL},
    563                                    {14, 9, 0x0,  0x7fffffffffffffffULL},
    564                                    {14, 11, 0x0, 0x7ff8000000000000ULL},
    565                                    {6, 8, 0x0,   0x7ff0000000000000ULL},
    566                                    {6, 14, 0x0,  0x40d0650f5a07b353ULL},
    567                                    {6, 6, 0x0,   0x0000000000000000ULL},
    568                                    {6, 5, 0x0,   0x8000000000000000ULL},
    569                                    {6, 4, 0x0,   0x8123214569900000ULL},
    570                                    {6, 7, 0x0,   0xfff0000000000000ULL},
    571                                    {6, 9, 0x0,   0x7fffffffffffffffULL},
    572                                    {6, 11, 0x0,  0x7ff8000000000000ULL},
    573                                    {5, 8, 0x0,   0x7ff0000000000000ULL},
    574                                    {5, 14, 0x0,  0x40d0650f5a07b353ULL},
    575                                    {5, 6, 0x0,   0x0000000000000000ULL},
    576                                    {5, 5, 0x0,   0x0000000000000000ULL},
    577                                    {5, 4, 0x0,   0x8123214569900000ULL},
    578                                    {5, 7, 0x0,   0xfff0000000000000ULL},
    579                                    {5, 9, 0x0,   0x7fffffffffffffffULL},
    580                                    {5, 11, 0x0,  0x7ff8000000000000ULL},
    581                                    {4, 8, 0x0,   0x7ff0000000000000ULL},
    582                                    {4, 14, 0x0,  0x40d0650f5a07b353ULL},
    583                                    {4, 6, 0x0,   0x82039a19ca8fcb5fULL},
    584                                    {4, 5, 0x0,   0x0000000000000000ULL},
    585                                    {4, 1, 0x0,   0xc04f000000000000ULL},
    586                                    {4, 7, 0x0,   0xfff0000000000000ULL},
    587                                    {4, 9, 0x0,   0x7fffffffffffffffULL},
    588                                    {4, 11, 0x0,  0x7ff8000000000000ULL},
    589                                    {7, 8, 0x0,   0x7ff8000000000000ULL},
    590                                    {7, 14, 0x0,  0x7ff0000000000000ULL},
    591                                    {7, 6, 0x0,   0xfff0000000000000ULL},
    592                                    {7, 5, 0x0,   0x7ff0000000000000ULL},
    593                                    {7, 4, 0x0,   0xfff0000000000000ULL},
    594                                    {7, 7, 0x0,   0x7ff8000000000000ULL},
    595                                    {7, 9, 0x0,   0x7fffffffffffffffULL},
    596                                    {7, 11, 0x0,  0x7ff8000000000000ULL},
    597                                    {10, 8, 0x0,  0xffffffffffffffffULL},
    598                                    {10, 14, 0x0, 0xffffffffffffffffULL},
    599                                    {10, 6, 0x0,  0xffffffffffffffffULL},
    600                                    {10, 5, 0x0,  0xffffffffffffffffULL},
    601                                    {10, 4, 0x0,  0xffffffffffffffffULL},
    602                                    {10, 7, 0x0,  0xffffffffffffffffULL},
    603                                    {10, 9, 0x0,  0xffffffffffffffffULL},
    604                                    {10, 11, 0x0, 0xffffffffffffffffULL},
    605                                    {12, 8, 0x0,  0xfff8000000000000ULL},
    606                                    {12, 14, 0x0, 0xfff8000000000000ULL},
    607                                    {12, 6, 0x0,  0xfff8000000000000ULL},
    608                                    {12, 5, 0x0,  0xfff8000000000000ULL},
    609                                    {12, 4, 0x0,  0xfff8000000000000ULL},
    610                                    {12, 7, 0x0,  0xfff8000000000000ULL},
    611                                    {12, 9, 0x0,  0xfff8000000000000ULL},
    612                                    {12, 11, 0x0, 0xfff8000000000000ULL},
    613 };
    614 
    615 fp_test_args_t xsnmaddXdp_tests[] = {
    616                                      {8, 8, 0x0,   0x7ff8000000000000ULL},
    617                                      {8, 14, 0x0,  0x7ff0000000000000ULL},
    618                                      {8, 6, 0x0,   0xfff0000000000000ULL},
    619                                      {8, 5, 0x0,   0x7ff0000000000000ULL},
    620                                      {8, 4, 0x0,   0xfff0000000000000ULL},
    621                                      {8, 7, 0x0,   0x7ff8000000000000ULL},
    622                                      {8, 9, 0x0,   0x7fffffffffffffffULL},
    623                                      {8, 11, 0x0,  0x7ff8000000000000ULL},
    624                                      {14, 8, 0x0,  0x7ff0000000000000ULL},
    625                                      {14, 14, 0x0, 0x40d0650f5a07b353ULL},
    626                                      {14, 6, 0x0,  0xc1b0cc9d05eec2a7ULL},
    627                                      {14, 5, 0x0,  0x02039a19ca8fcb5fULL},
    628                                      {14, 4, 0x0,  0xc1b0cc9d05eec2a7ULL},
    629                                      {14, 7, 0x0,  0xfff0000000000000ULL},
    630                                      {14, 9, 0x0,  0x7fffffffffffffffULL},
    631                                      {14, 11, 0x0, 0x7ff8000000000000ULL},
    632                                      {6, 8, 0x0,   0x7ff0000000000000ULL},
    633                                      {6, 14, 0x0,  0x40d0650f5a07b353ULL},
    634                                      {6, 6, 0x0,   0x8000000000000000ULL},
    635                                      {6, 5, 0x0,   0x8000000000000000ULL},
    636                                      {6, 4, 0x0,   0x8123214569900000ULL},
    637                                      {6, 7, 0x0,   0xfff0000000000000ULL},
    638                                      {6, 9, 0x0,   0x7fffffffffffffffULL},
    639                                      {6, 11, 0x0,  0x7ff8000000000000ULL},
    640                                      {5, 8, 0x0,   0x7ff0000000000000ULL},
    641                                      {5, 14, 0x0,  0x40d0650f5a07b353ULL},
    642                                      {5, 6, 0x0,   0x0000000000000000ULL},
    643                                      {5, 5, 0x0,   0x8000000000000000ULL},
    644                                      {5, 4, 0x0,   0x8123214569900000ULL},
    645                                      {5, 7, 0x0,   0xfff0000000000000ULL},
    646                                      {5, 9, 0x0,   0x7fffffffffffffffULL},
    647                                      {5, 11, 0x0,  0x7ff8000000000000ULL},
    648                                      {4, 8, 0x0,   0x7ff0000000000000ULL},
    649                                      {4, 14, 0x0,  0x40d0650f5a07b353ULL},
    650                                      {4, 6, 0x0,   0x02039a19ca8fcb5fULL},
    651                                      {4, 5, 0x0,   0x8000000000000000ULL},
    652                                      {4, 1, 0x0,   0xc04f000000000000ULL},
    653                                      {4, 7, 0x0,   0xfff0000000000000ULL},
    654                                      {4, 9, 0x0,   0x7fffffffffffffffULL},
    655                                      {4, 11, 0x0,  0x7ff8000000000000ULL},
    656                                      {7, 8, 0x0,   0x7ff0000000000000ULL},
    657                                      {7, 14, 0x0,  0xfff0000000000000ULL},
    658                                      {7, 6, 0x0,   0x7ff0000000000000ULL},
    659                                      {7, 5, 0x0,   0xfff0000000000000ULL},
    660                                      {7, 4, 0x0,   0x7ff0000000000000ULL},
    661                                      {7, 7, 0x0,   0xfff0000000000000ULL},
    662                                      {7, 9, 0x0,   0x7fffffffffffffffULL},
    663                                      {7, 11, 0x0,  0x7ff8000000000000ULL},
    664                                      {10, 8, 0x0,  0xffffffffffffffffULL},
    665                                      {10, 14, 0x0, 0xffffffffffffffffULL},
    666                                      {10, 6, 0x0,  0xffffffffffffffffULL},
    667                                      {10, 5, 0x0,  0xffffffffffffffffULL},
    668                                      {10, 4, 0x0,  0xffffffffffffffffULL},
    669                                      {10, 7, 0x0,  0xffffffffffffffffULL},
    670                                      {10, 9, 0x0,  0xffffffffffffffffULL},
    671                                      {10, 11, 0x0, 0xffffffffffffffffULL},
    672                                      {12, 8, 0x0,  0xfff8000000000000ULL},
    673                                      {12, 14, 0x0, 0xfff8000000000000ULL},
    674                                      {12, 6, 0x0,  0xfff8000000000000ULL},
    675                                      {12, 5, 0x0,  0xfff8000000000000ULL},
    676                                      {12, 4, 0x0,  0xfff8000000000000ULL},
    677                                      {12, 7, 0x0,  0xfff8000000000000ULL},
    678                                      {12, 9, 0x0,  0xfff8000000000000ULL},
    679                                      {12, 11, 0x0, 0xfff8000000000000ULL},
    680 };
    681 
    682 fp_test_args_t xsmuldp_tests[] = {
    683                                   {8, 8, 0x0,   0x7ff0000000000000ULL},
    684                                   {8, 14, 0x0,  0x7ff0000000000000ULL},
    685                                   {8, 6, 0x0,   0x7ff8000000000000ULL},
    686                                   {8, 5, 0x0,   0x7ff8000000000000ULL},
    687                                   {8, 4, 0x0,   0xfff0000000000000ULL},
    688                                   {8, 7, 0x0,   0xfff0000000000000ULL},
    689                                   {8, 9, 0x0,   0x7fffffffffffffffULL},
    690                                   {8, 11, 0x0,  0x7ff8000000000000ULL},
    691                                   {14, 8, 0x0,  0x7ff0000000000000ULL},
    692                                   {14, 14, 0x0, 0x41b0cc9d05eec2a7ULL},
    693                                   {14, 6, 0x0,  0x0000000000000000ULL},
    694                                   {14, 5, 0x0,  0x8000000000000000ULL},
    695                                   {14, 4, 0x0,  0x82039a19ca8fcb5fULL},
    696                                   {14, 7, 0x0,  0xfff0000000000000ULL},
    697                                   {14, 9, 0x0,  0x7fffffffffffffffULL},
    698                                   {14, 11, 0x0, 0x7ff8000000000000ULL},
    699                                   {6, 8, 0x0,   0x7ff8000000000000ULL},
    700                                   {6, 14, 0x0,  0x0000000000000000ULL},
    701                                   {6, 6, 0x0,   0x0000000000000000ULL},
    702                                   {6, 5, 0x0,   0x8000000000000000ULL},
    703                                   {6, 4, 0x0,   0x8000000000000000ULL},
    704                                   {6, 7, 0x0,   0x7ff8000000000000ULL},
    705                                   {6, 9, 0x0,   0x7fffffffffffffffULL},
    706                                   {6, 11, 0x0,  0x7ff8000000000000ULL},
    707                                   {5, 8, 0x0,   0x7ff8000000000000ULL},
    708                                   {5, 14, 0x0,  0x8000000000000000ULL},
    709                                   {5, 6, 0x0,   0x8000000000000000ULL},
    710                                   {5, 5, 0x0,   0x0000000000000000ULL},
    711                                   {5, 4, 0x0,   0x0000000000000000ULL},
    712                                   {5, 7, 0x0,   0x7ff8000000000000ULL},
    713                                   {5, 9, 0x0,   0x7fffffffffffffffULL},
    714                                   {5, 11, 0x0,  0x7ff8000000000000ULL},
    715                                   {4, 8, 0x0,   0xfff0000000000000ULL},
    716                                   {4, 14, 0x0,  0x82039a19ca8fcb5fULL},
    717                                   {4, 6, 0x0,   0x8000000000000000ULL},
    718                                   {4, 5, 0x0,   0x0000000000000000ULL},
    719                                   {4, 1, 0x0,   0x0182883b3e438000ULL},
    720                                   {4, 7, 0x0,   0x7ff0000000000000ULL},
    721                                   {4, 9, 0x0,   0x7fffffffffffffffULL},
    722                                   {4, 11, 0x0,  0x7ff8000000000000ULL},
    723                                   {7, 8, 0x0,   0xfff0000000000000ULL},
    724                                   {7, 14, 0x0,  0xfff0000000000000ULL},
    725                                   {7, 6, 0x0,   0x7ff8000000000000ULL},
    726                                   {7, 5, 0x0,   0x7ff8000000000000ULL},
    727                                   {7, 4, 0x0,   0x7ff0000000000000ULL},
    728                                   {7, 7, 0x0,   0x7ff0000000000000ULL},
    729                                   {7, 9, 0x0,   0x7fffffffffffffffULL},
    730                                   {7, 11, 0x0,  0x7ff8000000000000ULL},
    731                                   {10, 8, 0x0,  0xffffffffffffffffULL},
    732                                   {10, 14, 0x0, 0xffffffffffffffffULL},
    733                                   {10, 6, 0x0,  0xffffffffffffffffULL},
    734                                   {10, 5, 0x0,  0xffffffffffffffffULL},
    735                                   {10, 4, 0x0,  0xffffffffffffffffULL},
    736                                   {10, 7, 0x0,  0xffffffffffffffffULL},
    737                                   {10, 9, 0x0,  0xffffffffffffffffULL},
    738                                   {10, 11, 0x0, 0xffffffffffffffffULL},
    739                                   {12, 8, 0x0,  0xfff8000000000000ULL},
    740                                   {12, 14, 0x0, 0xfff8000000000000ULL},
    741                                   {12, 6, 0x0,  0xfff8000000000000ULL},
    742                                   {12, 5, 0x0,  0xfff8000000000000ULL},
    743                                   {12, 4, 0x0,  0xfff8000000000000ULL},
    744                                   {12, 7, 0x0,  0xfff8000000000000ULL},
    745                                   {12, 9, 0x0,  0xfff8000000000000ULL},
    746                                   {12, 11, 0x0, 0xfff8000000000000ULL},
    747 };
    748 
    749 fp_test_args_t xssubdp_tests[] = {
    750                                   {8, 8, 0x0,   0x7ff8000000000000ULL},
    751                                   {8, 14, 0x0,  0xfff0000000000000ULL},
    752                                   {8, 6, 0x0,   0xfff0000000000000ULL},
    753                                   {8, 5, 0x0,   0xfff0000000000000ULL},
    754                                   {8, 4, 0x0,   0xfff0000000000000ULL},
    755                                   {8, 7, 0x0,   0xfff0000000000000ULL},
    756                                   {8, 9, 0x0,   0x7fffffffffffffffULL},
    757                                   {8, 11, 0x0,  0x7ff8000000000000ULL},
    758                                   {14, 8, 0x0,  0x7ff0000000000000ULL},
    759                                   {14, 14, 0x0, 0x0000000000000000ULL},
    760                                   {14, 6, 0x0,  0xc0d0650f5a07b353ULL},
    761                                   {14, 5, 0x0,  0xc0d0650f5a07b353ULL},
    762                                   {14, 4, 0x0,  0xc0d0650f5a07b353ULL},
    763                                   {14, 7, 0x0,  0xfff0000000000000ULL},
    764                                   {14, 9, 0x0,  0x7fffffffffffffffULL},
    765                                   {14, 11, 0x0, 0x7ff8000000000000ULL},
    766                                   {6, 8, 0x0,   0x7ff0000000000000ULL},
    767                                   {6, 14, 0x0,  0x40d0650f5a07b353ULL},
    768                                   {6, 6, 0x0,   0x0000000000000000ULL},
    769                                   {6, 5, 0x0,   0x8000000000000000ULL},
    770                                   {6, 4, 0x0,   0x8123214569900000ULL},
    771                                   {6, 7, 0x0,   0xfff0000000000000ULL},
    772                                   {6, 9, 0x0,   0x7fffffffffffffffULL},
    773                                   {6, 11, 0x0,  0x7ff8000000000000ULL},
    774                                   {5, 8, 0x0,   0x7ff0000000000000ULL},
    775                                   {5, 14, 0x0,  0x40d0650f5a07b353ULL},
    776                                   {5, 6, 0x0,   0x0000000000000000ULL},
    777                                   {5, 5, 0x0,   0x0000000000000000ULL},
    778                                   {5, 4, 0x0,   0x8123214569900000ULL},
    779                                   {5, 7, 0x0,   0xfff0000000000000ULL},
    780                                   {5, 9, 0x0,   0x7fffffffffffffffULL},
    781                                   {5, 11, 0x0,  0x7ff8000000000000ULL},
    782                                   {4, 8, 0x0,   0x7ff0000000000000ULL},
    783                                   {4, 14, 0x0,  0x40d0650f5a07b353ULL},
    784                                   {4, 6, 0x0,   0x0123214569900000ULL},
    785                                   {4, 5, 0x0,   0x0123214569900000ULL},
    786                                   {4, 1, 0x0,   0xc04f000000000000ULL},
    787                                   {4, 7, 0x0,   0xfff0000000000000ULL},
    788                                   {4, 9, 0x0,   0x7fffffffffffffffULL},
    789                                   {4, 11, 0x0,  0x7ff8000000000000ULL},
    790                                   {7, 8, 0x0,   0x7ff0000000000000ULL},
    791                                   {7, 14, 0x0,  0x7ff0000000000000ULL},
    792                                   {7, 6, 0x0,   0x7ff0000000000000ULL},
    793                                   {7, 5, 0x0,   0x7ff0000000000000ULL},
    794                                   {7, 4, 0x0,   0x7ff0000000000000ULL},
    795                                   {7, 7, 0x0,   0x7ff8000000000000ULL},
    796                                   {7, 9, 0x0,   0x7fffffffffffffffULL},
    797                                   {7, 11, 0x0,  0x7ff8000000000000ULL},
    798                                   {10, 8, 0x0,  0xffffffffffffffffULL},
    799                                   {10, 14, 0x0, 0xffffffffffffffffULL},
    800                                   {10, 6, 0x0,  0xffffffffffffffffULL},
    801                                   {10, 5, 0x0,  0xffffffffffffffffULL},
    802                                   {10, 4, 0x0,  0xffffffffffffffffULL},
    803                                   {10, 7, 0x0,  0xffffffffffffffffULL},
    804                                   {10, 9, 0x0,  0xffffffffffffffffULL},
    805                                   {10, 11, 0x0, 0xffffffffffffffffULL},
    806                                   {12, 8, 0x0,  0xfff8000000000000ULL},
    807                                   {12, 14, 0x0, 0xfff8000000000000ULL},
    808                                   {12, 6, 0x0,  0xfff8000000000000ULL},
    809                                   {12, 5, 0x0,  0xfff8000000000000ULL},
    810                                   {12, 4, 0x0,  0xfff8000000000000ULL},
    811                                   {12, 7, 0x0,  0xfff8000000000000ULL},
    812                                   {12, 9, 0x0,  0xfff8000000000000ULL},
    813                                   {12, 11, 0x0, 0xfff8000000000000ULL},
    814 };
    815 
    816 
    817 
    818 static int nb_special_fargs;
    819 static double * spec_fargs;
    820 
    821 static void build_special_fargs_table(void)
    822 {
    823    /* The special floating point values created below are for
    824     * use in the ftdiv tests for setting the fe_flag and fg_flag,
    825     * but they can also be used for other tests (e.g., xscmpudp).
    826     *
    827     * Note that fl_flag is 'always '1' on ppc64 Linux.
    828     *
    829   Entry  Sign Exp   fraction                  Special value
    830    0      0   3fd   0x8000000000000ULL         Positive finite number
    831    1      0   404   0xf000000000000ULL         ...
    832    2      0   001   0x8000000b77501ULL         ...
    833    3      0   7fe   0x800000000051bULL         ...
    834    4      0   012   0x3214569900000ULL         ...
    835    5      0   000   0x0000000000000ULL         +0.0 (+zero)
    836    6      1   000   0x0000000000000ULL         -0.0 (-zero)
    837    7      0   7ff   0x0000000000000ULL         +infinity
    838    8      1   7ff   0x0000000000000ULL         -infinity
    839    9      0   7ff   0x7FFFFFFFFFFFFULL         +QNaN
    840    10     1   7ff   0x7FFFFFFFFFFFFULL         -QNaN
    841    11     0   7ff   0x8000000000000ULL         +SNaN
    842    12     1   7ff   0x8000000000000ULL         -SNaN
    843    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
    844    14     1   40d   0x0650f5a07b353ULL         Negative finite number
    845     */
    846 
    847    uint64_t mant;
    848    uint16_t _exp;
    849    int s;
    850    int i = 0;
    851 
    852    if (spec_fargs)
    853       return;
    854 
    855    spec_fargs = malloc( 16 * sizeof(double) );
    856 
    857    // #0
    858    s = 0;
    859    _exp = 0x3fd;
    860    mant = 0x8000000000000ULL;
    861    register_farg(&spec_fargs[i++], s, _exp, mant);
    862 
    863    // #1
    864    s = 0;
    865    _exp = 0x404;
    866    mant = 0xf000000000000ULL;
    867    register_farg(&spec_fargs[i++], s, _exp, mant);
    868 
    869    /* None of the ftdiv tests succeed.
    870     * FRA = value #0; FRB = value #1
    871     * ea_ = -2; e_b = 5
    872     * fl_flag || fg_flag || fe_flag = 100
    873     */
    874 
    875    /*************************************************
    876     *     fe_flag tests
    877     *
    878     *************************************************/
    879 
    880    /* fe_flag <- 1 if FRA is a NaN
    881     * FRA = value #9; FRB = value #1
    882     * e_a = 1024; e_b = 5
    883     * fl_flag || fg_flag || fe_flag = 101
    884     */
    885 
    886    /* fe_flag <- 1 if FRB is a NaN
    887     * FRA = value #1; FRB = value #12
    888     * e_a = 5; e_b = 1024
    889     * fl_flag || fg_flag || fe_flag = 101
    890     */
    891 
    892    /* fe_flag <- 1 if e_b <= -1022
    893     * FRA = value #0; FRB = value #2
    894     * e_a = -2; e_b = -1022
    895     * fl_flag || fg_flag || fe_flag = 101
    896     *
    897     */
    898    // #2
    899    s = 0;
    900    _exp = 0x001;
    901    mant = 0x8000000b77501ULL;
    902    register_farg(&spec_fargs[i++], s, _exp, mant);
    903 
    904    /* fe_flag <- 1 if e_b >= 1021
    905     * FRA = value #1; FRB = value #3
    906     * e_a = 5; e_b = 1023
    907     * fl_flag || fg_flag || fe_flag = 101
    908     */
    909    // #3
    910    s = 0;
    911    _exp = 0x7fe;
    912    mant = 0x800000000051bULL;
    913    register_farg(&spec_fargs[i++], s, _exp, mant);
    914 
    915    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
    916     * Let FRA = value #3 and FRB be value #0.
    917     * e_a = 1023; e_b = -2
    918     * fl_flag || fg_flag || fe_flag = 101
    919     */
    920 
    921    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
    922     * Let FRA = value #0 above and FRB be value #3 above
    923     * e_a = -2; e_b = 1023
    924     * fl_flag || fg_flag || fe_flag = 101
    925     */
    926 
    927    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
    928     * Let FRA = value #4 and FRB be value #0
    929     * e_a = -1005; e_b = -2
    930     * fl_flag || fg_flag || fe_flag = 101
    931    */
    932    // #4
    933    s = 0;
    934    _exp = 0x012;
    935    mant = 0x3214569900000ULL;
    936    register_farg(&spec_fargs[i++], s, _exp, mant);
    937 
    938    /*************************************************
    939     *     fg_flag tests
    940     *
    941     *************************************************/
    942    /* fg_flag <- 1 if FRA is an Infinity
    943     * NOTE: FRA = Inf also sets fe_flag
    944     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
    945     * Test 1:
    946     *   Let FRA be value #7 and FRB be value #1
    947     *   e_a = 1024; e_b = 5
    948     *   fl_flag || fg_flag || fe_flag = 111
    949     *
    950     * Test 2:
    951     *   Let FRA be value #8 and FRB be value #1
    952     *   e_a = 1024; e_b = 5
    953     *   fl_flag || fg_flag || fe_flag = 111
    954     *
    955     */
    956 
    957    /* fg_flag <- 1 if FRB is an Infinity
    958     * NOTE: FRB = Inf also sets fe_flag
    959     * Let FRA be value #1 and FRB be value #7
    960     * e_a = 5; e_b = 1024
    961     * fl_flag || fg_flag || fe_flag = 111
    962     */
    963 
    964    /* fg_flag <- 1 if FRB is denormalized
    965     * NOTE: e_b < -1022 ==> fe_flag <- 1
    966     * Let FRA be value #0 and FRB be value #13
    967     * e_a = -2; e_b = -1023
    968     * fl_flag || fg_flag || fe_flag = 111
    969     */
    970 
    971    /* fg_flag <- 1 if FRB is +zero
    972     * NOTE: FRA = Inf also sets fe_flag
    973     * Let FRA = val #5; FRB = val #5
    974     * ea_ = -1023; e_b = -1023
    975     * fl_flag || fg_flag || fe_flag = 111
    976     */
    977 
    978    /* fg_flag <- 1 if FRB is -zero
    979     * NOTE: FRA = Inf also sets fe_flag
    980     * Let FRA = val #5; FRB = val #6
    981     * ea_ = -1023; e_b = -1023
    982     * fl_flag || fg_flag || fe_flag = 111
    983     */
    984 
    985    /* Special values */
    986    /* +0.0      : 0 0x000 0x0000000000000 */
    987    // #5
    988    s = 0;
    989    _exp = 0x000;
    990    mant = 0x0000000000000ULL;
    991    register_farg(&spec_fargs[i++], s, _exp, mant);
    992 
    993    /* -0.0      : 1 0x000 0x0000000000000 */
    994    // #6
    995    s = 1;
    996    _exp = 0x000;
    997    mant = 0x0000000000000ULL;
    998    register_farg(&spec_fargs[i++], s, _exp, mant);
    999 
   1000    /* +infinity : 0 0x7FF 0x0000000000000  */
   1001    // #7
   1002    s = 0;
   1003    _exp = 0x7FF;
   1004    mant = 0x0000000000000ULL;
   1005    register_farg(&spec_fargs[i++], s, _exp, mant);
   1006 
   1007    /* -infinity : 1 0x7FF 0x0000000000000 */
   1008    // #8
   1009    s = 1;
   1010    _exp = 0x7FF;
   1011    mant = 0x0000000000000ULL;
   1012    register_farg(&spec_fargs[i++], s, _exp, mant);
   1013 
   1014    /* +QNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
   1015    // #9
   1016    s = 0;
   1017    _exp = 0x7FF;
   1018    mant = 0x7FFFFFFFFFFFFULL;
   1019    register_farg(&spec_fargs[i++], s, _exp, mant);
   1020 
   1021    /* -QNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
   1022    // #10
   1023    s = 1;
   1024    _exp = 0x7FF;
   1025    mant = 0x7FFFFFFFFFFFFULL;
   1026    register_farg(&spec_fargs[i++], s, _exp, mant);
   1027 
   1028    /* +SNaN     : 0 0x7FF 0x8000000000000 */
   1029    // #11
   1030    s = 0;
   1031    _exp = 0x7FF;
   1032    mant = 0x8000000000000ULL;
   1033    register_farg(&spec_fargs[i++], s, _exp, mant);
   1034 
   1035    /* -SNaN     : 1 0x7FF 0x8000000000000 */
   1036    // #12
   1037    s = 1;
   1038    _exp = 0x7FF;
   1039    mant = 0x8000000000000ULL;
   1040    register_farg(&spec_fargs[i++], s, _exp, mant);
   1041 
   1042    /* denormalized value */
   1043    // #13
   1044    s = 1;
   1045    _exp = 0x000;
   1046    mant = 0x8340000078000ULL;
   1047    register_farg(&spec_fargs[i++], s, _exp, mant);
   1048 
   1049    /* Negative finite number */
   1050    // #14
   1051    s = 1;
   1052    _exp = 0x40d;
   1053    mant = 0x0650f5a07b353ULL;
   1054    register_farg(&spec_fargs[i++], s, _exp, mant);
   1055 
   1056    nb_special_fargs = i;
   1057 }
   1058 
   1059 
   1060 struct test_table
   1061 {
   1062    test_func_t test_category;
   1063    char * name;
   1064 };
   1065 
   1066 struct p7_fp_test
   1067 {
   1068    test_func_t test_func;
   1069    const char *name;
   1070    int single;  // 1=single precision result; 0=double precision result
   1071 };
   1072 
   1073 typedef enum {
   1074    VX_FP_CMP,
   1075    VX_FP_SMA,
   1076    VX_FP_SMS,
   1077    VX_FP_SNMA,
   1078    VX_FP_OTHER
   1079 } vx_fp_test_type;
   1080 
   1081 struct vx_fp_test
   1082 {
   1083    test_func_t test_func;
   1084    const char *name;
   1085    fp_test_args_t * targs;
   1086    int num_tests;
   1087    vx_fp_test_type test_type;
   1088 };
   1089 
   1090 struct xs_conv_test
   1091 {
   1092    test_func_t test_func;
   1093    const char *name;
   1094    unsigned long long * results;
   1095    int num_tests;
   1096 };
   1097 
   1098 typedef enum {
   1099    VSX_LOAD =1,
   1100    VSX_LOAD_SPLAT,
   1101    VSX_STORE
   1102 } vsx_ldst_type;
   1103 
   1104 struct ldst_test
   1105 {
   1106    test_func_t test_func;
   1107    const char *name;
   1108    void * base_addr;
   1109    uint32_t offset;
   1110    int num_words_to_process;
   1111    vsx_ldst_type type;
   1112 };
   1113 
   1114 typedef enum {
   1115    VSX_AND = 1,
   1116    VSX_XOR,
   1117    VSX_ANDC,
   1118    VSX_OR,
   1119    VSX_NOR
   1120 } vsx_log_op;
   1121 
   1122 struct vsx_logic_test
   1123 {
   1124    test_func_t test_func;
   1125    const char *name;
   1126    vsx_log_op op;
   1127 };
   1128 
   1129 struct vsx_move_test
   1130 {
   1131    test_func_t test_func;
   1132    const char *name;
   1133    int xa_idx, xb_idx;
   1134    unsigned long long expected_result;
   1135 };
   1136 
   1137 struct vsx_permute_test
   1138 {
   1139    test_func_t test_func;
   1140    const char *name;
   1141    unsigned int xa[4];
   1142    unsigned int xb[4];
   1143    unsigned int expected_output[4];
   1144 };
   1145 
   1146 static vector unsigned int vec_out, vec_inA, vec_inB;
   1147 
   1148 static void test_lxsdx(void)
   1149 {
   1150    __asm__ __volatile__ ("lxsdx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1151 }
   1152 
   1153 static void
   1154 test_lxvd2x(void)
   1155 {
   1156    __asm__ __volatile__ ("lxvd2x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1157 }
   1158 
   1159 static void test_lxvdsx(void)
   1160 {
   1161    __asm__ __volatile__ ("lxvdsx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1162 }
   1163 
   1164 static void test_lxvw4x(void)
   1165 {
   1166    __asm__ __volatile__ ("lxvw4x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
   1167 }
   1168 
   1169 static void test_stxsdx(void)
   1170 {
   1171    __asm__ __volatile__ ("stxsdx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1172 }
   1173 
   1174 static void test_stxvd2x(void)
   1175 {
   1176    __asm__ __volatile__ ("stxvd2x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1177 }
   1178 
   1179 static void test_stxvw4x(void)
   1180 {
   1181    __asm__ __volatile__ ("stxvw4x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
   1182 }
   1183 
   1184 static void test_xxlxor(void)
   1185 {
   1186    __asm__ __volatile__ ("xxlxor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1187 }
   1188 
   1189 static void test_xxlor(void)
   1190 {
   1191    __asm__ __volatile__ ("xxlor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1192 }
   1193 
   1194 static void test_xxlnor(void)
   1195 {
   1196    __asm__ __volatile__ ("xxlnor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1197 }
   1198 
   1199 static void test_xxland(void)
   1200 {
   1201    __asm__ __volatile__ ("xxland          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1202 }
   1203 
   1204 static void test_xxlandc(void)
   1205 {
   1206    __asm__ __volatile__ ("xxlandc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1207 }
   1208 
   1209 static void test_xxmrghw(void)
   1210 {
   1211    __asm__ __volatile__ ("xxmrghw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1212 }
   1213 
   1214 static void test_xxmrglw(void)
   1215 {
   1216    __asm__ __volatile__ ("xxmrglw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1217 }
   1218 
   1219 static void test_xxpermdi_00(void)
   1220 {
   1221    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1222 }
   1223 
   1224 static void test_xxpermdi_01(void)
   1225 {
   1226    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1227 }
   1228 
   1229 static void test_xxpermdi_10(void)
   1230 {
   1231    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1232 }
   1233 
   1234 static void test_xxpermdi_11(void)
   1235 {
   1236    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1237 }
   1238 
   1239 static void test_xxsldwi_0(void)
   1240 {
   1241    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1242 }
   1243 
   1244 static void test_xxsldwi_1(void)
   1245 {
   1246    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1247 }
   1248 
   1249 static void test_xxsldwi_2(void)
   1250 {
   1251    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1252 }
   1253 
   1254 static void test_xxsldwi_3(void)
   1255 {
   1256    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1257 }
   1258 
   1259 static void test_fcfids (void)
   1260 {
   1261     __asm__ __volatile__ ("fcfids          %0, %1" : "=f" (f17): "d" (f14));
   1262 }
   1263 
   1264 static void test_fcfidus (void)
   1265 {
   1266     __asm__ __volatile__ ("fcfidus          %0, %1" : "=f" (f17): "d" (f14));
   1267 }
   1268 
   1269 static void test_fcfidu (void)
   1270 {
   1271     __asm__ __volatile__ ("fcfidu          %0, %1" : "=f" (f17): "d" (f14));
   1272 }
   1273 
   1274 static void test_xsabsdp (void)
   1275 {
   1276    __asm__ __volatile__ ("xsabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1277 }
   1278 
   1279 static void test_xscpsgndp (void)
   1280 {
   1281    __asm__ __volatile__ ("xscpsgndp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1282 }
   1283 
   1284 static void test_xsnabsdp (void)
   1285 {
   1286    __asm__ __volatile__ ("xsnabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1287 }
   1288 
   1289 static void test_xsnegdp (void)
   1290 {
   1291    __asm__ __volatile__ ("xsnegdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1292 }
   1293 
   1294 static int do_cmpudp;
   1295 static void test_xscmp (void)
   1296 {
   1297    if (do_cmpudp)
   1298       __asm__ __volatile__ ("xscmpudp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
   1299    else
   1300       __asm__ __volatile__ ("xscmpodp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
   1301 }
   1302 
   1303 static void test_xsadddp(void)
   1304 {
   1305    __asm__ __volatile__ ("xsadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1306 }
   1307 
   1308 static void test_xsdivdp(void)
   1309 {
   1310    __asm__ __volatile__ ("xsdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1311 }
   1312 
   1313 static int do_adp;
   1314 static void test_xsmadd(void)
   1315 {
   1316    if (do_adp)
   1317       __asm__ __volatile__ ("xsmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1318    else
   1319       __asm__ __volatile__ ("xsmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1320 }
   1321 
   1322 static void test_xsmsub(void)
   1323 {
   1324    if (do_adp)
   1325       __asm__ __volatile__ ("xsmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1326    else
   1327       __asm__ __volatile__ ("xsmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1328 }
   1329 
   1330 static void test_xsnmadd(void)
   1331 {
   1332    if (do_adp)
   1333       __asm__ __volatile__ ("xsnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1334    else
   1335       __asm__ __volatile__ ("xsnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1336 }
   1337 
   1338 static void test_xsmuldp(void)
   1339 {
   1340    __asm__ __volatile__ ("xsmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1341 }
   1342 
   1343 static void test_xssubdp(void)
   1344 {
   1345    __asm__ __volatile__ ("xssubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   1346 }
   1347 
   1348 static void test_xscvdpsxds (void)
   1349 {
   1350    __asm__ __volatile__ ("xscvdpsxds          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1351 }
   1352 
   1353 static void test_xscvsxddp (void)
   1354 {
   1355    __asm__ __volatile__ ("xscvsxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1356 }
   1357 
   1358 static void test_xscvuxddp (void)
   1359 {
   1360    __asm__ __volatile__ ("xscvuxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
   1361 }
   1362 
   1363 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
   1364                                                               0, 0, 0, 0 };
   1365 
   1366 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
   1367 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
   1368 
   1369 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567,
   1370                                                                 0x89abcdef,
   1371                                                                 0x00112233,
   1372                                                                 0x44556677,
   1373                                                                 0x8899aabb,
   1374                                                                 0x91929394,
   1375                                                                 0xa1a2a3a4,
   1376                                                                 0xb1b2b3b4,
   1377                                                                 0xc1c2c3c4,
   1378                                                                 0xd1d2d3d4,
   1379                                                                 0x7a6b5d3e
   1380 };
   1381 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
   1382 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
   1383 
   1384 static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD },
   1385                                      { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD },
   1386                                      { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD },
   1387                                      { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD },
   1388                                      { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT },
   1389                                      { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT },
   1390                                      { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD },
   1391                                      { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD },
   1392                                      { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE },
   1393                                      { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE },
   1394                                      { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE },
   1395                                      { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE },
   1396                                      { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE },
   1397                                      { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE },
   1398                                      { NULL, NULL, NULL, 0, 0, 0 } };
   1399 
   1400 static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR },
   1401                                       { &test_xxlor, "xxlor", VSX_OR } ,
   1402                                       { &test_xxlnor, "xxlnor", VSX_NOR },
   1403                                       { &test_xxland, "xxland", VSX_AND },
   1404                                       { &test_xxlandc, "xxlandc", VSX_ANDC },
   1405                                       { NULL, NULL}};
   1406 
   1407 static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp", 0, 4, 0x0899aabb91929394ULL },
   1408                                     { &test_xscpsgndp, "xscpsgndp", 4, 0, 0x8123456789abcdefULL },
   1409                                     { &test_xsnabsdp, "xsnabsdp", 7, 3, 0xc45566778899aabbULL, },
   1410                                     { &test_xsnegdp, "xsnegdp", 0, 7, 0x31b2b3b4c1c2c3c4ULL, },
   1411                                     { NULL, NULL, 0, 0, 0 }
   1412 
   1413 };
   1414 
   1415 static permute_test_t permute_tests[] =
   1416 {
   1417   { &test_xxmrghw, "xxmrghw",
   1418     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1419     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1420     { 0x11111111, 0x55555555, 0x22222222, 0x66666666 }  /* XT expected output */
   1421   },
   1422   { &test_xxmrghw, "xxmrghw",
   1423     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
   1424     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
   1425     { 0x00112233, 0x11111111, 0x44556677, 0x22222222 }  /* XT expected output */
   1426   },
   1427   { &test_xxmrglw, "xxmrglw",
   1428     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1429     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1430     { 0x33333333, 0x77777777, 0x44444444, 0x88888888 }  /* XT expected output */
   1431   },
   1432   { &test_xxmrglw, "xxmrglw",
   1433     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
   1434     { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
   1435     { 0x8899aabb, 0x33333333, 0xccddeeff, 0x44444444}  /* XT expected output */
   1436   },
   1437   { &test_xxpermdi_00, "xxpermdi DM=00",
   1438     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1439     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1440     { 0x11111111, 0x22222222, 0x55555555, 0x66666666 }  /* XT expected output */
   1441   },
   1442   { &test_xxpermdi_01, "xxpermdi DM=01",
   1443     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1444     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1445     { 0x11111111, 0x22222222, 0x77777777, 0x88888888 }  /* XT expected output */
   1446   },
   1447   { &test_xxpermdi_10, "xxpermdi DM=10",
   1448     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1449     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1450     { 0x33333333, 0x44444444, 0x55555555, 0x66666666 }  /* XT expected output */
   1451   },
   1452   { &test_xxpermdi_11, "xxpermdi DM=11",
   1453     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1454     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1455     { 0x33333333, 0x44444444, 0x77777777, 0x88888888 }  /* XT expected output */
   1456   },
   1457   { &test_xxsldwi_0, "xxsldwi SHW=0",
   1458     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1459     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1460     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }  /* XT expected output */
   1461   },
   1462   { &test_xxsldwi_1, "xxsldwi SHW=1",
   1463     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1464     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1465     { 0x22222222, 0x33333333, 0x44444444, 0x55555555 }  /* XT expected output */
   1466   },
   1467   { &test_xxsldwi_2, "xxsldwi SHW=2",
   1468     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1469     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1470     { 0x33333333, 0x44444444, 0x55555555, 0x66666666 }  /* XT expected output */
   1471   },
   1472   { &test_xxsldwi_3, "xxsldwi SHW=3",
   1473     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
   1474     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
   1475     { 0x44444444, 0x55555555, 0x66666666, 0x77777777 }  /* XT expected output */
   1476   },
   1477   { NULL, NULL }
   1478 };
   1479 
   1480 static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 },
   1481                                 { &test_fcfidus, "fcfidus", 1 },
   1482                                 { &test_fcfidu, "fcfidu", 1 },
   1483                                 { NULL, NULL, 0 },
   1484 
   1485 };
   1486 
   1487 static vx_fp_test_t vx_fp_tests[] = {
   1488                                      { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP},
   1489                                      { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER},
   1490                                      { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER},
   1491                                      { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA},
   1492                                      { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS},
   1493                                      { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA},
   1494                                      { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER},
   1495                                      { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER},
   1496                                      { NULL, NULL, NULL, 0, 0 }
   1497 };
   1498 
   1499 static xs_conv_test_t xs_conv_tests[] = {
   1500                                          { &test_xscvdpsxds, "xscvdpsxds", xscvdpsxds_results, 15},
   1501                                          { &test_xscvsxddp, "xscvsxddp", xscvsxddp_results, 15},
   1502                                          { &test_xscvuxddp, "xscvuxddp", xscvuxddp_results, 15},
   1503                                          { NULL, NULL, NULL, 0}
   1504 };
   1505 
   1506 #ifdef __powerpc64__
   1507 static void test_ldbrx(void)
   1508 {
   1509    int i, equality;
   1510    HWord_t reg_out;
   1511    unsigned char * byteIn, * byteOut;
   1512    r14 = (HWord_t)viargs;
   1513    // Just try the instruction an arbitrary number of times at different r15 offsets.
   1514    for (i = 0; i < 3; i++) {
   1515       int j, k;
   1516       reg_out = 0;
   1517       r15 = i * 4;
   1518       equality = 1;
   1519       __asm__ __volatile__ ("ldbrx          %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15));
   1520       byteIn = ((unsigned char *)(r14 + r15));
   1521       byteOut = (unsigned char *)&reg_out;
   1522 
   1523       printf("ldbrx:");
   1524       for (k = 0; k < 7; k++) {
   1525          printf( " %02x", (byteIn[k]));
   1526       }
   1527       printf(" (reverse) =>");
   1528       for (j = 0; j < 8; j++) {
   1529          printf( " %02x", (byteOut[j]));
   1530       }
   1531       printf("\n");
   1532       for (j = 0, k = 7; j < 8; j++, k--) {
   1533          equality &= (byteIn[k] == byteOut[j]);
   1534       }
   1535       if (!equality) {
   1536          printf("FAILED: load with byte reversal is incorrect\n");
   1537          errors++;
   1538       }
   1539    }
   1540    printf( "\n" );
   1541 }
   1542 
   1543 static void
   1544 test_popcntd(void)
   1545 {
   1546    uint64_t res;
   1547    unsigned long long src = 0x9182736405504536ULL;
   1548    int i, answer = 0;
   1549    r14 = src;
   1550    __asm__ __volatile__ ("popcntd          %0, %1" : "=r" (res): "r" (r14));
   1551    for (i = 0; i < 64; i++) {
   1552       answer += (r14 & 1ULL);
   1553       r14 = r14 >> 1;
   1554    }
   1555    printf("popcntd: 0x%llx => %d\n", src, (int)res);
   1556    if (res!= answer) {
   1557       printf("Error: unexpected result from popcntd\n");
   1558       errors++;
   1559    }
   1560    printf( "\n" );
   1561 }
   1562 #endif
   1563 
   1564 static void
   1565 test_lfiwzx(void)
   1566 {
   1567    unsigned int i;
   1568    unsigned int * src;
   1569    uint64_t reg_out;
   1570    r14 = (HWord_t)viargs;
   1571    // Just try the instruction an arbitrary number of times at different r15 offsets.
   1572    for (i = 0; i < 3; i++) {
   1573       reg_out = 0;
   1574       r15 = i * 4;
   1575       __asm__ __volatile__ ("lfiwzx          %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15));
   1576       src = ((unsigned int *)(r14 + r15));
   1577       printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out);
   1578 
   1579       if (reg_out > 0xFFFFFFFFULL || *src != (unsigned int)reg_out) {
   1580          printf("FAILED: integer load to FP register is incorrect\n");
   1581          errors++;
   1582       }
   1583    }
   1584    printf( "\n" );
   1585 }
   1586 
   1587 static void test_vx_fp_ops(void)
   1588 {
   1589 
   1590    test_func_t func;
   1591    int k;
   1592    char * test_name = (char *)malloc(20);
   1593    k = 0;
   1594 
   1595    build_special_fargs_table();
   1596    while ((func = vx_fp_tests[k].test_func)) {
   1597       int i, condreg, repeat = 0;
   1598       unsigned int flags;
   1599       unsigned long long * frap, * frbp, * dst;
   1600       vx_fp_test_t test_group = vx_fp_tests[k];
   1601       vx_fp_test_type test_type = test_group.test_type;
   1602 
   1603       switch (test_type) {
   1604          case VX_FP_CMP:
   1605             strcpy(test_name, "xscmp");
   1606             if (!repeat) {
   1607                repeat = 1;
   1608                strcat(test_name, "udp");
   1609                do_cmpudp = 1;
   1610             }
   1611             break;
   1612          case VX_FP_SMA:
   1613          case VX_FP_SMS:
   1614          case VX_FP_SNMA:
   1615             if (test_type == VX_FP_SMA)
   1616                strcpy(test_name, "xsmadd");
   1617             else if (test_type == VX_FP_SMS)
   1618                strcpy(test_name, "xsmsub");
   1619             else
   1620                strcpy(test_name, "xsnmadd");
   1621             if (!repeat) {
   1622                repeat = 1;
   1623                strcat(test_name, "adp");
   1624                do_adp = 1;
   1625             }
   1626             break;
   1627          case VX_FP_OTHER:
   1628             strcpy(test_name, test_group.name);
   1629             break;
   1630          default:
   1631             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
   1632             exit(1);
   1633       }
   1634 
   1635 again:
   1636       for (i = 0; i < test_group.num_tests; i++) {
   1637          unsigned int * inA, * inB, * pv;
   1638 
   1639          fp_test_args_t aTest = test_group.targs[i];
   1640          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
   1641          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
   1642          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1643          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1644          // Only need to copy one doubleword into each vector's element 0
   1645          memcpy(&vec_inA, inA, 8);
   1646          memcpy(&vec_inB, inB, 8);
   1647 
   1648          switch (test_type) {
   1649             case VX_FP_CMP:
   1650                SET_FPSCR_ZERO;
   1651                SET_CR_XER_ZERO;
   1652                (*func)();
   1653                GET_CR(flags);
   1654                condreg = (flags & 0x0f000000) >> 24;
   1655                printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg);
   1656               // printf("\tFRA: %e;  FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
   1657                if ( condreg != aTest.cr_flags) {
   1658                   printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg);
   1659                   errors++;
   1660                }
   1661                break;
   1662             case VX_FP_SMA:
   1663             case VX_FP_SMS:
   1664             case VX_FP_SNMA:
   1665             case VX_FP_OTHER:
   1666             {
   1667                int idx;
   1668                unsigned long long vsr_XT;
   1669                pv = (unsigned int *)&vec_out;
   1670                // clear vec_out
   1671                for (idx = 0; idx < 4; idx++, pv++)
   1672                   *pv = 0;
   1673 
   1674                if (test_type != VX_FP_OTHER) {
   1675                   /* Then we need a third src argument, which is stored in element 0 of
   1676                    * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
   1677                    * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
   1678                    * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
   1679                    * data (input args, result) contain only two inputs, so I arbitrarily
   1680                    * use spec_fargs elements 4 and 14 (alternating) for the third source
   1681                    * argument.  We can use the same input data for a given pair of
   1682                    * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
   1683                    * the expected result should be the same.
   1684                    */
   1685                   int extra_arg_idx;
   1686                   if (i % 2)
   1687                      extra_arg_idx = 4;
   1688                   else
   1689                      extra_arg_idx = 14;
   1690 
   1691                      //memcpy(&vec_out, &spec_fargs[14], 8);
   1692 
   1693                   if (repeat) {
   1694                      /* We're on the first time through of one of the VX_FP_SMx
   1695                       * test types, meaning we're testing a xs<ZZZ>adp case, thus we
   1696                       * have to swap inputs as described above:
   1697                       *    src2 <= VSX[XT]
   1698                       *    src3 <= VSX[XB]
   1699                       */
   1700                      memcpy(&vec_out, inB, 8);  // src2
   1701                      memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8);  //src3
   1702                      frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
   1703                   } else {
   1704                      // Don't need to init src2, as it's done before the switch()
   1705                      memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8);  //src3
   1706                   }
   1707                   memcpy(&vsr_XT, &vec_out, 8);
   1708                }
   1709 
   1710                (*func)();
   1711                dst = (unsigned long long *) &vec_out;
   1712                if (test_type == VX_FP_OTHER)
   1713                   printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst);
   1714                else
   1715                   printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
   1716                           test_name, vsr_XT, *frap, *frbp, *dst );
   1717 
   1718                if ( *dst != aTest.dp_bin_result) {
   1719                   printf("Error: Expected result %016llx; actual result %016llx\n", aTest.dp_bin_result, *dst);
   1720                   errors++;
   1721                }
   1722                /*
   1723               {
   1724                   // Debug code.  Keep this block commented out except when debugging.
   1725                   double result, expected;
   1726                   memcpy(&result, dst, 8);
   1727                   memcpy(&expected, &aTest.dp_bin_result, 8);
   1728                   printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
   1729                           spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
   1730                           expected, result );
   1731                }
   1732               */
   1733                break;
   1734             }
   1735          }
   1736 
   1737 
   1738       }
   1739       printf( "\n" );
   1740 
   1741       if (repeat) {
   1742          repeat = 0;
   1743          switch (test_type) {
   1744             case VX_FP_CMP:
   1745                strcpy(test_name, "xscmp");
   1746                strcat(test_name, "odp");
   1747                do_cmpudp = 0;
   1748                break;
   1749             case VX_FP_SMA:
   1750             case VX_FP_SMS:
   1751             case VX_FP_SNMA:
   1752                if (test_type == VX_FP_SMA)
   1753                   strcpy(test_name, "xsmadd");
   1754                else if (test_type == VX_FP_SMS)
   1755                   strcpy(test_name, "xsmsub");
   1756                else
   1757                   strcpy(test_name, "xsnmadd");
   1758                strcat(test_name, "mdp");
   1759                do_adp = 0;
   1760                break;
   1761             case VX_FP_OTHER:
   1762                break;
   1763          }
   1764          goto again;
   1765       }
   1766       k++;
   1767    }
   1768    printf( "\n" );
   1769    free(test_name);
   1770 }
   1771 
   1772 static void test_xs_conv_ops(void)
   1773 {
   1774 
   1775    test_func_t func;
   1776    int k = 0;
   1777 
   1778    build_special_fargs_table();
   1779    while ((func = xs_conv_tests[k].test_func)) {
   1780       int i;
   1781       unsigned long long * frbp, * dst;
   1782       xs_conv_test_t test_group = xs_conv_tests[k];
   1783       for (i = 0; i < test_group.num_tests; i++) {
   1784          unsigned int * inB, * pv;
   1785          int idx;
   1786          unsigned long long exp_result = test_group.results[i];
   1787          inB = (unsigned int *)&spec_fargs[i];
   1788          frbp = (unsigned long long *)&spec_fargs[i];
   1789          memcpy(&vec_inB, inB, 8);
   1790          pv = (unsigned int *)&vec_out;
   1791          // clear vec_out
   1792          for (idx = 0; idx < 4; idx++, pv++)
   1793             *pv = 0;
   1794          (*func)();
   1795          dst = (unsigned long long *) &vec_out;
   1796          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst);
   1797 
   1798          if ( *dst != exp_result) {
   1799             printf("Error: Expected result %016llx; actual result %016llx\n", exp_result, *dst);
   1800             errors++;
   1801          }
   1802       }
   1803       k++;
   1804       printf("\n");
   1805    }
   1806    printf( "\n" );
   1807 }
   1808 
   1809 static void do_load_test(ldst_test_t loadTest)
   1810 {
   1811    test_func_t func;
   1812    unsigned int *src, *dst;
   1813    int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0;
   1814    int i, j, m, equality;
   1815    i = j = 0;
   1816 
   1817    func = loadTest.test_func;
   1818    for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) {
   1819       int again;
   1820       j = 0;
   1821        r14 += i * 16;
   1822       do {
   1823          unsigned int * pv = (unsigned int *)&vec_out;
   1824          int idx;
   1825          // clear vec_out
   1826          for (idx = 0; idx < 4; idx++, pv+=idx)
   1827             *pv = 0;
   1828 
   1829          again = 0;
   1830          r15 = j;
   1831 
   1832          // execute test insn
   1833          (*func)();
   1834 
   1835          src = (unsigned int*) (((unsigned char *)r14) + j);
   1836          dst = (unsigned int*) &vec_out;
   1837 
   1838          printf( "%s:", loadTest.name);
   1839          for (m = 0; m < loadTest.num_words_to_process; m++) {
   1840             printf( " %08x", src[splat ? m % 2 : m]);
   1841          }
   1842          printf( " =>");
   1843          for (m = 0; m < loadTest.num_words_to_process; m++) {
   1844             printf( " %08x", dst[m]);
   1845          }
   1846          printf("\n");
   1847          equality = 1;
   1848          for (m = 0; m < loadTest.num_words_to_process; m++) {
   1849             equality = equality && (src[splat ? m % 2 : m] == dst[m]);
   1850          }
   1851 
   1852          if (!equality) {
   1853             printf("FAILED: loaded vector is incorrect\n");
   1854             errors++;
   1855          }
   1856 
   1857          if (j == 0 && loadTest.offset) {
   1858             again = 1;
   1859             j += loadTest.offset;
   1860          }
   1861       }
   1862       while (again);
   1863    }
   1864 }
   1865 
   1866 static void
   1867 do_store_test ( ldst_test_t storeTest )
   1868 {
   1869    test_func_t func;
   1870    unsigned int *src, *dst;
   1871    int m, equality;
   1872 
   1873    func = storeTest.test_func;
   1874    r14 = (HWord_t) storeTest.base_addr;
   1875    r15 = (HWord_t) storeTest.offset;
   1876    unsigned int * pv = (unsigned int *) storeTest.base_addr;
   1877    int idx;
   1878    // clear out storage destination
   1879    for (idx = 0; idx < 4; idx++, pv += idx)
   1880       *pv = 0;
   1881 
   1882    memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char));
   1883 
   1884    // execute test insn
   1885    (*func)();
   1886    src = &viargs[0];
   1887    dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset);
   1888 
   1889    printf( "%s:", storeTest.name );
   1890    for (m = 0; m < storeTest.num_words_to_process; m++) {
   1891       printf( " %08x", src[m] );
   1892    }
   1893    printf( " =>" );
   1894    for (m = 0; m < storeTest.num_words_to_process; m++) {
   1895       printf( " %08x", dst[m] );
   1896    }
   1897    printf( "\n" );
   1898    equality = 1;
   1899    for (m = 0; m < storeTest.num_words_to_process; m++) {
   1900       equality = equality && (src[m] == dst[m]);
   1901    }
   1902 
   1903    if (!equality) {
   1904       printf( "FAILED: vector store result is incorrect\n" );
   1905       errors++;
   1906    }
   1907 
   1908 }
   1909 
   1910 
   1911 static void test_ldst(void)
   1912 {
   1913    int k = 0;
   1914 
   1915    while (ldst_tests[k].test_func) {
   1916       if (ldst_tests[k].type == VSX_STORE)
   1917          do_store_test(ldst_tests[k]);
   1918       else
   1919          do_load_test(ldst_tests[k]);
   1920       k++;
   1921       printf("\n");
   1922    }
   1923 }
   1924 
   1925 static void test_ftdiv(void)
   1926 {
   1927    int i, num_tests, crx;
   1928    unsigned int flags;
   1929    unsigned long long * frap, * frbp;
   1930    build_special_fargs_table();
   1931 
   1932    num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0];
   1933 
   1934    for (i = 0; i < num_tests; i++) {
   1935       ftdiv_test_args_t aTest = ftdiv_tests[i];
   1936       f14 = spec_fargs[aTest.fra_idx];
   1937       f15 = spec_fargs[aTest.frb_idx];
   1938       frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1939       frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1940       SET_FPSCR_ZERO;
   1941       SET_CR_XER_ZERO;
   1942       __asm__ __volatile__ ("ftdiv           cr1, %0, %1" : : "d" (f14), "d" (f15));
   1943       GET_CR(flags);
   1944       crx = (flags & 0x0f000000) >> 24;
   1945       printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx);
   1946 //      printf("\tFRA: %e;  FRB: %e\n", f14, f15);
   1947       if ( crx != aTest.cr_flags) {
   1948          printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx);
   1949          errors++;
   1950       }
   1951    }
   1952    printf( "\n" );
   1953 }
   1954 
   1955 
   1956 static void test_p7_fpops ( void )
   1957 {
   1958    int k = 0;
   1959    test_func_t func;
   1960 
   1961    build_fargs_table();
   1962    while ((func = fp_tests[k].test_func)) {
   1963       float res;
   1964       double resd;
   1965       unsigned long long u0;
   1966       int i;
   1967       int res32 = strcmp(fp_tests[k].name, "fcfidu");
   1968 
   1969       for (i = 0; i < nb_fargs; i++) {
   1970          u0 = *(unsigned long long *) (&fargs[i]);
   1971          f14 = fargs[i];
   1972          (*func)();
   1973          if (res32) {
   1974             res = f17;
   1975             printf( "%s %016llx => (raw sp) %08x)",
   1976                     fp_tests[k].name, u0, *((unsigned int *)&res));
   1977          } else {
   1978             resd = f17;
   1979             printf( "%s %016llx => (raw sp) %016llx)",
   1980                     fp_tests[k].name, u0, *(unsigned long long *)(&resd));
   1981          }
   1982          printf( "\n" );
   1983       }
   1984 
   1985       k++;
   1986       printf( "\n" );
   1987    }
   1988 }
   1989 
   1990 static void test_vsx_logic(void)
   1991 {
   1992    logic_test_t aTest;
   1993    test_func_t func;
   1994    int equality, k;
   1995    k = 0;
   1996 
   1997    while ((func = logic_tests[k].test_func)) {
   1998       unsigned int * pv;
   1999       int startA, startB;
   2000       unsigned int * inA, * inB, * dst;
   2001       int idx, i;
   2002       startA = 0;
   2003       aTest = logic_tests[k];
   2004       for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) {
   2005          startB = startA + 4;
   2006          pv = (unsigned int *)&vec_out;
   2007          inA = &viargs[startA];
   2008          inB = &viargs[startB];
   2009          memcpy(&vec_inA, inA, sizeof(vector unsigned char));
   2010          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
   2011          // clear vec_out
   2012          for (idx = 0; idx < 4; idx++, pv++)
   2013             *pv = 0;
   2014 
   2015          // execute test insn
   2016          (*func)();
   2017          dst = (unsigned int*) &vec_out;
   2018 
   2019          printf( "%s:", aTest.name);
   2020          printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name);
   2021          printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]);
   2022          printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
   2023 
   2024          equality = 1;
   2025          for (idx = 0; idx < 4; idx++) {
   2026             switch (aTest.op) {
   2027                case VSX_AND:
   2028                   equality &= (dst[idx] == (inA[idx] & inB[idx]));
   2029                   break;
   2030                case VSX_ANDC:
   2031                   equality &= (dst[idx] == (inA[idx] & ~inB[idx]));
   2032                   break;
   2033                case VSX_NOR:
   2034                   equality &= (dst[idx] == ~(inA[idx] | inB[idx]));
   2035                   break;
   2036                case VSX_XOR:
   2037                   equality &= (dst[idx] == (inA[idx] ^ inB[idx]));
   2038                   break;
   2039                case VSX_OR:
   2040                   equality &= (dst[idx] == (inA[idx] | inB[idx]));
   2041                   break;
   2042                default:
   2043                   fprintf(stderr, "Error in test_vsx_logic(): unknown VSX logical op %d\n", aTest.op);
   2044                   exit(1);
   2045             }
   2046          }
   2047          if (!equality) {
   2048             printf( "FAILED: vector out is incorrect\n" );
   2049             errors++;
   2050          }
   2051       }
   2052       k++;
   2053    }
   2054    printf( "\n" );
   2055 }
   2056 
   2057 static void test_move_ops (void)
   2058 {
   2059    move_test_t aTest;
   2060    test_func_t func;
   2061    int equality, k;
   2062    k = 0;
   2063 
   2064    while ((func = move_tests[k].test_func)) {
   2065       unsigned int * pv;
   2066       int startA, startB;
   2067       unsigned int * inA, * inB, * dst;
   2068       unsigned long long exp_out;
   2069       int idx;
   2070       aTest = move_tests[k];
   2071       exp_out = aTest.expected_result;
   2072       startA = aTest.xa_idx;
   2073       startB = aTest.xb_idx;
   2074       pv = (unsigned int *)&vec_out;
   2075       inA = &viargs[startA];
   2076       inB = &viargs[startB];
   2077       memcpy(&vec_inA, inA, sizeof(vector unsigned char));
   2078       memcpy(&vec_inB, inB, sizeof(vector unsigned char));
   2079       // clear vec_out
   2080       for (idx = 0; idx < 4; idx++, pv++)
   2081          *pv = 0;
   2082 
   2083       // execute test insn
   2084       (*func)();
   2085       dst = (unsigned int*) &vec_out;
   2086 
   2087       printf( "%s:", aTest.name);
   2088       printf( " %08x %08x %s", inA[0], inA[1], aTest.name);
   2089       printf( " %08x %08xx", inB[0], inB[1]);
   2090       printf(" => %08x %08x\n", dst[0], dst[1]);
   2091 
   2092       equality = 1;
   2093       pv = (unsigned int *)&exp_out;
   2094       for (idx = 0; idx < 2; idx++) {
   2095          equality &= (dst[idx] == pv[idx]);
   2096       }
   2097       if (!equality) {
   2098          printf( "FAILED: vector out is incorrect\n" );
   2099          errors++;
   2100       }
   2101       k++;
   2102       printf( "\n" );
   2103    }
   2104 }
   2105 
   2106 static void test_permute_ops (void)
   2107 {
   2108   permute_test_t *aTest;
   2109   unsigned int *dst = (unsigned int *) &vec_out;
   2110 
   2111   for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++)
   2112     {
   2113       /* Grab test input and clear output vector.  */
   2114       memcpy(&vec_inA, aTest->xa, sizeof(vec_inA));
   2115       memcpy(&vec_inB, aTest->xb, sizeof(vec_inB));
   2116       memset(dst, 0, sizeof(vec_out));
   2117 
   2118       /* execute test insn */
   2119       aTest->test_func();
   2120 
   2121       printf( "%s:\n", aTest->name);
   2122       printf( "        XA[%08x,%08x,%08x,%08x]\n",
   2123               aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]);
   2124       printf( "        XB[%08x,%08x,%08x,%08x]\n",
   2125               aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]);
   2126       printf( "   =>   XT[%08x,%08x,%08x,%08x]\n",
   2127               dst[0], dst[1], dst[2], dst[3]);
   2128 
   2129       if (memcmp (dst, &aTest->expected_output, sizeof(vec_out)))
   2130        {
   2131          printf( "FAILED: vector out is incorrect\n" );
   2132          errors++;
   2133        }
   2134     }
   2135   printf( "\n" );
   2136 }
   2137 
   2138 static test_table_t all_tests[] = { { &test_ldst,
   2139                                        "Test VSX load/store instructions" },
   2140                                      { &test_vsx_logic,
   2141                                        "Test VSX logic instructions" },
   2142 #ifdef __powerpc64__
   2143                                      { &test_ldbrx,
   2144                                        "Test ldbrx instruction" },
   2145                                      { &test_popcntd,
   2146                                        "Test popcntd instruction" },
   2147 #endif
   2148                                      { &test_lfiwzx,
   2149                                        "Test lfiwzx instruction" },
   2150                                      { &test_p7_fpops,
   2151                                        "Test P7 floating point convert instructions"},
   2152                                      { &test_ftdiv,
   2153                                        "Test ftdiv instruction" },
   2154                                      { &test_move_ops,
   2155                                        "Test VSX move instructions"},
   2156                                      { &test_permute_ops,
   2157                                        "Test VSX permute instructions"},
   2158                                      { &test_vx_fp_ops,
   2159                                        "Test VSX floating point instructions"},
   2160                                      { &test_xs_conv_ops,
   2161                                        "Test VSX scalar integer conversion instructions" },
   2162                                      { NULL, NULL }
   2163 };
   2164 #endif // HAS_VSX
   2165 
   2166 int main(int argc, char *argv[])
   2167 {
   2168 #ifdef HAS_VSX
   2169 
   2170    test_table_t aTest;
   2171    test_func_t func;
   2172    int i = 0;
   2173 
   2174    while ((func = all_tests[i].test_category)) {
   2175       aTest = all_tests[i];
   2176       printf( "%s\n", aTest.name );
   2177       (*func)();
   2178       i++;
   2179    }
   2180    if (errors)
   2181       printf("Testcase FAILED with %d errors \n", errors);
   2182    else
   2183       printf("Testcase PASSED\n");
   2184 
   2185 #endif // HAS _VSX
   2186 
   2187    return 0;
   2188 }
   2189