Home | History | Annotate | Download | only in ppc64
      1 /*  Copyright (C) 2013 IBM
      2 
      3  Authors: Carl Love  <carll (at) us.ibm.com>
      4           Maynard Johnson <maynardj (at) us.ibm.com>
      5 
      6  This program is free software; you can redistribute it and/or
      7  modify it under the terms of the GNU General Public License as
      8  published by the Free Software Foundation; either version 2 of the
      9  License, or (at your option) any later version.
     10 
     11  This program is distributed in the hope that it will be useful, but
     12  WITHOUT ANY WARRANTY; without even the implied warranty of
     13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  General Public License for more details.
     15 
     16  You should have received a copy of the GNU General Public License
     17  along with this program; if not, write to the Free Software
     18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     19  02111-1307, USA.
     20 
     21  The GNU General Public License is contained in the file COPYING.
     22 
     23  This program is based heavily on the test_isa_2_06_part*.c source files.
     24  */
     25 
     26 #include <stdio.h>
     27 
     28 #ifdef HAS_ISA_2_07
     29 
     30 #include <stdint.h>
     31 #include <stdlib.h>
     32 #include <string.h>
     33 #include <malloc.h>
     34 #include <altivec.h>
     35 #include <math.h>
     36 
     37 #ifndef __powerpc64__
     38 typedef uint32_t HWord_t;
     39 #else
     40 typedef uint64_t HWord_t;
     41 #endif /* __powerpc64__ */
     42 
     43 register HWord_t r14 __asm__ ("r14");
     44 register HWord_t r15 __asm__ ("r15");
     45 register HWord_t r16 __asm__ ("r16");
     46 register HWord_t r17 __asm__ ("r17");
     47 register double f14 __asm__ ("fr14");
     48 register double f15 __asm__ ("fr15");
     49 register double f16 __asm__ ("fr16");
     50 register double f17 __asm__ ("fr17");
     51 
     52 static volatile unsigned int cond_reg;
     53 
     54 #define True  1
     55 #define False 0
     56 
     57 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     58 
     59 #define SET_CR(_arg) \
     60       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     61 
     62 #define SET_XER(_arg) \
     63       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     64 
     65 #define GET_CR(_lval) \
     66       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     67 
     68 #define GET_XER(_lval) \
     69       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     70 
     71 #define GET_CR_XER(_lval_cr,_lval_xer) \
     72    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     73 
     74 #define SET_CR_ZERO \
     75       SET_CR(0)
     76 
     77 #define SET_XER_ZERO \
     78       SET_XER(0)
     79 
     80 #define SET_CR_XER_ZERO \
     81    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     82 
     83 #define SET_FPSCR_ZERO \
     84    do { double _d = 0.0; \
     85         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     86    } while (0)
     87 
     88 typedef unsigned char Bool;
     89 
     90 
     91 /* These functions below that construct a table of floating point
     92  * values were lifted from none/tests/ppc32/jm-insns.c.
     93  */
     94 
     95 #if defined (DEBUG_ARGS_BUILD)
     96 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
     97 #else
     98 #define AB_DPRINTF(fmt, args...) do { } while (0)
     99 #endif
    100 
    101 static inline void register_farg (void *farg,
    102                                   int s, uint16_t _exp, uint64_t mant)
    103 {
    104    uint64_t tmp;
    105 
    106    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    107    *(uint64_t *)farg = tmp;
    108    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    109               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    110 }
    111 
    112 static inline void register_sp_farg (void *farg,
    113                                      int s, uint16_t _exp, uint32_t mant)
    114 {
    115    uint32_t tmp;
    116    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
    117    *(uint32_t *)farg = tmp;
    118 }
    119 
    120 
    121 typedef struct fp_test_args {
    122    int fra_idx;
    123    int frb_idx;
    124 } fp_test_args_t;
    125 
    126 static int nb_special_fargs;
    127 static double * spec_fargs;
    128 static float * spec_sp_fargs;
    129 
    130 static void build_special_fargs_table(void)
    131 {
    132    /*
    133     * Double precision:
    134     * Sign goes from zero to one               (1 bit)
    135     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    136     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    137     * + special values:
    138     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    139     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    140     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    141     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    142     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    143     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    144     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    145     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    146     * (8 values)
    147     *
    148     * Single precision
    149     * Sign:     1 bit
    150     * Exponent: 8 bits
    151     * Mantissa: 23 bits
    152     * +0.0      : 0 0x00 0x000000 => 0x00000000
    153     * -0.0      : 1 0x00 0x000000 => 0x80000000
    154     * +infinity : 0 0xFF 0x000000 => 0x7F800000
    155     * -infinity : 1 0xFF 0x000000 => 0xFF800000
    156     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    157     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    158     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    159     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    160    */
    161 
    162    uint64_t mant;
    163    uint32_t mant_sp;
    164    uint16_t _exp;
    165    int s;
    166    int j, i = 0;
    167 
    168    if (spec_fargs)
    169       return;
    170 
    171    spec_fargs = malloc( 20 * sizeof(double) );
    172    spec_sp_fargs = malloc( 20 * sizeof(float) );
    173 
    174    // #0
    175    s = 0;
    176    _exp = 0x3fd;
    177    mant = 0x8000000000000ULL;
    178    register_farg(&spec_fargs[i++], s, _exp, mant);
    179 
    180    // #1
    181    s = 0;
    182    _exp = 0x404;
    183    mant = 0xf000000000000ULL;
    184    register_farg(&spec_fargs[i++], s, _exp, mant);
    185 
    186    // #2
    187    s = 0;
    188    _exp = 0x001;
    189    mant = 0x8000000b77501ULL;
    190    register_farg(&spec_fargs[i++], s, _exp, mant);
    191 
    192    // #3
    193    s = 0;
    194    _exp = 0x7fe;
    195    mant = 0x800000000051bULL;
    196    register_farg(&spec_fargs[i++], s, _exp, mant);
    197 
    198    // #4
    199    s = 0;
    200    _exp = 0x012;
    201    mant = 0x3214569900000ULL;
    202    register_farg(&spec_fargs[i++], s, _exp, mant);
    203 
    204    /* Special values */
    205    /* +0.0      : 0 0x000 0x0000000000000 */
    206    // #5
    207    s = 0;
    208    _exp = 0x000;
    209    mant = 0x0000000000000ULL;
    210    register_farg(&spec_fargs[i++], s, _exp, mant);
    211 
    212    /* -0.0      : 1 0x000 0x0000000000000 */
    213    // #6
    214    s = 1;
    215    _exp = 0x000;
    216    mant = 0x0000000000000ULL;
    217    register_farg(&spec_fargs[i++], s, _exp, mant);
    218 
    219    /* +infinity : 0 0x7FF 0x0000000000000  */
    220    // #7
    221    s = 0;
    222    _exp = 0x7FF;
    223    mant = 0x0000000000000ULL;
    224    register_farg(&spec_fargs[i++], s, _exp, mant);
    225 
    226    /* -infinity : 1 0x7FF 0x0000000000000 */
    227    // #8
    228    s = 1;
    229    _exp = 0x7FF;
    230    mant = 0x0000000000000ULL;
    231    register_farg(&spec_fargs[i++], s, _exp, mant);
    232 
    233    /*
    234     * This comment applies to values #9 and #10 below:
    235     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
    236     * so we can't just copy the double-precision value to the corresponding slot in the
    237     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
    238     * have to manually set the bits using register_sp_farg().
    239     */
    240 
    241    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    242    // #9
    243    s = 0;
    244    _exp = 0x7FF;
    245    mant = 0x7FFFFFFFFFFFFULL;
    246    register_farg(&spec_fargs[i++], s, _exp, mant);
    247    _exp = 0xff;
    248    mant_sp = 0x3FFFFF;
    249    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    250 
    251    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    252    // #10
    253    s = 1;
    254    _exp = 0x7FF;
    255    mant = 0x7FFFFFFFFFFFFULL;
    256    register_farg(&spec_fargs[i++], s, _exp, mant);
    257    _exp = 0xff;
    258    mant_sp = 0x3FFFFF;
    259    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    260 
    261    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    262    // #11
    263    s = 0;
    264    _exp = 0x7FF;
    265    mant = 0x8000000000000ULL;
    266    register_farg(&spec_fargs[i++], s, _exp, mant);
    267 
    268    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    269    // #12
    270    s = 1;
    271    _exp = 0x7FF;
    272    mant = 0x8000000000000ULL;
    273    register_farg(&spec_fargs[i++], s, _exp, mant);
    274 
    275    /* denormalized value */
    276    // #13
    277    s = 1;
    278    _exp = 0x000;
    279    mant = 0x8340000078000ULL;
    280    register_farg(&spec_fargs[i++], s, _exp, mant);
    281 
    282    /* Negative finite number */
    283    // #14
    284    s = 1;
    285    _exp = 0x40d;
    286    mant = 0x0650f5a07b353ULL;
    287    register_farg(&spec_fargs[i++], s, _exp, mant);
    288 
    289    /* A few positive finite numbers ... */
    290    // #15
    291    s = 0;
    292    _exp = 0x412;
    293    mant = 0x32585a9900000ULL;
    294    register_farg(&spec_fargs[i++], s, _exp, mant);
    295 
    296    // #16
    297    s = 0;
    298    _exp = 0x413;
    299    mant = 0x82511a2000000ULL;
    300    register_farg(&spec_fargs[i++], s, _exp, mant);
    301 
    302    // #17
    303    s = 0;
    304    _exp = 0x403;
    305    mant = 0x12ef5a9300000ULL;
    306    register_farg(&spec_fargs[i++], s, _exp, mant);
    307 
    308    // #18
    309    s = 0;
    310    _exp = 0x405;
    311    mant = 0x14bf5d2300000ULL;
    312    register_farg(&spec_fargs[i++], s, _exp, mant);
    313 
    314    // #19
    315    s = 0;
    316    _exp = 0x409;
    317    mant = 0x76bf982440000ULL;
    318    register_farg(&spec_fargs[i++], s, _exp, mant);
    319 
    320 
    321    nb_special_fargs = i;
    322    for (j = 0; j < i; j++) {
    323       if (!(j == 9 || j == 10))
    324          spec_sp_fargs[j] = spec_fargs[j];
    325    }
    326 }
    327 
    328 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
    329                                                               0, 0, 0, 0 };
    330 
    331 
    332 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
    333                                                                 0x89abcdef,
    334                                                                 0x00112233,
    335                                                                 0x74556677,
    336                                                                 0x00001abb,
    337                                                                 0x00000001,
    338                                                                 0x31929394,
    339                                                                 0xa1a2a3a4,
    340 };
    341 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
    342 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
    343 
    344 typedef void (*test_func_t)(void);
    345 
    346 struct test_table
    347 {
    348    test_func_t test_category;
    349    char * name;
    350 };
    351 
    352 
    353 typedef enum {
    354    SINGLE_TEST,
    355    SINGLE_TEST_SINGLE_RES,
    356    DOUBLE_TEST,
    357    DOUBLE_TEST_SINGLE_RES
    358 } precision_type_t;
    359 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
    360 
    361 typedef enum {
    362    VX_FP_SMAS,   // multiply add single precision result
    363    VX_FP_SMSS,   // multiply sub single precision result
    364    VX_FP_SNMAS,  // negative multiply add single precision result
    365    VX_FP_SNMSS,  // negative multiply sub single precision result
    366    VX_FP_OTHER,
    367    VX_CONV_WORD,
    368    VX_ESTIMATE,
    369    VX_CONV_TO_SINGLE,
    370    VX_CONV_TO_DOUBLE,
    371    VX_SCALAR_CONV_TO_WORD,
    372    VX_SCALAR_SP_TO_VECTOR_SP,
    373    VX_DEFAULT
    374 } vx_fp_test_type;
    375 
    376 typedef enum {
    377    VSX_LOAD = 1,
    378    VSX_LOAD_SPLAT,
    379    VSX_STORE,
    380 } vsx_ldst_type;
    381 
    382 typedef enum {
    383    VSX_AND = 1,
    384    VSX_NAND,
    385    VSX_ANDC,
    386    VSX_OR,
    387    VSX_ORC,
    388    VSX_NOR,
    389    VSX_XOR,
    390    VSX_EQV,
    391 } vsx_log_op;
    392 
    393 struct vx_fp_test1
    394 {
    395    test_func_t test_func;
    396    const char *name;
    397    fp_test_args_t * targs;
    398    int num_tests;
    399     vx_fp_test_type test_type;
    400  };
    401 
    402 struct ldst_test
    403 {
    404    test_func_t test_func;
    405    const char *name;
    406    precision_type_t precision;
    407    void * base_addr;
    408    uint32_t offset;
    409    vsx_ldst_type type;
    410 };
    411 
    412 struct vx_fp_test2
    413 {
    414    test_func_t test_func;
    415    const char *name;
    416    fp_test_args_t * targs;
    417    int num_tests;
    418    precision_type_t precision;
    419    vx_fp_test_type test_type;
    420    const char * op;
    421 };
    422 
    423 struct xs_conv_test
    424 {
    425    test_func_t test_func;
    426    const char *name;
    427    int num_tests;
    428 };
    429 
    430 struct simple_test
    431 {
    432    test_func_t test_func;
    433    const char *name;
    434 };
    435 
    436 struct vsx_logic_test
    437 {
    438    test_func_t test_func;
    439    const char *name;
    440    vsx_log_op op;
    441 };
    442 
    443 typedef struct vsx_logic_test logic_test_t;
    444 typedef struct ldst_test ldst_test_t;
    445 typedef struct simple_test xs_conv_test_t;
    446 typedef struct vx_fp_test1 vx_fp_test_basic_t;
    447 typedef struct vx_fp_test2 vx_fp_test2_t;
    448 typedef struct test_table test_table_t;
    449 
    450 
    451 static vector unsigned int vec_out, vec_inA, vec_inB;
    452 
    453 static void test_xscvdpspn(void)
    454 {
    455    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    456 }
    457 
    458 static void test_xscvspdpn(void)
    459 {
    460    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    461 }
    462 
    463 static int do_asp;
    464 static void test_xsmadds(void)
    465 {
    466    if (do_asp)
    467       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    468    else
    469       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    470 }
    471 
    472 static void test_xsmsubs(void)
    473 {
    474    if (do_asp)
    475       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    476    else
    477       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    478 }
    479 
    480 static void test_xscvsxdsp (void)
    481 {
    482    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    483 }
    484 
    485 static void test_xscvuxdsp (void)
    486 {
    487    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    488 }
    489 
    490 static void test_xsnmadds(void)
    491 {
    492    if (do_asp)
    493       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    494    else
    495       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    496 }
    497 
    498 static void test_xsnmsubs(void)
    499 {
    500    if (do_asp)
    501       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    502    else
    503       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    504 }
    505 
    506 static void test_stxsspx(void)
    507 {
    508    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
    509 }
    510 
    511 static void test_stxsiwx(void)
    512 {
    513    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
    514 }
    515 
    516 static void test_lxsiwax(void)
    517 {
    518    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    519 }
    520 
    521 static void test_lxsiwzx(void)
    522 {
    523    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    524 }
    525 
    526 static void test_lxsspx(void)
    527 {
    528    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    529 }
    530 
    531 static void test_xssqrtsp(void)
    532 {
    533    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    534 }
    535 
    536 static void test_xsrsqrtesp(void)
    537 {
    538    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    539 }
    540 
    541 /* Three argument instuctions */
    542 static void test_xxleqv(void)
    543 {
    544    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    545 }
    546 
    547 static void test_xxlorc(void)
    548 {
    549    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    550 }
    551 
    552 static void test_xxlnand(void)
    553 {
    554    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    555 }
    556 
    557 static void test_xsaddsp(void)
    558 {
    559   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    560 }
    561 
    562 static void test_xssubsp(void)
    563 {
    564   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    565 }
    566 
    567 static void test_xsdivsp(void)
    568 {
    569   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    570 }
    571 
    572 static void test_xsmulsp(void)
    573 {
    574    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    575 }
    576 
    577 static void test_xsresp(void)
    578 {
    579    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    580 }
    581 static void test_xsrsp(void)
    582 {
    583    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    584 }
    585 
    586 fp_test_args_t vx_math_tests[] = {
    587                                   {8, 8},
    588                                   {8, 14},
    589                                   {8, 6},
    590                                   {8, 5},
    591                                   {8, 4},
    592                                   {8, 7},
    593                                   {8, 9},
    594                                   {8, 11},
    595                                   {14, 8},
    596                                   {14, 14},
    597                                   {14, 6},
    598                                   {14, 5},
    599                                   {14, 4},
    600                                   {14, 7},
    601                                   {14, 9},
    602                                   {14, 11},
    603                                   {6, 8},
    604                                   {6, 14},
    605                                   {6, 6},
    606                                   {6, 5},
    607                                   {6, 4},
    608                                   {6, 7},
    609                                   {6, 9},
    610                                   {6, 11},
    611                                   {5, 8},
    612                                   {5, 14},
    613                                   {5, 6},
    614                                   {5, 5},
    615                                   {5, 4},
    616                                   {5, 7},
    617                                   {5, 9},
    618                                   {5, 11},
    619                                   {4, 8},
    620                                   {4, 14},
    621                                   {4, 6},
    622                                   {4, 5},
    623                                   {4, 1},
    624                                   {4, 7},
    625                                   {4, 9},
    626                                   {4, 11},
    627                                   {7, 8},
    628                                   {7, 14},
    629                                   {7, 6},
    630                                   {7, 5},
    631                                   {7, 4},
    632                                   {7, 7},
    633                                   {7, 9},
    634                                   {7, 11},
    635                                   {10, 8},
    636                                   {10, 14},
    637                                   {10, 6},
    638                                   {10, 5},
    639                                   {10, 4},
    640                                   {10, 7},
    641                                   {10, 9},
    642                                   {10, 11},
    643                                   {12, 8},
    644                                   {12, 14},
    645                                   {12, 6},
    646                                   {12, 5},
    647                                   {12, 4},
    648                                   {12, 7},
    649                                   {12, 9},
    650                                   {12, 11},
    651                                   {8, 8},
    652                                   {8, 14},
    653                                   {8, 6},
    654                                   {8, 5},
    655                                   {8, 4},
    656                                   {8, 7},
    657                                   {8, 9},
    658                                   {8, 11},
    659                                   {14, 8},
    660                                   {14, 14},
    661                                   {14, 6},
    662                                   {14, 5},
    663                                   {14, 4},
    664                                   {14, 7},
    665                                   {14, 9},
    666                                   {14, 11},
    667                                   {6, 8},
    668                                   {6, 14},
    669                                   {6, 6},
    670                                   {6, 5},
    671                                   {6, 4},
    672                                   {6, 7},
    673                                   {6, 9},
    674                                   {6, 11},
    675                                   {5, 8},
    676                                   {5, 14},
    677                                   {5, 6},
    678                                   {5, 5},
    679                                   {5, 4},
    680                                   {5, 7},
    681                                   {5, 9},
    682                                   {5, 11},
    683                                   {4, 8},
    684                                   {4, 14},
    685                                   {4, 6},
    686                                   {4, 5},
    687                                   {4, 1},
    688                                   {4, 7},
    689                                   {4, 9},
    690                                   {4, 11},
    691                                   {7, 8},
    692                                   {7, 14},
    693                                   {7, 6},
    694                                   {7, 5},
    695                                   {7, 4},
    696                                   {7, 7},
    697                                   {7, 9},
    698                                   {7, 11},
    699                                   {10, 8},
    700                                   {10, 14},
    701                                   {10, 6},
    702                                   {10, 5},
    703                                   {10, 4},
    704                                   {10, 7},
    705                                   {10, 9},
    706                                   {10, 11},
    707                                   {12, 8},
    708                                   {12, 14},
    709                                   {12, 6},
    710                                   {12, 5},
    711                                   {12, 4},
    712                                   {12, 7},
    713                                   {12, 9},
    714                                   {12, 11}
    715 };
    716 
    717 // These are all double precision inputs with double word outputs (mostly converted to single precision)
    718 static vx_fp_test_basic_t vx_fp_tests[] = {
    719                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
    720                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
    721                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
    722                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
    723                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
    724                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
    725                                      { NULL, NULL, NULL, 0, 0 }
    726 };
    727 
    728 static vx_fp_test2_t
    729 vsx_one_fp_arg_tests[] = {
    730                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, SINGLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
    731                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
    732                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
    733                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
    734                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
    735                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
    736                           { NULL, NULL, NULL, 0, 0, 0, NULL}
    737 };
    738 
    739 // These are all double precision inputs with double word outputs (mostly converted to single precision)
    740 static vx_fp_test_basic_t
    741 vx_simple_scalar_fp_tests[] = {
    742                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
    743                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
    744                           { NULL, NULL, NULL, 0 , 0}
    745 };
    746 
    747 static ldst_test_t
    748 ldst_tests[] = {
    749                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
    750                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
    751                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
    752                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 1, VSX_LOAD },
    753                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
    754                     { NULL, NULL, 0, NULL, 0, 0 } };
    755 
    756 static xs_conv_test_t
    757 xs_conv_tests[] = {
    758                    { &test_xscvsxdsp, "xscvsxdsp"},
    759                    { &test_xscvuxdsp, "xscvuxdsp"},
    760                    { NULL, NULL}
    761 };
    762 
    763 static logic_test_t
    764 logic_tests[] = {
    765                  { &test_xxleqv,  "xxleqv", VSX_EQV },
    766                  { &test_xxlorc,  "xxlorc", VSX_ORC },
    767                  { &test_xxlnand, "xxlnand", VSX_NAND },
    768                  { NULL, NULL}
    769 };
    770 
    771 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
    772 {
    773    /* NOTE:
    774     * This function has been verified only with the xsresp and xsrsqrtes instructions.
    775     *
    776     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
    777     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
    778     * does an actual reciprocal calculation versus estimation, so the answer we get back from
    779     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
    780     * precision) and the estimate may still be within expected tolerances.  On top of that,
    781     * we can't count on these estimates always being the same across implementations.
    782     * For example, with the fre[s] instruction (which should be correct to within one part
    783     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
    784     * one implementation could return 1.0111_1111_0000 and another implementation could return
    785     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
    786     * single bit in common.
    787     *
    788     * The upshot is we can't validate the VEX output for these instructions by comparing against
    789     * stored bit patterns.  We must check that the result is within expected tolerances.
    790     */
    791 
    792    /* A mask to be used for validation as a last resort.
    793     * Only use 12 bits of precision for reasons discussed above.
    794     */
    795 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
    796 
    797 
    798    Bool result = False;
    799    double src_dp, res_dp;
    800    float calc_diff = 0;
    801    float real_diff = 0;
    802    double recip_divisor;
    803    float div_result;
    804    float calc_diff_tmp;
    805 
    806    src_dp = res_dp = 0;
    807    Bool src_is_negative = False;
    808    Bool res_is_negative = False;
    809    unsigned long long * dst_dp = NULL;
    810    unsigned long long * src_dp_ull;
    811    dst_dp = (unsigned long long *) &vec_out;
    812    src_dp = spec_fargs[idx];
    813    src_dp_ull = (unsigned long long *) &src_dp;
    814    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
    815    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
    816    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
    817 
    818 
    819    // Below are common rules
    820    if (isnan(src_dp))
    821       return isnan(res_dp);
    822    if (fpclassify(src_dp) == FP_ZERO)
    823       return isinf(res_dp);
    824    if (!src_is_negative && isinf(src_dp))
    825       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    826    if (is_rsqrte) {
    827       if (src_is_negative)
    828          return isnan(res_dp);
    829    } else {
    830       if (src_is_negative && isinf(src_dp))
    831          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    832    }
    833 
    834    if (is_rsqrte)
    835       recip_divisor = sqrt(src_dp);
    836    else
    837       recip_divisor = src_dp;
    838 
    839    /* The instructions handled by this function take a double precision
    840     * input, perform a reciprocal estimate in double-precision, round
    841     * the result to single precision and store into the destination
    842     * register in double precision format.  So, to check the result
    843     * for accuracy, we use float (single precision) values.
    844     */
    845    div_result = 1.0/recip_divisor;
    846    calc_diff_tmp = recip_divisor * 16384.0;
    847    if (isnormal(calc_diff_tmp)) {
    848       calc_diff = fabs(1.0/calc_diff_tmp);
    849       real_diff = fabs((float)res_dp - div_result);
    850       result = ( ( res_dp == div_result )
    851                || ( real_diff <= calc_diff ) );
    852 #if FRES_DEBUG
    853       unsigned int * dv = (unsigned int *)&div_result;
    854       unsigned int * rd = (unsigned int *)&real_diff;
    855       unsigned int * cd = (unsigned int *)&calc_diff;
    856       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
    857              *dv, *rd, *cd);
    858 #endif
    859 
    860    } else {
    861       /* Unable to compute theoretical difference, so we fall back to masking out
    862        * un-precise bits.
    863        */
    864       unsigned int * div_result_sp = (unsigned int *)&div_result;
    865       float res_sp = (float)res_dp;
    866       unsigned int * dst_sp = (unsigned int *)&res_sp;
    867 #if FRES_DEBUG
    868       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
    869       printf("Unable to compute theoretical difference, so we fall back to masking\n");
    870       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
    871              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
    872 #endif
    873       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
    874    }
    875    return result;
    876 }
    877 
    878 static void test_vx_fp_ops(void)
    879 {
    880 
    881    test_func_t func;
    882    int k;
    883    char * test_name = (char *)malloc(20);
    884    k = 0;
    885 
    886    build_special_fargs_table();
    887    while ((func = vx_fp_tests[k].test_func)) {
    888       int i, repeat = 0;
    889       unsigned long long * frap, * frbp, * dst;
    890       vx_fp_test_basic_t test_group = vx_fp_tests[k];
    891       vx_fp_test_type test_type = test_group.test_type;
    892 
    893       switch (test_type) {
    894          case VX_FP_SMAS:
    895          case VX_FP_SMSS:
    896          case VX_FP_SNMAS:
    897          case VX_FP_SNMSS:
    898             if (test_type == VX_FP_SMAS)
    899                strcpy(test_name, "xsmadd");
    900             else if (test_type == VX_FP_SMSS)
    901                strcpy(test_name, "xsmsub");
    902             else if (test_type == VX_FP_SNMAS)
    903                strcpy(test_name, "xsnmadd");
    904             else
    905                strcpy(test_name, "xsnmsub");
    906 
    907             if (!repeat) {
    908                repeat = 1;
    909                strcat(test_name, "asp");
    910                do_asp = 1;
    911             }
    912             break;
    913          case VX_FP_OTHER:
    914             strcpy(test_name, test_group.name);
    915             break;
    916          default:
    917             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
    918             exit(1);
    919       }
    920 
    921 again:
    922       for (i = 0; i < test_group.num_tests; i++) {
    923          unsigned int * inA, * inB, * pv;
    924 
    925          fp_test_args_t aTest = test_group.targs[i];
    926          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
    927          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
    928          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
    929          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
    930          int idx;
    931          unsigned long long vsr_XT;
    932          pv = (unsigned int *)&vec_out;
    933 
    934          // Only need to copy one doubleword into each vector's element 0
    935          memcpy(&vec_inA, inA, 8);
    936          memcpy(&vec_inB, inB, 8);
    937 
    938          // clear vec_out
    939          for (idx = 0; idx < 4; idx++, pv++)
    940             *pv = 0;
    941 
    942          if (test_type != VX_FP_OTHER) {
    943             /* Then we need a third src argument, which is stored in element 0 of
    944              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
    945              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
    946              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
    947              * data (input args, result) contain only two inputs, so I arbitrarily
    948              * use spec_fargs elements 4 and 14 (alternating) for the third source
    949              * argument.  We can use the same input data for a given pair of
    950              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
    951              * the expected result should be the same.
    952              */
    953             int extra_arg_idx;
    954             if (i % 2)
    955                extra_arg_idx = 4;
    956             else
    957                extra_arg_idx = 14;
    958 
    959             if (repeat) {
    960                /* We're on the first time through of one of the VX_FP_SMx
    961                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
    962                 * we have to swap inputs as described above:
    963                 *    src2 <= VSX[XT]
    964                 *    src3 <= VSX[XB]
    965                 */
    966                memcpy(&vec_out, inB, 8);  // src2
    967                memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8);  //src3
    968                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
    969             } else {
    970                // Don't need to init src2, as it's done before the switch()
    971                memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8);  //src3
    972             }
    973             memcpy(&vsr_XT, &vec_out, 8);
    974          }
    975 
    976          (*func)();
    977          dst = (unsigned long long *) &vec_out;
    978 
    979          if (test_type == VX_FP_OTHER)
    980             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
    981                    *frap, *frbp, *dst);
    982          else
    983             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
    984                     test_name, vsr_XT, *frap, *frbp, *dst );
    985 
    986       }
    987       /*
    988            {
    989                // Debug code.  Keep this block commented out except when debugging.
    990                double result, expected;
    991                memcpy(&result, dst, 8);
    992                memcpy(&expected, &aTest.dp_bin_result, 8);
    993                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
    994                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
    995                        expected, result );
    996             }
    997        */
    998       printf( "\n" );
    999 
   1000       if (repeat) {
   1001          repeat = 0;
   1002          strcat(test_name, "UNKNOWN");
   1003          switch (test_type) {
   1004             case VX_FP_SMAS:
   1005             case VX_FP_SMSS:
   1006             case VX_FP_SNMAS:
   1007             case VX_FP_SNMSS:
   1008                if (test_type == VX_FP_SMAS)
   1009                   strcpy(test_name, "xsmadd");
   1010                else if (test_type == VX_FP_SMSS)
   1011                   strcpy(test_name, "xsmsub");
   1012                else if (test_type == VX_FP_SNMAS)
   1013                   strcpy(test_name, "xsnmadd");
   1014                else
   1015                   strcpy(test_name, "xsnmsub");
   1016 
   1017                do_asp = 0;
   1018                strcat(test_name, "msp");
   1019                break;
   1020             default:
   1021                break;
   1022          }
   1023          goto again;
   1024       }
   1025       k++;
   1026    }
   1027    printf( "\n" );
   1028    free(test_name);
   1029 }
   1030 
   1031 
   1032 static void test_vsx_one_fp_arg(void)
   1033 {
   1034    test_func_t func;
   1035    int k;
   1036    k = 0;
   1037    build_special_fargs_table();
   1038 
   1039    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
   1040       int idx, i;
   1041       unsigned long long *dst_dp;
   1042       unsigned int * dst_sp;
   1043       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
   1044       /* size of source operands */
   1045       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
   1046 		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
   1047       /* size of result */
   1048       Bool dp_res = IS_DP_RESULT(test_group.precision);
   1049       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
   1050       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
   1051       Bool sparse_sp = False;
   1052       int stride = dp ? 2 : 4;
   1053       int loops = is_scalar ? 1 : stride;
   1054       stride = is_scalar ? 1: stride;
   1055 
   1056       /* For conversions of single to double, the 128-bit input register is sparsely populated:
   1057        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
   1058        *                     or
   1059        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
   1060        *
   1061        * For the vector op case, we need to adjust stride from '4' to '2', since
   1062        * we'll only be loading two values per loop into the input register.
   1063        */
   1064       if (!dp && !is_scalar && test_group.test_type == VX_CONV_TO_DOUBLE) {
   1065          sparse_sp = True;
   1066          stride = 2;
   1067       }
   1068 
   1069       for (i = 0; i < test_group.num_tests; i+=stride) {
   1070          unsigned int * pv;
   1071          void * inB;
   1072 
   1073          pv = (unsigned int *)&vec_out;
   1074          // clear vec_out
   1075          for (idx = 0; idx < 4; idx++, pv++)
   1076             *pv = 0;
   1077 
   1078          if (dp) {
   1079             int j;
   1080             unsigned long long * frB_dp;
   1081             for (j = 0; j < loops; j++) {
   1082                inB = (void *)&spec_fargs[i + j];
   1083                // copy double precision FP into vector element i
   1084                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
   1085             }
   1086             // execute test insn
   1087             (*func)();
   1088             if (dp_res)
   1089                dst_dp = (unsigned long long *) &vec_out;
   1090             else
   1091                dst_sp = (unsigned int *) &vec_out;
   1092 
   1093             printf("#%d: %s ", i/stride, test_group.name);
   1094             for (j = 0; j < loops; j++) {
   1095                if (j)
   1096                   printf("; ");
   1097                frB_dp = (unsigned long long *)&spec_fargs[i + j];
   1098                printf("%s(%016llx)", test_group.op, *frB_dp);
   1099                if (test_group.test_type == VX_ESTIMATE)
   1100                {
   1101                   Bool res;
   1102                   res = check_reciprocal_estimate(is_sqrt, i + j, j);
   1103                   printf(" ==> %s)", res ? "PASS" : "FAIL");
   1104                } else if (dp_res) {
   1105                   printf(" = %016llx", dst_dp[j]);
   1106                } else {
   1107                   printf(" = %08x", dst_sp[j]);
   1108                }
   1109             }
   1110             printf("\n");
   1111          } else {  // single precision test type
   1112             int j;
   1113             // Clear input vector
   1114             pv = (unsigned int *)&vec_inB;
   1115             for (idx = 0; idx < 4; idx++, pv++)
   1116                *pv = 0;
   1117 
   1118             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
   1119                /* Take a single-precision value stored in double word element 0
   1120                 * of src in double-precision format and convert to single-
   1121                 * precision and store in word element 0 of dst.
   1122                 */
   1123                double input = spec_sp_fargs[i];
   1124                memcpy(((void *)&vec_inB), (void *)&input, 8);
   1125             } else {
   1126                int skip_slot;
   1127                if (sparse_sp) {
   1128                   skip_slot = 1;
   1129                   loops = 2;
   1130                } else {
   1131                   skip_slot = 0;
   1132                }
   1133                for (j = 0; j < loops; j++) {
   1134                   inB = (void *)&spec_sp_fargs[i + j];
   1135                   // copy single precision FP into vector element i
   1136 
   1137                   if (skip_slot && j > 0)
   1138                      memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
   1139                   else
   1140                      memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
   1141                }
   1142             }
   1143             // execute test insn
   1144             (*func)();
   1145             if (dp_res)
   1146                dst_dp = (unsigned long long *) &vec_out;
   1147             else
   1148                dst_sp = (unsigned int *) &vec_out;
   1149             // print result
   1150             printf("#%d: %s ", i/stride, test_group.name);
   1151             for (j = 0; j < loops; j++) {
   1152                if (j)
   1153                   printf("; ");
   1154                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i + j]));
   1155                if (dp_res)
   1156                      printf(" = %016llx", dst_dp[j]);
   1157                else
   1158                   printf(" = %08x", dst_sp[j]);
   1159             }
   1160             printf("\n");
   1161          }
   1162       }
   1163       k++;
   1164       printf( "\n" );
   1165    }
   1166 }
   1167 
   1168 /* This function currently only supports two double precision input arguments. */
   1169 static void test_vsx_two_fp_arg(void)
   1170 {
   1171    test_func_t func;
   1172    int k = 0;
   1173 
   1174    build_special_fargs_table();
   1175    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
   1176       unsigned long long * frap, * frbp, * dst;
   1177       unsigned int * pv;
   1178       int idx;
   1179       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
   1180       pv = (unsigned int *)&vec_out;
   1181       // clear vec_out
   1182       for (idx = 0; idx < 4; idx++, pv++)
   1183          *pv = 0;
   1184 
   1185       void * inA, * inB;
   1186       int i;
   1187       for (i = 0; i < test_group.num_tests; i++) {
   1188          fp_test_args_t aTest = test_group.targs[i];
   1189          inA = (void *)&spec_fargs[aTest.fra_idx];
   1190          inB = (void *)&spec_fargs[aTest.frb_idx];
   1191          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1192          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1193          // Only need to copy one doubleword into each vector's element 0
   1194          memcpy(&vec_inA, inA, 8);
   1195          memcpy(&vec_inB, inB, 8);
   1196          (*func)();
   1197          dst = (unsigned long long *) &vec_out;
   1198          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
   1199                 *frap, *frbp, *dst);
   1200       }
   1201       printf( "\n" );
   1202       k++;
   1203    }
   1204 }
   1205 
   1206 /* This function handles the following cases:
   1207  *   1) Single precision value stored in double-precision
   1208  *      floating-point format in doubleword element 0 of src VSX register
   1209  *   2) Integer word value stored in word element 1 of src VSX register
   1210  */
   1211 static void _do_store_test (ldst_test_t storeTest)
   1212 {
   1213    test_func_t func;
   1214    unsigned int *dst32;
   1215    unsigned int i, idx;
   1216    unsigned int * pv = (unsigned int *) storeTest.base_addr;
   1217 
   1218    func = storeTest.test_func;
   1219    r14 = (HWord_t) storeTest.base_addr;
   1220    r15 = (HWord_t) storeTest.offset;
   1221 
   1222    if (storeTest.precision == DOUBLE_TEST_SINGLE_RES) {
   1223       /* source is single precision stored in double precision format */
   1224       /* test some of the pre-defined single precision values */
   1225       for (i = 0; i < nb_special_fargs; i+=3) {
   1226          // clear out storage destination
   1227          for (idx = 0; idx < 4; idx++)
   1228             *(pv + idx) = 0;
   1229 
   1230          printf( "%s:", storeTest.name );
   1231          unsigned long long * dp;
   1232          double input = spec_sp_fargs[i];
   1233          dp = (unsigned long long *)&input;
   1234          memcpy(&vec_inA, dp, sizeof(unsigned long long));
   1235          printf(" %016llx ==> ", *dp);
   1236 
   1237          // execute test insn
   1238          (*func)();
   1239          dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
   1240          printf( "%08x\n", *dst32);
   1241       }
   1242    } else {
   1243       // source is an integer word
   1244       for (i = 0; i < NUM_VIARGS_INTS; i++) {
   1245          // clear out storage destination
   1246          for (idx = 0; idx < 4; idx++)
   1247             *(pv + idx) = 0;
   1248          printf( "%s:", storeTest.name );
   1249          unsigned int * pi = (unsigned int *)&vec_inA;
   1250          memcpy(pi + 1, &viargs[i], sizeof(unsigned int));
   1251          printf(" %08x ==> ", *(pi + 1));
   1252 
   1253          // execute test insn
   1254          (*func)();
   1255          dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
   1256          printf( "%08x\n", *dst32);
   1257       }
   1258    }
   1259    printf("\n");
   1260 }
   1261 
   1262 static void _do_load_test(ldst_test_t storeTest)
   1263 {
   1264    test_func_t func;
   1265    unsigned int i;
   1266    unsigned long long * dst_dp;
   1267 
   1268    func = storeTest.test_func;
   1269    r15 = (HWord_t) storeTest.offset;
   1270 
   1271    if (storeTest.base_addr == NULL) {
   1272       /* Test lxsspx: source is single precision value, so let's */
   1273       /* test some of the pre-defined single precision values. */
   1274       for (i = 0; i + storeTest.offset < nb_special_fargs; i+=3) {
   1275          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + storeTest.offset];
   1276          printf( "%s:", storeTest.name );
   1277          printf(" %08x ==> ", *sp);
   1278          r14 = (HWord_t)&spec_sp_fargs[i];
   1279 
   1280          // execute test insn
   1281          (*func)();
   1282          dst_dp = (unsigned long long *) &vec_out;
   1283          printf("%016llx\n", *dst_dp);
   1284       }
   1285    } else {
   1286       // source is an integer word
   1287       for (i = 0; i < NUM_VIARGS_INTS; i++) {
   1288          printf( "%s:", storeTest.name );
   1289          r14 = (HWord_t)&viargs[i + storeTest.offset];
   1290          printf(" %08x ==> ", viargs[i + storeTest.offset]);
   1291 
   1292          // execute test insn
   1293          (*func)();
   1294          dst_dp = (unsigned long long *) &vec_out;
   1295          printf("%016llx\n", *dst_dp);
   1296       }
   1297    }
   1298    printf("\n");
   1299 }
   1300 
   1301 static void test_ldst(void)
   1302 {
   1303    int k = 0;
   1304 
   1305    while (ldst_tests[k].test_func) {
   1306       if (ldst_tests[k].type == VSX_STORE)
   1307          _do_store_test(ldst_tests[k]);
   1308       else {
   1309          _do_load_test(ldst_tests[k]);
   1310       }
   1311       k++;
   1312       printf("\n");
   1313    }
   1314 }
   1315 
   1316 static void test_xs_conv_ops(void)
   1317 {
   1318 
   1319    test_func_t func;
   1320    int k = 0;
   1321 
   1322    build_special_fargs_table();
   1323    while ((func = xs_conv_tests[k].test_func)) {
   1324       int i;
   1325       unsigned long long * dst;
   1326       xs_conv_test_t test_group = xs_conv_tests[k];
   1327       for (i = 0; i < NUM_VIARGS_INTS; i++) {
   1328          unsigned int * inB, * pv;
   1329          int idx;
   1330          inB = (unsigned int *)&viargs[i];
   1331          memcpy(&vec_inB, inB, 4);
   1332          pv = (unsigned int *)&vec_out;
   1333          // clear vec_out
   1334          for (idx = 0; idx < 4; idx++, pv++)
   1335             *pv = 0;
   1336          (*func)();
   1337          dst = (unsigned long long *) &vec_out;
   1338          printf("#%d: %s %08x => %016llx\n", i, test_group.name, viargs[i], *dst);
   1339       }
   1340       k++;
   1341       printf("\n");
   1342    }
   1343    printf( "\n" );
   1344 }
   1345 
   1346 
   1347 static void test_vsx_logic(void)
   1348 {
   1349    logic_test_t aTest;
   1350    test_func_t func;
   1351    int k;
   1352    k = 0;
   1353 
   1354    while ((func = logic_tests[k].test_func)) {
   1355 
   1356       unsigned int * pv;
   1357       unsigned int * inA, * inB, * dst;
   1358       int idx, i;
   1359       aTest = logic_tests[k];
   1360       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
   1361          pv = (unsigned int *)&vec_out;
   1362          inA = &viargs[i];
   1363          inB = &viargs[i];
   1364          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
   1365          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
   1366          // clear vec_out
   1367          for (idx = 0; idx < 4; idx++, pv++)
   1368             *pv = 0;
   1369 
   1370          // execute test insn
   1371          (*func)();
   1372          dst = (unsigned int*) &vec_out;
   1373 
   1374          printf( "#%d: %10s ", k, aTest.name);
   1375          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
   1376          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
   1377          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
   1378       }
   1379       k++;
   1380    }
   1381    printf( "\n" );
   1382 }
   1383 
   1384 
   1385 //----------------------------------------------------------
   1386 
   1387 static test_table_t all_tests[] = {
   1388                                      { &test_vx_fp_ops,
   1389                                        "Test VSX floating point instructions"},
   1390                                      { &test_vsx_one_fp_arg,
   1391                                        "Test VSX vector and scalar single argument instructions"} ,
   1392                                      { &test_vsx_logic,
   1393                                        "Test VSX logic instructions" },
   1394                                      { &test_xs_conv_ops,
   1395                                        "Test VSX scalar integer conversion instructions" },
   1396                                      { &test_ldst,
   1397                                        "Test VSX load/store dp to sp instructions" },
   1398                                      { &test_vsx_two_fp_arg,
   1399                                        "Test VSX vector and scalar two argument instructions"} ,
   1400                                      { NULL, NULL }
   1401 };
   1402 
   1403 #endif
   1404 
   1405 int main(int argc, char *argv[])
   1406 {
   1407 
   1408 #ifdef HAS_ISA_2_07
   1409    test_table_t aTest;
   1410    test_func_t func;
   1411    int i = 0;
   1412 
   1413    while ((func = all_tests[i].test_category)) {
   1414       aTest = all_tests[i];
   1415       printf( "%s\n", aTest.name );
   1416       (*func)();
   1417       i++;
   1418    }
   1419 #else
   1420    printf("NO ISA 2.07 SUPPORT\n");
   1421 #endif
   1422    return 0;
   1423 }
   1424