Home | History | Annotate | Download | only in ppc32
      1 /*  Copyright (C) 2013 IBM
      2 
      3  Authors: Carl Love  <carll (at) us.ibm.com>
      4           Maynard Johnson <maynardj (at) us.ibm.com>
      5 
      6  This program is free software; you can redistribute it and/or
      7  modify it under the terms of the GNU General Public License as
      8  published by the Free Software Foundation; either version 2 of the
      9  License, or (at your option) any later version.
     10 
     11  This program is distributed in the hope that it will be useful, but
     12  WITHOUT ANY WARRANTY; without even the implied warranty of
     13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  General Public License for more details.
     15 
     16  You should have received a copy of the GNU General Public License
     17  along with this program; if not, write to the Free Software
     18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     19  02111-1307, USA.
     20 
     21  The GNU General Public License is contained in the file COPYING.
     22 
     23  This program is based heavily on the test_isa_2_06_part*.c source files.
     24  */
     25 
     26 #include <stdio.h>
     27 
     28 #ifdef HAS_ISA_2_07
     29 
     30 #include <stdint.h>
     31 #include <stdlib.h>
     32 #include <string.h>
     33 #include <malloc.h>
     34 #include <altivec.h>
     35 #include <math.h>
     36 
     37 #ifndef __powerpc64__
     38 typedef uint32_t HWord_t;
     39 #else
     40 typedef uint64_t HWord_t;
     41 #endif /* __powerpc64__ */
     42 
     43 #ifdef VGP_ppc64le_linux
     44 #define isLE 1
     45 #else
     46 #define isLE 0
     47 #endif
     48 
     49 register HWord_t r14 __asm__ ("r14");
     50 register HWord_t r15 __asm__ ("r15");
     51 register HWord_t r16 __asm__ ("r16");
     52 register HWord_t r17 __asm__ ("r17");
     53 register double f14 __asm__ ("fr14");
     54 register double f15 __asm__ ("fr15");
     55 register double f16 __asm__ ("fr16");
     56 register double f17 __asm__ ("fr17");
     57 
     58 static volatile unsigned int cond_reg;
     59 
     60 #define True  1
     61 #define False 0
     62 
     63 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     64 
     65 #define SET_CR(_arg) \
     66       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     67 
     68 #define SET_XER(_arg) \
     69       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     70 
     71 #define GET_CR(_lval) \
     72       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     73 
     74 #define GET_XER(_lval) \
     75       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     76 
     77 #define GET_CR_XER(_lval_cr,_lval_xer) \
     78    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     79 
     80 #define SET_CR_ZERO \
     81       SET_CR(0)
     82 
     83 #define SET_XER_ZERO \
     84       SET_XER(0)
     85 
     86 #define SET_CR_XER_ZERO \
     87    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     88 
     89 #define SET_FPSCR_ZERO \
     90    do { double _d = 0.0; \
     91         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     92    } while (0)
     93 
     94 typedef unsigned char Bool;
     95 
     96 
     97 /* These functions below that construct a table of floating point
     98  * values were lifted from none/tests/ppc32/jm-insns.c.
     99  */
    100 
    101 #if defined (DEBUG_ARGS_BUILD)
    102 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    103 #else
    104 #define AB_DPRINTF(fmt, args...) do { } while (0)
    105 #endif
    106 
    107 static inline void register_farg (void *farg,
    108                                   int s, uint16_t _exp, uint64_t mant)
    109 {
    110    uint64_t tmp;
    111 
    112    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    113    *(uint64_t *)farg = tmp;
    114    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    115               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    116 }
    117 
    118 static inline void register_sp_farg (void *farg,
    119                                      int s, uint16_t _exp, uint32_t mant)
    120 {
    121    uint32_t tmp;
    122    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
    123    *(uint32_t *)farg = tmp;
    124 }
    125 
    126 
    127 typedef struct fp_test_args {
    128    int fra_idx;
    129    int frb_idx;
    130 } fp_test_args_t;
    131 
    132 static int nb_special_fargs;
    133 static double * spec_fargs;
    134 static float * spec_sp_fargs;
    135 
    136 static void build_special_fargs_table(void)
    137 {
    138    /*
    139     * Double precision:
    140     * Sign goes from zero to one               (1 bit)
    141     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    142     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    143     * + special values:
    144     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    145     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    146     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    147     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    148     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    149     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    150     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    151     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    152     * (8 values)
    153     *
    154     * Single precision
    155     * Sign:     1 bit
    156     * Exponent: 8 bits
    157     * Mantissa: 23 bits
    158     * +0.0      : 0 0x00 0x000000 => 0x00000000
    159     * -0.0      : 1 0x00 0x000000 => 0x80000000
    160     * +infinity : 0 0xFF 0x000000 => 0x7F800000
    161     * -infinity : 1 0xFF 0x000000 => 0xFF800000
    162     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    163     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    164     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    165     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    166    */
    167 
    168    uint64_t mant;
    169    uint32_t mant_sp;
    170    uint16_t _exp;
    171    int s;
    172    int j, i = 0;
    173 
    174    if (spec_fargs)
    175       return;
    176 
    177    spec_fargs = malloc( 20 * sizeof(double) );
    178    spec_sp_fargs = malloc( 20 * sizeof(float) );
    179 
    180    // #0
    181    s = 0;
    182    _exp = 0x3fd;
    183    mant = 0x8000000000000ULL;
    184    register_farg(&spec_fargs[i++], s, _exp, mant);
    185 
    186    // #1
    187    s = 0;
    188    _exp = 0x404;
    189    mant = 0xf000000000000ULL;
    190    register_farg(&spec_fargs[i++], s, _exp, mant);
    191 
    192    // #2
    193    s = 0;
    194    _exp = 0x001;
    195    mant = 0x8000000b77501ULL;
    196    register_farg(&spec_fargs[i++], s, _exp, mant);
    197 
    198    // #3
    199    s = 0;
    200    _exp = 0x7fe;
    201    mant = 0x800000000051bULL;
    202    register_farg(&spec_fargs[i++], s, _exp, mant);
    203 
    204    // #4
    205    s = 0;
    206    _exp = 0x012;
    207    mant = 0x3214569900000ULL;
    208    register_farg(&spec_fargs[i++], s, _exp, mant);
    209 
    210    /* Special values */
    211    /* +0.0      : 0 0x000 0x0000000000000 */
    212    // #5
    213    s = 0;
    214    _exp = 0x000;
    215    mant = 0x0000000000000ULL;
    216    register_farg(&spec_fargs[i++], s, _exp, mant);
    217 
    218    /* -0.0      : 1 0x000 0x0000000000000 */
    219    // #6
    220    s = 1;
    221    _exp = 0x000;
    222    mant = 0x0000000000000ULL;
    223    register_farg(&spec_fargs[i++], s, _exp, mant);
    224 
    225    /* +infinity : 0 0x7FF 0x0000000000000  */
    226    // #7
    227    s = 0;
    228    _exp = 0x7FF;
    229    mant = 0x0000000000000ULL;
    230    register_farg(&spec_fargs[i++], s, _exp, mant);
    231 
    232    /* -infinity : 1 0x7FF 0x0000000000000 */
    233    // #8
    234    s = 1;
    235    _exp = 0x7FF;
    236    mant = 0x0000000000000ULL;
    237    register_farg(&spec_fargs[i++], s, _exp, mant);
    238 
    239    /*
    240     * This comment applies to values #9 and #10 below:
    241     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
    242     * so we can't just copy the double-precision value to the corresponding slot in the
    243     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
    244     * have to manually set the bits using register_sp_farg().
    245     */
    246 
    247    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    248    // #9
    249    s = 0;
    250    _exp = 0x7FF;
    251    mant = 0x7FFFFFFFFFFFFULL;
    252    register_farg(&spec_fargs[i++], s, _exp, mant);
    253    _exp = 0xff;
    254    mant_sp = 0x3FFFFF;
    255    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    256 
    257    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    258    // #10
    259    s = 1;
    260    _exp = 0x7FF;
    261    mant = 0x7FFFFFFFFFFFFULL;
    262    register_farg(&spec_fargs[i++], s, _exp, mant);
    263    _exp = 0xff;
    264    mant_sp = 0x3FFFFF;
    265    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    266 
    267    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    268    // #11
    269    s = 0;
    270    _exp = 0x7FF;
    271    mant = 0x8000000000000ULL;
    272    register_farg(&spec_fargs[i++], s, _exp, mant);
    273 
    274    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    275    // #12
    276    s = 1;
    277    _exp = 0x7FF;
    278    mant = 0x8000000000000ULL;
    279    register_farg(&spec_fargs[i++], s, _exp, mant);
    280 
    281    /* denormalized value */
    282    // #13
    283    s = 1;
    284    _exp = 0x000;
    285    mant = 0x8340000078000ULL;
    286    register_farg(&spec_fargs[i++], s, _exp, mant);
    287 
    288    /* Negative finite number */
    289    // #14
    290    s = 1;
    291    _exp = 0x40d;
    292    mant = 0x0650f5a07b353ULL;
    293    register_farg(&spec_fargs[i++], s, _exp, mant);
    294 
    295    /* A few positive finite numbers ... */
    296    // #15
    297    s = 0;
    298    _exp = 0x412;
    299    mant = 0x32585a9900000ULL;
    300    register_farg(&spec_fargs[i++], s, _exp, mant);
    301 
    302    // #16
    303    s = 0;
    304    _exp = 0x413;
    305    mant = 0x82511a2000000ULL;
    306    register_farg(&spec_fargs[i++], s, _exp, mant);
    307 
    308    // #17
    309    s = 0;
    310    _exp = 0x403;
    311    mant = 0x12ef5a9300000ULL;
    312    register_farg(&spec_fargs[i++], s, _exp, mant);
    313 
    314    // #18
    315    s = 0;
    316    _exp = 0x405;
    317    mant = 0x14bf5d2300000ULL;
    318    register_farg(&spec_fargs[i++], s, _exp, mant);
    319 
    320    // #19
    321    s = 0;
    322    _exp = 0x409;
    323    mant = 0x76bf982440000ULL;
    324    register_farg(&spec_fargs[i++], s, _exp, mant);
    325 
    326 
    327    nb_special_fargs = i;
    328    for (j = 0; j < i; j++) {
    329       if (!(j == 9 || j == 10))
    330          spec_sp_fargs[j] = spec_fargs[j];
    331    }
    332 }
    333 
    334 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
    335                                                               0, 0, 0, 0 };
    336 
    337 
    338 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
    339                                                                 0x89abcdef,
    340                                                                 0x00112233,
    341                                                                 0x74556677,
    342                                                                 0x00001abb,
    343                                                                 0x00000001,
    344                                                                 0x31929394,
    345                                                                 0xa1a2a3a4,
    346 };
    347 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
    348 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
    349 
    350 
    351 static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
    352                                                                      0x0102030405060708ULL,
    353                                                                      0x090A0B0C0E0D0E0FULL,
    354                                                                      0xF1F2F3F4F5F6F7F8ULL,
    355                                                                      0xF9FAFBFCFEFDFEFFULL
    356 };
    357 #define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
    358 #define NUM_VDARGS_VECS  (NUM_VDARGS_INTS/2)
    359 
    360 typedef void (*test_func_t)(void);
    361 
    362 struct test_table
    363 {
    364    test_func_t test_category;
    365    char * name;
    366 };
    367 
    368 
    369 typedef enum {
    370    SINGLE_TEST,
    371    SINGLE_TEST_SINGLE_RES,
    372    DOUBLE_TEST,
    373    DOUBLE_TEST_SINGLE_RES
    374 } precision_type_t;
    375 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
    376 
    377 typedef enum {
    378    VX_FP_SMAS,   // multiply add single precision result
    379    VX_FP_SMSS,   // multiply sub single precision result
    380    VX_FP_SNMAS,  // negative multiply add single precision result
    381    VX_FP_SNMSS,  // negative multiply sub single precision result
    382    VX_FP_OTHER,
    383    VX_CONV_WORD,
    384    VX_ESTIMATE,
    385    VX_CONV_TO_SINGLE,
    386    VX_CONV_TO_DOUBLE,
    387    VX_SCALAR_CONV_TO_WORD,
    388    VX_SCALAR_SP_TO_VECTOR_SP,
    389    VX_DEFAULT
    390 } vx_fp_test_type;
    391 
    392 typedef enum {
    393    VSX_LOAD = 1,
    394    VSX_LOAD_SPLAT,
    395    VSX_STORE,
    396 } vsx_ldst_type;
    397 
    398 typedef enum {
    399    VSX_AND = 1,
    400    VSX_NAND,
    401    VSX_ANDC,
    402    VSX_OR,
    403    VSX_ORC,
    404    VSX_NOR,
    405    VSX_XOR,
    406    VSX_EQV,
    407 } vsx_log_op;
    408 
    409 struct vx_fp_test1
    410 {
    411    test_func_t test_func;
    412    const char *name;
    413    fp_test_args_t * targs;
    414    int num_tests;
    415     vx_fp_test_type test_type;
    416  };
    417 
    418 struct ldst_test
    419 {
    420    test_func_t test_func;
    421    const char *name;
    422    precision_type_t precision;
    423    void * base_addr;
    424    uint32_t offset;
    425    vsx_ldst_type type;
    426 };
    427 
    428 struct vx_fp_test2
    429 {
    430    test_func_t test_func;
    431    const char *name;
    432    fp_test_args_t * targs;
    433    int num_tests;
    434    precision_type_t precision;
    435    vx_fp_test_type test_type;
    436    const char * op;
    437 };
    438 
    439 struct xs_conv_test
    440 {
    441    test_func_t test_func;
    442    const char *name;
    443    int num_tests;
    444 };
    445 
    446 struct simple_test
    447 {
    448    test_func_t test_func;
    449    const char *name;
    450 };
    451 
    452 struct vsx_logic_test
    453 {
    454    test_func_t test_func;
    455    const char *name;
    456    vsx_log_op op;
    457 };
    458 
    459 typedef struct vsx_logic_test logic_test_t;
    460 typedef struct ldst_test ldst_test_t;
    461 typedef struct simple_test xs_conv_test_t;
    462 typedef struct vx_fp_test1 vx_fp_test_basic_t;
    463 typedef struct vx_fp_test2 vx_fp_test2_t;
    464 typedef struct test_table test_table_t;
    465 
    466 
    467 static vector unsigned int vec_out, vec_inA, vec_inB;
    468 
    469 static void test_xscvdpspn(void)
    470 {
    471    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    472 }
    473 
    474 static void test_xscvspdpn(void)
    475 {
    476    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    477 }
    478 
    479 static int do_asp;
    480 static void test_xsmadds(void)
    481 {
    482    if (do_asp)
    483       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    484    else
    485       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    486 }
    487 
    488 static void test_xsmsubs(void)
    489 {
    490    if (do_asp)
    491       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    492    else
    493       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    494 }
    495 
    496 static void test_xscvsxdsp (void)
    497 {
    498    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    499 }
    500 
    501 static void test_xscvuxdsp (void)
    502 {
    503    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    504 }
    505 
    506 static void test_xsnmadds(void)
    507 {
    508    if (do_asp)
    509       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    510    else
    511       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    512 }
    513 
    514 static void test_xsnmsubs(void)
    515 {
    516    if (do_asp)
    517       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    518    else
    519       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    520 }
    521 
    522 static void test_stxsspx(void)
    523 {
    524    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
    525 }
    526 
    527 static void test_stxsiwx(void)
    528 {
    529    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
    530 }
    531 
    532 static void test_lxsiwax(void)
    533 {
    534    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    535 }
    536 
    537 static void test_lxsiwzx(void)
    538 {
    539    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    540 }
    541 
    542 static void test_lxsspx(void)
    543 {
    544    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
    545 }
    546 
    547 static void test_xssqrtsp(void)
    548 {
    549    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    550 }
    551 
    552 static void test_xsrsqrtesp(void)
    553 {
    554    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    555 }
    556 
    557 /* Three argument instuctions */
    558 static void test_xxleqv(void)
    559 {
    560    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    561 }
    562 
    563 static void test_xxlorc(void)
    564 {
    565    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    566 }
    567 
    568 static void test_xxlnand(void)
    569 {
    570    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    571 }
    572 
    573 static void test_xsaddsp(void)
    574 {
    575   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    576 }
    577 
    578 static void test_xssubsp(void)
    579 {
    580   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    581 }
    582 
    583 static void test_xsdivsp(void)
    584 {
    585   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
    586 }
    587 
    588 static void test_xsmulsp(void)
    589 {
    590    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    591 }
    592 
    593 static void test_xsresp(void)
    594 {
    595    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    596 }
    597 static void test_xsrsp(void)
    598 {
    599    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    600 }
    601 
    602 fp_test_args_t vx_math_tests[] = {
    603                                   {8, 8},
    604                                   {8, 14},
    605                                   {8, 6},
    606                                   {8, 5},
    607                                   {8, 4},
    608                                   {8, 7},
    609                                   {8, 9},
    610                                   {8, 11},
    611                                   {14, 8},
    612                                   {14, 14},
    613                                   {14, 6},
    614                                   {14, 5},
    615                                   {14, 4},
    616                                   {14, 7},
    617                                   {14, 9},
    618                                   {14, 11},
    619                                   {6, 8},
    620                                   {6, 14},
    621                                   {6, 6},
    622                                   {6, 5},
    623                                   {6, 4},
    624                                   {6, 7},
    625                                   {6, 9},
    626                                   {6, 11},
    627                                   {5, 8},
    628                                   {5, 14},
    629                                   {5, 6},
    630                                   {5, 5},
    631                                   {5, 4},
    632                                   {5, 7},
    633                                   {5, 9},
    634                                   {5, 11},
    635                                   {4, 8},
    636                                   {4, 14},
    637                                   {4, 6},
    638                                   {4, 5},
    639                                   {4, 1},
    640                                   {4, 7},
    641                                   {4, 9},
    642                                   {4, 11},
    643                                   {7, 8},
    644                                   {7, 14},
    645                                   {7, 6},
    646                                   {7, 5},
    647                                   {7, 4},
    648                                   {7, 7},
    649                                   {7, 9},
    650                                   {7, 11},
    651                                   {10, 8},
    652                                   {10, 14},
    653                                   {10, 6},
    654                                   {10, 5},
    655                                   {10, 4},
    656                                   {10, 7},
    657                                   {10, 9},
    658                                   {10, 11},
    659                                   {12, 8},
    660                                   {12, 14},
    661                                   {12, 6},
    662                                   {12, 5},
    663                                   {12, 4},
    664                                   {12, 7},
    665                                   {12, 9},
    666                                   {12, 11},
    667                                   {8, 8},
    668                                   {8, 14},
    669                                   {8, 6},
    670                                   {8, 5},
    671                                   {8, 4},
    672                                   {8, 7},
    673                                   {8, 9},
    674                                   {8, 11},
    675                                   {14, 8},
    676                                   {14, 14},
    677                                   {14, 6},
    678                                   {14, 5},
    679                                   {14, 4},
    680                                   {14, 7},
    681                                   {14, 9},
    682                                   {14, 11},
    683                                   {6, 8},
    684                                   {6, 14},
    685                                   {6, 6},
    686                                   {6, 5},
    687                                   {6, 4},
    688                                   {6, 7},
    689                                   {6, 9},
    690                                   {6, 11},
    691                                   {5, 8},
    692                                   {5, 14},
    693                                   {5, 6},
    694                                   {5, 5},
    695                                   {5, 4},
    696                                   {5, 7},
    697                                   {5, 9},
    698                                   {5, 11},
    699                                   {4, 8},
    700                                   {4, 14},
    701                                   {4, 6},
    702                                   {4, 5},
    703                                   {4, 1},
    704                                   {4, 7},
    705                                   {4, 9},
    706                                   {4, 11},
    707                                   {7, 8},
    708                                   {7, 14},
    709                                   {7, 6},
    710                                   {7, 5},
    711                                   {7, 4},
    712                                   {7, 7},
    713                                   {7, 9},
    714                                   {7, 11},
    715                                   {10, 8},
    716                                   {10, 14},
    717                                   {10, 6},
    718                                   {10, 5},
    719                                   {10, 4},
    720                                   {10, 7},
    721                                   {10, 9},
    722                                   {10, 11},
    723                                   {12, 8},
    724                                   {12, 14},
    725                                   {12, 6},
    726                                   {12, 5},
    727                                   {12, 4},
    728                                   {12, 7},
    729                                   {12, 9},
    730                                   {12, 11}
    731 };
    732 
    733 // These are all double precision inputs with double word outputs (mostly converted to single precision)
    734 static vx_fp_test_basic_t vx_fp_tests[] = {
    735                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
    736                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
    737                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
    738                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
    739                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
    740                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
    741                                      { NULL, NULL, NULL, 0, 0 }
    742 };
    743 
    744 static vx_fp_test2_t
    745 vsx_one_fp_arg_tests[] = {
    746                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
    747                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
    748                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
    749                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
    750                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
    751                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
    752                           { NULL, NULL, NULL, 0, 0, 0, NULL}
    753 };
    754 
    755 // These are all double precision inputs with double word outputs (mostly converted to single precision)
    756 static vx_fp_test_basic_t
    757 vx_simple_scalar_fp_tests[] = {
    758                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
    759                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
    760                           { NULL, NULL, NULL, 0 , 0}
    761 };
    762 
    763 static ldst_test_t
    764 ldst_tests[] = {
    765                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
    766                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
    767                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
    768                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
    769                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
    770                     { NULL, NULL, 0, NULL, 0, 0 } };
    771 
    772 static xs_conv_test_t
    773 xs_conv_tests[] = {
    774                    { &test_xscvsxdsp, "xscvsxdsp"},
    775                    { &test_xscvuxdsp, "xscvuxdsp"},
    776                    { NULL, NULL}
    777 };
    778 
    779 static logic_test_t
    780 logic_tests[] = {
    781                  { &test_xxleqv,  "xxleqv", VSX_EQV },
    782                  { &test_xxlorc,  "xxlorc", VSX_ORC },
    783                  { &test_xxlnand, "xxlnand", VSX_NAND },
    784                  { NULL, NULL}
    785 };
    786 
    787 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
    788 {
    789    /* NOTE:
    790     * This function has been verified only with the xsresp and xsrsqrtes instructions.
    791     *
    792     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
    793     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
    794     * does an actual reciprocal calculation versus estimation, so the answer we get back from
    795     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
    796     * precision) and the estimate may still be within expected tolerances.  On top of that,
    797     * we can't count on these estimates always being the same across implementations.
    798     * For example, with the fre[s] instruction (which should be correct to within one part
    799     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
    800     * one implementation could return 1.0111_1111_0000 and another implementation could return
    801     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
    802     * single bit in common.
    803     *
    804     * The upshot is we can't validate the VEX output for these instructions by comparing against
    805     * stored bit patterns.  We must check that the result is within expected tolerances.
    806     */
    807 
    808    /* A mask to be used for validation as a last resort.
    809     * Only use 12 bits of precision for reasons discussed above.
    810     */
    811 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
    812 
    813 
    814    Bool result = False;
    815    double src_dp, res_dp;
    816    float calc_diff = 0;
    817    float real_diff = 0;
    818    double recip_divisor;
    819    float div_result;
    820    float calc_diff_tmp;
    821 
    822    src_dp = res_dp = 0;
    823    Bool src_is_negative = False;
    824    Bool res_is_negative = False;
    825    unsigned long long * dst_dp = NULL;
    826    unsigned long long * src_dp_ull;
    827    dst_dp = (unsigned long long *) &vec_out;
    828    src_dp = spec_fargs[idx];
    829    src_dp_ull = (unsigned long long *) &src_dp;
    830    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
    831    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
    832    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
    833 
    834 
    835    // Below are common rules
    836    if (isnan(src_dp))
    837       return isnan(res_dp);
    838    if (fpclassify(src_dp) == FP_ZERO)
    839       return isinf(res_dp);
    840    if (!src_is_negative && isinf(src_dp))
    841       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    842    if (is_rsqrte) {
    843       if (src_is_negative)
    844          return isnan(res_dp);
    845    } else {
    846       if (src_is_negative && isinf(src_dp))
    847          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
    848    }
    849 
    850    if (is_rsqrte)
    851       recip_divisor = sqrt(src_dp);
    852    else
    853       recip_divisor = src_dp;
    854 
    855    /* The instructions handled by this function take a double precision
    856     * input, perform a reciprocal estimate in double-precision, round
    857     * the result to single precision and store into the destination
    858     * register in double precision format.  So, to check the result
    859     * for accuracy, we use float (single precision) values.
    860     */
    861    div_result = 1.0/recip_divisor;
    862    calc_diff_tmp = recip_divisor * 16384.0;
    863    if (isnormal(calc_diff_tmp)) {
    864       calc_diff = fabs(1.0/calc_diff_tmp);
    865       real_diff = fabs((float)res_dp - div_result);
    866       result = ( ( res_dp == div_result )
    867                || ( real_diff <= calc_diff ) );
    868 #if FRES_DEBUG
    869       unsigned int * dv = (unsigned int *)&div_result;
    870       unsigned int * rd = (unsigned int *)&real_diff;
    871       unsigned int * cd = (unsigned int *)&calc_diff;
    872       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
    873              *dv, *rd, *cd);
    874 #endif
    875 
    876    } else {
    877       /* Unable to compute theoretical difference, so we fall back to masking out
    878        * un-precise bits.
    879        */
    880       unsigned int * div_result_sp = (unsigned int *)&div_result;
    881       float res_sp = (float)res_dp;
    882       unsigned int * dst_sp = (unsigned int *)&res_sp;
    883 #if FRES_DEBUG
    884       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
    885       printf("Unable to compute theoretical difference, so we fall back to masking\n");
    886       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
    887              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
    888 #endif
    889       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
    890    }
    891    return result;
    892 }
    893 
    894 static void test_vx_fp_ops(void)
    895 {
    896 
    897    test_func_t func;
    898    int k;
    899    char * test_name = (char *)malloc(20);
    900    void  * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;
    901 
    902    if (isLE) {
    903       vecA_void_ptr = (void *)&vec_inA + 8;
    904       vecB_void_ptr = (void *)&vec_inB + 8;
    905       vecOut_void_ptr = (void *)&vec_out + 8;
    906    } else {
    907       vecA_void_ptr = (void *)&vec_inA;
    908       vecB_void_ptr = (void *)&vec_inB;
    909       vecOut_void_ptr = (void *)&vec_out;
    910    }
    911 
    912    k = 0;
    913    build_special_fargs_table();
    914    while ((func = vx_fp_tests[k].test_func)) {
    915       int i, repeat = 0;
    916       unsigned long long * frap, * frbp, * dst;
    917       vx_fp_test_basic_t test_group = vx_fp_tests[k];
    918       vx_fp_test_type test_type = test_group.test_type;
    919 
    920       switch (test_type) {
    921          case VX_FP_SMAS:
    922          case VX_FP_SMSS:
    923          case VX_FP_SNMAS:
    924          case VX_FP_SNMSS:
    925             if (test_type == VX_FP_SMAS)
    926                strcpy(test_name, "xsmadd");
    927             else if (test_type == VX_FP_SMSS)
    928                strcpy(test_name, "xsmsub");
    929             else if (test_type == VX_FP_SNMAS)
    930                strcpy(test_name, "xsnmadd");
    931             else
    932                strcpy(test_name, "xsnmsub");
    933 
    934             if (!repeat) {
    935                repeat = 1;
    936                strcat(test_name, "asp");
    937                do_asp = 1;
    938             }
    939             break;
    940          case VX_FP_OTHER:
    941             strcpy(test_name, test_group.name);
    942             break;
    943          default:
    944             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
    945             exit(1);
    946       }
    947 
    948 again:
    949       for (i = 0; i < test_group.num_tests; i++) {
    950          unsigned int * inA, * inB, * pv;
    951 
    952          fp_test_args_t aTest = test_group.targs[i];
    953          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
    954          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
    955          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
    956          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
    957          int idx;
    958          unsigned long long vsr_XT;
    959          pv = (unsigned int *)&vec_out;
    960 
    961          // Only need to copy one doubleword into each vector's element 0
    962          memcpy(vecA_void_ptr, inA, 8);
    963          memcpy(vecB_void_ptr, inB, 8);
    964 
    965          // clear vec_out
    966          for (idx = 0; idx < 4; idx++, pv++)
    967             *pv = 0;
    968 
    969          if (test_type != VX_FP_OTHER) {
    970             /* Then we need a third src argument, which is stored in element 0 of
    971              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
    972              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
    973              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
    974              * data (input args, result) contain only two inputs, so I arbitrarily
    975              * use spec_fargs elements 4 and 14 (alternating) for the third source
    976              * argument.  We can use the same input data for a given pair of
    977              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
    978              * the expected result should be the same.
    979              */
    980             int extra_arg_idx;
    981             if (i % 2)
    982                extra_arg_idx = 4;
    983             else
    984                extra_arg_idx = 14;
    985 
    986             if (repeat) {
    987                /* We're on the first time through of one of the VX_FP_SMx
    988                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
    989                 * we have to swap inputs as described above:
    990                 *    src2 <= VSX[XT]
    991                 *    src3 <= VSX[XB]
    992                 */
    993                memcpy(vecOut_void_ptr, inB, 8);  // src2
    994                memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
    995                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
    996             } else {
    997                // Don't need to init src2, as it's done before the switch()
    998                memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
    999             }
   1000             memcpy(&vsr_XT, vecOut_void_ptr, 8);
   1001          }
   1002 
   1003          (*func)();
   1004          dst = (unsigned long long *) &vec_out;
   1005          if (isLE)
   1006             dst++;
   1007 
   1008          if (test_type == VX_FP_OTHER)
   1009             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
   1010                    *frap, *frbp, *dst);
   1011          else
   1012             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
   1013                     test_name, vsr_XT, *frap, *frbp, *dst );
   1014 
   1015       }
   1016       /*
   1017            {
   1018                // Debug code.  Keep this block commented out except when debugging.
   1019                double result, expected;
   1020                memcpy(&result, dst, 8);
   1021                memcpy(&expected, &aTest.dp_bin_result, 8);
   1022                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
   1023                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
   1024                        expected, result );
   1025             }
   1026        */
   1027       printf( "\n" );
   1028 
   1029       if (repeat) {
   1030          repeat = 0;
   1031          strcat(test_name, "UNKNOWN");
   1032          switch (test_type) {
   1033             case VX_FP_SMAS:
   1034             case VX_FP_SMSS:
   1035             case VX_FP_SNMAS:
   1036             case VX_FP_SNMSS:
   1037                if (test_type == VX_FP_SMAS)
   1038                   strcpy(test_name, "xsmadd");
   1039                else if (test_type == VX_FP_SMSS)
   1040                   strcpy(test_name, "xsmsub");
   1041                else if (test_type == VX_FP_SNMAS)
   1042                   strcpy(test_name, "xsnmadd");
   1043                else
   1044                   strcpy(test_name, "xsnmsub");
   1045 
   1046                do_asp = 0;
   1047                strcat(test_name, "msp");
   1048                break;
   1049             default:
   1050                break;
   1051          }
   1052          goto again;
   1053       }
   1054       k++;
   1055    }
   1056    printf( "\n" );
   1057    free(test_name);
   1058 }
   1059 
   1060 
   1061 static void test_vsx_one_fp_arg(void)
   1062 {
   1063    test_func_t func;
   1064    int k;
   1065    void  * vecB_void_ptr;
   1066 
   1067    k = 0;
   1068    build_special_fargs_table();
   1069 
   1070    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
   1071       int idx, i;
   1072       unsigned long long *dst_dp;
   1073       unsigned int * dst_sp;
   1074       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
   1075       /* size of source operands */
   1076       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
   1077 		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
   1078       /* size of result */
   1079       Bool dp_res = IS_DP_RESULT(test_group.precision);
   1080       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
   1081 
   1082       vecB_void_ptr = (void *)&vec_inB;
   1083       if (isLE) {
   1084          vecB_void_ptr += dp? 8 : 12;
   1085       }
   1086 
   1087       for (i = 0; i < test_group.num_tests; i++) {
   1088          unsigned int * pv;
   1089          void * inB;
   1090 
   1091          pv = (unsigned int *)&vec_out;
   1092          // clear vec_out
   1093          for (idx = 0; idx < 4; idx++, pv++)
   1094             *pv = 0;
   1095 
   1096          if (dp) {
   1097             int vec_out_idx;
   1098             unsigned long long * frB_dp;
   1099             if (isLE)
   1100                vec_out_idx = dp_res ? 1 : 3;
   1101             else
   1102                vec_out_idx = 0;
   1103 
   1104             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
   1105                /* Take a single-precision value stored in double word element 0
   1106                 * of src in double-precision format and convert to single-
   1107                 * precision and store in word element 0 of dst.
   1108                 */
   1109                double input = spec_sp_fargs[i];
   1110                memcpy(vecB_void_ptr, (void *)&input, 8);
   1111             } else {
   1112                inB = (void *)&spec_fargs[i];
   1113                // copy double precision FP into input vector element 0
   1114                memcpy(vecB_void_ptr, inB, 8);
   1115             }
   1116 
   1117             // execute test insn
   1118             (*func)();
   1119             if (dp_res)
   1120                dst_dp = (unsigned long long *) &vec_out;
   1121             else
   1122                dst_sp = (unsigned int *) &vec_out;
   1123 
   1124             printf("#%d: %s ", i, test_group.name);
   1125             frB_dp = (unsigned long long *)&spec_fargs[i];
   1126             printf("%s(%016llx)", test_group.op, *frB_dp);
   1127             if (test_group.test_type == VX_ESTIMATE)
   1128             {
   1129                Bool res;
   1130                res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
   1131                printf(" ==> %s)", res ? "PASS" : "FAIL");
   1132             } else if (dp_res) {
   1133                printf(" = %016llx", dst_dp[vec_out_idx]);
   1134             } else {
   1135                printf(" = %08x", dst_sp[vec_out_idx]);
   1136             }
   1137 
   1138             printf("\n");
   1139          } else {  // single precision test type
   1140             int vec_out_idx;
   1141             if (isLE)
   1142                vec_out_idx = dp_res ? 1 : 3;
   1143             else
   1144                vec_out_idx = 0;
   1145             // Clear input vector
   1146             pv = (unsigned int *)&vec_inB;
   1147             for (idx = 0; idx < 4; idx++, pv++)
   1148                *pv = 0;
   1149             inB = (void *)&spec_sp_fargs[i];
   1150             // copy single precision FP into input vector element i
   1151             memcpy(vecB_void_ptr, inB, 4);
   1152             // execute test insn
   1153             (*func)();
   1154             if (dp_res)
   1155                dst_dp = (unsigned long long *) &vec_out;
   1156             else
   1157                dst_sp = (unsigned int *) &vec_out;
   1158             // print result
   1159             printf("#%d: %s ", i, test_group.name);
   1160                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
   1161                if (dp_res)
   1162                      printf(" = %016llx", dst_dp[vec_out_idx]);
   1163                else
   1164                   printf(" = %08x", dst_sp[vec_out_idx]);
   1165 
   1166             printf("\n");
   1167          }
   1168       }
   1169       k++;
   1170       printf( "\n" );
   1171    }
   1172 }
   1173 
   1174 /* This function currently only supports two double precision input arguments. */
   1175 static void test_vsx_two_fp_arg(void)
   1176 {
   1177    test_func_t func;
   1178    int k = 0;
   1179    void  * vecA_void_ptr, * vecB_void_ptr;
   1180 
   1181    if (isLE) {
   1182       vecA_void_ptr = (void *)&vec_inA + 8;
   1183       vecB_void_ptr = (void *)&vec_inB + 8;
   1184    } else {
   1185       vecA_void_ptr = (void *)&vec_inA;
   1186       vecB_void_ptr = (void *)&vec_inB;
   1187    }
   1188 
   1189    build_special_fargs_table();
   1190    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
   1191       unsigned long long * frap, * frbp, * dst;
   1192       unsigned int * pv;
   1193       int idx;
   1194       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
   1195       pv = (unsigned int *)&vec_out;
   1196       // clear vec_out
   1197       for (idx = 0; idx < 4; idx++, pv++)
   1198          *pv = 0;
   1199 
   1200       void * inA, * inB;
   1201       int i;
   1202       for (i = 0; i < test_group.num_tests; i++) {
   1203          fp_test_args_t aTest = test_group.targs[i];
   1204          inA = (void *)&spec_fargs[aTest.fra_idx];
   1205          inB = (void *)&spec_fargs[aTest.frb_idx];
   1206          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1207          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1208          // Only need to copy one doubleword into each vector's element 0
   1209          memcpy(vecA_void_ptr, inA, 8);
   1210          memcpy(vecB_void_ptr, inB, 8);
   1211          (*func)();
   1212          dst = (unsigned long long *) &vec_out;
   1213          if (isLE)
   1214             dst++;
   1215          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
   1216                 *frap, *frbp, *dst);
   1217       }
   1218       printf( "\n" );
   1219       k++;
   1220    }
   1221 }
   1222 
   1223 /* This function handles the following cases:
   1224  *   1) Single precision value stored in double-precision
   1225  *      floating-point format in doubleword element 0 of src VSX register
   1226  *   2) Integer word value stored in word element 1 of src VSX register
   1227  */
   1228 static void _do_store_test (ldst_test_t storeTest)
   1229 {
   1230    test_func_t func;
   1231    unsigned int *dst32;
   1232    unsigned int i, idx;
   1233    unsigned int * pv = (unsigned int *) storeTest.base_addr;
   1234    void  * vecA_void_ptr;
   1235 
   1236    if (isLE) {
   1237       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
   1238          vecA_void_ptr = (void *)&vec_inA + 8;
   1239    } else {
   1240       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
   1241          vecA_void_ptr = (void *)&vec_inA + 4;
   1242       else
   1243          vecA_void_ptr = (void *)&vec_inA;
   1244    }
   1245 
   1246    func = storeTest.test_func;
   1247    r14 = (HWord_t) storeTest.base_addr;
   1248    r15 = (HWord_t) storeTest.offset;
   1249 
   1250    /* test some of the pre-defined single precision values */
   1251    for (i = 0; i < nb_special_fargs; i+=3) {
   1252       // clear out storage destination
   1253       for (idx = 0; idx < 4; idx++)
   1254          *(pv + idx) = 0;
   1255 
   1256       printf( "%s:", storeTest.name );
   1257       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
   1258       {
   1259          unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
   1260          memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
   1261          printf(" %08x ==> ", *arg_ptr);
   1262       } else {
   1263          unsigned long long * dp;
   1264          double input = spec_sp_fargs[i];
   1265          dp = (unsigned long long *)&input;
   1266          memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
   1267          printf(" %016llx ==> ", *dp);
   1268       }
   1269 
   1270       // execute test insn
   1271       (*func)();
   1272       dst32 = (unsigned int*)(storeTest.base_addr);
   1273       dst32 += (storeTest.offset/sizeof(int));
   1274       printf( "%08x\n", *dst32);
   1275    }
   1276 
   1277    printf("\n");
   1278 }
   1279 
   1280 static void _do_load_test(ldst_test_t loadTest)
   1281 {
   1282    test_func_t func;
   1283    unsigned int i;
   1284    unsigned long long * dst_dp;
   1285 
   1286    func = loadTest.test_func;
   1287    r15 = (HWord_t) loadTest.offset;
   1288 
   1289    if (loadTest.base_addr == NULL) {
   1290       /* Test lxsspx: source is single precision value, so let's */
   1291       /* test some of the pre-defined single precision values. */
   1292       int num_loops = (loadTest.offset == 0) ?  nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
   1293       for (i = 0; i < num_loops; i+=3) {
   1294          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
   1295          printf( "%s:", loadTest.name );
   1296          printf(" %08x ==> ", *sp);
   1297          r14 = (HWord_t)&spec_sp_fargs[i];
   1298 
   1299          // execute test insn
   1300          (*func)();
   1301          dst_dp = (unsigned long long *) &vec_out;
   1302          if (isLE)
   1303             dst_dp++;
   1304          printf("%016llx\n", *dst_dp);
   1305       }
   1306    } else {
   1307       // source is an integer word
   1308       int num_loops = (loadTest.offset == 0) ?  NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
   1309       for (i = 0; i < num_loops; i++) {
   1310          printf( "%s:", loadTest.name );
   1311          r14 = (HWord_t)&viargs[i];
   1312          printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);
   1313 
   1314          // execute test insn
   1315          (*func)();
   1316          dst_dp = (unsigned long long *) &vec_out;
   1317          if (isLE)
   1318             dst_dp++;
   1319          printf("%016llx\n", *dst_dp);
   1320       }
   1321    }
   1322    printf("\n");
   1323 }
   1324 
   1325 static void test_ldst(void)
   1326 {
   1327    int k = 0;
   1328 
   1329    while (ldst_tests[k].test_func) {
   1330       if (ldst_tests[k].type == VSX_STORE)
   1331          _do_store_test(ldst_tests[k]);
   1332       else {
   1333          _do_load_test(ldst_tests[k]);
   1334       }
   1335       k++;
   1336       printf("\n");
   1337    }
   1338 }
   1339 
   1340 static void test_xs_conv_ops(void)
   1341 {
   1342 
   1343    test_func_t func;
   1344    int k = 0;
   1345    void  * vecB_void_ptr;
   1346 
   1347    if (isLE)
   1348       vecB_void_ptr = (void *)&vec_inB + 8;
   1349    else
   1350       vecB_void_ptr = (void *)&vec_inB;
   1351 
   1352    build_special_fargs_table();
   1353    while ((func = xs_conv_tests[k].test_func)) {
   1354       int i;
   1355       unsigned long long * dst;
   1356       xs_conv_test_t test_group = xs_conv_tests[k];
   1357       for (i = 0; i < NUM_VDARGS_INTS; i++) {
   1358          unsigned long long  * inB, * pv;
   1359          int idx;
   1360          inB = (unsigned long long *)&vdargs[i];
   1361          memcpy(vecB_void_ptr, inB, 8);
   1362          pv = (unsigned long long *)&vec_out;
   1363          // clear vec_out
   1364          for (idx = 0; idx < 2; idx++, pv++)
   1365             *pv = 0ULL;
   1366          (*func)();
   1367          dst = (unsigned long long *) &vec_out;
   1368          if (isLE)
   1369             dst++;
   1370          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
   1371       }
   1372       k++;
   1373       printf("\n");
   1374    }
   1375    printf( "\n" );
   1376 }
   1377 
   1378 
   1379 static void test_vsx_logic(void)
   1380 {
   1381    logic_test_t aTest;
   1382    test_func_t func;
   1383    int k;
   1384    k = 0;
   1385 
   1386    while ((func = logic_tests[k].test_func)) {
   1387 
   1388       unsigned int * pv;
   1389       unsigned int * inA, * inB, * dst;
   1390       int idx, i;
   1391       aTest = logic_tests[k];
   1392       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
   1393          pv = (unsigned int *)&vec_out;
   1394          inA = &viargs[i];
   1395          inB = &viargs[i];
   1396          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
   1397          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
   1398          // clear vec_out
   1399          for (idx = 0; idx < 4; idx++, pv++)
   1400             *pv = 0;
   1401 
   1402          // execute test insn
   1403          (*func)();
   1404          dst = (unsigned int*) &vec_out;
   1405 
   1406          printf( "#%d: %10s ", k, aTest.name);
   1407          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
   1408          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
   1409          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
   1410       }
   1411       k++;
   1412    }
   1413    printf( "\n" );
   1414 }
   1415 
   1416 
   1417 //----------------------------------------------------------
   1418 
   1419 static test_table_t all_tests[] = {
   1420                                      { &test_vx_fp_ops,
   1421                                        "Test VSX floating point instructions"},
   1422                                      { &test_vsx_one_fp_arg,
   1423                                        "Test VSX vector and scalar single argument instructions"} ,
   1424                                      { &test_vsx_logic,
   1425                                        "Test VSX logic instructions" },
   1426                                      { &test_xs_conv_ops,
   1427                                        "Test VSX scalar integer conversion instructions" },
   1428                                      { &test_ldst,
   1429                                        "Test VSX load/store dp to sp instructions" },
   1430                                      { &test_vsx_two_fp_arg,
   1431                                        "Test VSX vector and scalar two argument instructions"} ,
   1432                                      { NULL, NULL }
   1433 };
   1434 
   1435 #endif
   1436 
   1437 int main(int argc, char *argv[])
   1438 {
   1439 
   1440 #ifdef HAS_ISA_2_07
   1441    test_table_t aTest;
   1442    test_func_t func;
   1443    int i = 0;
   1444 
   1445    while ((func = all_tests[i].test_category)) {
   1446       aTest = all_tests[i];
   1447       printf( "%s\n", aTest.name );
   1448       (*func)();
   1449       i++;
   1450    }
   1451 #else
   1452    printf("NO ISA 2.07 SUPPORT\n");
   1453 #endif
   1454    return 0;
   1455 }
   1456