Home | History | Annotate | Download | only in ppc32
      1 /*  Copyright (C) 2011 IBM
      2 
      3  Author: Maynard Johnson <maynardj (at) us.ibm.com>
      4 
      5  This program is free software; you can redistribute it and/or
      6  modify it under the terms of the GNU General Public License as
      7  published by the Free Software Foundation; either version 2 of the
      8  License, or (at your option) any later version.
      9 
     10  This program is distributed in the hope that it will be useful, but
     11  WITHOUT ANY WARRANTY; without even the implied warranty of
     12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  General Public License for more details.
     14 
     15  You should have received a copy of the GNU General Public License
     16  along with this program; if not, write to the Free Software
     17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18  02111-1307, USA.
     19 
     20  The GNU General Public License is contained in the file COPYING.
     21  */
     22 
     23 #ifdef HAS_VSX
     24 
     25 #include <stdio.h>
     26 #include <stdint.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <malloc.h>
     30 #include <altivec.h>
     31 #include <math.h>
     32 
     33 #ifndef __powerpc64__
     34 typedef uint32_t HWord_t;
     35 #else
     36 typedef uint64_t HWord_t;
     37 #endif /* __powerpc64__ */
     38 
     39 typedef unsigned char Bool;
     40 #define True 1
     41 #define False 0
     42 register HWord_t r14 __asm__ ("r14");
     43 register HWord_t r15 __asm__ ("r15");
     44 register HWord_t r16 __asm__ ("r16");
     45 register HWord_t r17 __asm__ ("r17");
     46 register double f14 __asm__ ("fr14");
     47 register double f15 __asm__ ("fr15");
     48 register double f16 __asm__ ("fr16");
     49 register double f17 __asm__ ("fr17");
     50 
     51 static volatile unsigned int div_flags, div_xer;
     52 
     53 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     54 
     55 #define SET_CR(_arg) \
     56       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     57 
     58 #define SET_XER(_arg) \
     59       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     60 
     61 #define GET_CR(_lval) \
     62       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     63 
     64 #define GET_XER(_lval) \
     65       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     66 
     67 #define GET_CR_XER(_lval_cr,_lval_xer) \
     68    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     69 
     70 #define SET_CR_ZERO \
     71       SET_CR(0)
     72 
     73 #define SET_XER_ZERO \
     74       SET_XER(0)
     75 
     76 #define SET_CR_XER_ZERO \
     77    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     78 
     79 #define SET_FPSCR_ZERO \
     80    do { double _d = 0.0; \
     81         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     82    } while (0)
     83 
     84 
     85 typedef void (*test_func_t)(void);
     86 typedef struct test_table test_table_t;
     87 
     88 
     89 /* These functions below that construct a table of floating point
     90  * values were lifted from none/tests/ppc32/jm-insns.c.
     91  */
     92 
     93 #if defined (DEBUG_ARGS_BUILD)
     94 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
     95 #else
     96 #define AB_DPRINTF(fmt, args...) do { } while (0)
     97 #endif
     98 
     99 static inline void register_farg (void *farg,
    100                                   int s, uint16_t _exp, uint64_t mant)
    101 {
    102    uint64_t tmp;
    103 
    104    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    105    *(uint64_t *)farg = tmp;
    106    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    107               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    108 }
    109 
    110 static inline void register_sp_farg (void *farg,
    111                                      int s, uint16_t _exp, uint32_t mant)
    112 {
    113    uint32_t tmp;
    114    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
    115    *(uint32_t *)farg = tmp;
    116 }
    117 
    118 typedef struct fp_test_args {
    119    int fra_idx;
    120    int frb_idx;
    121 } fp_test_args_t;
    122 
    123 
    124 fp_test_args_t fp_cmp_tests[] = {
    125                                    {8, 8},
    126                                    {8, 14},
    127                                    {8, 6},
    128                                    {8, 5},
    129                                    {8, 4},
    130                                    {8, 7},
    131                                    {8, 9},
    132                                    {8, 11},
    133                                    {14, 8},
    134                                    {14, 14},
    135                                    {14, 6},
    136                                    {14, 5},
    137                                    {14, 4},
    138                                    {14, 7},
    139                                    {14, 9},
    140                                    {14, 11},
    141                                    {6, 8},
    142                                    {6, 14},
    143                                    {6, 6},
    144                                    {6, 5},
    145                                    {6, 4},
    146                                    {6, 7},
    147                                    {6, 9},
    148                                    {6, 11},
    149                                    {5, 8},
    150                                    {5, 14},
    151                                    {5, 6},
    152                                    {5, 5},
    153                                    {5, 4},
    154                                    {5, 7},
    155                                    {5, 9},
    156                                    {5, 11},
    157                                    {4, 8},
    158                                    {4, 14},
    159                                    {4, 6},
    160                                    {4, 5},
    161                                    {4, 1},
    162                                    {4, 7},
    163                                    {4, 9},
    164                                    {4, 11},
    165                                    {7, 8},
    166                                    {7, 14},
    167                                    {7, 6},
    168                                    {7, 5},
    169                                    {7, 4},
    170                                    {7, 7},
    171                                    {7, 9},
    172                                    {7, 11},
    173                                    {10, 8},
    174                                    {10, 14},
    175                                    {10, 6},
    176                                    {10, 5},
    177                                    {10, 4},
    178                                    {10, 7},
    179                                    {10, 9},
    180                                    {10, 10},
    181                                    {12, 8},
    182                                    {12, 14},
    183                                    {12, 6},
    184                                    {12, 5},
    185                                    {1, 1},
    186                                    {2, 2},
    187                                    {3, 3},
    188                                    {4, 4},
    189 };
    190 
    191 
    192 fp_test_args_t two_arg_fp_tests[] = {
    193                                      {8, 8},
    194                                      {8, 14},
    195                                      {15, 16},
    196                                      {8, 5},
    197                                      {8, 4},
    198                                      {8, 7},
    199                                      {8, 9},
    200                                      {8, 11},
    201                                      {14, 8},
    202                                      {14, 14},
    203                                      {14, 6},
    204                                      {14, 5},
    205                                      {14, 4},
    206                                      {14, 7},
    207                                      {14, 9},
    208                                      {14, 11},
    209                                      {6, 8},
    210                                      {6, 14},
    211                                      {6, 6},
    212                                      {6, 5},
    213                                      {6, 4},
    214                                      {6, 7},
    215                                      {6, 9},
    216                                      {6, 11},
    217                                      {5, 8},
    218                                      {5, 14},
    219                                      {5, 6},
    220                                      {5, 5},
    221                                      {5, 4},
    222                                      {5, 7},
    223                                      {5, 9},
    224                                      {5, 11},
    225                                      {4, 8},
    226                                      {4, 14},
    227                                      {4, 6},
    228                                      {4, 5},
    229                                      {4, 1},
    230                                      {4, 7},
    231                                      {4, 9},
    232                                      {4, 11},
    233                                      {7, 8},
    234                                      {7, 14},
    235                                      {7, 6},
    236                                      {7, 5},
    237                                      {7, 4},
    238                                      {7, 7},
    239                                      {7, 9},
    240                                      {7, 11},
    241                                      {10, 8},
    242                                      {10, 14},
    243                                      {12, 6},
    244                                      {12, 5},
    245                                      {10, 4},
    246                                      {10, 7},
    247                                      {10, 9},
    248                                      {10, 11},
    249                                      {12, 8 },
    250                                      {12, 14},
    251                                      {12, 6},
    252                                      {15, 16},
    253                                      {15, 16},
    254                                      {9, 11},
    255                                      {11, 11},
    256                                      {11, 12}
    257 };
    258 
    259 
    260 static int nb_special_fargs;
    261 static double * spec_fargs;
    262 static float * spec_sp_fargs;
    263 
    264 static void build_special_fargs_table(void)
    265 {
    266 /*
    267   Entry  Sign Exp   fraction                  Special value
    268    0      0   3fd   0x8000000000000ULL         Positive finite number
    269    1      0   404   0xf000000000000ULL         ...
    270    2      0   001   0x8000000b77501ULL         ...
    271    3      0   7fe   0x800000000051bULL         ...
    272    4      0   012   0x3214569900000ULL         ...
    273    5      0   000   0x0000000000000ULL         +0.0 (+zero)
    274    6      1   000   0x0000000000000ULL         -0.0 (-zero)
    275    7      0   7ff   0x0000000000000ULL         +infinity
    276    8      1   7ff   0x0000000000000ULL         -infinity
    277    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
    278    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
    279    11     0   7ff   0x8000000000000ULL         +QNaN
    280    12     1   7ff   0x8000000000000ULL         -QNaN
    281    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
    282    14     1   40d   0x0650f5a07b353ULL         Negative finite number
    283    15     0   412   0x32585a9900000ULL         A couple more positive finite numbers
    284    16     0   413   0x82511a2000000ULL         ...
    285 */
    286 
    287    uint64_t mant;
    288    uint32_t mant_sp;
    289    uint16_t _exp;
    290    int s;
    291    int j, i = 0;
    292 
    293    if (spec_fargs)
    294       return;
    295 
    296    spec_fargs = malloc( 17 * sizeof(double) );
    297    spec_sp_fargs = malloc( 17 * sizeof(float) );
    298 
    299    // #0
    300    s = 0;
    301    _exp = 0x3fd;
    302    mant = 0x8000000000000ULL;
    303    register_farg(&spec_fargs[i++], s, _exp, mant);
    304 
    305    // #1
    306    s = 0;
    307    _exp = 0x404;
    308    mant = 0xf000000000000ULL;
    309    register_farg(&spec_fargs[i++], s, _exp, mant);
    310 
    311    /* None of the ftdiv tests succeed.
    312     * FRA = value #0; FRB = value #1
    313     * ea_ = -2; e_b = 5
    314     * fl_flag || fg_flag || fe_flag = 100
    315     */
    316 
    317    /*************************************************
    318     *     fe_flag tests
    319     *
    320     *************************************************/
    321 
    322    /* fe_flag <- 1 if FRA is a NaN
    323     * FRA = value #9; FRB = value #1
    324     * e_a = 1024; e_b = 5
    325     * fl_flag || fg_flag || fe_flag = 101
    326     */
    327 
    328    /* fe_flag <- 1 if FRB is a NaN
    329     * FRA = value #1; FRB = value #12
    330     * e_a = 5; e_b = 1024
    331     * fl_flag || fg_flag || fe_flag = 101
    332     */
    333 
    334    /* fe_flag <- 1 if e_b <= -1022
    335     * FRA = value #0; FRB = value #2
    336     * e_a = -2; e_b = -1022
    337     * fl_flag || fg_flag || fe_flag = 101
    338     *
    339     */
    340    // #2
    341    s = 0;
    342    _exp = 0x001;
    343    mant = 0x8000000b77501ULL;
    344    register_farg(&spec_fargs[i++], s, _exp, mant);
    345 
    346    /* fe_flag <- 1 if e_b >= 1021
    347     * FRA = value #1; FRB = value #3
    348     * e_a = 5; e_b = 1023
    349     * fl_flag || fg_flag || fe_flag = 101
    350     */
    351    // #3
    352    s = 0;
    353    _exp = 0x7fe;
    354    mant = 0x800000000051bULL;
    355    register_farg(&spec_fargs[i++], s, _exp, mant);
    356 
    357    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
    358     * Let FRA = value #3 and FRB be value #0.
    359     * e_a = 1023; e_b = -2
    360     * fl_flag || fg_flag || fe_flag = 101
    361     */
    362 
    363    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
    364     * Let FRA = value #0 above and FRB be value #3 above
    365     * e_a = -2; e_b = 1023
    366     * fl_flag || fg_flag || fe_flag = 101
    367     */
    368 
    369    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
    370     * Let FRA = value #4 and FRB be value #0
    371     * e_a = -1005; e_b = -2
    372     * fl_flag || fg_flag || fe_flag = 101
    373    */
    374    // #4
    375    s = 0;
    376    _exp = 0x012;
    377    mant = 0x3214569900000ULL;
    378    register_farg(&spec_fargs[i++], s, _exp, mant);
    379 
    380    /*************************************************
    381     *     fg_flag tests
    382     *
    383     *************************************************/
    384    /* fg_flag <- 1 if FRA is an Infinity
    385     * NOTE: FRA = Inf also sets fe_flag
    386     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
    387     * Test 1:
    388     *   Let FRA be value #7 and FRB be value #1
    389     *   e_a = 1024; e_b = 5
    390     *   fl_flag || fg_flag || fe_flag = 111
    391     *
    392     * Test 2:
    393     *   Let FRA be value #8 and FRB be value #1
    394     *   e_a = 1024; e_b = 5
    395     *   fl_flag || fg_flag || fe_flag = 111
    396     *
    397     */
    398 
    399    /* fg_flag <- 1 if FRB is an Infinity
    400     * NOTE: FRB = Inf also sets fe_flag
    401     * Let FRA be value #1 and FRB be value #7
    402     * e_a = 5; e_b = 1024
    403     * fl_flag || fg_flag || fe_flag = 111
    404     */
    405 
    406    /* fg_flag <- 1 if FRB is denormalized
    407     * NOTE: e_b < -1022 ==> fe_flag <- 1
    408     * Let FRA be value #0 and FRB be value #13
    409     * e_a = -2; e_b = -1023
    410     * fl_flag || fg_flag || fe_flag = 111
    411     */
    412 
    413    /* fg_flag <- 1 if FRB is +zero
    414     * NOTE: FRA = Inf also sets fe_flag
    415     * Let FRA = val #5; FRB = val #5
    416     * ea_ = -1023; e_b = -1023
    417     * fl_flag || fg_flag || fe_flag = 111
    418     */
    419 
    420    /* fg_flag <- 1 if FRB is -zero
    421     * NOTE: FRA = Inf also sets fe_flag
    422     * Let FRA = val #5; FRB = val #6
    423     * ea_ = -1023; e_b = -1023
    424     * fl_flag || fg_flag || fe_flag = 111
    425     */
    426 
    427    /* Special values */
    428    /* +0.0      : 0 0x000 0x0000000000000 */
    429    // #5
    430    s = 0;
    431    _exp = 0x000;
    432    mant = 0x0000000000000ULL;
    433    register_farg(&spec_fargs[i++], s, _exp, mant);
    434 
    435    /* -0.0      : 1 0x000 0x0000000000000 */
    436    // #6
    437    s = 1;
    438    _exp = 0x000;
    439    mant = 0x0000000000000ULL;
    440    register_farg(&spec_fargs[i++], s, _exp, mant);
    441 
    442    /* +infinity : 0 0x7FF 0x0000000000000  */
    443    // #7
    444    s = 0;
    445    _exp = 0x7FF;
    446    mant = 0x0000000000000ULL;
    447    register_farg(&spec_fargs[i++], s, _exp, mant);
    448 
    449    /* -infinity : 1 0x7FF 0x0000000000000 */
    450    // #8
    451    s = 1;
    452    _exp = 0x7FF;
    453    mant = 0x0000000000000ULL;
    454    register_farg(&spec_fargs[i++], s, _exp, mant);
    455 
    456    /*
    457     * This comment applies to values #9 and #10 below:
    458     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
    459     * so we can't just copy the double-precision value to the corresponding slot in the
    460     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
    461     * have to manually set the bits using register_sp_farg().
    462     */
    463 
    464    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    465    // #9
    466    s = 0;
    467    _exp = 0x7FF;
    468    mant = 0x7FFFFFFFFFFFFULL;
    469    register_farg(&spec_fargs[i++], s, _exp, mant);
    470    _exp = 0xff;
    471    mant_sp = 0x3FFFFF;
    472    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    473 
    474    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    475    // #10
    476    s = 1;
    477    _exp = 0x7FF;
    478    mant = 0x7FFFFFFFFFFFFULL;
    479    register_farg(&spec_fargs[i++], s, _exp, mant);
    480    _exp = 0xff;
    481    mant_sp = 0x3FFFFF;
    482    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    483 
    484    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    485    // #11
    486    s = 0;
    487    _exp = 0x7FF;
    488    mant = 0x8000000000000ULL;
    489    register_farg(&spec_fargs[i++], s, _exp, mant);
    490 
    491    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    492    // #12
    493    s = 1;
    494    _exp = 0x7FF;
    495    mant = 0x8000000000000ULL;
    496    register_farg(&spec_fargs[i++], s, _exp, mant);
    497 
    498    /* denormalized value */
    499    // #13
    500    s = 1;
    501    _exp = 0x000;
    502    mant = 0x8340000078000ULL;
    503    register_farg(&spec_fargs[i++], s, _exp, mant);
    504 
    505    /* Negative finite number */
    506    // #14
    507    s = 1;
    508    _exp = 0x40d;
    509    mant = 0x0650f5a07b353ULL;
    510    register_farg(&spec_fargs[i++], s, _exp, mant);
    511 
    512    /* A couple positive finite numbers ... */
    513    // #15
    514    s = 0;
    515    _exp = 0x412;
    516    mant = 0x32585a9900000ULL;
    517    register_farg(&spec_fargs[i++], s, _exp, mant);
    518 
    519    // #16
    520    s = 0;
    521    _exp = 0x413;
    522    mant = 0x82511a2000000ULL;
    523    register_farg(&spec_fargs[i++], s, _exp, mant);
    524 
    525    nb_special_fargs = i;
    526    for (j = 0; j < i; j++) {
    527       if (!(j == 9 || j == 10))
    528          spec_sp_fargs[j] = spec_fargs[j];
    529    }
    530 }
    531 
    532 
    533 struct test_table
    534 {
    535    test_func_t test_category;
    536    char * name;
    537 };
    538 
    539 typedef enum {
    540    SINGLE_TEST,
    541    DOUBLE_TEST
    542 } precision_type_t;
    543 
    544 typedef enum {
    545    VX_SCALAR_FP_NMSUB = 0,
    546    // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
    547    VX_VECTOR_FP_MULT_AND_OP2 = 10,
    548    // and before this line
    549    VX_BASIC_CMP = 30,
    550    VX_CONV_WORD,
    551    VX_DEFAULT
    552 } vx_fp_test_type;
    553 
    554 typedef struct vx_fp_test
    555 {
    556    test_func_t test_func;
    557    const char * name;
    558    fp_test_args_t * targs;
    559    int num_tests;
    560    precision_type_t precision;
    561    vx_fp_test_type type;
    562    const char * op;
    563 } vx_fp_test_t;
    564 
    565 static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC;
    566 
    567 static Bool do_dot;
    568 static void test_xvcmpeqdp(void)
    569 {
    570    if (do_dot)
    571       __asm__ __volatile__ ("xvcmpeqdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    572    else
    573       __asm__ __volatile__ ("xvcmpeqdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    574 }
    575 
    576 static void test_xvcmpgedp(void)
    577 {
    578    if (do_dot)
    579       __asm__ __volatile__ ("xvcmpgedp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    580    else
    581       __asm__ __volatile__ ("xvcmpgedp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    582 }
    583 
    584 static void test_xvcmpgtdp(void)
    585 {
    586    if (do_dot)
    587       __asm__ __volatile__ ("xvcmpgtdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    588    else
    589       __asm__ __volatile__ ("xvcmpgtdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    590 }
    591 
    592 static void test_xvcmpeqsp(void)
    593 {
    594    if (do_dot)
    595       __asm__ __volatile__ ("xvcmpeqsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    596    else
    597       __asm__ __volatile__ ("xvcmpeqsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    598 }
    599 
    600 static void test_xvcmpgesp(void)
    601 {
    602    if (do_dot)
    603       __asm__ __volatile__ ("xvcmpgesp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    604    else
    605       __asm__ __volatile__ ("xvcmpgesp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    606 }
    607 
    608 static void test_xvcmpgtsp(void)
    609 {
    610    if (do_dot)
    611       __asm__ __volatile__ ("xvcmpgtsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    612    else
    613       __asm__ __volatile__ ("xvcmpgtsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    614 }
    615 
    616 static Bool do_aXp;
    617 static Bool do_dp;
    618 static void test_xsnmsub(void)
    619 {
    620    if (do_aXp)
    621       __asm__ __volatile__ ("xsnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    622    else
    623       __asm__ __volatile__ ("xsnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    624 }
    625 
    626 static void test_xvmadd(void)
    627 {
    628    if (do_aXp)
    629       if (do_dp)
    630          __asm__ __volatile__ ("xvmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    631       else
    632          __asm__ __volatile__ ("xvmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    633    else
    634       if (do_dp)
    635          __asm__ __volatile__ ("xvmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    636       else
    637          __asm__ __volatile__ ("xvmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    638 }
    639 
    640 static void test_xvnmadd(void)
    641 {
    642    if (do_aXp)
    643       if (do_dp)
    644          __asm__ __volatile__ ("xvnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    645       else
    646          __asm__ __volatile__ ("xvnmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    647    else
    648       if (do_dp)
    649          __asm__ __volatile__ ("xvnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    650       else
    651          __asm__ __volatile__ ("xvnmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    652 }
    653 
    654 static void test_xvnmsub(void)
    655 {
    656    if (do_aXp)
    657       if (do_dp)
    658          __asm__ __volatile__ ("xvnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    659       else
    660          __asm__ __volatile__ ("xvnmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    661    else
    662       if (do_dp)
    663          __asm__ __volatile__ ("xvnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    664       else
    665          __asm__ __volatile__ ("xvnmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    666 }
    667 
    668 static void test_xvmsub(void)
    669 {
    670    if (do_aXp)
    671       if (do_dp)
    672          __asm__ __volatile__ ("xvmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    673       else
    674          __asm__ __volatile__ ("xvmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    675    else
    676       if (do_dp)
    677          __asm__ __volatile__ ("xvmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    678       else
    679          __asm__ __volatile__ ("xvmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    680 }
    681 
    682 static void test_xssqrtdp(void)
    683 {
    684    __asm__ __volatile__ ("xssqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    685 }
    686 
    687 static void test_xsrdpim(void)
    688 {
    689    __asm__ __volatile__ ("xsrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    690 }
    691 
    692 static void test_xsrdpip(void)
    693 {
    694    __asm__ __volatile__ ("xsrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    695 }
    696 
    697 static void test_xstdivdp(void)
    698 {
    699    __asm__ __volatile__ ("xstdivdp   6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
    700 }
    701 
    702 static void test_xsmaxdp(void)
    703 {
    704    __asm__ __volatile__ ("xsmaxdp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    705 }
    706 
    707 static void test_xsmindp(void)
    708 {
    709    __asm__ __volatile__ ("xsmindp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    710 }
    711 
    712 static void test_xvadddp(void)
    713 {
    714    __asm__ __volatile__ ("xvadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    715 }
    716 
    717 static void test_xvaddsp(void)
    718 {
    719    __asm__ __volatile__ ("xvaddsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    720 }
    721 
    722 static void test_xvdivdp(void)
    723 {
    724    __asm__ __volatile__ ("xvdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    725 }
    726 
    727 static void test_xvdivsp(void)
    728 {
    729    __asm__ __volatile__ ("xvdivsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    730 }
    731 
    732 static void test_xvmuldp(void)
    733 {
    734    __asm__ __volatile__ ("xvmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    735 }
    736 
    737 static void test_xvmulsp(void)
    738 {
    739    __asm__ __volatile__ ("xvmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    740 }
    741 
    742 static void test_xvsubdp(void)
    743 {
    744    __asm__ __volatile__ ("xvsubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    745 }
    746 
    747 static void test_xvmaxdp(void)
    748 {
    749    __asm__ __volatile__ ("xvmaxdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    750 }
    751 
    752 static void test_xvmindp(void)
    753 {
    754    __asm__ __volatile__ ("xvmindp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    755 }
    756 
    757 static void test_xvmaxsp(void)
    758 {
    759    __asm__ __volatile__ ("xvmaxsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    760 }
    761 
    762 static void test_xvminsp(void)
    763 {
    764    __asm__ __volatile__ ("xvminsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    765 }
    766 
    767 static void test_xvsubsp(void)
    768 {
    769    __asm__ __volatile__ ("xvsubsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    770 }
    771 
    772 static void test_xvresp(void)
    773 {
    774    __asm__ __volatile__ ("xvresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    775 }
    776 
    777 static void test_xxsel(void)
    778 {
    779    unsigned long long * dst;
    780    unsigned long long xa[] =  { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL};
    781    unsigned long long xb[] =  { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
    782    unsigned long long xc[] =  { 0xffffffff01020304ULL, 0x128934bd00000000ULL};
    783 
    784    memcpy(&vec_inA, xa, 16);
    785    memcpy(&vec_inB, xb, 16);
    786    memcpy(&vec_inC, xc, 16);
    787 
    788 
    789    __asm__ __volatile__ ("xxsel   %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC));
    790    dst = (unsigned long long *) &vec_out;
    791    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst);
    792    dst++;
    793    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst);
    794    printf("\n");
    795 }
    796 
    797 static void test_xxspltw(void)
    798 {
    799    int uim;
    800    unsigned long long * dst = NULL;
    801    unsigned long long xb[] =  { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
    802    memcpy(&vec_inB, xb, 16);
    803 
    804    for (uim = 0; uim < 4; uim++) {
    805       switch (uim) {
    806          case 0:
    807             __asm__ __volatile__ ("xxspltw   %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB));
    808             break;
    809          case 1:
    810             __asm__ __volatile__ ("xxspltw   %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB));
    811             break;
    812          case 2:
    813             __asm__ __volatile__ ("xxspltw   %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB));
    814             break;
    815          case 3:
    816             __asm__ __volatile__ ("xxspltw   %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB));
    817             break;
    818       }
    819       dst = (unsigned long long *) &vec_out;
    820       printf("xxspltw 0x%016llx%016llx %d=> 0x%016llx",  xb[0], xb[1], uim, *dst);
    821       dst++;
    822       printf("%016llx\n", *dst);
    823    }
    824    printf("\n");
    825 }
    826 
    827 static void test_xscvdpsxws(void)
    828 {
    829    __asm__ __volatile__ ("xscvdpsxws  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    830 }
    831 
    832 static void test_xscvdpuxds(void)
    833 {
    834    __asm__ __volatile__ ("xscvdpuxds  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    835 }
    836 
    837 static void test_xvcpsgndp(void)
    838 {
    839    __asm__ __volatile__  ("xvcpsgndp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    840 }
    841 
    842 static void test_xvcpsgnsp(void)
    843 {
    844    __asm__ __volatile__  ("xvcpsgnsp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    845 }
    846 
    847 static void test_xvcvdpsxws(void)
    848 {
    849    __asm__ __volatile__ ("xvcvdpsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
    850 }
    851 
    852 static void test_xvcvspsxws(void)
    853 {
    854    __asm__ __volatile__ ("xvcvspsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
    855 }
    856 
    857 static vx_fp_test_t
    858 vx_vector_one_fp_arg_tests[] = {
    859                                 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"},
    860                                 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"},
    861                                 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"},
    862                                 { NULL, NULL, NULL, 0 , 0, 0, NULL}
    863 };
    864 
    865 static vx_fp_test_t
    866 vx_vector_fp_tests[] = {
    867                         { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"},
    868                         { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"},
    869                         { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"},
    870                         { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"},
    871                         { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"},
    872                         { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"},
    873                         { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" },
    874                         { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" },
    875                         { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" },
    876                         { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" },
    877                         { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" },
    878                         { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" },
    879                         { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" },
    880                         { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" },
    881                         { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" },
    882                         { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" },
    883                         { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" },
    884                         { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" },
    885                         { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"},
    886                         { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"},
    887                         { NULL, NULL, NULL, 0 , 0, 0, NULL}
    888 };
    889 
    890 
    891 static vx_fp_test_t
    892 vx_aORm_fp_tests[] = {
    893                        { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"},
    894                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
    895                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
    896                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
    897                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
    898                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
    899                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
    900                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
    901                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
    902                        { NULL, NULL, NULL, 0, 0, 0,  NULL }
    903 };
    904 
    905 static vx_fp_test_t
    906 vx_simple_scalar_fp_tests[] = {
    907                                { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    908                                { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    909                                { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    910                                { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    911                                { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    912                                { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    913                                { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL},
    914                                { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    915                                { NULL, NULL, NULL, 0, 0, 0, NULL }
    916 };
    917 
    918 
    919 #ifdef __powerpc64__
    920 static void test_bpermd(void)
    921 {
    922    /* NOTE: Bit number is '0 . . . 63'
    923     *
    924     * Permuted bits are generated bit 0 -7 as follows:
    925     *    index = (r14)8*i:8*i+7
    926     *    perm[i] = (r15)index
    927     *
    928     * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
    929     * byte of r14, 0x1b(27/base 10).  This identifies bit 27 of r15, which is '1'.
    930     * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
    931     * So the result of the first two iterations of i are:
    932     *   perm = 0b01xxxxxx
    933     *
    934     */
    935    r15 = 0xa12bc37de56f9708ULL;
    936    r14 = 0x1b2c31f030000001ULL;
    937    __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    938    printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14,
    939           (unsigned long long)r15, (unsigned long long)r17);
    940    printf("\n");
    941 }
    942 #endif
    943 
    944 static Bool do_OE;
    945 typedef enum {
    946    DIV_BASE = 1,
    947    DIV_OE = 2,
    948    DIV_DOT = 4,
    949 } div_type_t;
    950 /* Possible divde type combinations are:
    951  *   - base
    952  *   - base+dot
    953  *   - base+OE
    954  *   - base+OE+dot
    955  */
    956 #ifdef __powerpc64__
    957 static void test_divde(void)
    958 {
    959    int divde_type = DIV_BASE;
    960    if (do_OE)
    961       divde_type |= DIV_OE;
    962    if (do_dot)
    963       divde_type |= DIV_DOT;
    964 
    965    switch (divde_type) {
    966       case 1:
    967         SET_CR_XER_ZERO;
    968          __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    969          GET_CR_XER(div_flags, div_xer);
    970          break;
    971       case 3:
    972         SET_CR_XER_ZERO;
    973          __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    974          GET_CR_XER(div_flags, div_xer);
    975          break;
    976       case 5:
    977         SET_CR_XER_ZERO;
    978          __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    979          GET_CR_XER(div_flags, div_xer);
    980          break;
    981       case 7:
    982         SET_CR_XER_ZERO;
    983          __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    984          GET_CR_XER(div_flags, div_xer);
    985          break;
    986       default:
    987          fprintf(stderr, "Invalid divde type. Exiting\n");
    988          exit(1);
    989    }
    990 }
    991 #endif
    992 
    993 static void test_divweu(void)
    994 {
    995    int divweu_type = DIV_BASE;
    996    if (do_OE)
    997       divweu_type |= DIV_OE;
    998    if (do_dot)
    999       divweu_type |= DIV_DOT;
   1000 
   1001    switch (divweu_type) {
   1002       case 1:
   1003         SET_CR_XER_ZERO;
   1004          __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1005          GET_CR_XER(div_flags, div_xer);
   1006          break;
   1007       case 3:
   1008         SET_CR_XER_ZERO;
   1009          __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1010          GET_CR_XER(div_flags, div_xer);
   1011          break;
   1012       case 5:
   1013         SET_CR_XER_ZERO;
   1014          __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1015          GET_CR_XER(div_flags, div_xer);
   1016          break;
   1017       case 7:
   1018         SET_CR_XER_ZERO;
   1019          __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1020          GET_CR_XER(div_flags, div_xer);
   1021          break;
   1022       default:
   1023          fprintf(stderr, "Invalid divweu type. Exiting\n");
   1024          exit(1);
   1025    }
   1026 }
   1027 
   1028 static void test_fctiduz(void)
   1029 {
   1030    if (do_dot)
   1031       __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14));
   1032    else
   1033       __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14));
   1034 }
   1035 
   1036 static void test_fctidu(void)
   1037 {
   1038    if (do_dot)
   1039       __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14));
   1040    else
   1041       __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14));
   1042 }
   1043 
   1044 static void test_fctiwuz(void)
   1045 {
   1046    if (do_dot)
   1047       __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14));
   1048    else
   1049       __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14));
   1050 }
   1051 
   1052 static void test_fctiwu(void)
   1053 {
   1054    if (do_dot)
   1055       __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14));
   1056    else
   1057       __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14));
   1058 }
   1059 
   1060 typedef struct simple_test {
   1061    test_func_t test_func;
   1062    char * name;
   1063    precision_type_t precision;
   1064 } simple_test_t;
   1065 
   1066 static simple_test_t fct_tests[] = {
   1067                                     { &test_fctiduz, "fctiduz", DOUBLE_TEST },
   1068                                     { &test_fctidu, "fctidu", DOUBLE_TEST },
   1069                                     { &test_fctiwuz, "fctiwuz", SINGLE_TEST },
   1070                                     { &test_fctiwu, "fctiwu", SINGLE_TEST },
   1071                                    { NULL, NULL }
   1072 };
   1073 
   1074 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
   1075 {
   1076    int a_idx, b_idx, i;
   1077    void * inA, * inB;
   1078    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
   1079 
   1080    for (i = 0; i < 4; i++) {
   1081       a_idx = targs->fra_idx;
   1082       b_idx = targs->frb_idx;
   1083       inA = (void *)&spec_sp_fargs[a_idx];
   1084       inB = (void *)&spec_sp_fargs[b_idx];
   1085       // copy single precision FP  into vector element i
   1086       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
   1087       memcpy(vec_src + (i * 4), inB, 4);
   1088       targs++;
   1089    }
   1090 }
   1091 
   1092 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
   1093 {
   1094    int a_idx, b_idx, i;
   1095    void * inA, * inB;
   1096    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
   1097 
   1098    for (i = 0; i < 2; i++) {
   1099       a_idx = targs->fra_idx;
   1100       b_idx = targs->frb_idx;
   1101       inA = (void *)&spec_fargs[a_idx];
   1102       inB = (void *)&spec_fargs[b_idx];
   1103       // copy double precision FP  into vector element i
   1104       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
   1105       memcpy(vec_src + (i * 8), inB, 8);
   1106       targs++;
   1107    }
   1108 }
   1109 
   1110 #define VX_NOT_CMP_OP 0xffffffff
   1111 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i)
   1112 {
   1113    int a_idx, b_idx, k;
   1114    char * name = malloc(20);
   1115    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
   1116    int loops = dp ? 2 : 4;
   1117    fp_test_args_t * targs = &test_group->targs[i];
   1118    unsigned long long * frA_dp, * frB_dp, * dst_dp;
   1119    unsigned int * frA_sp, *frB_sp, * dst_sp;
   1120    strcpy(name, test_group->name);
   1121    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
   1122    for (k = 0; k < loops; k++) {
   1123       a_idx = targs->fra_idx;
   1124       b_idx = targs->frb_idx;
   1125       if (k)
   1126          printf(" AND ");
   1127       if (dp) {
   1128          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
   1129          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
   1130          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
   1131       } else {
   1132          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
   1133          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
   1134          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
   1135       }
   1136       targs++;
   1137    }
   1138    if (cc != VX_NOT_CMP_OP)
   1139       printf(" ? cc=%x", cc);
   1140 
   1141    if (dp) {
   1142       dst_dp = (unsigned long long *) &vec_out;
   1143       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
   1144    } else {
   1145       dst_sp = (unsigned int *) &vec_out;
   1146       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
   1147    }
   1148    free(name);
   1149 }
   1150 
   1151 
   1152 static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg,
   1153                                     vx_fp_test_t * test_group, int i)
   1154 {
   1155    int a_idx, k;
   1156    char * name = malloc(20);
   1157    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
   1158    int loops = dp ? 2 : 4;
   1159    fp_test_args_t * targs = &test_group->targs[i];
   1160    unsigned long long frA_dp, * dst_dp;
   1161    unsigned int frA_sp, * dst_sp;
   1162 
   1163    strcpy(name, test_group->name);
   1164    if (do_aXp)
   1165       if (dp)
   1166          strcat(name, "adp");
   1167       else
   1168          strcat(name, "asp");
   1169    else
   1170       if (dp)
   1171          strcat(name, "mdp");
   1172       else
   1173          strcat(name, "msp");
   1174 
   1175    printf("#%d: %s ", dp? i/2 : i/4, name);
   1176    for (k = 0; k < loops; k++) {
   1177       a_idx = targs->fra_idx;
   1178       if (k)
   1179          printf(" AND ");
   1180       if (dp) {
   1181          frA_dp = *((unsigned long long *)&spec_fargs[a_idx]);
   1182          printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]);
   1183       } else {
   1184          unsigned int * xt_sp = (unsigned int *)XT_arg;
   1185          unsigned int * xb_sp = (unsigned int *)XB_arg;
   1186          frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]);
   1187          printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]);
   1188       }
   1189       targs++;
   1190    }
   1191 
   1192    if (dp) {
   1193       dst_dp = (unsigned long long *) &vec_out;
   1194       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
   1195    } else {
   1196       dst_sp = (unsigned int *) &vec_out;
   1197       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
   1198    }
   1199    free(name);
   1200 }
   1201 
   1202 /* This function currently only supports double precision input arguments. */
   1203 static void test_vx_simple_scalar_fp_ops(void)
   1204 {
   1205    test_func_t func;
   1206    int k = 0;
   1207 
   1208    build_special_fargs_table();
   1209    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
   1210       unsigned long long * frap, * frbp, * dst;
   1211       unsigned int * pv;
   1212       int idx;
   1213       vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k];
   1214       Bool convToWord = (test_group.type == VX_CONV_WORD);
   1215       if (test_group.precision != DOUBLE_TEST) {
   1216          fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
   1217          exit(1);
   1218       }
   1219       pv = (unsigned int *)&vec_out;
   1220       // clear vec_out
   1221       for (idx = 0; idx < 4; idx++, pv++)
   1222          *pv = 0;
   1223 
   1224       /* If num_tests is exactly equal to nb_special_fargs, this implies the
   1225        * instruction being tested only requires one floating point argument
   1226        * (e.g. xssqrtdp).
   1227        */
   1228       if (test_group.num_tests == nb_special_fargs && !test_group.targs) {
   1229          void * inB;
   1230          int i;
   1231          for (i = 0; i < nb_special_fargs; i++) {
   1232             inB = (void *)&spec_fargs[i];
   1233             frbp = (unsigned long long *)&spec_fargs[i];
   1234             memcpy(&vec_inB, inB, 8);
   1235             (*func)();
   1236             dst = (unsigned long long *) &vec_out;
   1237             printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp,
   1238                    convToWord ? (*dst & 0x00000000ffffffffULL) : *dst);
   1239          }
   1240       } else {
   1241          void * inA, * inB;
   1242          unsigned int condreg, flags;
   1243          int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0;
   1244          int i;
   1245          for (i = 0; i < test_group.num_tests; i++) {
   1246             fp_test_args_t aTest = test_group.targs[i];
   1247             inA = (void *)&spec_fargs[aTest.fra_idx];
   1248             inB = (void *)&spec_fargs[aTest.frb_idx];
   1249             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1250             frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1251             // Only need to copy one doubleword into each vector's element 0
   1252             memcpy(&vec_inA, inA, 8);
   1253             memcpy(&vec_inB, inB, 8);
   1254             SET_FPSCR_ZERO;
   1255             SET_CR_XER_ZERO;
   1256             (*func)();
   1257             GET_CR(flags);
   1258             if (isTdiv) {
   1259                condreg = (flags & 0x000000f0) >> 4;
   1260                printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg);
   1261             } else {
   1262                dst = (unsigned long long *) &vec_out;
   1263                printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
   1264                       *frap, *frbp, *dst);
   1265             }
   1266          }
   1267       }
   1268       printf( "\n" );
   1269       k++;
   1270    }
   1271 }
   1272 
   1273 static void test_vx_aORm_fp_ops(void)
   1274 {
   1275    /* These ops need a third src argument, which is stored in element 0 of
   1276     * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
   1277     * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
   1278     * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
   1279     * data (input args, result) contain only two inputs, so I arbitrarily
   1280     * choose some spec_fargs elements for the third source argument.
   1281     * Note that that by using the same input data for a given pair of
   1282     * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
   1283     * arguments), the expected result should be the same.
   1284     */
   1285 
   1286    test_func_t func;
   1287    int k;
   1288    char * test_name = (char *)malloc(20);
   1289    k = 0;
   1290    do_dot = False;
   1291 
   1292    build_special_fargs_table();
   1293    while ((func = vx_aORm_fp_tests[k].test_func)) {
   1294       int i, stride;
   1295       Bool repeat = False;
   1296       Bool scalar = False;
   1297       unsigned long long * frap, * frbp, * dst;
   1298       vx_fp_test_t test_group = vx_aORm_fp_tests[k];
   1299       vx_fp_test_type test_type = test_group.type;
   1300       do_dp = test_group.precision == DOUBLE_TEST ? True : False;
   1301       frap = frbp = NULL;
   1302 
   1303       if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
   1304             scalar = True;
   1305             strcpy(test_name, test_group.name);
   1306             if (!repeat) {
   1307                repeat = 1;
   1308                stride = 1;
   1309                // Only support double precision scalar ops in this function
   1310                if (do_dp) {
   1311                   strcat(test_name, "adp");
   1312                } else {
   1313                   fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
   1314                   exit(1);
   1315                }
   1316                do_aXp = True;
   1317             }
   1318       } else if (test_type < VX_BASIC_CMP) {
   1319          // Then it must be a VX_VECTOR_xxx type
   1320             stride = do_dp ? 2 : 4;
   1321             if (!repeat) {
   1322                // No need to work up the testcase name here, since that will be done in
   1323                // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
   1324                repeat = 1;
   1325                do_aXp = True;
   1326             }
   1327       } else {
   1328             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
   1329             exit(1);
   1330       }
   1331 
   1332 again:
   1333       for (i = 0; i < test_group.num_tests; i+=stride) {
   1334          void  * inA, * inB;
   1335          int m, fp_idx[4];
   1336          unsigned long long vsr_XT[2];
   1337          unsigned long long vsr_XB[2];
   1338          fp_test_args_t aTest = test_group.targs[i];
   1339          for (m = 0; m < stride; m++)
   1340             fp_idx[m] = i % (nb_special_fargs - stride) + m;
   1341 
   1342          /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
   1343           * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
   1344           * inputs as described above:
   1345           *    src2 <= VSX[XT]
   1346           *    src3 <= VSX[XB]
   1347           */
   1348          if (scalar) {
   1349             // For scalar op, only need to copy one doubleword into each vector's element 0
   1350             inA = (void *)&spec_fargs[aTest.fra_idx];
   1351             inB = (void *)&spec_fargs[aTest.frb_idx];
   1352             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1353             memcpy(&vec_inA, inA, 8);
   1354             if (repeat) {
   1355                memcpy(&vec_out, inB, 8);  // src2
   1356                memcpy(&vec_inB, &spec_fargs[fp_idx[0]], 8);  //src3
   1357                frbp = (unsigned long long *)&spec_fargs[fp_idx[0]];
   1358             } else {
   1359                frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1360                memcpy(&vec_inB, inB, 8);  // src2
   1361                memcpy(&vec_out, &spec_fargs[fp_idx[0]], 8);  //src3
   1362             }
   1363             memcpy(vsr_XT, &vec_out, 8);
   1364          } else {
   1365             int j, loops = do_dp ? 2 : 4;
   1366             size_t len = do_dp ? 8 : 4;
   1367             void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out;
   1368             for (j = 0; j < loops; j++) {
   1369                if (do_dp)
   1370                   memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len);
   1371                else
   1372                   memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len);
   1373             }
   1374             if (do_dp)
   1375                setup_dp_fp_args(&test_group.targs[i], repeat);
   1376             else
   1377                setup_sp_fp_args(&test_group.targs[i], repeat);
   1378 
   1379             memcpy(vsr_XT, &vec_out, 16);
   1380             memcpy(vsr_XB, &vec_inB, 16);
   1381          }
   1382 
   1383          (*func)();
   1384          dst = (unsigned long long *) &vec_out;
   1385          if (test_type < VX_VECTOR_FP_MULT_AND_OP2)
   1386             printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i,
   1387                     test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst );
   1388          else
   1389             print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i);
   1390       }
   1391       printf( "\n" );
   1392 
   1393       if (repeat) {
   1394          repeat = 0;
   1395          if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
   1396                strcpy(test_name, test_group.name);
   1397                strcat(test_name, "mdp");
   1398          }
   1399          do_aXp = False;
   1400          goto again;
   1401       }
   1402       k++;
   1403    }
   1404    printf( "\n" );
   1405    free(test_name);
   1406 }
   1407 
   1408 static void test_vx_vector_one_fp_arg(void)
   1409 {
   1410    test_func_t func;
   1411    int k;
   1412    k = 0;
   1413    build_special_fargs_table();
   1414 
   1415    while ((func = vx_vector_one_fp_arg_tests[k].test_func)) {
   1416       int idx, i;
   1417       vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k];
   1418       Bool convToWord = (test_group.type == VX_CONV_WORD);
   1419       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
   1420       Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False;
   1421       int stride = dp ? 2 : 4;
   1422 
   1423       for (i = 0; i < test_group.num_tests; i+=stride) {
   1424          unsigned int * pv;
   1425          void * inB;
   1426 
   1427          pv = (unsigned int *)&vec_out;
   1428          // clear vec_out
   1429          for (idx = 0; idx < 4; idx++, pv++)
   1430             *pv = 0;
   1431 
   1432          if (dp) {
   1433             int j;
   1434             unsigned long long * frB_dp, *dst_dp;
   1435             for (j = 0; j < 2; j++) {
   1436                inB = (void *)&spec_fargs[i + j];
   1437                // copy double precision FP into vector element i
   1438                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
   1439             }
   1440             // execute test insn
   1441             (*func)();
   1442             dst_dp = (unsigned long long *) &vec_out;
   1443             printf("#%d: %s ", i/2, test_group.name);
   1444             for (j = 0; j < 2; j++) {
   1445                if (j)
   1446                   printf("; ");
   1447                frB_dp = (unsigned long long *)&spec_fargs[i + j];
   1448                printf("%s(%016llx)", test_group.op, *frB_dp);
   1449                printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]);
   1450             }
   1451             printf("\n");
   1452          } else {
   1453             int j;
   1454             unsigned int * frB_sp, * dst_sp;
   1455 
   1456             for (j = 0; j < 4; j++) {
   1457                inB = (void *)&spec_sp_fargs[i + j];
   1458                // copy single precision FP into vector element i
   1459                memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
   1460             }
   1461             // execute test insn
   1462             (*func)();
   1463             dst_sp = (unsigned int *) &vec_out;
   1464             // print result
   1465             printf("#%d: %s ", i/4, test_group.name);
   1466             for (j = 0; j < 4; j++) {
   1467                if (j)
   1468                   printf("; ");
   1469                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
   1470                printf("%s(%08x)", test_group.op, *frB_sp);
   1471                if (xvrespTest) {
   1472                   float calc_diff = fabs(spec_sp_fargs[i + j]/256);
   1473                   float sp_res;
   1474                   memcpy(&sp_res, &dst_sp[j], 4);
   1475                   float div_result = 1/spec_sp_fargs[i + j];
   1476                   float real_diff = fabs(sp_res - div_result);
   1477                   printf( " ==> %s",
   1478                           ( ( sp_res == div_result )
   1479                                    || ( isnan(sp_res) && isnan(div_result) )
   1480                                    || ( real_diff <= calc_diff ) ) ? "PASS"
   1481                                                                      : "FAIL");
   1482                } else {
   1483                   printf(" = %08x", dst_sp[j]);
   1484                }
   1485             }
   1486             printf("\n");
   1487          }
   1488       }
   1489       k++;
   1490       printf( "\n" );
   1491    }
   1492 
   1493 }
   1494 
   1495 /* This function assumes the instruction being tested requires two args. */
   1496 static void test_vx_vector_fp_ops(void)
   1497 {
   1498    test_func_t func;
   1499    int k;
   1500    k = 0;
   1501    build_special_fargs_table();
   1502 
   1503    while ((func = vx_vector_fp_tests[k].test_func)) {
   1504       int idx, i, repeat = 1;
   1505       vx_fp_test_t test_group = vx_vector_fp_tests[k];
   1506       int stride = test_group.precision == DOUBLE_TEST ? 2 : 4;
   1507       do_dot = False;
   1508 
   1509 again:
   1510       for (i = 0; i < test_group.num_tests; i+=stride) {
   1511          unsigned int * pv, condreg;
   1512          unsigned int flags;
   1513 
   1514          pv = (unsigned int *)&vec_out;
   1515          if (test_group.precision == DOUBLE_TEST)
   1516             setup_dp_fp_args(&test_group.targs[i], False);
   1517          else
   1518             setup_sp_fp_args(&test_group.targs[i], False);
   1519 
   1520          // clear vec_out
   1521          for (idx = 0; idx < 4; idx++, pv++)
   1522             *pv = 0;
   1523 
   1524          // execute test insn
   1525          SET_FPSCR_ZERO;
   1526          SET_CR_XER_ZERO;
   1527          (*func)();
   1528          GET_CR(flags);
   1529          if (test_group.type == VX_BASIC_CMP) {
   1530             condreg = (flags & 0x000000f0) >> 4;
   1531          } else {
   1532             condreg = VX_NOT_CMP_OP;
   1533          }
   1534          print_vector_fp_result(condreg, &test_group, i);
   1535       }
   1536       printf("\n");
   1537       if (repeat && test_group.type == VX_BASIC_CMP) {
   1538          repeat = 0;
   1539          do_dot = True;
   1540          goto again;
   1541       }
   1542       k++;
   1543       printf( "\n" );
   1544    }
   1545 }
   1546 
   1547 
   1548 // The div doubleword test data
   1549 signed long long div_dw_tdata[13][2] = {
   1550                                        { 4, -4 },
   1551                                        { 4, -3 },
   1552                                        { 4, 4 },
   1553                                        { 4, -5 },
   1554                                        { 3, 8 },
   1555                                        { 0x8000000000000000ULL, 0xa },
   1556                                        { 0x50c, -1 },
   1557                                        { 0x50c, -4096 },
   1558                                        { 0x1234fedc, 0x8000a873 },
   1559                                        { 0xabcd87651234fedcULL, 0xa123b893 },
   1560                                        { 0x123456789abdcULL, 0 },
   1561                                        { 0, 2 },
   1562                                        { 0x77, 0xa3499 }
   1563 };
   1564 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
   1565 
   1566 // The div word test data
   1567 unsigned int div_w_tdata[6][2] = {
   1568                               { 0, 2 },
   1569                               { 2, 0 },
   1570                               { 0x7abc1234, 0xf0000000 },
   1571                               { 0xfabc1234, 5 },
   1572                               { 77, 66 },
   1573                               { 5, 0xfabc1234 },
   1574 };
   1575 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
   1576 
   1577 typedef struct div_ext_test
   1578 {
   1579    test_func_t test_func;
   1580    const char *name;
   1581    int num_tests;
   1582    div_type_t div_type;
   1583    precision_type_t precision;
   1584 } div_ext_test_t;
   1585 
   1586 static div_ext_test_t div_tests[] = {
   1587 #ifdef __powerpc64__
   1588                                    { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
   1589                                    { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
   1590 #endif
   1591                                    { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST },
   1592                                    { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST },
   1593                                    { NULL, NULL, 0, 0, 0 }
   1594 };
   1595 
   1596 static void test_div_extensions(void)
   1597 {
   1598    test_func_t func;
   1599    int k;
   1600    k = 0;
   1601 
   1602    while ((func = div_tests[k].test_func)) {
   1603       int i, repeat = 1;
   1604       div_ext_test_t test_group = div_tests[k];
   1605       do_dot = False;
   1606 
   1607 again:
   1608       for (i = 0; i < test_group.num_tests; i++) {
   1609          unsigned int condreg;
   1610 
   1611          if (test_group.div_type == DIV_OE)
   1612             do_OE = True;
   1613          else
   1614             do_OE = False;
   1615 
   1616          if (test_group.precision == DOUBLE_TEST) {
   1617             r14 = div_dw_tdata[i][0];
   1618             r15 = div_dw_tdata[i][1];
   1619          } else {
   1620             r14 = div_w_tdata[i][0];
   1621             r15 = div_w_tdata[i][1];
   1622          }
   1623          // execute test insn
   1624          (*func)();
   1625          condreg = (div_flags & 0xf0000000) >> 28;
   1626          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
   1627          if (test_group.precision == DOUBLE_TEST) {
   1628             printf("0x%016llx / 0x%016llx = 0x%016llx;",
   1629                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
   1630          } else {
   1631             printf("0x%08x / 0x%08x = 0x%08x;",
   1632                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
   1633          }
   1634          printf(" CR=%x; XER=%x\n", condreg, div_xer);
   1635       }
   1636       printf("\n");
   1637       if (repeat) {
   1638          repeat = 0;
   1639          do_dot = True;
   1640          goto again;
   1641       }
   1642       k++;
   1643       printf( "\n" );
   1644    }
   1645 
   1646 }
   1647 
   1648 static void test_fct_ops(void)
   1649 {
   1650    test_func_t func;
   1651    int k;
   1652    k = 0;
   1653 
   1654    while ((func = fct_tests[k].test_func)) {
   1655       int i, repeat = 1;
   1656       simple_test_t test_group = fct_tests[k];
   1657       do_dot = False;
   1658 
   1659 again:
   1660       for (i = 0; i < nb_special_fargs; i++) {
   1661          double result;
   1662 #define SINGLE_MASK 0x00000000FFFFFFFFULL
   1663 
   1664          f14 = spec_fargs[i];
   1665          // execute test insn
   1666          SET_FPSCR_ZERO;
   1667          (*func)();
   1668          result = f17;
   1669          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
   1670          printf("0x%016llx (%e) ==> 0x%016llx\n",
   1671                 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i],
   1672                 test_group.precision == SINGLE_TEST ? (SINGLE_MASK &
   1673                          *((unsigned long long *)(&result))) :
   1674                          *((unsigned long long *)(&result)));
   1675       }
   1676       printf("\n");
   1677       if (repeat) {
   1678          repeat = 0;
   1679          do_dot = True;
   1680          goto again;
   1681       }
   1682       k++;
   1683       printf( "\n" );
   1684    }
   1685 }
   1686 
   1687 #ifdef __powerpc64__
   1688 void test_stdbrx(void)
   1689 {
   1690    unsigned long long store, val = 0xdeadbacf12345678ULL;
   1691    printf("stdbrx: 0x%llx ==> ", val);
   1692    r17 = (HWord_t)val;
   1693    r14 = (HWord_t)&store;
   1694    __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14));
   1695    printf("0x%llx\n", store);
   1696    printf( "\n" );
   1697 }
   1698 #endif
   1699 
   1700 static test_table_t
   1701          all_tests[] =
   1702 {
   1703                     { &test_vx_vector_one_fp_arg,
   1704                       "Test VSX vector single arg instructions"},
   1705                     { &test_vx_vector_fp_ops,
   1706                       "Test VSX floating point compare and basic arithmetic instructions" },
   1707 #ifdef __powerpc64__
   1708                      { &test_bpermd,
   1709                        "Test bit permute double"},
   1710 #endif
   1711                      { &test_xxsel,
   1712                          "Test xxsel instruction" },
   1713                      { &test_xxspltw,
   1714                          "Test xxspltw instruction" },
   1715                      { &test_div_extensions,
   1716                        "Test div extensions" },
   1717                      { &test_fct_ops,
   1718                        "Test floating point convert [word | doubleword] unsigned, with round toward zero" },
   1719 #ifdef __powerpc64__
   1720                      { &test_stdbrx,
   1721                       "Test stdbrx instruction"},
   1722 #endif
   1723                      { &test_vx_aORm_fp_ops,
   1724                       "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p"},
   1725                      { &test_vx_simple_scalar_fp_ops,
   1726                       "Test scalar floating point arithmetic instructions"},
   1727                      { NULL, NULL }
   1728 };
   1729 #endif // HAS_VSX
   1730 
   1731 int main(int argc, char *argv[])
   1732 {
   1733 #ifdef HAS_VSX
   1734 
   1735    test_table_t aTest;
   1736    test_func_t func;
   1737    int i = 0;
   1738 
   1739    while ((func = all_tests[i].test_category)) {
   1740       aTest = all_tests[i];
   1741       printf( "%s\n", aTest.name );
   1742       (*func)();
   1743       i++;
   1744    }
   1745    if (spec_fargs)
   1746      free(spec_fargs);
   1747    if (spec_sp_fargs)
   1748      free(spec_sp_fargs);
   1749 
   1750 #endif // HAS _VSX
   1751 
   1752    return 0;
   1753 }
   1754