Home | History | Annotate | Download | only in ppc32
      1 /*  Copyright (C) 2011 IBM
      2 
      3  Author: Maynard Johnson <maynardj (at) us.ibm.com>
      4 
      5  This program is free software; you can redistribute it and/or
      6  modify it under the terms of the GNU General Public License as
      7  published by the Free Software Foundation; either version 2 of the
      8  License, or (at your option) any later version.
      9 
     10  This program is distributed in the hope that it will be useful, but
     11  WITHOUT ANY WARRANTY; without even the implied warranty of
     12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  General Public License for more details.
     14 
     15  You should have received a copy of the GNU General Public License
     16  along with this program; if not, write to the Free Software
     17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     18  02111-1307, USA.
     19 
     20  The GNU General Public License is contained in the file COPYING.
     21  */
     22 
     23 #ifdef HAS_VSX
     24 
     25 #include <stdio.h>
     26 #include <stdint.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <malloc.h>
     30 #include <altivec.h>
     31 #include <math.h>
     32 
     33 #ifndef __powerpc64__
     34 typedef uint32_t HWord_t;
     35 #else
     36 typedef uint64_t HWord_t;
     37 #endif /* __powerpc64__ */
     38 
     39 typedef unsigned char Bool;
     40 #define True 1
     41 #define False 0
     42 
     43 #ifdef VGP_ppc64le_linux
     44 #define isLE 1
     45 #else
     46 #define isLE 0
     47 #endif
     48 
     49 register HWord_t r14 __asm__ ("r14");
     50 register HWord_t r15 __asm__ ("r15");
     51 register HWord_t r16 __asm__ ("r16");
     52 register HWord_t r17 __asm__ ("r17");
     53 register double f14 __asm__ ("fr14");
     54 register double f15 __asm__ ("fr15");
     55 register double f16 __asm__ ("fr16");
     56 register double f17 __asm__ ("fr17");
     57 
     58 static volatile unsigned int div_flags, div_xer;
     59 
     60 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
     61 
     62 #define SET_CR(_arg) \
     63       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
     64 
     65 #define SET_XER(_arg) \
     66       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
     67 
     68 #define GET_CR(_lval) \
     69       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
     70 
     71 #define GET_XER(_lval) \
     72       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
     73 
     74 #define GET_CR_XER(_lval_cr,_lval_xer) \
     75    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
     76 
     77 #define SET_CR_ZERO \
     78       SET_CR(0)
     79 
     80 #define SET_XER_ZERO \
     81       SET_XER(0)
     82 
     83 #define SET_CR_XER_ZERO \
     84    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
     85 
     86 #define SET_FPSCR_ZERO \
     87    do { double _d = 0.0; \
     88         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
     89    } while (0)
     90 
     91 
     92 typedef void (*test_func_t)(void);
     93 typedef struct test_table test_table_t;
     94 
     95 
     96 /* These functions below that construct a table of floating point
     97  * values were lifted from none/tests/ppc32/jm-insns.c.
     98  */
     99 
    100 #if defined (DEBUG_ARGS_BUILD)
    101 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    102 #else
    103 #define AB_DPRINTF(fmt, args...) do { } while (0)
    104 #endif
    105 
    106 static inline void register_farg (void *farg,
    107                                   int s, uint16_t _exp, uint64_t mant)
    108 {
    109    uint64_t tmp;
    110 
    111    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    112    *(uint64_t *)farg = tmp;
    113    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    114               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    115 }
    116 
    117 static inline void register_sp_farg (void *farg,
    118                                      int s, uint16_t _exp, uint32_t mant)
    119 {
    120    uint32_t tmp;
    121    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
    122    *(uint32_t *)farg = tmp;
    123 }
    124 
    125 typedef struct fp_test_args {
    126    int fra_idx;
    127    int frb_idx;
    128 } fp_test_args_t;
    129 
    130 
    131 fp_test_args_t fp_cmp_tests[] = {
    132                                    {8, 8},
    133                                    {8, 14},
    134                                    {8, 6},
    135                                    {8, 5},
    136                                    {8, 4},
    137                                    {8, 7},
    138                                    {8, 9},
    139                                    {8, 11},
    140                                    {14, 8},
    141                                    {14, 14},
    142                                    {14, 6},
    143                                    {14, 5},
    144                                    {14, 4},
    145                                    {14, 7},
    146                                    {14, 9},
    147                                    {14, 11},
    148                                    {6, 8},
    149                                    {6, 14},
    150                                    {6, 6},
    151                                    {6, 5},
    152                                    {6, 4},
    153                                    {6, 7},
    154                                    {6, 9},
    155                                    {6, 11},
    156                                    {5, 8},
    157                                    {5, 14},
    158                                    {5, 6},
    159                                    {5, 5},
    160                                    {5, 4},
    161                                    {5, 7},
    162                                    {5, 9},
    163                                    {5, 11},
    164                                    {4, 8},
    165                                    {4, 14},
    166                                    {4, 6},
    167                                    {4, 5},
    168                                    {4, 1},
    169                                    {4, 7},
    170                                    {4, 9},
    171                                    {4, 11},
    172                                    {7, 8},
    173                                    {7, 14},
    174                                    {7, 6},
    175                                    {7, 5},
    176                                    {7, 4},
    177                                    {7, 7},
    178                                    {7, 9},
    179                                    {7, 11},
    180                                    {10, 8},
    181                                    {10, 14},
    182                                    {10, 6},
    183                                    {10, 5},
    184                                    {10, 4},
    185                                    {10, 7},
    186                                    {10, 9},
    187                                    {10, 10},
    188                                    {12, 8},
    189                                    {12, 14},
    190                                    {12, 6},
    191                                    {12, 5},
    192                                    {1, 1},
    193                                    {2, 2},
    194                                    {3, 3},
    195                                    {4, 4},
    196 };
    197 
    198 
    199 fp_test_args_t two_arg_fp_tests[] = {
    200                                      {8, 8},
    201                                      {8, 14},
    202                                      {15, 16},
    203                                      {8, 5},
    204                                      {8, 4},
    205                                      {8, 7},
    206                                      {8, 9},
    207                                      {8, 11},
    208                                      {14, 8},
    209                                      {14, 14},
    210                                      {14, 6},
    211                                      {14, 5},
    212                                      {14, 4},
    213                                      {14, 7},
    214                                      {14, 9},
    215                                      {14, 11},
    216                                      {6, 8},
    217                                      {6, 14},
    218                                      {6, 6},
    219                                      {6, 5},
    220                                      {6, 4},
    221                                      {6, 7},
    222                                      {6, 9},
    223                                      {6, 11},
    224                                      {5, 8},
    225                                      {5, 14},
    226                                      {5, 6},
    227                                      {5, 5},
    228                                      {5, 4},
    229                                      {5, 7},
    230                                      {5, 9},
    231                                      {5, 11},
    232                                      {4, 8},
    233                                      {4, 14},
    234                                      {4, 6},
    235                                      {4, 5},
    236                                      {4, 1},
    237                                      {4, 7},
    238                                      {4, 9},
    239                                      {4, 11},
    240                                      {7, 8},
    241                                      {7, 14},
    242                                      {7, 6},
    243                                      {7, 5},
    244                                      {7, 4},
    245                                      {7, 7},
    246                                      {7, 9},
    247                                      {7, 11},
    248                                      {10, 8},
    249                                      {10, 14},
    250                                      {12, 6},
    251                                      {12, 5},
    252                                      {10, 4},
    253                                      {10, 7},
    254                                      {10, 9},
    255                                      {10, 11},
    256                                      {12, 8 },
    257                                      {12, 14},
    258                                      {12, 6},
    259                                      {15, 16},
    260                                      {15, 16},
    261                                      {9, 11},
    262                                      {11, 11},
    263                                      {11, 12}
    264 };
    265 
    266 
    267 static int nb_special_fargs;
    268 static double * spec_fargs;
    269 static float * spec_sp_fargs;
    270 
    271 static void build_special_fargs_table(void)
    272 {
    273 /*
    274   Entry  Sign Exp   fraction                  Special value
    275    0      0   3fd   0x8000000000000ULL         Positive finite number
    276    1      0   404   0xf000000000000ULL         ...
    277    2      0   001   0x8000000b77501ULL         ...
    278    3      0   7fe   0x800000000051bULL         ...
    279    4      0   012   0x3214569900000ULL         ...
    280    5      0   000   0x0000000000000ULL         +0.0 (+zero)
    281    6      1   000   0x0000000000000ULL         -0.0 (-zero)
    282    7      0   7ff   0x0000000000000ULL         +infinity
    283    8      1   7ff   0x0000000000000ULL         -infinity
    284    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
    285    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
    286    11     0   7ff   0x8000000000000ULL         +QNaN
    287    12     1   7ff   0x8000000000000ULL         -QNaN
    288    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
    289    14     1   40d   0x0650f5a07b353ULL         Negative finite number
    290    15     0   412   0x32585a9900000ULL         A couple more positive finite numbers
    291    16     0   413   0x82511a2000000ULL         ...
    292 */
    293 
    294    uint64_t mant;
    295    uint32_t mant_sp;
    296    uint16_t _exp;
    297    int s;
    298    int j, i = 0;
    299 
    300    if (spec_fargs)
    301       return;
    302 
    303    spec_fargs = malloc( 17 * sizeof(double) );
    304    spec_sp_fargs = malloc( 17 * sizeof(float) );
    305 
    306    // #0
    307    s = 0;
    308    _exp = 0x3fd;
    309    mant = 0x8000000000000ULL;
    310    register_farg(&spec_fargs[i++], s, _exp, mant);
    311 
    312    // #1
    313    s = 0;
    314    _exp = 0x404;
    315    mant = 0xf000000000000ULL;
    316    register_farg(&spec_fargs[i++], s, _exp, mant);
    317 
    318    /* None of the ftdiv tests succeed.
    319     * FRA = value #0; FRB = value #1
    320     * ea_ = -2; e_b = 5
    321     * fl_flag || fg_flag || fe_flag = 100
    322     */
    323 
    324    /*************************************************
    325     *     fe_flag tests
    326     *
    327     *************************************************/
    328 
    329    /* fe_flag <- 1 if FRA is a NaN
    330     * FRA = value #9; FRB = value #1
    331     * e_a = 1024; e_b = 5
    332     * fl_flag || fg_flag || fe_flag = 101
    333     */
    334 
    335    /* fe_flag <- 1 if FRB is a NaN
    336     * FRA = value #1; FRB = value #12
    337     * e_a = 5; e_b = 1024
    338     * fl_flag || fg_flag || fe_flag = 101
    339     */
    340 
    341    /* fe_flag <- 1 if e_b <= -1022
    342     * FRA = value #0; FRB = value #2
    343     * e_a = -2; e_b = -1022
    344     * fl_flag || fg_flag || fe_flag = 101
    345     *
    346     */
    347    // #2
    348    s = 0;
    349    _exp = 0x001;
    350    mant = 0x8000000b77501ULL;
    351    register_farg(&spec_fargs[i++], s, _exp, mant);
    352 
    353    /* fe_flag <- 1 if e_b >= 1021
    354     * FRA = value #1; FRB = value #3
    355     * e_a = 5; e_b = 1023
    356     * fl_flag || fg_flag || fe_flag = 101
    357     */
    358    // #3
    359    s = 0;
    360    _exp = 0x7fe;
    361    mant = 0x800000000051bULL;
    362    register_farg(&spec_fargs[i++], s, _exp, mant);
    363 
    364    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
    365     * Let FRA = value #3 and FRB be value #0.
    366     * e_a = 1023; e_b = -2
    367     * fl_flag || fg_flag || fe_flag = 101
    368     */
    369 
    370    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
    371     * Let FRA = value #0 above and FRB be value #3 above
    372     * e_a = -2; e_b = 1023
    373     * fl_flag || fg_flag || fe_flag = 101
    374     */
    375 
    376    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
    377     * Let FRA = value #4 and FRB be value #0
    378     * e_a = -1005; e_b = -2
    379     * fl_flag || fg_flag || fe_flag = 101
    380    */
    381    // #4
    382    s = 0;
    383    _exp = 0x012;
    384    mant = 0x3214569900000ULL;
    385    register_farg(&spec_fargs[i++], s, _exp, mant);
    386 
    387    /*************************************************
    388     *     fg_flag tests
    389     *
    390     *************************************************/
    391    /* fg_flag <- 1 if FRA is an Infinity
    392     * NOTE: FRA = Inf also sets fe_flag
    393     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
    394     * Test 1:
    395     *   Let FRA be value #7 and FRB be value #1
    396     *   e_a = 1024; e_b = 5
    397     *   fl_flag || fg_flag || fe_flag = 111
    398     *
    399     * Test 2:
    400     *   Let FRA be value #8 and FRB be value #1
    401     *   e_a = 1024; e_b = 5
    402     *   fl_flag || fg_flag || fe_flag = 111
    403     *
    404     */
    405 
    406    /* fg_flag <- 1 if FRB is an Infinity
    407     * NOTE: FRB = Inf also sets fe_flag
    408     * Let FRA be value #1 and FRB be value #7
    409     * e_a = 5; e_b = 1024
    410     * fl_flag || fg_flag || fe_flag = 111
    411     */
    412 
    413    /* fg_flag <- 1 if FRB is denormalized
    414     * NOTE: e_b < -1022 ==> fe_flag <- 1
    415     * Let FRA be value #0 and FRB be value #13
    416     * e_a = -2; e_b = -1023
    417     * fl_flag || fg_flag || fe_flag = 111
    418     */
    419 
    420    /* fg_flag <- 1 if FRB is +zero
    421     * NOTE: FRA = Inf also sets fe_flag
    422     * Let FRA = val #5; FRB = val #5
    423     * ea_ = -1023; e_b = -1023
    424     * fl_flag || fg_flag || fe_flag = 111
    425     */
    426 
    427    /* fg_flag <- 1 if FRB is -zero
    428     * NOTE: FRA = Inf also sets fe_flag
    429     * Let FRA = val #5; FRB = val #6
    430     * ea_ = -1023; e_b = -1023
    431     * fl_flag || fg_flag || fe_flag = 111
    432     */
    433 
    434    /* Special values */
    435    /* +0.0      : 0 0x000 0x0000000000000 */
    436    // #5
    437    s = 0;
    438    _exp = 0x000;
    439    mant = 0x0000000000000ULL;
    440    register_farg(&spec_fargs[i++], s, _exp, mant);
    441 
    442    /* -0.0      : 1 0x000 0x0000000000000 */
    443    // #6
    444    s = 1;
    445    _exp = 0x000;
    446    mant = 0x0000000000000ULL;
    447    register_farg(&spec_fargs[i++], s, _exp, mant);
    448 
    449    /* +infinity : 0 0x7FF 0x0000000000000  */
    450    // #7
    451    s = 0;
    452    _exp = 0x7FF;
    453    mant = 0x0000000000000ULL;
    454    register_farg(&spec_fargs[i++], s, _exp, mant);
    455 
    456    /* -infinity : 1 0x7FF 0x0000000000000 */
    457    // #8
    458    s = 1;
    459    _exp = 0x7FF;
    460    mant = 0x0000000000000ULL;
    461    register_farg(&spec_fargs[i++], s, _exp, mant);
    462 
    463    /*
    464     * This comment applies to values #9 and #10 below:
    465     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
    466     * so we can't just copy the double-precision value to the corresponding slot in the
    467     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
    468     * have to manually set the bits using register_sp_farg().
    469     */
    470 
    471    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
    472    // #9
    473    s = 0;
    474    _exp = 0x7FF;
    475    mant = 0x7FFFFFFFFFFFFULL;
    476    register_farg(&spec_fargs[i++], s, _exp, mant);
    477    _exp = 0xff;
    478    mant_sp = 0x3FFFFF;
    479    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    480 
    481    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
    482    // #10
    483    s = 1;
    484    _exp = 0x7FF;
    485    mant = 0x7FFFFFFFFFFFFULL;
    486    register_farg(&spec_fargs[i++], s, _exp, mant);
    487    _exp = 0xff;
    488    mant_sp = 0x3FFFFF;
    489    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
    490 
    491    /* +QNaN     : 0 0x7FF 0x8000000000000 */
    492    // #11
    493    s = 0;
    494    _exp = 0x7FF;
    495    mant = 0x8000000000000ULL;
    496    register_farg(&spec_fargs[i++], s, _exp, mant);
    497 
    498    /* -QNaN     : 1 0x7FF 0x8000000000000 */
    499    // #12
    500    s = 1;
    501    _exp = 0x7FF;
    502    mant = 0x8000000000000ULL;
    503    register_farg(&spec_fargs[i++], s, _exp, mant);
    504 
    505    /* denormalized value */
    506    // #13
    507    s = 1;
    508    _exp = 0x000;
    509    mant = 0x8340000078000ULL;
    510    register_farg(&spec_fargs[i++], s, _exp, mant);
    511 
    512    /* Negative finite number */
    513    // #14
    514    s = 1;
    515    _exp = 0x40d;
    516    mant = 0x0650f5a07b353ULL;
    517    register_farg(&spec_fargs[i++], s, _exp, mant);
    518 
    519    /* A couple positive finite numbers ... */
    520    // #15
    521    s = 0;
    522    _exp = 0x412;
    523    mant = 0x32585a9900000ULL;
    524    register_farg(&spec_fargs[i++], s, _exp, mant);
    525 
    526    // #16
    527    s = 0;
    528    _exp = 0x413;
    529    mant = 0x82511a2000000ULL;
    530    register_farg(&spec_fargs[i++], s, _exp, mant);
    531 
    532    nb_special_fargs = i;
    533    for (j = 0; j < i; j++) {
    534       if (!(j == 9 || j == 10))
    535          spec_sp_fargs[j] = spec_fargs[j];
    536    }
    537 }
    538 
    539 
    540 struct test_table
    541 {
    542    test_func_t test_category;
    543    char * name;
    544 };
    545 
    546 typedef enum {
    547    SINGLE_TEST,
    548    DOUBLE_TEST
    549 } precision_type_t;
    550 
    551 typedef enum {
    552    VX_SCALAR_FP_NMSUB = 0,
    553    // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
    554    VX_VECTOR_FP_MULT_AND_OP2 = 10,
    555    // and before this line
    556    VX_BASIC_CMP = 30,
    557    VX_CONV_WORD,
    558    VX_DEFAULT
    559 } vx_fp_test_type;
    560 
    561 typedef struct vx_fp_test
    562 {
    563    test_func_t test_func;
    564    const char * name;
    565    fp_test_args_t * targs;
    566    int num_tests;
    567    precision_type_t precision;
    568    vx_fp_test_type type;
    569    const char * op;
    570 } vx_fp_test_t;
    571 
    572 static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC;
    573 
    574 static Bool do_dot;
    575 static void test_xvcmpeqdp(void)
    576 {
    577    if (do_dot)
    578       __asm__ __volatile__ ("xvcmpeqdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    579    else
    580       __asm__ __volatile__ ("xvcmpeqdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    581 }
    582 
    583 static void test_xvcmpgedp(void)
    584 {
    585    if (do_dot)
    586       __asm__ __volatile__ ("xvcmpgedp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    587    else
    588       __asm__ __volatile__ ("xvcmpgedp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    589 }
    590 
    591 static void test_xvcmpgtdp(void)
    592 {
    593    if (do_dot)
    594       __asm__ __volatile__ ("xvcmpgtdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    595    else
    596       __asm__ __volatile__ ("xvcmpgtdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    597 }
    598 
    599 static void test_xvcmpeqsp(void)
    600 {
    601    if (do_dot)
    602       __asm__ __volatile__ ("xvcmpeqsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    603    else
    604       __asm__ __volatile__ ("xvcmpeqsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    605 }
    606 
    607 static void test_xvcmpgesp(void)
    608 {
    609    if (do_dot)
    610       __asm__ __volatile__ ("xvcmpgesp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    611    else
    612       __asm__ __volatile__ ("xvcmpgesp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    613 }
    614 
    615 static void test_xvcmpgtsp(void)
    616 {
    617    if (do_dot)
    618       __asm__ __volatile__ ("xvcmpgtsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    619    else
    620       __asm__ __volatile__ ("xvcmpgtsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    621 }
    622 
    623 static Bool do_aXp;
    624 static Bool do_dp;
    625 static void test_xsnmsub(void)
    626 {
    627    if (do_aXp)
    628       __asm__ __volatile__ ("xsnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    629    else
    630       __asm__ __volatile__ ("xsnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    631 }
    632 
    633 static void test_xvmadd(void)
    634 {
    635    if (do_aXp)
    636       if (do_dp)
    637          __asm__ __volatile__ ("xvmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    638       else
    639          __asm__ __volatile__ ("xvmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    640    else
    641       if (do_dp)
    642          __asm__ __volatile__ ("xvmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    643       else
    644          __asm__ __volatile__ ("xvmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    645 }
    646 
    647 static void test_xvnmadd(void)
    648 {
    649    if (do_aXp)
    650       if (do_dp)
    651          __asm__ __volatile__ ("xvnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    652       else
    653          __asm__ __volatile__ ("xvnmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    654    else
    655       if (do_dp)
    656          __asm__ __volatile__ ("xvnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    657       else
    658          __asm__ __volatile__ ("xvnmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    659 }
    660 
    661 static void test_xvnmsub(void)
    662 {
    663    if (do_aXp)
    664       if (do_dp)
    665          __asm__ __volatile__ ("xvnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    666       else
    667          __asm__ __volatile__ ("xvnmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    668    else
    669       if (do_dp)
    670          __asm__ __volatile__ ("xvnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    671       else
    672          __asm__ __volatile__ ("xvnmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    673 }
    674 
    675 static void test_xvmsub(void)
    676 {
    677    if (do_aXp)
    678       if (do_dp)
    679          __asm__ __volatile__ ("xvmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    680       else
    681          __asm__ __volatile__ ("xvmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    682    else
    683       if (do_dp)
    684          __asm__ __volatile__ ("xvmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    685       else
    686          __asm__ __volatile__ ("xvmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    687 }
    688 
    689 static void test_xssqrtdp(void)
    690 {
    691    __asm__ __volatile__ ("xssqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    692 }
    693 
    694 static void test_xsrdpim(void)
    695 {
    696    __asm__ __volatile__ ("xsrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    697 }
    698 
    699 static void test_xsrdpip(void)
    700 {
    701    __asm__ __volatile__ ("xsrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    702 }
    703 
    704 static void test_xstdivdp(void)
    705 {
    706    __asm__ __volatile__ ("xstdivdp   6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
    707 }
    708 
    709 static void test_xsmaxdp(void)
    710 {
    711    __asm__ __volatile__ ("xsmaxdp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    712 }
    713 
    714 static void test_xsmindp(void)
    715 {
    716    __asm__ __volatile__ ("xsmindp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    717 }
    718 
    719 static void test_xvadddp(void)
    720 {
    721    __asm__ __volatile__ ("xvadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    722 }
    723 
    724 static void test_xvaddsp(void)
    725 {
    726    __asm__ __volatile__ ("xvaddsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    727 }
    728 
    729 static void test_xvdivdp(void)
    730 {
    731    __asm__ __volatile__ ("xvdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    732 }
    733 
    734 static void test_xvdivsp(void)
    735 {
    736    __asm__ __volatile__ ("xvdivsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    737 }
    738 
    739 static void test_xvmuldp(void)
    740 {
    741    __asm__ __volatile__ ("xvmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    742 }
    743 
    744 static void test_xvmulsp(void)
    745 {
    746    __asm__ __volatile__ ("xvmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    747 }
    748 
    749 static void test_xvsubdp(void)
    750 {
    751    __asm__ __volatile__ ("xvsubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    752 }
    753 
    754 static void test_xvmaxdp(void)
    755 {
    756    __asm__ __volatile__ ("xvmaxdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    757 }
    758 
    759 static void test_xvmindp(void)
    760 {
    761    __asm__ __volatile__ ("xvmindp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    762 }
    763 
    764 static void test_xvmaxsp(void)
    765 {
    766    __asm__ __volatile__ ("xvmaxsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    767 }
    768 
    769 static void test_xvminsp(void)
    770 {
    771    __asm__ __volatile__ ("xvminsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    772 }
    773 
    774 static void test_xvsubsp(void)
    775 {
    776    __asm__ __volatile__ ("xvsubsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    777 }
    778 
    779 static void test_xvresp(void)
    780 {
    781    __asm__ __volatile__ ("xvresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    782 }
    783 
    784 static void test_xxsel(void)
    785 {
    786    unsigned long long * dst;
    787    unsigned long long xa[] =  { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL};
    788    unsigned long long xb[] =  { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
    789    unsigned long long xc[] =  { 0xffffffff01020304ULL, 0x128934bd00000000ULL};
    790 
    791    memcpy(&vec_inA, xa, 16);
    792    memcpy(&vec_inB, xb, 16);
    793    memcpy(&vec_inC, xc, 16);
    794 
    795 
    796    __asm__ __volatile__ ("xxsel   %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC));
    797    dst = (unsigned long long *) &vec_out;
    798    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst);
    799    dst++;
    800    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst);
    801    printf("\n");
    802 }
    803 
    804 static void test_xxspltw(void)
    805 {
    806    int uim;
    807    unsigned long long * dst = NULL;
    808    unsigned int xb[] =  { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708};
    809    int i;
    810    void * vecB_ptr = &vec_inB;
    811    if (isLE) {
    812       for (i = 3; i >=0; i--) {
    813          memcpy(vecB_ptr, &xb[i], 4);
    814          vecB_ptr+=4;
    815       }
    816    } else {
    817       for (i = 0; i < 4; i++) {
    818          memcpy(vecB_ptr, &xb[i], 4);
    819          vecB_ptr+=4;
    820       }
    821    }
    822 
    823    for (uim = 0; uim < 4; uim++) {
    824       switch (uim) {
    825          case 0:
    826             __asm__ __volatile__ ("xxspltw   %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB));
    827             break;
    828          case 1:
    829             __asm__ __volatile__ ("xxspltw   %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB));
    830             break;
    831          case 2:
    832             __asm__ __volatile__ ("xxspltw   %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB));
    833             break;
    834          case 3:
    835             __asm__ __volatile__ ("xxspltw   %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB));
    836             break;
    837       }
    838       dst = (unsigned long long *) &vec_out;
    839       printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx",  xb[0], xb[1],
    840              xb[2], xb[3], uim, *dst);
    841       dst++;
    842       printf("%016llx\n", *dst);
    843    }
    844    printf("\n");
    845 }
    846 
    847 static void test_xscvdpsxws(void)
    848 {
    849    __asm__ __volatile__ ("xscvdpsxws  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    850 }
    851 
    852 static void test_xscvdpuxds(void)
    853 {
    854    __asm__ __volatile__ ("xscvdpuxds  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
    855 }
    856 
    857 static void test_xvcpsgndp(void)
    858 {
    859    __asm__ __volatile__  ("xvcpsgndp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    860 }
    861 
    862 static void test_xvcpsgnsp(void)
    863 {
    864    __asm__ __volatile__  ("xvcpsgnsp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
    865 }
    866 
    867 static void test_xvcvdpsxws(void)
    868 {
    869    __asm__ __volatile__ ("xvcvdpsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
    870 }
    871 
    872 static void test_xvcvspsxws(void)
    873 {
    874    __asm__ __volatile__ ("xvcvspsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
    875 }
    876 
    877 static vx_fp_test_t
    878 vx_vector_one_fp_arg_tests[] = {
    879                                 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"},
    880                                 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"},
    881                                 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"},
    882                                 { NULL, NULL, NULL, 0 , 0, 0, NULL}
    883 };
    884 
    885 static vx_fp_test_t
    886 vx_vector_fp_tests[] = {
    887                         { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"},
    888                         { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"},
    889                         { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"},
    890                         { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"},
    891                         { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"},
    892                         { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"},
    893                         { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" },
    894                         { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" },
    895                         { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" },
    896                         { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" },
    897                         { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" },
    898                         { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" },
    899                         { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" },
    900                         { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" },
    901                         { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" },
    902                         { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" },
    903                         { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" },
    904                         { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" },
    905                         { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"},
    906                         { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"},
    907                         { NULL, NULL, NULL, 0 , 0, 0, NULL}
    908 };
    909 
    910 
    911 static vx_fp_test_t
    912 vx_aORm_fp_tests[] = {
    913                        { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"},
    914                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
    915                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
    916                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
    917                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
    918                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
    919                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
    920                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
    921                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
    922                        { NULL, NULL, NULL, 0, 0, 0,  NULL }
    923 };
    924 
    925 static vx_fp_test_t
    926 vx_simple_scalar_fp_tests[] = {
    927                                { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    928                                { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    929                                { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    930                                { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    931                                { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    932                                { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
    933                                { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL},
    934                                { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
    935                                { NULL, NULL, NULL, 0, 0, 0, NULL }
    936 };
    937 
    938 
    939 #ifdef __powerpc64__
    940 static void test_bpermd(void)
    941 {
    942    /* NOTE: Bit number is '0 . . . 63'
    943     *
    944     * Permuted bits are generated bit 0 -7 as follows:
    945     *    index = (r14)8*i:8*i+7
    946     *    perm[i] = (r15)index
    947     *
    948     * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
    949     * byte of r14, 0x1b(27/base 10).  This identifies bit 27 of r15, which is '1'.
    950     * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
    951     * So the result of the first two iterations of i are:
    952     *   perm = 0b01xxxxxx
    953     *
    954     */
    955    r15 = 0xa12bc37de56f9708ULL;
    956    r14 = 0x1b2c31f030000001ULL;
    957    __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    958    printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14,
    959           (unsigned long long)r15, (unsigned long long)r17);
    960    printf("\n");
    961 }
    962 #endif
    963 
    964 static Bool do_OE;
    965 typedef enum {
    966    DIV_BASE = 1,
    967    DIV_OE = 2,
    968    DIV_DOT = 4,
    969 } div_type_t;
    970 /* Possible divde type combinations are:
    971  *   - base
    972  *   - base+dot
    973  *   - base+OE
    974  *   - base+OE+dot
    975  */
    976 #ifdef __powerpc64__
    977 static void test_divde(void)
    978 {
    979    int divde_type = DIV_BASE;
    980    if (do_OE)
    981       divde_type |= DIV_OE;
    982    if (do_dot)
    983       divde_type |= DIV_DOT;
    984 
    985    switch (divde_type) {
    986       case 1:
    987         SET_CR_XER_ZERO;
    988          __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    989          GET_CR_XER(div_flags, div_xer);
    990          break;
    991       case 3:
    992         SET_CR_XER_ZERO;
    993          __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    994          GET_CR_XER(div_flags, div_xer);
    995          break;
    996       case 5:
    997         SET_CR_XER_ZERO;
    998          __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
    999          GET_CR_XER(div_flags, div_xer);
   1000          break;
   1001       case 7:
   1002         SET_CR_XER_ZERO;
   1003          __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1004          GET_CR_XER(div_flags, div_xer);
   1005          break;
   1006       default:
   1007          fprintf(stderr, "Invalid divde type. Exiting\n");
   1008          exit(1);
   1009    }
   1010 }
   1011 #endif
   1012 
   1013 static void test_divweu(void)
   1014 {
   1015    int divweu_type = DIV_BASE;
   1016    if (do_OE)
   1017       divweu_type |= DIV_OE;
   1018    if (do_dot)
   1019       divweu_type |= DIV_DOT;
   1020 
   1021    switch (divweu_type) {
   1022       case 1:
   1023         SET_CR_XER_ZERO;
   1024          __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1025          GET_CR_XER(div_flags, div_xer);
   1026          break;
   1027       case 3:
   1028         SET_CR_XER_ZERO;
   1029          __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1030          GET_CR_XER(div_flags, div_xer);
   1031          break;
   1032       case 5:
   1033         SET_CR_XER_ZERO;
   1034          __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1035          GET_CR_XER(div_flags, div_xer);
   1036          break;
   1037       case 7:
   1038         SET_CR_XER_ZERO;
   1039          __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
   1040          GET_CR_XER(div_flags, div_xer);
   1041          break;
   1042       default:
   1043          fprintf(stderr, "Invalid divweu type. Exiting\n");
   1044          exit(1);
   1045    }
   1046 }
   1047 
   1048 static void test_fctiduz(void)
   1049 {
   1050    if (do_dot)
   1051       __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14));
   1052    else
   1053       __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14));
   1054 }
   1055 
   1056 static void test_fctidu(void)
   1057 {
   1058    if (do_dot)
   1059       __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14));
   1060    else
   1061       __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14));
   1062 }
   1063 
   1064 static void test_fctiwuz(void)
   1065 {
   1066    if (do_dot)
   1067       __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14));
   1068    else
   1069       __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14));
   1070 }
   1071 
   1072 static void test_fctiwu(void)
   1073 {
   1074    if (do_dot)
   1075       __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14));
   1076    else
   1077       __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14));
   1078 }
   1079 
   1080 typedef struct simple_test {
   1081    test_func_t test_func;
   1082    char * name;
   1083    precision_type_t precision;
   1084 } simple_test_t;
   1085 
   1086 static simple_test_t fct_tests[] = {
   1087                                     { &test_fctiduz, "fctiduz", DOUBLE_TEST },
   1088                                     { &test_fctidu, "fctidu", DOUBLE_TEST },
   1089                                     { &test_fctiwuz, "fctiwuz", SINGLE_TEST },
   1090                                     { &test_fctiwu, "fctiwu", SINGLE_TEST },
   1091                                    { NULL, NULL }
   1092 };
   1093 
   1094 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
   1095 {
   1096    int a_idx, b_idx, i;
   1097    void * inA, * inB;
   1098    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
   1099 
   1100    for (i = 0; i < 4; i++) {
   1101       a_idx = targs->fra_idx;
   1102       b_idx = targs->frb_idx;
   1103       inA = (void *)&spec_sp_fargs[a_idx];
   1104       inB = (void *)&spec_sp_fargs[b_idx];
   1105       // copy single precision FP  into vector element i
   1106       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
   1107       memcpy(vec_src + (i * 4), inB, 4);
   1108       targs++;
   1109    }
   1110 }
   1111 
   1112 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
   1113 {
   1114    int a_idx, b_idx, i;
   1115    void * inA, * inB;
   1116    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
   1117 
   1118    for (i = 0; i < 2; i++) {
   1119       a_idx = targs->fra_idx;
   1120       b_idx = targs->frb_idx;
   1121       inA = (void *)&spec_fargs[a_idx];
   1122       inB = (void *)&spec_fargs[b_idx];
   1123       // copy double precision FP  into vector element i
   1124       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
   1125       memcpy(vec_src + (i * 8), inB, 8);
   1126       targs++;
   1127    }
   1128 }
   1129 
   1130 #define VX_NOT_CMP_OP 0xffffffff
   1131 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i)
   1132 {
   1133    int a_idx, b_idx, k;
   1134    char * name = malloc(20);
   1135    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
   1136    int loops = dp ? 2 : 4;
   1137    fp_test_args_t * targs = &test_group->targs[i];
   1138    unsigned long long * frA_dp, * frB_dp, * dst_dp;
   1139    unsigned int * frA_sp, *frB_sp, * dst_sp;
   1140    strcpy(name, test_group->name);
   1141    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
   1142    for (k = 0; k < loops; k++) {
   1143       a_idx = targs->fra_idx;
   1144       b_idx = targs->frb_idx;
   1145       if (k)
   1146          printf(" AND ");
   1147       if (dp) {
   1148          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
   1149          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
   1150          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
   1151       } else {
   1152          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
   1153          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
   1154          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
   1155       }
   1156       targs++;
   1157    }
   1158    if (cc != VX_NOT_CMP_OP)
   1159       printf(" ? cc=%x", cc);
   1160 
   1161    if (dp) {
   1162       dst_dp = (unsigned long long *) &vec_out;
   1163       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
   1164    } else {
   1165       dst_sp = (unsigned int *) &vec_out;
   1166       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
   1167    }
   1168    free(name);
   1169 }
   1170 
   1171 
   1172 static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg,
   1173                                     vx_fp_test_t * test_group, int i)
   1174 {
   1175    int a_idx, k;
   1176    char * name = malloc(20);
   1177    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
   1178    int loops = dp ? 2 : 4;
   1179    fp_test_args_t * targs = &test_group->targs[i];
   1180    unsigned long long frA_dp, * dst_dp;
   1181    unsigned int frA_sp, * dst_sp;
   1182 
   1183    strcpy(name, test_group->name);
   1184    if (do_aXp)
   1185       if (dp)
   1186          strcat(name, "adp");
   1187       else
   1188          strcat(name, "asp");
   1189    else
   1190       if (dp)
   1191          strcat(name, "mdp");
   1192       else
   1193          strcat(name, "msp");
   1194 
   1195    printf("#%d: %s ", dp? i/2 : i/4, name);
   1196    for (k = 0; k < loops; k++) {
   1197       a_idx = targs->fra_idx;
   1198       if (k)
   1199          printf(" AND ");
   1200       if (dp) {
   1201          frA_dp = *((unsigned long long *)&spec_fargs[a_idx]);
   1202          printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]);
   1203       } else {
   1204          unsigned int * xt_sp = (unsigned int *)XT_arg;
   1205          unsigned int * xb_sp = (unsigned int *)XB_arg;
   1206          frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]);
   1207          printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]);
   1208       }
   1209       targs++;
   1210    }
   1211 
   1212    if (dp) {
   1213       dst_dp = (unsigned long long *) &vec_out;
   1214       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
   1215    } else {
   1216       dst_sp = (unsigned int *) &vec_out;
   1217       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
   1218    }
   1219    free(name);
   1220 }
   1221 
   1222 /* This function currently only supports double precision input arguments. */
   1223 static void test_vx_simple_scalar_fp_ops(void)
   1224 {
   1225    test_func_t func;
   1226    int k = 0;
   1227 
   1228    build_special_fargs_table();
   1229    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
   1230       unsigned long long * frap, * frbp, * dst;
   1231       unsigned int * pv;
   1232       int idx;
   1233       vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k];
   1234       Bool convToWord = (test_group.type == VX_CONV_WORD);
   1235       if (test_group.precision != DOUBLE_TEST) {
   1236          fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
   1237          exit(1);
   1238       }
   1239       pv = (unsigned int *)&vec_out;
   1240       // clear vec_out
   1241       for (idx = 0; idx < 4; idx++, pv++)
   1242          *pv = 0;
   1243 
   1244       /* If num_tests is exactly equal to nb_special_fargs, this implies the
   1245        * instruction being tested only requires one floating point argument
   1246        * (e.g. xssqrtdp).
   1247        */
   1248       if (test_group.num_tests == nb_special_fargs && !test_group.targs) {
   1249          void * inB, * vec_void_ptr = (void *)&vec_inB;
   1250          int i;
   1251          if (isLE)
   1252             vec_void_ptr += 8;
   1253          for (i = 0; i < nb_special_fargs; i++) {
   1254             inB = (void *)&spec_fargs[i];
   1255             frbp = (unsigned long long *)&spec_fargs[i];
   1256             memcpy(vec_void_ptr, inB, 8);
   1257             (*func)();
   1258             dst = (unsigned long long *) &vec_out;
   1259             if (isLE)
   1260                dst++;
   1261             printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp,
   1262                    convToWord ? (*dst & 0x00000000ffffffffULL) : *dst);
   1263          }
   1264       } else {
   1265          void * inA, * inB, * vecA_void_ptr, * vecB_void_ptr;
   1266          unsigned int condreg, flags;
   1267          int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0;
   1268          int i;
   1269          if (isLE) {
   1270             vecA_void_ptr = (void *)&vec_inA + 8;
   1271             vecB_void_ptr = (void *)&vec_inB + 8;
   1272          } else {
   1273             vecA_void_ptr = (void *)&vec_inA;
   1274             vecB_void_ptr = (void *)&vec_inB;
   1275          }
   1276          for (i = 0; i < test_group.num_tests; i++) {
   1277             fp_test_args_t aTest = test_group.targs[i];
   1278             inA = (void *)&spec_fargs[aTest.fra_idx];
   1279             inB = (void *)&spec_fargs[aTest.frb_idx];
   1280             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1281             frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1282             // Only need to copy one doubleword into each vector's element 0
   1283             memcpy(vecA_void_ptr, inA, 8);
   1284             memcpy(vecB_void_ptr, inB, 8);
   1285             SET_FPSCR_ZERO;
   1286             SET_CR_XER_ZERO;
   1287             (*func)();
   1288             GET_CR(flags);
   1289             if (isTdiv) {
   1290                condreg = (flags & 0x000000f0) >> 4;
   1291                printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg);
   1292             } else {
   1293                dst = (unsigned long long *) &vec_out;
   1294                if (isLE)
   1295                   dst++;
   1296                printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
   1297                       *frap, *frbp, *dst);
   1298             }
   1299          }
   1300       }
   1301       printf( "\n" );
   1302       k++;
   1303    }
   1304 }
   1305 
   1306 static void test_vx_aORm_fp_ops(void)
   1307 {
   1308    /* These ops need a third src argument, which is stored in element 0 of
   1309     * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
   1310     * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
   1311     * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
   1312     * data (input args, result) contain only two inputs, so I arbitrarily
   1313     * choose some spec_fargs elements for the third source argument.
   1314     * Note that that by using the same input data for a given pair of
   1315     * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
   1316     * arguments), the expected result should be the same.
   1317     */
   1318 
   1319    test_func_t func;
   1320    int k;
   1321    char * test_name = (char *)malloc(20);
   1322    k = 0;
   1323    do_dot = False;
   1324 
   1325    build_special_fargs_table();
   1326    while ((func = vx_aORm_fp_tests[k].test_func)) {
   1327       int i, stride;
   1328       Bool repeat = False;
   1329       Bool scalar = False;
   1330       unsigned long long * frap, * frbp, * dst;
   1331       vx_fp_test_t test_group = vx_aORm_fp_tests[k];
   1332       vx_fp_test_type test_type = test_group.type;
   1333       do_dp = test_group.precision == DOUBLE_TEST ? True : False;
   1334       frap = frbp = NULL;
   1335 
   1336       if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
   1337             scalar = True;
   1338             strcpy(test_name, test_group.name);
   1339             if (!repeat) {
   1340                repeat = 1;
   1341                stride = 1;
   1342                // Only support double precision scalar ops in this function
   1343                if (do_dp) {
   1344                   strcat(test_name, "adp");
   1345                } else {
   1346                   fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
   1347                   exit(1);
   1348                }
   1349                do_aXp = True;
   1350             }
   1351       } else if (test_type < VX_BASIC_CMP) {
   1352          // Then it must be a VX_VECTOR_xxx type
   1353             stride = do_dp ? 2 : 4;
   1354             if (!repeat) {
   1355                // No need to work up the testcase name here, since that will be done in
   1356                // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
   1357                repeat = 1;
   1358                do_aXp = True;
   1359             }
   1360       } else {
   1361             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
   1362             exit(1);
   1363       }
   1364 
   1365 again:
   1366       for (i = 0; i < test_group.num_tests; i+=stride) {
   1367          void  * inA, * inB;
   1368          int m, fp_idx[4];
   1369          unsigned long long vsr_XT[2];
   1370          unsigned long long vsr_XB[2];
   1371          fp_test_args_t aTest = test_group.targs[i];
   1372          for (m = 0; m < stride; m++)
   1373             fp_idx[m] = i % (nb_special_fargs - stride) + m;
   1374 
   1375          /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
   1376           * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
   1377           * inputs as described above:
   1378           *    src2 <= VSX[XT]
   1379           *    src3 <= VSX[XB]
   1380           */
   1381          if (scalar) {
   1382 #ifdef VGP_ppc64le_linux
   1383 #define VECTOR_ADDR(_v) ((void *)&_v) + 8
   1384 #else
   1385 #define VECTOR_ADDR(_v) ((void *)&_v)
   1386 #endif
   1387             // For scalar op, only need to copy one doubleword into each vector's element 0
   1388             inA = (void *)&spec_fargs[aTest.fra_idx];
   1389             inB = (void *)&spec_fargs[aTest.frb_idx];
   1390             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
   1391             memcpy(VECTOR_ADDR(vec_inA), inA, 8);
   1392             if (repeat) {
   1393                memcpy(VECTOR_ADDR(vec_out), inB, 8);  // src2
   1394                memcpy(VECTOR_ADDR(vec_inB), &spec_fargs[fp_idx[0]], 8);  //src3
   1395                frbp = (unsigned long long *)&spec_fargs[fp_idx[0]];
   1396             } else {
   1397                frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
   1398                memcpy(VECTOR_ADDR(vec_inB), inB, 8);  // src2
   1399                memcpy(VECTOR_ADDR(vec_out), &spec_fargs[fp_idx[0]], 8);  //src3
   1400             }
   1401             memcpy(vsr_XT, VECTOR_ADDR(vec_out), 8);
   1402          } else {
   1403             int j, loops = do_dp ? 2 : 4;
   1404             size_t len = do_dp ? 8 : 4;
   1405             void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out;
   1406             for (j = 0; j < loops; j++) {
   1407                if (do_dp)
   1408                   memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len);
   1409                else
   1410                   memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len);
   1411             }
   1412             if (do_dp)
   1413                setup_dp_fp_args(&test_group.targs[i], repeat);
   1414             else
   1415                setup_sp_fp_args(&test_group.targs[i], repeat);
   1416 
   1417             memcpy(vsr_XT, &vec_out, 16);
   1418             memcpy(vsr_XB, &vec_inB, 16);
   1419          }
   1420 
   1421          (*func)();
   1422          dst = (unsigned long long *) &vec_out;
   1423          if (isLE)
   1424             dst++;
   1425          if (test_type < VX_VECTOR_FP_MULT_AND_OP2)
   1426             printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i,
   1427                     test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst );
   1428          else
   1429             print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i);
   1430       }
   1431       printf( "\n" );
   1432 
   1433       if (repeat) {
   1434          repeat = 0;
   1435          if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
   1436                strcpy(test_name, test_group.name);
   1437                strcat(test_name, "mdp");
   1438          }
   1439          do_aXp = False;
   1440          goto again;
   1441       }
   1442       k++;
   1443    }
   1444    printf( "\n" );
   1445    free(test_name);
   1446 }
   1447 
   1448 static void test_vx_vector_one_fp_arg(void)
   1449 {
   1450    test_func_t func;
   1451    int k;
   1452    k = 0;
   1453    build_special_fargs_table();
   1454 
   1455    while ((func = vx_vector_one_fp_arg_tests[k].test_func)) {
   1456       int idx, i;
   1457       vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k];
   1458       Bool convToWord = (test_group.type == VX_CONV_WORD);
   1459       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
   1460       Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False;
   1461       int stride = dp ? 2 : 4;
   1462 
   1463       for (i = 0; i < test_group.num_tests; i+=stride) {
   1464          unsigned int * pv;
   1465          void * inB;
   1466 
   1467          pv = (unsigned int *)&vec_out;
   1468          // clear vec_out
   1469          for (idx = 0; idx < 4; idx++, pv++)
   1470             *pv = 0;
   1471 
   1472          if (dp) {
   1473             int j;
   1474             unsigned long long * frB_dp, *dst_dp;
   1475             for (j = 0; j < 2; j++) {
   1476                inB = (void *)&spec_fargs[i + j];
   1477                // copy double precision FP into vector element i
   1478                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
   1479             }
   1480             // execute test insn
   1481             (*func)();
   1482             dst_dp = (unsigned long long *) &vec_out;
   1483             printf("#%d: %s ", i/2, test_group.name);
   1484             for (j = 0; j < 2; j++) {
   1485                if (j)
   1486                   printf("; ");
   1487                frB_dp = (unsigned long long *)&spec_fargs[i + j];
   1488                printf("%s(%016llx)", test_group.op, *frB_dp);
   1489                printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]);
   1490             }
   1491             printf("\n");
   1492          } else {
   1493             int j;
   1494             unsigned int * frB_sp, * dst_sp;
   1495 
   1496             for (j = 0; j < 4; j++) {
   1497                inB = (void *)&spec_sp_fargs[i + j];
   1498                // copy single precision FP into vector element i
   1499                memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
   1500             }
   1501             // execute test insn
   1502             (*func)();
   1503             dst_sp = (unsigned int *) &vec_out;
   1504             // print result
   1505             printf("#%d: %s ", i/4, test_group.name);
   1506             for (j = 0; j < 4; j++) {
   1507                if (j)
   1508                   printf("; ");
   1509                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
   1510                printf("%s(%08x)", test_group.op, *frB_sp);
   1511                if (xvrespTest) {
   1512                   float calc_diff = fabs(spec_sp_fargs[i + j]/256);
   1513                   float sp_res;
   1514                   memcpy(&sp_res, &dst_sp[j], 4);
   1515                   float div_result = 1/spec_sp_fargs[i + j];
   1516                   float real_diff = fabs(sp_res - div_result);
   1517                   printf( " ==> %s",
   1518                           ( ( sp_res == div_result )
   1519                                    || ( isnan(sp_res) && isnan(div_result) )
   1520                                    || ( real_diff <= calc_diff ) ) ? "PASS"
   1521                                                                      : "FAIL");
   1522                } else {
   1523                   printf(" = %08x", dst_sp[j]);
   1524                }
   1525             }
   1526             printf("\n");
   1527          }
   1528       }
   1529       k++;
   1530       printf( "\n" );
   1531    }
   1532 
   1533 }
   1534 
   1535 /* This function assumes the instruction being tested requires two args. */
   1536 static void test_vx_vector_fp_ops(void)
   1537 {
   1538    test_func_t func;
   1539    int k;
   1540    k = 0;
   1541    build_special_fargs_table();
   1542 
   1543    while ((func = vx_vector_fp_tests[k].test_func)) {
   1544       int idx, i, repeat = 1;
   1545       vx_fp_test_t test_group = vx_vector_fp_tests[k];
   1546       int stride = test_group.precision == DOUBLE_TEST ? 2 : 4;
   1547       do_dot = False;
   1548 
   1549 again:
   1550       for (i = 0; i < test_group.num_tests; i+=stride) {
   1551          unsigned int * pv, condreg;
   1552          unsigned int flags;
   1553 
   1554          pv = (unsigned int *)&vec_out;
   1555          if (test_group.precision == DOUBLE_TEST)
   1556             setup_dp_fp_args(&test_group.targs[i], False);
   1557          else
   1558             setup_sp_fp_args(&test_group.targs[i], False);
   1559 
   1560          // clear vec_out
   1561          for (idx = 0; idx < 4; idx++, pv++)
   1562             *pv = 0;
   1563 
   1564          // execute test insn
   1565          SET_FPSCR_ZERO;
   1566          SET_CR_XER_ZERO;
   1567          (*func)();
   1568          GET_CR(flags);
   1569          if (test_group.type == VX_BASIC_CMP) {
   1570             condreg = (flags & 0x000000f0) >> 4;
   1571          } else {
   1572             condreg = VX_NOT_CMP_OP;
   1573          }
   1574          print_vector_fp_result(condreg, &test_group, i);
   1575       }
   1576       printf("\n");
   1577       if (repeat && test_group.type == VX_BASIC_CMP) {
   1578          repeat = 0;
   1579          do_dot = True;
   1580          goto again;
   1581       }
   1582       k++;
   1583       printf( "\n" );
   1584    }
   1585 }
   1586 
   1587 
   1588 // The div doubleword test data
   1589 signed long long div_dw_tdata[13][2] = {
   1590                                        { 4, -4 },
   1591                                        { 4, -3 },
   1592                                        { 4, 4 },
   1593                                        { 4, -5 },
   1594                                        { 3, 8 },
   1595                                        { 0x8000000000000000ULL, 0xa },
   1596                                        { 0x50c, -1 },
   1597                                        { 0x50c, -4096 },
   1598                                        { 0x1234fedc, 0x8000a873 },
   1599                                        { 0xabcd87651234fedcULL, 0xa123b893 },
   1600                                        { 0x123456789abdcULL, 0 },
   1601                                        { 0, 2 },
   1602                                        { 0x77, 0xa3499 }
   1603 };
   1604 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
   1605 
   1606 // The div word test data
   1607 unsigned int div_w_tdata[6][2] = {
   1608                               { 0, 2 },
   1609                               { 2, 0 },
   1610                               { 0x7abc1234, 0xf0000000 },
   1611                               { 0xfabc1234, 5 },
   1612                               { 77, 66 },
   1613                               { 5, 0xfabc1234 },
   1614 };
   1615 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
   1616 
   1617 typedef struct div_ext_test
   1618 {
   1619    test_func_t test_func;
   1620    const char *name;
   1621    int num_tests;
   1622    div_type_t div_type;
   1623    precision_type_t precision;
   1624 } div_ext_test_t;
   1625 
   1626 static div_ext_test_t div_tests[] = {
   1627 #ifdef __powerpc64__
   1628                                    { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
   1629                                    { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
   1630 #endif
   1631                                    { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST },
   1632                                    { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST },
   1633                                    { NULL, NULL, 0, 0, 0 }
   1634 };
   1635 
   1636 static void test_div_extensions(void)
   1637 {
   1638    test_func_t func;
   1639    int k;
   1640    k = 0;
   1641 
   1642    while ((func = div_tests[k].test_func)) {
   1643       int i, repeat = 1;
   1644       div_ext_test_t test_group = div_tests[k];
   1645       do_dot = False;
   1646 
   1647 again:
   1648       for (i = 0; i < test_group.num_tests; i++) {
   1649          unsigned int condreg;
   1650 
   1651          if (test_group.div_type == DIV_OE)
   1652             do_OE = True;
   1653          else
   1654             do_OE = False;
   1655 
   1656          if (test_group.precision == DOUBLE_TEST) {
   1657             r14 = div_dw_tdata[i][0];
   1658             r15 = div_dw_tdata[i][1];
   1659          } else {
   1660             r14 = div_w_tdata[i][0];
   1661             r15 = div_w_tdata[i][1];
   1662          }
   1663          // execute test insn
   1664          (*func)();
   1665          condreg = (div_flags & 0xf0000000) >> 28;
   1666          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
   1667          if (test_group.precision == DOUBLE_TEST) {
   1668             printf("0x%016llx / 0x%016llx = 0x%016llx;",
   1669                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
   1670          } else {
   1671             printf("0x%08x / 0x%08x = 0x%08x;",
   1672                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
   1673          }
   1674          printf(" CR=%x; XER=%x\n", condreg, div_xer);
   1675       }
   1676       printf("\n");
   1677       if (repeat) {
   1678          repeat = 0;
   1679          do_dot = True;
   1680          goto again;
   1681       }
   1682       k++;
   1683       printf( "\n" );
   1684    }
   1685 
   1686 }
   1687 
   1688 static void test_fct_ops(void)
   1689 {
   1690    test_func_t func;
   1691    int k;
   1692    k = 0;
   1693 
   1694    while ((func = fct_tests[k].test_func)) {
   1695       int i, repeat = 1;
   1696       simple_test_t test_group = fct_tests[k];
   1697       do_dot = False;
   1698 
   1699 again:
   1700       for (i = 0; i < nb_special_fargs; i++) {
   1701          double result;
   1702 #define SINGLE_MASK 0x00000000FFFFFFFFULL
   1703 
   1704          f14 = spec_fargs[i];
   1705          // execute test insn
   1706          SET_FPSCR_ZERO;
   1707          (*func)();
   1708          result = f17;
   1709          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
   1710          printf("0x%016llx (%e) ==> 0x%016llx\n",
   1711                 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i],
   1712                 test_group.precision == SINGLE_TEST ? (SINGLE_MASK &
   1713                          *((unsigned long long *)(&result))) :
   1714                          *((unsigned long long *)(&result)));
   1715       }
   1716       printf("\n");
   1717       if (repeat) {
   1718          repeat = 0;
   1719          do_dot = True;
   1720          goto again;
   1721       }
   1722       k++;
   1723       printf( "\n" );
   1724    }
   1725 }
   1726 
   1727 #ifdef __powerpc64__
   1728 void test_stdbrx(void)
   1729 {
   1730    unsigned long long store, val = 0xdeadbacf12345678ULL;
   1731    printf("stdbrx: 0x%llx ==> ", val);
   1732    r17 = (HWord_t)val;
   1733    r14 = (HWord_t)&store;
   1734    __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14));
   1735    printf("0x%llx\n", store);
   1736    printf( "\n" );
   1737 }
   1738 #endif
   1739 
   1740 static test_table_t
   1741          all_tests[] =
   1742 {
   1743                     { &test_vx_vector_one_fp_arg,
   1744                       "Test VSX vector single arg instructions"},
   1745                     { &test_vx_vector_fp_ops,
   1746                       "Test VSX floating point compare and basic arithmetic instructions" },
   1747 #ifdef __powerpc64__
   1748                      { &test_bpermd,
   1749                        "Test bit permute double"},
   1750 #endif
   1751                      { &test_xxsel,
   1752                          "Test xxsel instruction" },
   1753                      { &test_xxspltw,
   1754                          "Test xxspltw instruction" },
   1755                      { &test_div_extensions,
   1756                        "Test div extensions" },
   1757                      { &test_fct_ops,
   1758                        "Test floating point convert [word | doubleword] unsigned, with round toward zero" },
   1759 #ifdef __powerpc64__
   1760                      { &test_stdbrx,
   1761                       "Test stdbrx instruction"},
   1762 #endif
   1763                      { &test_vx_aORm_fp_ops,
   1764                       "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p"},
   1765                      { &test_vx_simple_scalar_fp_ops,
   1766                       "Test scalar floating point arithmetic instructions"},
   1767                      { NULL, NULL }
   1768 };
   1769 #endif // HAS_VSX
   1770 
   1771 int main(int argc, char *argv[])
   1772 {
   1773 #ifdef HAS_VSX
   1774 
   1775    test_table_t aTest;
   1776    test_func_t func;
   1777    int i = 0;
   1778 
   1779    while ((func = all_tests[i].test_category)) {
   1780       aTest = all_tests[i];
   1781       printf( "%s\n", aTest.name );
   1782       (*func)();
   1783       i++;
   1784    }
   1785    if (spec_fargs)
   1786      free(spec_fargs);
   1787    if (spec_sp_fargs)
   1788      free(spec_sp_fargs);
   1789 
   1790 #endif // HAS _VSX
   1791 
   1792    return 0;
   1793 }
   1794