Home | History | Annotate | Download | only in ppc32
      1 
      2 /* HOW TO COMPILE:
      3 
      4  * 32bit build:
      5    gcc -Winline -Wall -g -O -mregnames -maltivec -m32
      6  * 64bit build:
      7    gcc -Winline -Wall -g -O -mregnames -maltivec -m64
      8 
      9 
     10  * test_isa_2_07_part1.c:
     11  * PPC tests for the ISA 2.07.  This file is based on the
     12  * jm-insns.c file for the new instructions in the ISA 2.07.  The
     13  * test structure has been kept the same as the original file to
     14  * the extent possible.
     15  *
     16  * Copyright (C) 2013 IBM
     17  *
     18  *   Authors: Carl Love <carll (at) us.ibm.com>
     19  *            Maynard Johnson <maynardj (at) us.ibm.com>
     20  *
     21  *   This program is free software; you can redistribute it and/or
     22  *   modify it under the terms of the GNU General Public License as
     23  *   published by the Free Software Foundation; either version 2 of the
     24  *   License, or (at your option) any later version.
     25  *
     26  *   This program is distributed in the hope that it will be useful,
     27  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
     28  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     29  *   GNU General Public License for more details.
     30  *
     31  *   You should have received a copy of the GNU General Public License
     32  *   along with this program; if not, write to the Free Software
     33  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     34  *
     35  */
     36 
     37 /*
     38  * Operation details
     39  * -----------------
     40  *
     41  * The 'loops' (e.g. int_loops) do the actual work:
     42  *  - loops over as many arguments as the insn needs (regs | imms)
     43  *     - sets up the environment (reset cr,xer, assign src regs...)
     44  *     - maybe modifies the asm instn to test different imm args
     45  *     - calls the test function
     46  *     - retrieves relevant register data (rD,cr,xer,...)
     47  *     - prints argument and result data.
     48  *
     49  * More specifically...
     50  *
     51  * all_tests[i] holds insn tests
     52  *  - of which each holds: {instn_test_arr[], description, flags}
     53  *
     54  * flags hold 3 instn classifiers: {family, type, arg_type}
     55  *
     56  * // The main test loop:
     57  * do_tests( user_ctl_flags ) {
     58  *    foreach(curr_test = all_test[i]) {
     59  *
     60  *       // flags are used to control what tests are run:
     61  *       if (curr_test->flags && !user_ctl_flags)
     62  *          continue;
     63  *
     64  *       // a 'loop_family_arr' is chosen based on the 'family' flag...
     65  *       switch(curr_test->flags->family) {
     66  *       case x: loop_family_arr = int_loops;
     67  *      ...
     68  *       }
     69  *
     70  *       // ...and the actual test_loop to run is found by indexing into
     71  *       // the loop_family_arr with the 'arg_type' flag:
     72  *       test_loop = loop_family[curr_test->flags->arg_type]
     73  *
     74  *       // finally, loop over all instn tests for this test:
     75  *       foreach (instn_test = curr_test->instn_test_arr[i]) {
     76  *
     77  *          // and call the test_loop with the current instn_test function,name
     78  *          test_loop( instn_test->func, instn_test->name )
     79  *       }
     80  *    }
     81  * }
     82  *
     83  */
     84 
     85 
     86 /**********************************************************************/
     87 
     88 /* Uncomment to enable output of CR flags for float tests */
     89 //#define TEST_FLOAT_FLAGS
     90 
     91 /* Uncomment to enable debug output */
     92 //#define DEBUG_ARGS_BUILD
     93 //#define DEBUG_FILTER
     94 
     95 /**********************************************************************/
     96 #include <stdio.h>
     97 
     98 #ifdef HAS_ISA_2_07
     99 
    100 #include "config.h"
    101 #include <altivec.h>
    102 #include <stdint.h>
    103 
    104 #include <assert.h>
    105 #include <ctype.h>     // isspace
    106 #include <stdlib.h>
    107 #include <string.h>
    108 #include <unistd.h>    // getopt
    109 
    110 #if !defined (__TEST_PPC_H__)
    111 #define __TEST_PPC_H__
    112 
    113 #include "tests/sys_mman.h"
    114 #include "tests/malloc.h"       // memalign16
    115 
    116 #define STATIC_ASSERT(e) sizeof(struct { int:-!(e); })
    117 
    118 /* Something of the same size as void*, so can be safely be coerced
    119  * to/from a pointer type. Also same size as the host's gp registers.
    120  * According to the AltiVec section of the GCC manual, the syntax does
    121  * not allow the use of a typedef name as a type specifier in conjunction
    122  * with the vector keyword, so typedefs uint[32|64]_t are #undef'ed here
    123  * and redefined using #define.
    124  */
    125 #undef uint32_t
    126 #undef uint64_t
    127 #define uint32_t unsigned int
    128 #define uint64_t unsigned long long int
    129 
    130 #ifndef __powerpc64__
    131 typedef uint32_t  HWord_t;
    132 #define ZERO 0
    133 #else
    134 typedef uint64_t  HWord_t;
    135 #define ZERO 0ULL
    136 #endif /* __powerpc64__ */
    137 
    138 #ifdef VGP_ppc64le_linux
    139 #define isLE 1
    140 #else
    141 #define isLE 0
    142 #endif
    143 
    144 typedef uint64_t Word_t;
    145 
    146 enum {
    147     compile_time_test1 = STATIC_ASSERT(sizeof(uint32_t) == 4),
    148     compile_time_test2 = STATIC_ASSERT(sizeof(uint64_t) == 8),
    149 };
    150 
    151 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
    152 
    153 #define SET_CR(_arg) \
    154       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
    155 
    156 #define SET_XER(_arg) \
    157       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
    158 
    159 #define GET_CR(_lval) \
    160       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
    161 
    162 #define GET_XER(_lval) \
    163       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
    164 
    165 #define GET_CR_XER(_lval_cr,_lval_xer) \
    166    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
    167 
    168 #define SET_CR_ZERO \
    169       SET_CR(0)
    170 
    171 #define SET_XER_ZERO \
    172       SET_XER(0)
    173 
    174 #define SET_CR_XER_ZERO \
    175    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
    176 
    177 #define SET_FPSCR_ZERO \
    178    do { double _d = 0.0; \
    179         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
    180    } while (0)
    181 
    182 #define DEFAULT_VSCR 0x0
    183 
    184 static vector unsigned long long vec_out, vec_inA, vec_inB, vec_inC;
    185 static vector unsigned int vec_inA_wd, vec_inB_wd;
    186 
    187 /* XXXX these must all be callee-save regs! */
    188 register double f14 __asm__ ("fr14");
    189 register double f15 __asm__ ("fr15");
    190 register double f16 __asm__ ("fr16");
    191 register double f17 __asm__ ("fr17");
    192 register HWord_t r14 __asm__ ("r14");
    193 register HWord_t r15 __asm__ ("r15");
    194 register HWord_t r16 __asm__ ("r16");
    195 register HWord_t r17 __asm__ ("r17");
    196 
    197 typedef void (*test_func_t) (void);
    198 typedef struct _test test_t;
    199 typedef struct _test_table test_table_t;
    200 struct _test {
    201     test_func_t func;
    202     const char *name;
    203 };
    204 
    205 struct _test_table {
    206     test_t *tests;
    207     const char *name;
    208     uint32_t flags;
    209 };
    210 
    211 typedef void (*test_loop_t) (const char *name, test_func_t func,
    212                              uint32_t flags);
    213 
    214 enum test_flags {
    215     /* Nb arguments */
    216     PPC_ONE_ARG    = 0x00000001,
    217     PPC_TWO_ARGS   = 0x00000002,
    218     PPC_THREE_ARGS = 0x00000003,
    219     PPC_CMP_ARGS   = 0x00000004,  // family: compare
    220     PPC_CMPI_ARGS  = 0x00000005,  // family: compare
    221     PPC_TWO_I16    = 0x00000006,  // family: arith/logical
    222     PPC_SPECIAL    = 0x00000007,  // family: logical
    223     PPC_LD_ARGS    = 0x00000008,  // family: ldst
    224     PPC_LDX_ARGS   = 0x00000009,  // family: ldst
    225     PPC_ST_ARGS    = 0x0000000A,  // family: ldst
    226     PPC_STX_ARGS   = 0x0000000B,  // family: ldst
    227     PPC_STQ_ARGS   = 0x0000000C,  // family: ldst, two args, imm
    228     PPC_LDQ_ARGS   = 0x0000000D,  // family: ldst, two args, imm
    229     PPC_STQX_ARGS  = 0x0000000E,  // family: ldst, three args
    230     PPC_LDQX_ARGS  = 0x0000000F,  // family: ldst, three_args
    231     PPC_NB_ARGS    = 0x0000000F,
    232     /* Type */
    233     PPC_ARITH      = 0x00000100,
    234     PPC_LOGICAL    = 0x00000200,
    235     PPC_COMPARE    = 0x00000300,
    236     PPC_CROP       = 0x00000400,
    237     PPC_LDST       = 0x00000500,
    238     PPC_POPCNT     = 0x00000600,
    239     PPC_ARITH_DRES = 0x00000700,
    240     PPC_DOUBLE_IN_IRES = 0x00000800,
    241     PPC_MOV        = 0x00000A00,
    242     PPC_SHA_OR_BCD = 0x00000B00,
    243     PPC_TYPE       = 0x00000F00,
    244     /* Family */
    245     PPC_INTEGER    = 0x00010000,
    246     PPC_FLOAT      = 0x00020000,
    247     PPC_405        = 0x00030000,  // Leave so we keep numbering consistent
    248     PPC_ALTIVEC    = 0x00040000,
    249     PPC_FALTIVEC   = 0x00050000,
    250     PPC_ALTIVECD   = 0x00060000,    /* double word Altivec tests */
    251     PPC_ALTIVECQ   = 0x00070000,
    252     PPC_FAMILY     = 0x000F0000,
    253     /* Flags: these may be combined, so use separate bitfields. */
    254     PPC_CR         = 0x01000000,
    255     PPC_XER_CA     = 0x02000000,
    256 };
    257 
    258 #endif /* !defined (__TEST_PPC_H__) */
    259 
    260 /* -------------- END #include "test-ppc.h" -------------- */
    261 
    262 
    263 #if defined (DEBUG_ARGS_BUILD)
    264 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    265 #else
    266 #define AB_DPRINTF(fmt, args...) do { } while (0)
    267 #endif
    268 
    269 
    270 #if defined (DEBUG_FILTER)
    271 #define FDPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    272 #else
    273 #define FDPRINTF(fmt, args...) do { } while (0)
    274 #endif
    275 
    276 #define unused __attribute__ (( unused ))
    277 
    278 typedef struct special {
    279    const char *name;
    280    void (*test_cb)(const char* name, test_func_t func,
    281                    unused uint32_t test_flags);
    282 } special_t;
    283 
    284 static void test_stq(void)
    285 {
    286   __asm__ __volatile__ ("stq  %0, 0(%1)" : :"r" (r14), "r" (r16));
    287 }
    288 
    289 static test_t tests_istq_ops_two_i16[] = {
    290     { &test_stq             , "stq", },
    291     { NULL,                   NULL,           },
    292 };
    293 
    294 static void test_lq(void)
    295 {
    296   __asm__ __volatile__ ("lq  %0, 0(%1)" : :"r" (r14), "r" (r16));
    297 }
    298 
    299 static test_t tests_ildq_ops_two_i16[] = {
    300     { &test_lq              , "lq", },
    301     { NULL,                   NULL,          },
    302 };
    303 
    304 #ifdef HAS_ISA_2_07
    305 Word_t * mem_resv;
    306 static void test_stbcx(void)
    307 {
    308   /* Have to do the lbarx to the memory address to create the reservation
    309    * or the store will not occur.
    310    */
    311   __asm__ __volatile__ ("lbarx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    312   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    313   r15 = (HWord_t) 0x1155337744226688ULL;
    314   __asm__ __volatile__ ("stbcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    315 }
    316 
    317 static void test_sthcx(void)
    318 {
    319   /* Have to do the lharx to the memory address to create the reservation
    320    * or the store will not occur.
    321    */
    322   __asm__ __volatile__ ("lharx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    323   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    324   r15 = (HWord_t) 0x1155337744226688ULL;
    325   __asm__ __volatile__ ("sthcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    326 }
    327 #endif
    328 
    329 static void test_stqcx(void)
    330 {
    331   /* Have to do the lqarx to the memory address to create the reservation
    332    * or the store will not occur.
    333    */
    334   __asm__ __volatile__ ("lqarx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    335   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    336   r15 = (HWord_t) 0x1155337744226688ULL;
    337   __asm__ __volatile__ ("stqcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    338 }
    339 
    340 static test_t tests_stq_ops_three[] = {
    341 #ifdef HAS_ISA_2_07
    342     { &test_stbcx           , "stbcx.", },
    343     { &test_sthcx           , "sthcx.", },
    344 #endif
    345     { &test_stqcx           , "stqcx.", },
    346     { NULL,                   NULL,           },
    347 };
    348 
    349 #ifdef HAS_ISA_2_07
    350 static void test_lbarx(void)
    351 {
    352   __asm__ __volatile__ ("lbarx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    353 }
    354 static void test_lharx(void)
    355 {
    356   __asm__ __volatile__ ("lharx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    357 }
    358 #endif
    359 static void test_lqarx(void)
    360 {
    361   __asm__ __volatile__ ("lqarx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    362 }
    363 
    364 static test_t tests_ldq_ops_three[] = {
    365 #ifdef HAS_ISA_2_07
    366     { &test_lbarx           , "lbarx", },
    367     { &test_lharx           , "lharx", },
    368 #endif
    369     { &test_lqarx           , "lqarx", },
    370     { NULL,                   NULL,           },
    371 };
    372 
    373 static void test_fmrgew (void)
    374 {
    375     __asm__ __volatile__ ("fmrgew        17,14,15");
    376 };
    377 
    378 static void test_fmrgow (void)
    379 {
    380     __asm__ __volatile__ ("fmrgow        17,14,15");
    381 };
    382 
    383 
    384 
    385 // VSX move instructions
    386 static void test_mfvsrd (void)
    387 {
    388    __asm__ __volatile__ ("mfvsrd %0,%x1" : "=r" (r14) : "ws" (vec_inA));
    389 };
    390 
    391 static void test_mfvsrwz (void)
    392 {
    393    __asm__ __volatile__ ("mfvsrwz %0,%x1" : "=r" (r14) : "ws" (vec_inA));
    394 };
    395 
    396 static void test_mtvsrd (void)
    397 {
    398    __asm__ __volatile__ ("mtvsrd %x0,%1" : "=ws" (vec_out) : "r" (r14));
    399 };
    400 
    401 static void test_mtvsrwz (void)
    402 {
    403    __asm__ __volatile__ ("mtvsrwz %x0,%1" : "=ws" (vec_out) : "r" (r14));
    404 };
    405 
    406 
    407 static void test_mtfprwa (void)
    408 {
    409    __asm__ __volatile__ ("mtfprwa %x0,%1" : "=ws" (vec_out) : "r" (r14));
    410 };
    411 
    412 static test_t tests_move_ops_spe[] = {
    413   { &test_mfvsrd          , "mfvsrd" },
    414   { &test_mfvsrwz         , "mfvsrwz" },
    415   { &test_mtvsrd          , "mtvsrd" },
    416   { &test_mtvsrwz         , "mtvsrwz" },
    417   { &test_mtfprwa         , "mtfprwa" },
    418   { NULL,                   NULL }
    419 };
    420 
    421 /* NOTE: Since these are "vector" instructions versus VSX, we must use
    422  * vector constraints.
    423  *
    424  * Vector Double Word tests.
    425  */
    426 static void test_vpkudum (void)
    427 {
    428    __asm__ __volatile__ ("vpkudum %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    429 }
    430 
    431 static void test_vaddudm (void)
    432 {
    433    __asm__ __volatile__ ("vaddudm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    434 }
    435 
    436 static void test_vsubudm (void)
    437 {
    438    __asm__ __volatile__ ("vsubudm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    439 }
    440 
    441 static void test_vmaxud (void)
    442 {
    443    __asm__ __volatile__ ("vmaxud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    444 }
    445 
    446 static void test_vmaxsd (void)
    447 {
    448    __asm__ __volatile__ ("vmaxsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    449 }
    450 
    451 static void test_vminud (void)
    452 {
    453    __asm__ __volatile__ ("vminud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    454 }
    455 
    456 static void test_vminsd (void)
    457 {
    458    __asm__ __volatile__ ("vminsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    459 }
    460 
    461 static void test_vcmpequd (void)
    462 {
    463    __asm__ __volatile__ ("vcmpequd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    464 }
    465 
    466 static void test_vcmpgtud (void)
    467 {
    468    __asm__ __volatile__ ("vcmpgtud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    469 }
    470 
    471 static void test_vcmpgtsd (void)
    472 {
    473    __asm__ __volatile__ ("vcmpgtsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    474 }
    475 
    476 static void test_vrld (void)
    477 {
    478    __asm__ __volatile__ ("vrld %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    479 }
    480 
    481 static void test_vsld (void)
    482 {
    483    __asm__ __volatile__ ("vsld %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    484 }
    485 
    486 static void test_vsrad (void)
    487 {
    488    __asm__ __volatile__ ("vsrad %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    489 }
    490 
    491 static void test_vsrd (void)
    492 {
    493    __asm__ __volatile__ ("vsrd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    494 }
    495 
    496 /* Vector Double Word saturate tests.*/
    497 
    498 static void test_vpkudus (void)
    499 {
    500    __asm__ __volatile__ ("vpkudus %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    501 }
    502 
    503 static void test_vpksdus (void)
    504 {
    505    __asm__ __volatile__ ("vpksdus %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    506 }
    507 
    508 static void test_vpksdss (void)
    509 {
    510    __asm__ __volatile__ ("vpksdss %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    511 }
    512 
    513 
    514 /* Vector unpack two words from one vector arg */
    515 static void test_vupkhsw (void)
    516 {
    517     __asm__ __volatile__ ("vupkhsw %0, %1" : "=v" (vec_out): "v" (vec_inB_wd));
    518 }
    519 
    520 static void test_vupklsw (void)
    521 {
    522     __asm__ __volatile__ ("vupklsw %0, %1" : "=v" (vec_out): "v" (vec_inB_wd));
    523 }
    524 
    525 
    526 /* Vector Integer Word tests.*/
    527 static void test_vmulouw (void)
    528 {
    529   __asm__ __volatile__ ("vmulouw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    530 }
    531 
    532 static void test_vmuluwm (void)
    533 {
    534     __asm__ __volatile__ ("vmuluwm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    535 }
    536 
    537 static void test_vmulosw (void)
    538 {
    539     __asm__ __volatile__ ("vmulosw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    540 }
    541 
    542 static void test_vmuleuw (void)
    543 {
    544     __asm__ __volatile__ ("vmuleuw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    545 }
    546 
    547 static void test_vmulesw (void)
    548 {
    549     __asm__ __volatile__ ("vmulesw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    550 }
    551 
    552 static void test_vmrgew (void)
    553 {
    554     __asm__ __volatile__ ("vmrgew %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    555 }
    556 
    557 static void test_vmrgow (void)
    558 {
    559     __asm__ __volatile__ ("vmrgow %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    560 }
    561 
    562 static void test_vpmsumb (void)
    563 {
    564     __asm__ __volatile__ ("vpmsumb %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    565 }
    566 
    567 static void test_vpmsumh (void)
    568 {
    569     __asm__ __volatile__ ("vpmsumh %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    570 }
    571 
    572 static void test_vpmsumw (void)
    573 {
    574     __asm__ __volatile__ ("vpmsumw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    575 }
    576 
    577 static void test_vpermxor (void)
    578 {
    579   __asm__ __volatile__ ("vpermxor %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    580 }
    581 
    582 static void test_vpmsumd (void)
    583 {
    584     __asm__ __volatile__ ("vpmsumd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    585 }
    586 
    587 static void test_vnand (void)
    588 {
    589     __asm__ __volatile__ ("vnand %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    590 }
    591 
    592 static void test_vorc (void)
    593 {
    594     __asm__ __volatile__ ("vorc %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    595 }
    596 
    597 static void test_veqv (void)
    598 {
    599     __asm__ __volatile__ ("veqv %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    600 }
    601 
    602 static void test_vcipher (void)
    603 {
    604     __asm__ __volatile__ ("vcipher %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    605 }
    606 
    607 static void test_vcipherlast (void)
    608 {
    609     __asm__ __volatile__ ("vcipherlast %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    610 }
    611 
    612 static void test_vncipher (void)
    613 {
    614     __asm__ __volatile__ ("vncipher %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    615 }
    616 
    617 static void test_vncipherlast (void)
    618 {
    619     __asm__ __volatile__ ("vncipherlast %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    620 }
    621 
    622 static void test_vclzb (void)
    623 {
    624     __asm__ __volatile__ ("vclzb %0, %1" : "=v" (vec_out): "v" (vec_inB));
    625 }
    626 
    627 static void test_vclzw (void)
    628 {
    629     __asm__ __volatile__ ("vclzw %0, %1" : "=v" (vec_out): "v" (vec_inB));
    630 }
    631 
    632 static void test_vclzh (void)
    633 {
    634     __asm__ __volatile__ ("vclzh %0, %1" : "=v" (vec_out): "v" (vec_inB));
    635 }
    636 
    637 static void test_vclzd (void)
    638 {
    639     __asm__ __volatile__ ("vclzd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    640 }
    641 
    642 static void test_vpopcntb (void)
    643 {
    644     __asm__ __volatile__ ("vpopcntb %0, %1" : "=v" (vec_out): "v" (vec_inB));
    645 }
    646 
    647 static void test_vpopcnth (void)
    648 {
    649     __asm__ __volatile__ ("vpopcnth %0, %1" : "=v" (vec_out): "v" (vec_inB));
    650 }
    651 
    652 static void test_vpopcntw (void)
    653 {
    654     __asm__ __volatile__ ("vpopcntw %0, %1" : "=v" (vec_out): "v" (vec_inB));
    655 }
    656 
    657 static void test_vpopcntd (void)
    658 {
    659     __asm__ __volatile__ ("vpopcntd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    660 }
    661 
    662 static void test_vsbox (void)
    663 {
    664     __asm__ __volatile__ ("vsbox %0, %1" : "=v" (vec_out): "v" (vec_inB));
    665 }
    666 
    667 static int st_six;
    668 static void test_vshasigmad (void)
    669 {
    670    switch (st_six) {
    671    case 0x00:
    672       __asm__ __volatile__ ("vshasigmad %0, %1, 0, 0" : "=v" (vec_out): "v" (vec_inA));
    673       break;
    674    case 0x0f:
    675       __asm__ __volatile__ ("vshasigmad %0, %1, 0, 15" : "=v" (vec_out): "v" (vec_inA));
    676       break;
    677    case 0x10:
    678       __asm__ __volatile__ ("vshasigmad %0, %1, 1, 0" : "=v" (vec_out): "v" (vec_inA));
    679       break;
    680    case 0x1f:
    681       __asm__ __volatile__ ("vshasigmad %0, %1, 1, 15" : "=v" (vec_out): "v" (vec_inA));
    682       break;
    683    }
    684 }
    685 
    686 static void test_vshasigmaw (void)
    687 {
    688    switch (st_six) {
    689    case 0x00:
    690       __asm__ __volatile__ ("vshasigmaw %0, %1, 0, 0" : "=v" (vec_out): "v" (vec_inA));
    691       break;
    692    case 0x0f:
    693       __asm__ __volatile__ ("vshasigmaw %0, %1, 0, 15" : "=v" (vec_out): "v" (vec_inA));
    694       break;
    695    case 0x10:
    696       __asm__ __volatile__ ("vshasigmaw %0, %1, 1, 0" : "=v" (vec_out): "v" (vec_inA));
    697       break;
    698    case 0x1f:
    699       __asm__ __volatile__ ("vshasigmaw %0, %1, 1, 15" : "=v" (vec_out): "v" (vec_inA));
    700       break;
    701    }
    702 }
    703 
    704 static int PS_bit;
    705 static void test_bcdadd (void)
    706 {
    707    if (PS_bit)
    708       __asm__ __volatile__ ("bcdadd. %0, %1, %2, 1" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    709    else
    710       __asm__ __volatile__ ("bcdadd. %0, %1, %2, 0" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    711 }
    712 
    713 static void test_bcdsub (void)
    714 {
    715    if (PS_bit)
    716       __asm__ __volatile__ ("bcdsub. %0, %1, %2, 1" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    717    else
    718       __asm__ __volatile__ ("bcdsub. %0, %1, %2, 0" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    719 }
    720 
    721 static void test_vaddcuq (void)
    722 {
    723    __asm__ __volatile__ ("vaddcuq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    724 }
    725 
    726 static void test_vadduqm (void)
    727 {
    728    __asm__ __volatile__ ("vadduqm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    729 }
    730 
    731 static void test_vaddecuq (void)
    732 {
    733   __asm__ __volatile__ ("vaddecuq %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    734 }
    735 
    736 static void test_vaddeuqm (void)
    737 {
    738   __asm__ __volatile__ ("vaddeuqm %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    739 }
    740 
    741 static void test_vsubcuq (void)
    742 {
    743    __asm__ __volatile__ ("vsubcuq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    744 }
    745 
    746 static void test_vsubuqm (void)
    747 {
    748    __asm__ __volatile__ ("vsubuqm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    749 }
    750 
    751 static void test_vsubecuq (void)
    752 {
    753   __asm__ __volatile__ ("vsubecuq %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    754 }
    755 
    756 static void test_vsubeuqm (void)
    757 {
    758   __asm__ __volatile__ ("vsubeuqm %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    759 }
    760 
    761 static void test_vbpermq (void)
    762 {
    763    __asm__ __volatile__ ("vbpermq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    764 }
    765 
    766 static void test_vgbbd (void)
    767 {
    768     __asm__ __volatile__ ("vgbbd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    769 }
    770 
    771 
    772 static test_t tests_aa_quadword_two_args[] = {
    773   { &test_vaddcuq       , "vaddcuq" },
    774   { &test_vadduqm       , "vadduqm" },
    775   { &test_vsubcuq       , "vsubcuq" },
    776   { &test_vsubuqm       , "vsubuqm" },
    777   { &test_vbpermq       , "vbpermq" },
    778   { NULL                , NULL      },
    779 };
    780 
    781 static test_t tests_aa_quadword_three_args[] = {
    782   { &test_vaddecuq      , "vaddecuq" },
    783   { &test_vaddeuqm      , "vaddeuqm" },
    784   { &test_vsubecuq      , "vsubecuq" },
    785   { &test_vsubeuqm      , "vsubeuqm" },
    786   { NULL                , NULL      },
    787 };
    788 
    789 static test_t tests_aa_bcd_ops[] = {
    790   { &test_bcdadd        , "bcdadd." },
    791   { &test_bcdsub        , "bcdsub." },
    792   { NULL                , NULL      },
    793 };
    794 
    795 static test_t tests_aa_SHA_ops[] = {
    796   { &test_vshasigmad    , "vshasigmad" },
    797   { &test_vshasigmaw    , "vshasigmaw" },
    798   { NULL                , NULL         },
    799 };
    800 
    801 static test_t tests_aa_ops_three[] = {
    802   { &test_vpermxor        , "vpermxor" },
    803   { NULL                  , NULL       },
    804 };
    805 
    806 static test_t tests_aa_word_ops_one_arg_dres[] = {
    807   { &test_vupkhsw         , "vupkhsw" },
    808   { &test_vupklsw         , "vupklsw" },
    809   { NULL                  , NULL      }
    810 };
    811 
    812 static test_t tests_aa_word_ops_two_args_dres[] = {
    813   { &test_vmulouw         , "vmulouw" },
    814   { &test_vmuluwm         , "vmuluwm" },
    815   { &test_vmulosw         , "vmulosw" },
    816   { &test_vmuleuw         , "vmuleuw" },
    817   { &test_vmulesw         , "vmulesw" },
    818   { &test_vmrgew          , "vmrgew" },
    819   { &test_vmrgow          , "vmrgow" },
    820   { &test_vpmsumb         , "vpmsumb" },
    821   { &test_vpmsumh         , "vpmsumh" },
    822   { &test_vpmsumw         , "vpmsumw" },
    823   { NULL                  , NULL      }
    824 };
    825 
    826 static test_t tests_aa_dbl_ops_two_args[] = {
    827   { &test_vaddudm         , "vaddudm", },
    828   { &test_vsubudm         , "vsubudm", },
    829   { &test_vmaxud          , "vmaxud", },
    830   { &test_vmaxsd          , "vmaxsd", },
    831   { &test_vminud          , "vminud", },
    832   { &test_vminsd          , "vminsd", },
    833   { &test_vcmpequd        , "vcmpequd", },
    834   { &test_vcmpgtud        , "vcmpgtud", },
    835   { &test_vcmpgtsd        , "vcmpgtsd", },
    836   { &test_vrld            , "vrld", },
    837   { &test_vsld            , "vsld", },
    838   { &test_vsrad           , "vsrad", },
    839   { &test_vsrd            , "vsrd", },
    840   { &test_vpkudum         , "vpkudum", },
    841   { &test_vpmsumd         , "vpmsumd", },
    842   { &test_vnand           , "vnand", },
    843   { &test_vorc            , "vorc", },
    844   { &test_veqv            , "veqv", },
    845   { &test_vcipher         , "vcipher" },
    846   { &test_vcipherlast     , "vcipherlast" },
    847   { &test_vncipher        , "vncipher" },
    848   { &test_vncipherlast    , "vncipherlast" },
    849   { NULL                  , NULL,      },
    850 };
    851 
    852 static test_t tests_aa_dbl_ops_one_arg[] = {
    853   { &test_vclzb           , "vclzb" },
    854   { &test_vclzw           , "vclzw" },
    855   { &test_vclzh           , "vclzh" },
    856   { &test_vclzd           , "vclzd" },
    857   { &test_vpopcntb        , "vpopcntb" },
    858   { &test_vpopcnth        , "vpopcnth" },
    859   { &test_vpopcntw        , "vpopcntw" },
    860   { &test_vpopcntd        , "vpopcntd" },
    861   { &test_vsbox           , "vsbox" },
    862   { &test_vgbbd           , "vgbbd" },
    863   { NULL                  , NULL,      }
    864 };
    865 
    866 static test_t tests_aa_dbl_to_int_two_args[] = {
    867   { &test_vpkudus         , "vpkudus", },
    868   { &test_vpksdus         , "vpksdus", },
    869   { &test_vpksdss         , "vpksdss", },
    870   { NULL                  , NULL,      },
    871 };
    872 
    873 static int verbose = 0;
    874 static int arg_list_size = 0;
    875 static unsigned long long * vdargs = NULL;
    876 static unsigned long long * vdargs_x = NULL;
    877 #define NB_VDARGS 4
    878 
    879 static void build_vdargs_table (void)
    880 {
    881    // Each VSX register holds two doubleword integer values
    882    vdargs = memalign16(NB_VDARGS * sizeof(unsigned long long));
    883    vdargs[0] = 0x0102030405060708ULL;
    884    vdargs[1] = 0x090A0B0C0E0D0E0FULL;
    885    vdargs[2] = 0xF1F2F3F4F5F6F7F8ULL;
    886    vdargs[3] = 0xF9FAFBFCFEFDFEFFULL;
    887 
    888    vdargs_x = memalign16(NB_VDARGS * sizeof(unsigned long long));
    889    vdargs_x[0] = 0x000000007c118a2bULL;
    890    vdargs_x[1] = 0x00000000f1112345ULL;
    891    vdargs_x[2] = 0x01F2F3F4F5F6F7F8ULL;
    892    vdargs_x[3] = 0xF9FAFBFCFEFDFEFFULL;
    893 }
    894 
    895 static unsigned int * vwargs = NULL;
    896 #define NB_VWARGS 8
    897 
    898 static void build_vwargs_table (void)
    899 {
    900    // Each VSX register holds 4 integer word values
    901    size_t i = 0;
    902    vwargs = memalign(8, 8 * sizeof(int));
    903    assert(vwargs);
    904    assert(0 == ((8-1) & (unsigned long)vwargs));
    905    vwargs[i++] = 0x01020304;
    906    vwargs[i++] = 0x05060708;
    907    vwargs[i++] = 0x090A0B0C;
    908    vwargs[i++] = 0x0E0D0E0F;
    909    vwargs[i++] = 0xF1F2F3F4;
    910    vwargs[i++] = 0xF5F6F7F8;
    911    vwargs[i++] = 0xF9FAFBFC;
    912    vwargs[i++] = 0xFEFDFEFF;
    913 }
    914 
    915 static unsigned long long vbcd_args[] __attribute__ ((aligned (16))) = {
    916    0x8045090189321003ULL, // Negative BCD value
    917    0x001122334556677dULL,
    918    0x0000107600000001ULL, // Positive BCD value
    919    0x319293945142031aULL,
    920    0x0ULL,                // Valid BCD zero
    921    0xaULL,
    922    0x0ULL,                // Invalid BCD zero (no sign code)
    923    0x0ULL
    924 };
    925 #define NUM_VBCD_VALS (sizeof vbcd_args/sizeof vbcd_args[0])
    926 
    927 static void build_vargs_table (void)
    928 {
    929    build_vdargs_table();
    930    build_vwargs_table();
    931 }
    932 
    933 static double *fargs = NULL;
    934 static int nb_fargs = 0;
    935 
    936 static inline void register_farg (void *farg,
    937                                   int s, uint16_t _exp, uint64_t mant)
    938 {
    939    uint64_t tmp;
    940 
    941    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    942    *(uint64_t *)farg = tmp;
    943    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    944               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    945 }
    946 
    947 static void build_fargs_table (void)
    948 {
    949    /* Double precision:
    950     * Sign goes from zero to one               (1 bit)
    951     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    952     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    953     * + special values:
    954     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    955     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    956     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    957     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    958     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    959     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    960     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    961     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    962     * (8 values)
    963 
    964     * Ref only:
    965     * Single precision
    966     * Sign:     1 bit
    967     * Exponent: 8 bits
    968     * Mantissa: 23 bits
    969     * +0.0      : 0 0x00 0x000000 => 0x00000000
    970     * -0.0      : 1 0x00 0x000000 => 0x80000000
    971     * +infinity : 0 0xFF 0x000000 => 0x7F800000
    972     * -infinity : 1 0xFF 0x000000 => 0xFF800000
    973     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    974     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    975     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    976     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    977     */
    978    uint64_t mant;
    979    uint16_t _exp, e0, e1;
    980    int s;
    981    int i=0;
    982 
    983    /* Note: VEX isn't so hot with denormals, so don't bother
    984       testing them: set _exp > 0
    985    */
    986 
    987    if ( arg_list_size == 1 ) {   // Large
    988       fargs = malloc(200 * sizeof(double));
    989       for (s=0; s<2; s++) {
    990          for (e0=0; e0<2; e0++) {
    991             for (e1=0x001; ; e1 = ((e1 + 1) << 2) + 6) {
    992                if (e1 >= 0x400)
    993                   e1 = 0x3fe;
    994                _exp = (e0 << 10) | e1;
    995                for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
    996                     /* Add 'random' bits */
    997                     mant = ((mant + 0x4A6) << 13) + 0x359) {
    998                   register_farg(&fargs[i++], s, _exp, mant);
    999                }
   1000                if (e1 == 0x3fe)
   1001                   break;
   1002             }
   1003          }
   1004       }
   1005    } else {                      // Default
   1006       fargs = malloc(16 * sizeof(double));
   1007       for (s=0; s<2; s++) {                                // x2
   1008             for (e1=0x001; ; e1 = ((e1 + 1) << 13) + 7) {  // x2
   1009                if (e1 >= 0x400)
   1010                   e1 = 0x3fe;
   1011                _exp = e1;
   1012                for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
   1013                     /* Add 'random' bits */
   1014                     mant = ((mant + 0x4A6) << 29) + 0x359) {  // x2
   1015                   register_farg(&fargs[i++], s, _exp, mant);
   1016                }
   1017                if (e1 == 0x3fe)
   1018                   break;
   1019             }
   1020       }
   1021    }
   1022 
   1023    /* Special values */
   1024    /* +0.0      : 0 0x000 0x0000000000000 */
   1025    s = 0;
   1026    _exp = 0x000;
   1027    mant = 0x0000000000000ULL;
   1028    register_farg(&fargs[i++], s, _exp, mant);
   1029    /* -0.0      : 1 0x000 0x0000000000000 */
   1030    s = 1;
   1031    _exp = 0x000;
   1032    mant = 0x0000000000000ULL;
   1033    register_farg(&fargs[i++], s, _exp, mant);
   1034    /* +infinity : 0 0x7FF 0x0000000000000  */
   1035    s = 0;
   1036    _exp = 0x7FF;
   1037    mant = 0x0000000000000ULL;
   1038    register_farg(&fargs[i++], s, _exp, mant);
   1039    /* -infinity : 1 0x7FF 0x0000000000000 */
   1040    s = 1;
   1041    _exp = 0x7FF;
   1042    mant = 0x0000000000000ULL;
   1043    register_farg(&fargs[i++], s, _exp, mant);
   1044    /* +QNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
   1045    s = 0;
   1046    _exp = 0x7FF;
   1047    mant = 0x7FFFFFFFFFFFFULL;
   1048    register_farg(&fargs[i++], s, _exp, mant);
   1049    /* -QNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
   1050    s = 1;
   1051    _exp = 0x7FF;
   1052    mant = 0x7FFFFFFFFFFFFULL;
   1053    register_farg(&fargs[i++], s, _exp, mant);
   1054    /* +SNaN     : 0 0x7FF 0x8000000000000 */
   1055    s = 0;
   1056    _exp = 0x7FF;
   1057    mant = 0x8000000000000ULL;
   1058    register_farg(&fargs[i++], s, _exp, mant);
   1059    /* -SNaN     : 1 0x7FF 0x8000000000000 */
   1060    s = 1;
   1061    _exp = 0x7FF;
   1062    mant = 0x8000000000000ULL;
   1063    register_farg(&fargs[i++], s, _exp, mant);
   1064    AB_DPRINTF("Registered %d fargs values\n", i);
   1065 
   1066    nb_fargs = i;
   1067 }
   1068 
   1069 
   1070 
   1071 static int check_filter (char *filter)
   1072 {
   1073    char *c;
   1074    int ret = 1;
   1075 
   1076    if (filter != NULL) {
   1077       c = strchr(filter, '*');
   1078       if (c != NULL) {
   1079          *c = '\0';
   1080          ret = 0;
   1081       }
   1082    }
   1083    return ret;
   1084 }
   1085 
   1086 static int check_name (const char* name, const char *filter,
   1087                        int exact)
   1088 {
   1089    int nlen, flen;
   1090    int ret = 0;
   1091 
   1092    if (filter != NULL) {
   1093       for (; isspace(*name); name++)
   1094          continue;
   1095       FDPRINTF("Check '%s' againt '%s' (%s match)\n",
   1096                name, filter, exact ? "exact" : "starting");
   1097       nlen = strlen(name);
   1098       flen = strlen(filter);
   1099       if (exact) {
   1100          if (nlen == flen && memcmp(name, filter, flen) == 0)
   1101             ret = 1;
   1102       } else {
   1103          if (flen <= nlen && memcmp(name, filter, flen) == 0)
   1104             ret = 1;
   1105       }
   1106    } else {
   1107       ret = 1;
   1108    }
   1109    return ret;
   1110 }
   1111 
   1112 
   1113 typedef struct insn_sel_flags_t_struct {
   1114    int one_arg, two_args, three_args;
   1115    int arith, logical, compare, ldst;
   1116    int integer, floats, altivec, faltivec;
   1117    int cr;
   1118 } insn_sel_flags_t;
   1119 
   1120 static void test_float_two_args (const char* name, test_func_t func,
   1121                                  unused uint32_t test_flags)
   1122 {
   1123    double res;
   1124    Word_t u0, u1, ur;
   1125    volatile uint32_t flags;
   1126    int i, j;
   1127 
   1128    for (i=0; i<nb_fargs; i+=3) {
   1129       for (j=0; j<nb_fargs; j+=5) {
   1130          u0 = *(Word_t *)(&fargs[i]);
   1131          u1 = *(Word_t *)(&fargs[j]);
   1132          f14 = fargs[i];
   1133          f15 = fargs[j];
   1134 
   1135          SET_FPSCR_ZERO;
   1136          SET_CR_XER_ZERO;
   1137          (*func)();
   1138          GET_CR(flags);
   1139          res = f17;
   1140          ur = *(uint64_t *)(&res);
   1141 
   1142          printf("%s %016llx, %016llx => %016llx",
   1143                 name, u0, u1, ur);
   1144 #if defined TEST_FLOAT_FLAGS
   1145          printf(" (%08x)", flags);
   1146 #endif
   1147          printf("\n");
   1148       }
   1149       if (verbose) printf("\n");
   1150    }
   1151 }
   1152 
   1153 
   1154 static void mfvs(const char* name, test_func_t func,
   1155                  unused uint32_t test_flags)
   1156 {
   1157    /* This test is for move instructions where the input is a scalar register
   1158     * and the destination is a vector register.
   1159     */
   1160    int i;
   1161    volatile Word_t result;
   1162    result = 0ULL;
   1163 
   1164    for (i=0; i < NB_VDARGS; i++) {
   1165       r14 = ZERO;
   1166       if (isLE)
   1167          vec_inA = (vector unsigned long long){ 0ULL, vdargs[i] };
   1168       else
   1169          vec_inA = (vector unsigned long long){ vdargs[i], 0ULL };
   1170 
   1171       (*func)();
   1172       result = r14;
   1173       printf("%s: %016llx => %016llx\n", name, vdargs[i], result);
   1174    }
   1175 }
   1176 
   1177 static void mtvs(const char* name, test_func_t func,
   1178                  unused uint32_t test_flags)
   1179 {
   1180    /* This test is for move instructions where the input is a scalar register
   1181     * and the destination is a vector register.
   1182     */
   1183    unsigned long long *dst;
   1184    int i;
   1185 
   1186    for (i=0; i < NB_VDARGS; i++) {
   1187       r14  = vdargs[i];
   1188       vec_out = (vector unsigned long long){ 0ULL, 0ULL };
   1189 
   1190       (*func)();
   1191       dst = (unsigned long long *) &vec_out;
   1192       if (isLE)
   1193          dst++;
   1194       printf("%s: %016llx => %016llx\n", name, vdargs[i], *dst);
   1195    }
   1196 }
   1197 
   1198 static void mtvs2s(const char* name, test_func_t func,
   1199                  unused uint32_t test_flags)
   1200 {
   1201    /* This test is the mtvsrwa instruction.
   1202     */
   1203    unsigned long long *dst;
   1204    int i;
   1205 
   1206    for (i=0; i < NB_VDARGS; i++) {
   1207       // Only the lower half of the vdarg doubleword arg will be used as input by mtvsrwa
   1208       unsigned int * src = (unsigned int *)&vdargs[i];
   1209       if (!isLE)
   1210          src++;
   1211       r14  = vdargs[i];
   1212       vec_out = (vector unsigned long long){ 0ULL, 0ULL };
   1213 
   1214       (*func)();
   1215       // Only doubleword 0 is used in output
   1216       dst = (unsigned long long *) &vec_out;
   1217       if (isLE)
   1218          dst++;
   1219       printf("%s: %08x => %016llx\n", name, *src, *dst);
   1220    }
   1221 }
   1222 
   1223 static void test_special (special_t *table,
   1224                           const char* name, test_func_t func,
   1225                           unused uint32_t test_flags)
   1226 {
   1227    const char *tmp;
   1228    int i;
   1229 
   1230    for (tmp = name; isspace(*tmp); tmp++)
   1231       continue;
   1232    for (i=0; table[i].name != NULL; i++) {
   1233       if (strcmp(table[i].name, tmp) == 0) {
   1234          (*table[i].test_cb)(name, func, test_flags);
   1235          return;
   1236       }
   1237    }
   1238    fprintf(stderr, "ERROR: no test found for op '%s'\n", name);
   1239 }
   1240 
   1241 static special_t special_move_ops[] = {
   1242    {
   1243       "mfvsrd",  /* move from vector to scalar reg doubleword */
   1244       &mfvs,
   1245    },
   1246    {
   1247       "mtvsrd",  /* move from scalar to vector reg doubleword */
   1248       &mtvs,
   1249    },
   1250    {
   1251       "mtfprwa", /* (extended mnemonic for mtvsrwa) move from scalar to vector reg with twos-complement */
   1252       &mtvs2s,
   1253    },
   1254    {
   1255       "mfvsrwz", /* move from vector to scalar reg word */
   1256       &mfvs,
   1257    },
   1258    {
   1259       "mtvsrwz", /* move from scalar to vector reg word */
   1260       &mtvs2s,
   1261    }
   1262 };
   1263 
   1264 static void test_move_special(const char* name, test_func_t func,
   1265                                 uint32_t test_flags)
   1266 {
   1267    test_special(special_move_ops, name, func, test_flags);
   1268 }
   1269 
   1270 /* Vector Double Word tests */
   1271 
   1272 static void test_av_dint_two_args (const char* name, test_func_t func,
   1273                                    unused uint32_t test_flags)
   1274 {
   1275 
   1276    unsigned long long * dst;
   1277    unsigned int * dst_int;
   1278    int i,j;
   1279    int family = test_flags & PPC_FAMILY;
   1280    int is_vpkudum, is_vpmsumd;
   1281    if (strcmp(name, "vpkudum") == 0)
   1282       is_vpkudum = 1;
   1283    else
   1284       is_vpkudum = 0;
   1285 
   1286    if (strcmp(name, "vpmsumd") == 0)
   1287       is_vpmsumd = 1;
   1288    else
   1289       is_vpmsumd = 0;
   1290 
   1291    for (i = 0; i < NB_VDARGS; i+=2) {
   1292       if (isLE && family == PPC_ALTIVECQ)
   1293          vec_inA = (vector unsigned long long){ vdargs[i+1], vdargs[i] };
   1294       else
   1295          vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1296       for (j = 0; j < NB_VDARGS; j+=2) {
   1297          if (isLE && family == PPC_ALTIVECQ)
   1298             vec_inB = (vector unsigned long long){ vdargs[j+1], vdargs[j] };
   1299          else
   1300             vec_inB = (vector unsigned long long){ vdargs[j], vdargs[j+1] };
   1301          vec_out = (vector unsigned long long){ 0,0 };
   1302 
   1303          (*func)();
   1304          dst_int = (unsigned int *)&vec_out;
   1305          dst  = (unsigned long long*)&vec_out;
   1306 
   1307          printf("%s: ", name);
   1308 
   1309          if (is_vpkudum) {
   1310             printf("Inputs: %08llx %08llx %08llx %08llx\n", vdargs[i] & 0x00000000ffffffffULL,
   1311                    vdargs[i+1] & 0x00000000ffffffffULL, vdargs[j] & 0x00000000ffffffffULL,
   1312                    vdargs[j+1] & 0x00000000ffffffffULL);
   1313             if (isLE)
   1314                printf("         Output: %08x %08x %08x %08x\n", dst_int[2], dst_int[3],
   1315                       dst_int[0], dst_int[1]);
   1316             else
   1317                printf("         Output: %08x %08x %08x %08x\n", dst_int[0], dst_int[1],
   1318                       dst_int[2], dst_int[3]);
   1319          } else if (is_vpmsumd) {
   1320             printf("%016llx @@ %016llx ", vdargs[i], vdargs[j]);
   1321             if (isLE)
   1322                printf(" ==> %016llx\n", dst[1]);
   1323             else
   1324                printf(" ==> %016llx\n", dst[0]);
   1325             printf("\t%016llx @@ %016llx ", vdargs[i+1], vdargs[j+1]);
   1326             if (isLE)
   1327                printf(" ==> %016llx\n", dst[0]);
   1328             else
   1329                printf(" ==> %016llx\n", dst[1]);
   1330          } else if (family == PPC_ALTIVECQ) {
   1331             if (isLE)
   1332                printf("%016llx%016llx @@ %016llx%016llx ==> %016llx%016llx\n",
   1333                       vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1],
   1334                       dst[1], dst[0]);
   1335             else
   1336                printf("%016llx%016llx @@ %016llx%016llx ==> %016llx%016llx\n",
   1337                       vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1],
   1338                       dst[0], dst[1]);
   1339          } else {
   1340             printf("%016llx @@ %016llx ", vdargs[i], vdargs[j]);
   1341             printf(" ==> %016llx\n", dst[0]);
   1342             printf("\t%016llx @@ %016llx ", vdargs[i+1], vdargs[j+1]);
   1343             printf(" ==> %016llx\n", dst[1]);
   1344          }
   1345       }
   1346    }
   1347 }
   1348 
   1349 static void test_av_dint_one_arg (const char* name, test_func_t func,
   1350                                   unused uint32_t test_flags)
   1351 {
   1352 
   1353    unsigned long long * dst;
   1354    int i;
   1355 
   1356    for (i = 0; i < NB_VDARGS; i+=2) {
   1357       vec_inB = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1358       vec_out = (vector unsigned long long){ 0,0 };
   1359 
   1360       (*func)();
   1361       dst  = (unsigned long long*)&vec_out;
   1362 
   1363       printf("%s: ", name);
   1364       printf("%016llx @@ %016llx ", vdargs[i], vdargs[i + 1]);
   1365       printf(" ==> %016llx%016llx\n", dst[0], dst[1]);
   1366    }
   1367 }
   1368 
   1369 static void test_av_dint_one_arg_SHA (const char* name, test_func_t func,
   1370                                       unused uint32_t test_flags)
   1371 {
   1372    unsigned long long * dst;
   1373    int i, st, six;
   1374 
   1375    for (i = 0; i < NB_VDARGS; i+=2) {
   1376       vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1377       vec_out = (vector unsigned long long){ 0,0 };
   1378 
   1379       for (st = 0; st < 2; st++) {
   1380          for (six = 0; six < 16; six+=15) {
   1381             st_six = (st << 4) | six;
   1382             (*func)();
   1383             dst  = (unsigned long long*)&vec_out;
   1384 
   1385             printf("%s: ", name);
   1386             printf("%016llx @@ %016llx ", vdargs[i], vdargs[i + 1]);
   1387             printf(" ==> %016llx || %016llx\n", dst[0], dst[1]);
   1388          }
   1389       }
   1390    }
   1391 }
   1392 
   1393 static void test_av_bcd (const char* name, test_func_t func,
   1394                          unused uint32_t test_flags)
   1395 {
   1396    unsigned long long * dst;
   1397    int i, j;
   1398 
   1399    for (i = 0; i < NUM_VBCD_VALS; i+=2) {
   1400       if (isLE)
   1401          vec_inA = (vector unsigned long long){ vbcd_args[i+1], vbcd_args[i] };
   1402       else
   1403          vec_inA = (vector unsigned long long){ vbcd_args[i], vbcd_args[i+1] };
   1404       for (j = 0; j < NUM_VBCD_VALS; j+=2) {
   1405          if (isLE)
   1406             vec_inB = (vector unsigned long long){ vbcd_args[j+1], vbcd_args[j] };
   1407          else
   1408             vec_inB = (vector unsigned long long){ vbcd_args[j], vbcd_args[j+1] };
   1409          vec_out = (vector unsigned long long){ 0, 0 };
   1410 
   1411          for (PS_bit = 0; PS_bit < 2; PS_bit++) {
   1412             (*func)();
   1413             dst  = (unsigned long long*)&vec_out;
   1414             printf("%s: ", name);
   1415             printf("%016llx || %016llx @@ %016llx || %016llx",
   1416                    vbcd_args[i], vbcd_args[i + 1],
   1417                    vbcd_args[j], vbcd_args[j + 1]);
   1418             if (isLE)
   1419                printf(" ==> %016llx || %016llx\n", dst[1], dst[0]);
   1420             else
   1421                printf(" ==> %016llx || %016llx\n", dst[0], dst[1]);
   1422          }
   1423       }
   1424    }
   1425 }
   1426 
   1427 /* Vector doubleword-to-int tests, two input args, integer result */
   1428 static void test_av_dint_to_int_two_args (const char* name, test_func_t func,
   1429                                           unused uint32_t test_flags)
   1430 {
   1431 
   1432    unsigned int * dst_int;
   1433    int i,j;
   1434    for (i = 0; i < NB_VDARGS; i+=2) {
   1435       vec_inA = (vector unsigned long long){ vdargs_x[i], vdargs_x[i+1] };
   1436       for (j = 0; j < NB_VDARGS; j+=2) {
   1437          vec_inB = (vector unsigned long long){ vdargs_x[j], vdargs_x[j+1] };
   1438          vec_out = (vector unsigned long long){ 0,0 };
   1439 
   1440          (*func)();
   1441          dst_int = (unsigned int *)&vec_out;
   1442 
   1443          printf("%s: ", name);
   1444          printf("%016llx, %016llx @@ %016llx, %016llx ",
   1445                 vdargs_x[i], vdargs_x[i+1],
   1446                 vdargs_x[j], vdargs_x[j+1]);
   1447          if (isLE)
   1448             printf(" ==> %08x %08x %08x %08x\n", dst_int[2], dst_int[3],
   1449                    dst_int[0], dst_int[1]);
   1450          else
   1451             printf(" ==> %08x %08x %08x %08x\n", dst_int[0], dst_int[1],
   1452                    dst_int[2], dst_int[3]);
   1453       }
   1454    }
   1455 }
   1456 
   1457 /* Vector Word tests; two integer args, with double word result */
   1458 
   1459 static void test_av_wint_two_args_dres (const char* name, test_func_t func,
   1460                                         unused uint32_t test_flags)
   1461 {
   1462 
   1463    unsigned long long * dst;
   1464    int i,j;
   1465 
   1466    for (i = 0; i < NB_VWARGS; i+=4) {
   1467       if (isLE)
   1468          vec_inA_wd = (vector unsigned int){ vwargs[i+3], vwargs[i+2], vwargs[i+1], vwargs[i] };
   1469       else
   1470          vec_inA_wd = (vector unsigned int){ vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3] };
   1471       for (j = 0; j < NB_VWARGS; j+=4) {
   1472          if (isLE)
   1473             vec_inB_wd = (vector unsigned int){ vwargs[j+3], vwargs[j+2], vwargs[j+1], vwargs[j] };
   1474          else
   1475             vec_inB_wd = (vector unsigned int){ vwargs[j], vwargs[j+1], vwargs[j+2], vwargs[j+3] };
   1476          vec_out = (vector unsigned long long){ 0, 0 };
   1477 
   1478          (*func)();
   1479          dst  = (unsigned long long *)&vec_out;
   1480          printf("%s: ", name);
   1481          if (isLE)
   1482             printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1483                    vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[1], dst[0]);
   1484          else
   1485             printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1486                    vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[0], dst[1]);
   1487       }
   1488    }
   1489 }
   1490 
   1491 /* Vector Word tests; one input arg, with double word result */
   1492 
   1493 static void test_av_wint_one_arg_dres (const char* name, test_func_t func,
   1494                                        unused uint32_t test_flags)
   1495 {
   1496    unsigned long long * dst;
   1497    int i;
   1498    for (i = 0; i < NB_VWARGS; i+=4) {
   1499       if (isLE)
   1500          vec_inB_wd = (vector unsigned int){ vwargs[i+3], vwargs[i+2], vwargs[i+1], vwargs[i] };
   1501       else
   1502          vec_inB_wd = (vector unsigned int){ vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3] };
   1503       vec_out = (vector unsigned long long){ 0, 0 };
   1504 
   1505       (*func)();
   1506       dst  = (unsigned long long *)&vec_out;
   1507       printf("%s: ", name);
   1508       if (isLE)
   1509          printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1510                 vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[1], dst[0]);
   1511       else
   1512          printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1513                 vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[0], dst[1]);
   1514    }
   1515 }
   1516 
   1517 
   1518 static void test_int_stq_two_regs_imm16 (const char* name,
   1519                                         test_func_t func_IN,
   1520                                         unused uint32_t test_flags)
   1521 {
   1522    /* Store quad word from register pair */
   1523    int offs, k;
   1524    HWord_t base;
   1525    Word_t *iargs_priv;
   1526 
   1527    // private iargs table to store to, note storing pair of regs
   1528    iargs_priv = memalign16(2 * sizeof(Word_t));
   1529 
   1530    base = (HWord_t)&iargs_priv[0];
   1531    for (k = 0; k < 2; k++)  // clear array
   1532       iargs_priv[k] = 0;
   1533 
   1534    offs = 0;
   1535 
   1536    /* setup source register pair */
   1537    r14 = (HWord_t) 0xABCDEF0123456789ULL;
   1538    r15 = (HWord_t) 0x1133557722446688ULL;
   1539 
   1540    r16 = base;                 // store to r16 + offs
   1541 
   1542    (*func_IN)();
   1543 
   1544 #ifndef __powerpc64__
   1545    printf("%s %08x,%08x, %2d => "
   1546 #else
   1547    printf("%s %016llx,%016llx, %3d => "
   1548 #endif
   1549             "%016llx,%016llx)\n",
   1550             name, r14, r15, offs, iargs_priv[0], iargs_priv[1]);
   1551 
   1552    if (verbose) printf("\n");
   1553    free(iargs_priv);
   1554 }
   1555 
   1556 
   1557 static void test_int_stq_three_regs (const char* name,
   1558                                      test_func_t func_IN,
   1559                                      unused uint32_t test_flags)
   1560 {
   1561    /* Store quad word from register pair */
   1562    volatile uint32_t flags, xer;
   1563    int k;
   1564    HWord_t base;
   1565 
   1566    base = (HWord_t)&mem_resv[0];
   1567    for (k = 0; k < 2; k++)  // setup array for lqarx inst
   1568       mem_resv[k] = k;
   1569 
   1570    /* setup source register pair for store */
   1571    r14 = ZERO;
   1572    r15 = ZERO;
   1573    r16 = base;                 // store to r16 + r17
   1574    r17 = ZERO;
   1575 
   1576    /* In order for the store to occur, the lqarx instruction must first
   1577     * be used to load from the address thus creating a reservation at the
   1578     * memory address.  The lqarx instruction is done in the test_stqcx(),
   1579     * then registers 14, r15 are changed to the data to be stored in memory
   1580     * by the stqcx instruction.
   1581     */
   1582    SET_CR_XER_ZERO;
   1583    (*func_IN)();
   1584    GET_CR_XER(flags,xer);
   1585 #ifndef __powerpc64__
   1586    printf("%s %08x,%08x, =>  "
   1587 #else
   1588    printf("%s %016llx,%016llx => "
   1589 #endif
   1590             "%016llx,%016llx; CR=%08x\n",
   1591             name, r14, r15, mem_resv[0], mem_resv[1], flags);
   1592 
   1593    if (verbose) printf("\n");
   1594 }
   1595 
   1596 static void test_int_ldq_two_regs_imm16 (const char* name,
   1597                                         test_func_t func_IN,
   1598                                         unused uint32_t test_flags)
   1599 {
   1600    /* load quad word from register pair */
   1601    volatile uint32_t flags, xer;
   1602    Word_t * mem_priv;
   1603    HWord_t base;
   1604 
   1605    // private iargs table to store to, note storing pair of regs
   1606    mem_priv = memalign16(2 * sizeof(Word_t));  // want 128-bits
   1607 
   1608    base = (HWord_t)&mem_priv[0];
   1609 
   1610    mem_priv[0] = 0xAACCEE0011335577ULL;
   1611    mem_priv[1] = 0xABCDEF0123456789ULL;
   1612 
   1613    r14 = 0;
   1614    r15 = 0;
   1615    r16 = base;                 // fetch from r16 + offs
   1616    SET_CR_XER_ZERO;
   1617    (*func_IN)();
   1618    GET_CR_XER(flags,xer);
   1619 
   1620 #ifndef __powerpc64__
   1621    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = %08x,%08x)\n",
   1622 #else
   1623    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%016llx, 0x%016llx)\n",
   1624 #endif
   1625           name, mem_priv[0], mem_priv[1], r14, r15);
   1626 
   1627    if (verbose) printf("\n");
   1628 
   1629    free(mem_priv);
   1630 }
   1631 
   1632 static void test_int_ldq_three_regs (const char* name,
   1633                                      test_func_t func_IN,
   1634                                      unused uint32_t test_flags)
   1635 {
   1636    /* load quad word from register pair */
   1637    HWord_t base;
   1638 
   1639    base = (HWord_t)&mem_resv[0];
   1640 
   1641    mem_resv[0] = 0xAACCEE0011335577ULL;
   1642    mem_resv[1] = 0xABCDEF0123456789ULL;
   1643 
   1644    r14 = 0;
   1645    r15 = 0;
   1646    r16 = base;                 // fetch from r16 + r17
   1647    r17 = 0;
   1648 
   1649    (*func_IN)();
   1650 
   1651 #ifndef __powerpc64__
   1652    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%08x, 0x%08x)\n",
   1653 #else
   1654    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%016llx, 0x%016llx)\n",
   1655 #endif
   1656           name, mem_resv[0], mem_resv[1], r14, r15);
   1657    if (verbose) printf("\n");
   1658 
   1659 }
   1660 
   1661 static void test_av_dint_three_args (const char* name, test_func_t func,
   1662                                      unused uint32_t test_flags)
   1663 {
   1664 
   1665    unsigned long long * dst;
   1666    int i,j, k;
   1667    int family = test_flags & PPC_FAMILY;
   1668    unsigned long long cin_vals[] = {
   1669                                     // First pair of ULLs have LSB=0, so cin is '0'.
   1670                                     // Second pair of ULLs have LSB=1, so cin is '1'.
   1671                                     0xf000000000000000ULL, 0xf000000000000000ULL,
   1672                                     0xf000000000000000ULL, 0xf000000000000001ULL
   1673    };
   1674    for (i = 0; i < NB_VDARGS; i+=2) {
   1675       if (isLE)
   1676          vec_inA = (vector unsigned long long){ vdargs[i+1], vdargs[i] };
   1677       else
   1678          vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1679       for (j = 0; j < NB_VDARGS; j+=2) {
   1680          if (isLE)
   1681             vec_inB = (vector unsigned long long){ vdargs[j+1], vdargs[j] };
   1682          else
   1683             vec_inB = (vector unsigned long long){ vdargs[j], vdargs[j+1] };
   1684          for (k = 0; k < 4; k+=2) {
   1685             if (family == PPC_ALTIVECQ) {
   1686                if (isLE)
   1687                   vec_inC = (vector unsigned long long){ cin_vals[k+1], cin_vals[k] };
   1688                else
   1689                   vec_inC = (vector unsigned long long){ cin_vals[k], cin_vals[k+1] };
   1690             } else {
   1691                if (isLE)
   1692                   vec_inC = (vector unsigned long long){ vdargs[k+1], vdargs[k] };
   1693                else
   1694                   vec_inC = (vector unsigned long long){ vdargs[k], vdargs[k+1] };
   1695             }
   1696             vec_out = (vector unsigned long long){ 0,0 };
   1697 
   1698             (*func)();
   1699             dst  = (unsigned long long*)&vec_out;
   1700             printf("%s: ", name);
   1701             if (family == PPC_ALTIVECQ) {
   1702                if (isLE)
   1703                   printf("%016llx%016llx @@ %016llx%016llx @@ %llx ==> %016llx%016llx\n",
   1704                          vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1], cin_vals[k+1],
   1705                          dst[1], dst[0]);
   1706                else
   1707                   printf("%016llx%016llx @@ %016llx%016llx @@ %llx ==> %016llx%016llx\n",
   1708                          vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1], cin_vals[k+1],
   1709                          dst[0], dst[1]);
   1710             } else {
   1711                printf("%016llx @@ %016llx @@ %016llx ", vdargs[i], vdargs[j], vdargs[k]);
   1712                if (isLE)
   1713                   printf(" ==> %016llx\n", dst[1]);
   1714                else
   1715                   printf(" ==> %016llx\n", dst[0]);
   1716                printf("\t%016llx @@ %016llx @@ %016llx ", vdargs[i+1], vdargs[j+1], vdargs[k+1]);
   1717                if (isLE)
   1718                   printf(" ==> %016llx\n", dst[0]);
   1719                else
   1720                   printf(" ==> %016llx\n", dst[1]);
   1721             }
   1722          }
   1723       }
   1724    }
   1725 }
   1726 
   1727 
   1728 /* The ALTIVEC_LOOPS and altive_loops defined below are used in do_tests.
   1729  * Add new values to the end; do not change order, since the altivec_loops
   1730  * array is indexed using the enumerated values defined by ALTIVEC_LOOPS.
   1731  */
   1732 enum ALTIVEC_LOOPS {
   1733    ALTV_MOV,
   1734    ALTV_DINT,
   1735    ALTV_INT_DRES,
   1736    ALTV_DINT_IRES,
   1737    ALTV_ONE_INT_DRES,
   1738    ALTV_DINT_THREE_ARGS,
   1739    ALTV_DINT_ONE_ARG,
   1740    ALTV_SHA,
   1741    ATLV_BCD
   1742 };
   1743 
   1744 static test_loop_t altivec_loops[] = {
   1745    &test_move_special,
   1746    &test_av_dint_two_args,
   1747    &test_av_wint_two_args_dres,
   1748    &test_av_dint_to_int_two_args,
   1749    &test_av_wint_one_arg_dres,
   1750    &test_av_dint_three_args,
   1751    &test_av_dint_one_arg,
   1752    &test_av_dint_one_arg_SHA,
   1753    &test_av_bcd,
   1754    NULL
   1755 };
   1756 
   1757 /* Used in do_tests, indexed by flags->nb_args
   1758    Elements correspond to enum test_flags::num args
   1759 */
   1760 static test_loop_t int_loops[] = {
   1761   /* The #defines for the family, number registers need the array
   1762    * to be properly indexed.  This test is for the new ISA 2.0.7
   1763    * instructions.  The infrastructure has been left for the momemnt
   1764    */
   1765    NULL, //&test_int_one_arg,
   1766    NULL, //&test_int_two_args,
   1767    NULL, //&test_int_three_args,
   1768    NULL, //&test_int_two_args,
   1769    NULL, //&test_int_one_reg_imm16,
   1770    NULL, //&test_int_one_reg_imm16,
   1771    NULL, //&test_int_special,
   1772    NULL, //&test_int_ld_one_reg_imm16,
   1773    NULL, //&test_int_ld_two_regs,
   1774    NULL, //&test_int_st_two_regs_imm16,
   1775    NULL, //&test_int_st_three_regs,
   1776    &test_int_stq_two_regs_imm16,
   1777    &test_int_ldq_two_regs_imm16,
   1778    &test_int_stq_three_regs,
   1779    &test_int_ldq_three_regs,
   1780 };
   1781 
   1782 /* Used in do_tests, indexed by flags->nb_args
   1783    Elements correspond to enum test_flags::num args
   1784    Must have NULL for last entry.
   1785  */
   1786 static test_loop_t float_loops[] = {
   1787    NULL,
   1788    &test_float_two_args,
   1789 };
   1790 
   1791 
   1792 static test_t tests_fa_ops_two[] = {
   1793     { &test_fmrgew          , "fmrgew", },
   1794     { &test_fmrgow          , "fmrgow", },
   1795     { NULL,                   NULL,           },
   1796 };
   1797 
   1798 static test_table_t all_tests[] = {
   1799    {
   1800        tests_move_ops_spe,
   1801        "PPC VSR special move insns",
   1802        PPC_ALTIVECD | PPC_MOV | PPC_ONE_ARG,
   1803    },
   1804    {
   1805        tests_aa_dbl_ops_two_args,
   1806        "PPC altivec double word integer insns (arith, compare) with two args",
   1807        PPC_ALTIVECD | PPC_ARITH | PPC_TWO_ARGS,
   1808    },
   1809    {
   1810        tests_aa_word_ops_two_args_dres,
   1811        "PPC altivec integer word instructions with two input args, double word result",
   1812        PPC_ALTIVEC | PPC_ARITH_DRES | PPC_TWO_ARGS,
   1813    },
   1814    {
   1815        tests_aa_dbl_to_int_two_args,
   1816        "PPC altivec doubleword-to-integer instructions with two input args, saturated integer result",
   1817        PPC_ALTIVECD | PPC_DOUBLE_IN_IRES | PPC_TWO_ARGS,
   1818    },
   1819    {
   1820        tests_aa_word_ops_one_arg_dres,
   1821        "PPC altivec integer word instructions with one input arg, double word result",
   1822        PPC_ALTIVEC | PPC_ARITH_DRES | PPC_ONE_ARG,
   1823    },
   1824    {
   1825       tests_istq_ops_two_i16,
   1826       "PPC store quadword insns\n    with one register + one 16 bits immediate args with flags update",
   1827       0x0001050c,
   1828    },
   1829    {
   1830       tests_ildq_ops_two_i16,
   1831       "PPC load quadword insns\n    with one register + one 16 bits immediate args with flags update",
   1832       0x0001050d,
   1833    },
   1834    {
   1835        tests_ldq_ops_three,
   1836        "PPC load quadword insns\n    with three register args",
   1837        0x0001050f,
   1838    },
   1839    {
   1840        tests_stq_ops_three,
   1841        "PPC store quadword insns\n    with three register args",
   1842        0x0001050e,
   1843    },
   1844    {
   1845        tests_fa_ops_two,
   1846        "PPC floating point arith insns with two args",
   1847        0x00020102,
   1848    },
   1849    {
   1850        tests_aa_ops_three    ,
   1851        "PPC altivec integer logical insns with three args",
   1852        0x00060203,
   1853    },
   1854    {
   1855        tests_aa_dbl_ops_one_arg,
   1856        "PPC altivec one vector input arg, hex result",
   1857        0x00060201,
   1858    },
   1859    {
   1860        tests_aa_SHA_ops,
   1861        "PPC altivec SSH insns",
   1862        0x00040B01,
   1863    },
   1864    {
   1865        tests_aa_bcd_ops,
   1866        "PPC altivec BCD insns",
   1867        0x00040B02,
   1868    },
   1869    {
   1870        tests_aa_quadword_two_args,
   1871        "PPC altivec quadword insns, two input args",
   1872        0x00070102,
   1873    },
   1874    {
   1875        tests_aa_quadword_three_args,
   1876        "PPC altivec quadword insns, three input args",
   1877        0x00070103
   1878    },
   1879    { NULL,                   NULL,               0x00000000, },
   1880 };
   1881 
   1882 static void do_tests ( insn_sel_flags_t seln_flags,
   1883                        char *filter)
   1884 {
   1885    test_loop_t *loop;
   1886    test_t *tests;
   1887    int nb_args, type, family;
   1888    int i, j, n;
   1889    int exact;
   1890 
   1891    exact = check_filter(filter);
   1892    n = 0;
   1893    for (i=0; all_tests[i].name != NULL; i++) {
   1894       nb_args = all_tests[i].flags & PPC_NB_ARGS;
   1895 
   1896       /* Check number of arguments */
   1897       if ((nb_args == 1 && !seln_flags.one_arg) ||
   1898           (nb_args == 2 && !seln_flags.two_args) ||
   1899           (nb_args == 3 && !seln_flags.three_args)){
   1900          continue;
   1901       }
   1902       /* Check instruction type */
   1903       type = all_tests[i].flags & PPC_TYPE;
   1904       if ((type == PPC_ARITH   && !seln_flags.arith)   ||
   1905           (type == PPC_LOGICAL && !seln_flags.logical) ||
   1906           (type == PPC_COMPARE && !seln_flags.compare) ||
   1907           (type == PPC_LDST && !seln_flags.ldst)       ||
   1908           (type == PPC_MOV && !seln_flags.ldst)       ||
   1909           (type == PPC_POPCNT && !seln_flags.arith)) {
   1910          continue;
   1911       }
   1912 
   1913       /* Check instruction family */
   1914       family = all_tests[i].flags & PPC_FAMILY;
   1915       if ((family == PPC_INTEGER  && !seln_flags.integer) ||
   1916           (family == PPC_FLOAT    && !seln_flags.floats)  ||
   1917           (family == PPC_ALTIVEC && !seln_flags.altivec)  ||
   1918           (family == PPC_ALTIVECD && !seln_flags.altivec)  ||
   1919           (family == PPC_ALTIVECQ && !seln_flags.altivec)  ||
   1920           (family == PPC_FALTIVEC && !seln_flags.faltivec)) {
   1921          continue;
   1922       }
   1923       /* Check flags update */
   1924       if (((all_tests[i].flags & PPC_CR)  && seln_flags.cr == 0) ||
   1925           (!(all_tests[i].flags & PPC_CR) && seln_flags.cr == 1))
   1926          continue;
   1927 
   1928       /* All passed, do the tests */
   1929       tests = all_tests[i].tests;
   1930 
   1931       loop = NULL;
   1932 
   1933       /* Select the test loop */
   1934       switch (family) {
   1935       case PPC_INTEGER:
   1936          mem_resv = memalign16(2 * sizeof(HWord_t));  // want 128-bits
   1937          loop = &int_loops[nb_args - 1];
   1938          break;
   1939 
   1940       case PPC_FLOAT:
   1941          loop = &float_loops[nb_args - 1];
   1942          break;
   1943 
   1944       case PPC_ALTIVECQ:
   1945          if (nb_args == 2)
   1946             loop = &altivec_loops[ALTV_DINT];
   1947          else if (nb_args == 3)
   1948             loop = &altivec_loops[ALTV_DINT_THREE_ARGS];
   1949          break;
   1950       case PPC_ALTIVECD:
   1951          switch (type) {
   1952          case PPC_MOV:
   1953             loop = &altivec_loops[ALTV_MOV];
   1954             break;
   1955          case PPC_ARITH:
   1956             loop = &altivec_loops[ALTV_DINT];
   1957             break;
   1958          case PPC_DOUBLE_IN_IRES:
   1959             loop = &altivec_loops[ALTV_DINT_IRES];
   1960             break;
   1961          case PPC_LOGICAL:
   1962             if (nb_args == 3)
   1963                loop = &altivec_loops[ALTV_DINT_THREE_ARGS];
   1964             else if (nb_args ==1)
   1965                loop = &altivec_loops[ALTV_DINT_ONE_ARG];
   1966             break;
   1967          default:
   1968             printf("No altivec test defined for type %x\n", type);
   1969          }
   1970          break;
   1971 
   1972       case PPC_FALTIVEC:
   1973          printf("Currently there are no floating altivec tests in this testsuite.\n");
   1974          break;
   1975 
   1976       case PPC_ALTIVEC:
   1977          switch (type) {
   1978          case PPC_ARITH_DRES:
   1979          {
   1980             switch (nb_args) {
   1981             case 1:
   1982                loop = &altivec_loops[ALTV_ONE_INT_DRES];
   1983                break;
   1984             case 2:
   1985                loop = &altivec_loops[ALTV_INT_DRES];
   1986                break;
   1987             default:
   1988                printf("No altivec test defined for number args %d\n", nb_args);
   1989             }
   1990             break;
   1991          }
   1992          case PPC_SHA_OR_BCD:
   1993             if (nb_args == 1)
   1994                loop = &altivec_loops[ALTV_SHA];
   1995             else
   1996                loop = &altivec_loops[ATLV_BCD];
   1997             break;
   1998          default:
   1999             printf("No altivec test defined for type %x\n", type);
   2000          }
   2001          break;
   2002 
   2003       default:
   2004          printf("ERROR: unknown insn family %08x\n", family);
   2005          continue;
   2006       }
   2007       if (1 || verbose > 0)
   2008       for (j=0; tests[j].name != NULL; j++) {
   2009          if (check_name(tests[j].name, filter, exact)) {
   2010             if (verbose > 1)
   2011                printf("Test instruction %s\n", tests[j].name);
   2012             if (loop != NULL)
   2013                (*loop)(tests[j].name, tests[j].func, all_tests[i].flags);
   2014             printf("\n");
   2015             n++;
   2016          }
   2017         }
   2018       if (verbose) printf("\n");
   2019    }
   2020    printf("All done. Tested %d different instructions\n", n);
   2021 }
   2022 
   2023 
   2024 static void usage (void)
   2025 {
   2026    fprintf(stderr,
   2027            "Usage: jm-insns [OPTION]\n"
   2028            "\t-i: test integer instructions (default)\n"
   2029            "\t-f: test floating point instructions\n"
   2030            "\t-a: test altivec instructions\n"
   2031            "\t-A: test all (int, fp, altivec) instructions\n"
   2032            "\t-v: be verbose\n"
   2033            "\t-h: display this help and exit\n"
   2034            );
   2035 }
   2036 
   2037 #endif
   2038 
   2039 int main (int argc, char **argv)
   2040 {
   2041 #ifdef HAS_ISA_2_07
   2042    /* Simple usage:
   2043       ./jm-insns -i   => int insns
   2044       ./jm-insns -f   => fp  insns
   2045       ./jm-insns -a   => av  insns
   2046       ./jm-insns -A   => int, fp and avinsns
   2047    */
   2048    char *filter = NULL;
   2049    insn_sel_flags_t flags;
   2050    int c;
   2051 
   2052    // Args
   2053    flags.one_arg    = 1;
   2054    flags.two_args   = 1;
   2055    flags.three_args = 1;
   2056    // Type
   2057    flags.arith      = 1;
   2058    flags.logical    = 1;
   2059    flags.compare    = 1;
   2060    flags.ldst       = 1;
   2061    // Family
   2062    flags.integer    = 0;
   2063    flags.floats     = 0;
   2064    flags.altivec    = 0;
   2065    flags.faltivec   = 0;
   2066    // Flags
   2067    flags.cr         = 2;
   2068 
   2069    while ((c = getopt(argc, argv, "ifahvA")) != -1) {
   2070       switch (c) {
   2071       case 'i':
   2072          flags.integer  = 1;
   2073          break;
   2074       case 'f':
   2075          build_fargs_table();
   2076          flags.floats   = 1;
   2077          break;
   2078       case 'a':
   2079          flags.altivec  = 1;
   2080          flags.faltivec = 1;
   2081          break;
   2082       case 'A':
   2083          flags.integer  = 1;
   2084          flags.floats   = 1;
   2085          flags.altivec  = 1;
   2086          flags.faltivec = 1;
   2087          break;
   2088       case 'h':
   2089          usage();
   2090          return 0;
   2091       case 'v':
   2092          verbose++;
   2093          break;
   2094       default:
   2095          usage();
   2096          fprintf(stderr, "Unknown argument: '%c'\n", c);
   2097          return 1;
   2098       }
   2099    }
   2100 
   2101    arg_list_size = 0;
   2102 
   2103    build_vargs_table();
   2104    if (verbose > 1) {
   2105       printf("\nInstruction Selection:\n");
   2106       printf("  n_args: \n");
   2107       printf("    one_arg    = %d\n", flags.one_arg);
   2108       printf("    two_args   = %d\n", flags.two_args);
   2109       printf("    three_args = %d\n", flags.three_args);
   2110       printf("  type: \n");
   2111       printf("    arith      = %d\n", flags.arith);
   2112       printf("    logical    = %d\n", flags.logical);
   2113       printf("    compare    = %d\n", flags.compare);
   2114       printf("    ldst       = %d\n", flags.ldst);
   2115       printf("  family: \n");
   2116       printf("    integer    = %d\n", flags.integer);
   2117       printf("    floats     = %d\n", flags.floats);
   2118       printf("    altivec    = %d\n", flags.altivec);
   2119       printf("    faltivec   = %d\n", flags.faltivec);
   2120       printf("  cr update: \n");
   2121       printf("    cr         = %d\n", flags.cr);
   2122       printf("\n");
   2123    }
   2124 
   2125    do_tests( flags, filter );
   2126 #else
   2127    printf("NO ISA 2.07 SUPPORT\n");
   2128 #endif
   2129    return 0;
   2130 }
   2131