Home | History | Annotate | Download | only in ppc32
      1 
      2 /* HOW TO COMPILE:
      3 
      4  * 32bit build:
      5    gcc -Winline -Wall -g -O -mregnames -maltivec -m32
      6  * 64bit build:
      7    gcc -Winline -Wall -g -O -mregnames -maltivec -m64
      8 
      9 
     10  * test_isa_2_07_part1.c:
     11  * PPC tests for the ISA 2.07.  This file is based on the
     12  * jm-insns.c file for the new instructions in the ISA 2.07.  The
     13  * test structure has been kept the same as the original file to
     14  * the extent possible.
     15  *
     16  * Copyright (C) 2013 IBM
     17  *
     18  *   Authors: Carl Love <carll (at) us.ibm.com>
     19  *            Maynard Johnson <maynardj (at) us.ibm.com>
     20  *
     21  *   This program is free software; you can redistribute it and/or
     22  *   modify it under the terms of the GNU General Public License as
     23  *   published by the Free Software Foundation; either version 2 of the
     24  *   License, or (at your option) any later version.
     25  *
     26  *   This program is distributed in the hope that it will be useful,
     27  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
     28  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     29  *   GNU General Public License for more details.
     30  *
     31  *   You should have received a copy of the GNU General Public License
     32  *   along with this program; if not, write to the Free Software
     33  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     34  *
     35  */
     36 
     37 /*
     38  * Operation details
     39  * -----------------
     40  *
     41  * The 'loops' (e.g. int_loops) do the actual work:
     42  *  - loops over as many arguments as the insn needs (regs | imms)
     43  *     - sets up the environment (reset cr,xer, assign src regs...)
     44  *     - maybe modifies the asm instn to test different imm args
     45  *     - calls the test function
     46  *     - retrieves relevant register data (rD,cr,xer,...)
     47  *     - prints argument and result data.
     48  *
     49  * More specifically...
     50  *
     51  * all_tests[i] holds insn tests
     52  *  - of which each holds: {instn_test_arr[], description, flags}
     53  *
     54  * flags hold 3 instn classifiers: {family, type, arg_type}
     55  *
     56  * // The main test loop:
     57  * do_tests( user_ctl_flags ) {
     58  *    foreach(curr_test = all_test[i]) {
     59  *
     60  *       // flags are used to control what tests are run:
     61  *       if (curr_test->flags && !user_ctl_flags)
     62  *          continue;
     63  *
     64  *       // a 'loop_family_arr' is chosen based on the 'family' flag...
     65  *       switch(curr_test->flags->family) {
     66  *       case x: loop_family_arr = int_loops;
     67  *      ...
     68  *       }
     69  *
     70  *       // ...and the actual test_loop to run is found by indexing into
     71  *       // the loop_family_arr with the 'arg_type' flag:
     72  *       test_loop = loop_family[curr_test->flags->arg_type]
     73  *
     74  *       // finally, loop over all instn tests for this test:
     75  *       foreach (instn_test = curr_test->instn_test_arr[i]) {
     76  *
     77  *          // and call the test_loop with the current instn_test function,name
     78  *          test_loop( instn_test->func, instn_test->name )
     79  *       }
     80  *    }
     81  * }
     82  *
     83  */
     84 
     85 
     86 /**********************************************************************/
     87 
     88 /* Uncomment to enable output of CR flags for float tests */
     89 //#define TEST_FLOAT_FLAGS
     90 
     91 /* Uncomment to enable debug output */
     92 //#define DEBUG_ARGS_BUILD
     93 //#define DEBUG_FILTER
     94 
     95 /**********************************************************************/
     96 #include <stdio.h>
     97 
     98 #ifdef HAS_ISA_2_07
     99 
    100 #include "config.h"
    101 #include <altivec.h>
    102 #include <stdint.h>
    103 
    104 #include <assert.h>
    105 #include <ctype.h>     // isspace
    106 #include <stdlib.h>
    107 #include <string.h>
    108 #include <unistd.h>    // getopt
    109 
    110 #if !defined (__TEST_PPC_H__)
    111 #define __TEST_PPC_H__
    112 
    113 #include "tests/sys_mman.h"
    114 #include "tests/malloc.h"       // memalign16
    115 
    116 #define STATIC_ASSERT(e) sizeof(struct { int:-!(e); })
    117 
    118 /* Something of the same size as void*, so can be safely be coerced
    119  * to/from a pointer type. Also same size as the host's gp registers.
    120  * According to the AltiVec section of the GCC manual, the syntax does
    121  * not allow the use of a typedef name as a type specifier in conjunction
    122  * with the vector keyword, so typedefs uint[32|64]_t are #undef'ed here
    123  * and redefined using #define.
    124  */
    125 #undef uint32_t
    126 #undef uint64_t
    127 #define uint32_t unsigned int
    128 #define uint64_t unsigned long long int
    129 
    130 #ifndef __powerpc64__
    131 typedef uint32_t  HWord_t;
    132 #define ZERO 0
    133 #else
    134 typedef uint64_t  HWord_t;
    135 #define ZERO 0ULL
    136 #endif /* __powerpc64__ */
    137 
    138 #ifdef VGP_ppc64le_linux
    139 #define isLE 1
    140 #else
    141 #define isLE 0
    142 #endif
    143 
    144 typedef uint64_t Word_t;
    145 
    146 enum {
    147     compile_time_test1 = STATIC_ASSERT(sizeof(uint32_t) == 4),
    148     compile_time_test2 = STATIC_ASSERT(sizeof(uint64_t) == 8),
    149 };
    150 
    151 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
    152 
    153 #define SET_CR(_arg) \
    154       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
    155 
    156 #define SET_XER(_arg) \
    157       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
    158 
    159 #define GET_CR(_lval) \
    160       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
    161 
    162 #define GET_XER(_lval) \
    163       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
    164 
    165 #define GET_CR_XER(_lval_cr,_lval_xer) \
    166    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
    167 
    168 #define SET_CR_ZERO \
    169       SET_CR(0)
    170 
    171 #define SET_XER_ZERO \
    172       SET_XER(0)
    173 
    174 #define SET_CR_XER_ZERO \
    175    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
    176 
    177 #define SET_FPSCR_ZERO \
    178    do { double _d = 0.0; \
    179         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
    180    } while (0)
    181 
    182 #define DEFAULT_VSCR 0x0
    183 
    184 static vector unsigned long long vec_out, vec_inA, vec_inB, vec_inC;
    185 static vector unsigned int vec_inA_wd, vec_inB_wd;
    186 
    187 /* XXXX these must all be callee-save regs! */
    188 register double f14 __asm__ ("fr14");
    189 register double f15 __asm__ ("fr15");
    190 register double f16 __asm__ ("fr16");
    191 register double f17 __asm__ ("fr17");
    192 register HWord_t r14 __asm__ ("r14");
    193 register HWord_t r15 __asm__ ("r15");
    194 register HWord_t r16 __asm__ ("r16");
    195 register HWord_t r17 __asm__ ("r17");
    196 
    197 typedef void (*test_func_t) (void);
    198 typedef struct _test test_t;
    199 typedef struct _test_table test_table_t;
    200 struct _test {
    201     test_func_t func;
    202     const char *name;
    203 };
    204 
    205 struct _test_table {
    206     test_t *tests;
    207     const char *name;
    208     uint32_t flags;
    209 };
    210 
    211 typedef void (*test_loop_t) (const char *name, test_func_t func,
    212                              uint32_t flags);
    213 
    214 enum test_flags {
    215     /* Nb arguments */
    216     PPC_ONE_ARG    = 0x00000001,
    217     PPC_TWO_ARGS   = 0x00000002,
    218     PPC_THREE_ARGS = 0x00000003,
    219     PPC_CMP_ARGS   = 0x00000004,  // family: compare
    220     PPC_CMPI_ARGS  = 0x00000005,  // family: compare
    221     PPC_TWO_I16    = 0x00000006,  // family: arith/logical
    222     PPC_SPECIAL    = 0x00000007,  // family: logical
    223     PPC_LD_ARGS    = 0x00000008,  // family: ldst
    224     PPC_LDX_ARGS   = 0x00000009,  // family: ldst
    225     PPC_ST_ARGS    = 0x0000000A,  // family: ldst
    226     PPC_STX_ARGS   = 0x0000000B,  // family: ldst
    227     PPC_STQ_ARGS   = 0x0000000C,  // family: ldst, two args, imm
    228     PPC_LDQ_ARGS   = 0x0000000D,  // family: ldst, two args, imm
    229     PPC_STQX_ARGS  = 0x0000000E,  // family: ldst, three args
    230     PPC_LDQX_ARGS  = 0x0000000F,  // family: ldst, three_args
    231     PPC_NB_ARGS    = 0x0000000F,
    232     /* Type */
    233     PPC_ARITH      = 0x00000100,
    234     PPC_LOGICAL    = 0x00000200,
    235     PPC_COMPARE    = 0x00000300,
    236     PPC_CROP       = 0x00000400,
    237     PPC_LDST       = 0x00000500,
    238     PPC_POPCNT     = 0x00000600,
    239     PPC_ARITH_DRES = 0x00000700,
    240     PPC_DOUBLE_IN_IRES = 0x00000800,
    241     PPC_MOV        = 0x00000A00,
    242     PPC_SHA_OR_BCD = 0x00000B00,
    243     PPC_TYPE       = 0x00000F00,
    244     /* Family */
    245     PPC_INTEGER    = 0x00010000,
    246     PPC_FLOAT      = 0x00020000,
    247     PPC_405        = 0x00030000,  // Leave so we keep numbering consistent
    248     PPC_ALTIVEC    = 0x00040000,
    249     PPC_FALTIVEC   = 0x00050000,
    250     PPC_ALTIVECD   = 0x00060000,    /* double word Altivec tests */
    251     PPC_ALTIVECQ   = 0x00070000,
    252     PPC_FAMILY     = 0x000F0000,
    253     /* Flags: these may be combined, so use separate bitfields. */
    254     PPC_CR         = 0x01000000,
    255     PPC_XER_CA     = 0x02000000,
    256 };
    257 
    258 #endif /* !defined (__TEST_PPC_H__) */
    259 
    260 /* -------------- END #include "test-ppc.h" -------------- */
    261 
    262 
    263 #if defined (DEBUG_ARGS_BUILD)
    264 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    265 #else
    266 #define AB_DPRINTF(fmt, args...) do { } while (0)
    267 #endif
    268 
    269 
    270 #if defined (DEBUG_FILTER)
    271 #define FDPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
    272 #else
    273 #define FDPRINTF(fmt, args...) do { } while (0)
    274 #endif
    275 
    276 #define unused __attribute__ (( unused ))
    277 
    278 typedef struct special {
    279    const char *name;
    280    void (*test_cb)(const char* name, test_func_t func,
    281                    unused uint32_t test_flags);
    282 } special_t;
    283 
    284 static void test_stq(void)
    285 {
    286   __asm__ __volatile__ ("stq  %0, 0(%1)" : :"r" (r14), "r" (r16));
    287 }
    288 
    289 static test_t tests_istq_ops_two_i16[] = {
    290     { &test_stq             , "stq", },
    291     { NULL,                   NULL,           },
    292 };
    293 
    294 static void test_lq(void)
    295 {
    296   __asm__ __volatile__ ("lq  %0, 0(%1)" : :"r" (r14), "r" (r16));
    297 }
    298 
    299 static test_t tests_ildq_ops_two_i16[] = {
    300     { &test_lq              , "lq", },
    301     { NULL,                   NULL,          },
    302 };
    303 
    304 #ifdef HAS_ISA_2_07
    305 Word_t * mem_resv;
    306 static void test_stbcx(void)
    307 {
    308   /* Have to do the lbarx to the memory address to create the reservation
    309    * or the store will not occur.
    310    */
    311   __asm__ __volatile__ ("lbarx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    312   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    313   r15 = (HWord_t) 0x1155337744226688ULL;
    314   __asm__ __volatile__ ("stbcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    315 }
    316 
    317 static void test_sthcx(void)
    318 {
    319   /* Have to do the lharx to the memory address to create the reservation
    320    * or the store will not occur.
    321    */
    322   __asm__ __volatile__ ("lharx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    323   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    324   r15 = (HWord_t) 0x1155337744226688ULL;
    325   __asm__ __volatile__ ("sthcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    326 }
    327 #endif
    328 
    329 static void test_stqcx(void)
    330 {
    331   /* Have to do the lqarx to the memory address to create the reservation
    332    * or the store will not occur.
    333    */
    334   __asm__ __volatile__ ("lqarx  %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    335   r14 = (HWord_t) 0xABEFCD0145236789ULL;
    336   r15 = (HWord_t) 0x1155337744226688ULL;
    337   __asm__ __volatile__ ("stqcx. %0, %1, %2" : :"r" (r14), "r" (r16),"r" (r17));
    338 }
    339 
    340 static test_t tests_stq_ops_three[] = {
    341 #ifdef HAS_ISA_2_07
    342     { &test_stbcx           , "stbcx.", },
    343     { &test_sthcx           , "sthcx.", },
    344 #endif
    345     { &test_stqcx           , "stqcx.", },
    346     { NULL,                   NULL,           },
    347 };
    348 
    349 #ifdef HAS_ISA_2_07
    350 static void test_lbarx(void)
    351 {
    352   __asm__ __volatile__ ("lbarx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    353 }
    354 static void test_lharx(void)
    355 {
    356   __asm__ __volatile__ ("lharx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    357 }
    358 #endif
    359 static void test_lqarx(void)
    360 {
    361   __asm__ __volatile__ ("lqarx  %0, %1, %2, 0" : :"r" (r14), "r" (r16),"r" (r17));
    362 }
    363 
    364 static test_t tests_ldq_ops_three[] = {
    365 #ifdef HAS_ISA_2_07
    366     { &test_lbarx           , "lbarx", },
    367     { &test_lharx           , "lharx", },
    368 #endif
    369     { &test_lqarx           , "lqarx", },
    370     { NULL,                   NULL,           },
    371 };
    372 
    373 static void test_fmrgew (void)
    374 {
    375     __asm__ __volatile__ ("fmrgew        17,14,15");
    376 };
    377 
    378 static void test_fmrgow (void)
    379 {
    380     __asm__ __volatile__ ("fmrgow        17,14,15");
    381 };
    382 
    383 
    384 
    385 // VSX move instructions
    386 static void test_mfvsrd (void)
    387 {
    388    __asm__ __volatile__ ("mfvsrd %0,%x1" : "=r" (r14) : "ws" (vec_inA));
    389 };
    390 
    391 static void test_mfvsrwz (void)
    392 {
    393    __asm__ __volatile__ ("mfvsrwz %0,%x1" : "=r" (r14) : "ws" (vec_inA));
    394 };
    395 
    396 static void test_mtvsrd (void)
    397 {
    398    __asm__ __volatile__ ("mtvsrd %x0,%1" : "=ws" (vec_out) : "r" (r14));
    399 };
    400 
    401 static void test_mtvsrwz (void)
    402 {
    403    __asm__ __volatile__ ("mtvsrwz %x0,%1" : "=ws" (vec_out) : "r" (r14));
    404 };
    405 
    406 
    407 static void test_mtfprwa (void)
    408 {
    409    __asm__ __volatile__ ("mtfprwa %x0,%1" : "=ws" (vec_out) : "r" (r14));
    410 };
    411 
    412 static test_t tests_move_ops_spe[] = {
    413   { &test_mfvsrd          , "mfvsrd" },
    414   { &test_mfvsrwz         , "mfvsrwz" },
    415   { &test_mtvsrd          , "mtvsrd" },
    416   { &test_mtvsrwz         , "mtvsrwz" },
    417   { &test_mtfprwa         , "mtfprwa" },
    418   { NULL,                   NULL }
    419 };
    420 
    421 /* NOTE: Since these are "vector" instructions versus VSX, we must use
    422  * vector constraints.
    423  *
    424  * Vector Double Word tests.
    425  */
    426 static void test_vpkudum (void)
    427 {
    428    __asm__ __volatile__ ("vpkudum %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    429 }
    430 
    431 static void test_vaddudm (void)
    432 {
    433    __asm__ __volatile__ ("vaddudm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    434 }
    435 
    436 static void test_vsubudm (void)
    437 {
    438    __asm__ __volatile__ ("vsubudm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    439 }
    440 
    441 static void test_vmaxud (void)
    442 {
    443    __asm__ __volatile__ ("vmaxud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    444 }
    445 
    446 static void test_vmaxsd (void)
    447 {
    448    __asm__ __volatile__ ("vmaxsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    449 }
    450 
    451 static void test_vminud (void)
    452 {
    453    __asm__ __volatile__ ("vminud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    454 }
    455 
    456 static void test_vminsd (void)
    457 {
    458    __asm__ __volatile__ ("vminsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    459 }
    460 
    461 static void test_vcmpequd (void)
    462 {
    463    __asm__ __volatile__ ("vcmpequd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    464 }
    465 
    466 static void test_vcmpgtud (void)
    467 {
    468    __asm__ __volatile__ ("vcmpgtud %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    469 }
    470 
    471 static void test_vcmpgtsd (void)
    472 {
    473    __asm__ __volatile__ ("vcmpgtsd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    474 }
    475 
    476 static void test_vrld (void)
    477 {
    478    __asm__ __volatile__ ("vrld %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    479 }
    480 
    481 static void test_vsld (void)
    482 {
    483    __asm__ __volatile__ ("vsld %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    484 }
    485 
    486 static void test_vsrad (void)
    487 {
    488    __asm__ __volatile__ ("vsrad %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    489 }
    490 
    491 static void test_vsrd (void)
    492 {
    493    __asm__ __volatile__ ("vsrd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    494 }
    495 
    496 /* Vector Double Word saturate tests.*/
    497 
    498 static void test_vpkudus (void)
    499 {
    500    __asm__ __volatile__ ("vpkudus %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    501 }
    502 
    503 static void test_vpksdus (void)
    504 {
    505    __asm__ __volatile__ ("vpksdus %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    506 }
    507 
    508 static void test_vpksdss (void)
    509 {
    510    __asm__ __volatile__ ("vpksdss %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    511 }
    512 
    513 
    514 /* Vector unpack two words from one vector arg */
    515 static void test_vupkhsw (void)
    516 {
    517     __asm__ __volatile__ ("vupkhsw %0, %1" : "=v" (vec_out): "v" (vec_inB_wd));
    518 }
    519 
    520 static void test_vupklsw (void)
    521 {
    522     __asm__ __volatile__ ("vupklsw %0, %1" : "=v" (vec_out): "v" (vec_inB_wd));
    523 }
    524 
    525 
    526 /* Vector Integer Word tests.*/
    527 static void test_vmulouw (void)
    528 {
    529   __asm__ __volatile__ ("vmulouw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    530 }
    531 
    532 static void test_vmuluwm (void)
    533 {
    534     __asm__ __volatile__ ("vmuluwm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    535 }
    536 
    537 static void test_vmulosw (void)
    538 {
    539     __asm__ __volatile__ ("vmulosw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    540 }
    541 
    542 static void test_vmuleuw (void)
    543 {
    544     __asm__ __volatile__ ("vmuleuw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    545 }
    546 
    547 static void test_vmulesw (void)
    548 {
    549     __asm__ __volatile__ ("vmulesw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    550 }
    551 
    552 static void test_vmrgew (void)
    553 {
    554     __asm__ __volatile__ ("vmrgew %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    555 }
    556 
    557 static void test_vmrgow (void)
    558 {
    559     __asm__ __volatile__ ("vmrgow %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    560 }
    561 
    562 static void test_vpmsumb (void)
    563 {
    564     __asm__ __volatile__ ("vpmsumb %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    565 }
    566 
    567 static void test_vpmsumh (void)
    568 {
    569     __asm__ __volatile__ ("vpmsumh %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    570 }
    571 
    572 static void test_vpmsumw (void)
    573 {
    574     __asm__ __volatile__ ("vpmsumw %0, %1, %2" : "=v" (vec_out): "v" (vec_inA_wd),"v" (vec_inB_wd));
    575 }
    576 
    577 static void test_vpermxor (void)
    578 {
    579   __asm__ __volatile__ ("vpermxor %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    580 }
    581 
    582 static void test_vpmsumd (void)
    583 {
    584     __asm__ __volatile__ ("vpmsumd %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    585 }
    586 
    587 static void test_vnand (void)
    588 {
    589     __asm__ __volatile__ ("vnand %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    590 }
    591 
    592 static void test_vorc (void)
    593 {
    594     __asm__ __volatile__ ("vorc %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    595 }
    596 
    597 static void test_veqv (void)
    598 {
    599     __asm__ __volatile__ ("veqv %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    600 }
    601 
    602 static void test_vcipher (void)
    603 {
    604     __asm__ __volatile__ ("vcipher %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    605 }
    606 
    607 static void test_vcipherlast (void)
    608 {
    609     __asm__ __volatile__ ("vcipherlast %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    610 }
    611 
    612 static void test_vncipher (void)
    613 {
    614     __asm__ __volatile__ ("vncipher %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    615 }
    616 
    617 static void test_vncipherlast (void)
    618 {
    619     __asm__ __volatile__ ("vncipherlast %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    620 }
    621 
    622 static void test_vclzb (void)
    623 {
    624     __asm__ __volatile__ ("vclzb %0, %1" : "=v" (vec_out): "v" (vec_inB));
    625 }
    626 
    627 static void test_vclzw (void)
    628 {
    629     __asm__ __volatile__ ("vclzw %0, %1" : "=v" (vec_out): "v" (vec_inB));
    630 }
    631 
    632 static void test_vclzh (void)
    633 {
    634     __asm__ __volatile__ ("vclzh %0, %1" : "=v" (vec_out): "v" (vec_inB));
    635 }
    636 
    637 static void test_vclzd (void)
    638 {
    639     __asm__ __volatile__ ("vclzd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    640 }
    641 
    642 static void test_vpopcntb (void)
    643 {
    644     __asm__ __volatile__ ("vpopcntb %0, %1" : "=v" (vec_out): "v" (vec_inB));
    645 }
    646 
    647 static void test_vpopcnth (void)
    648 {
    649     __asm__ __volatile__ ("vpopcnth %0, %1" : "=v" (vec_out): "v" (vec_inB));
    650 }
    651 
    652 static void test_vpopcntw (void)
    653 {
    654     __asm__ __volatile__ ("vpopcntw %0, %1" : "=v" (vec_out): "v" (vec_inB));
    655 }
    656 
    657 static void test_vpopcntd (void)
    658 {
    659     __asm__ __volatile__ ("vpopcntd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    660 }
    661 
    662 static void test_vsbox (void)
    663 {
    664     __asm__ __volatile__ ("vsbox %0, %1" : "=v" (vec_out): "v" (vec_inB));
    665 }
    666 
    667 static int st_six;
    668 static void test_vshasigmad (void)
    669 {
    670    switch (st_six) {
    671    case 0x00:
    672       __asm__ __volatile__ ("vshasigmad %0, %1, 0, 0" : "=v" (vec_out): "v" (vec_inA));
    673       break;
    674    case 0x0f:
    675       __asm__ __volatile__ ("vshasigmad %0, %1, 0, 15" : "=v" (vec_out): "v" (vec_inA));
    676       break;
    677    case 0x10:
    678       __asm__ __volatile__ ("vshasigmad %0, %1, 1, 0" : "=v" (vec_out): "v" (vec_inA));
    679       break;
    680    case 0x1f:
    681       __asm__ __volatile__ ("vshasigmad %0, %1, 1, 15" : "=v" (vec_out): "v" (vec_inA));
    682       break;
    683    }
    684 }
    685 
    686 static void test_vshasigmaw (void)
    687 {
    688    switch (st_six) {
    689    case 0x00:
    690       __asm__ __volatile__ ("vshasigmaw %0, %1, 0, 0" : "=v" (vec_out): "v" (vec_inA));
    691       break;
    692    case 0x0f:
    693       __asm__ __volatile__ ("vshasigmaw %0, %1, 0, 15" : "=v" (vec_out): "v" (vec_inA));
    694       break;
    695    case 0x10:
    696       __asm__ __volatile__ ("vshasigmaw %0, %1, 1, 0" : "=v" (vec_out): "v" (vec_inA));
    697       break;
    698    case 0x1f:
    699       __asm__ __volatile__ ("vshasigmaw %0, %1, 1, 15" : "=v" (vec_out): "v" (vec_inA));
    700       break;
    701    }
    702 }
    703 
    704 static int PS_bit;
    705 static void test_bcdadd (void)
    706 {
    707    if (PS_bit)
    708       __asm__ __volatile__ ("bcdadd. %0, %1, %2, 1" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    709    else
    710       __asm__ __volatile__ ("bcdadd. %0, %1, %2, 0" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    711 }
    712 
    713 static void test_bcdsub (void)
    714 {
    715    if (PS_bit)
    716       __asm__ __volatile__ ("bcdsub. %0, %1, %2, 1" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    717    else
    718       __asm__ __volatile__ ("bcdsub. %0, %1, %2, 0" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    719 }
    720 
    721 static void test_vaddcuq (void)
    722 {
    723    __asm__ __volatile__ ("vaddcuq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    724 }
    725 
    726 static void test_vadduqm (void)
    727 {
    728    __asm__ __volatile__ ("vadduqm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    729 }
    730 
    731 static void test_vaddecuq (void)
    732 {
    733   __asm__ __volatile__ ("vaddecuq %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    734 }
    735 
    736 static void test_vaddeuqm (void)
    737 {
    738   __asm__ __volatile__ ("vaddeuqm %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    739 }
    740 
    741 static void test_vsubcuq (void)
    742 {
    743    __asm__ __volatile__ ("vsubcuq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    744 }
    745 
    746 static void test_vsubuqm (void)
    747 {
    748    __asm__ __volatile__ ("vsubuqm %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    749 }
    750 
    751 static void test_vsubecuq (void)
    752 {
    753   __asm__ __volatile__ ("vsubecuq %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    754 }
    755 
    756 static void test_vsubeuqm (void)
    757 {
    758   __asm__ __volatile__ ("vsubeuqm %0, %1, %2, %3" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB),"v" (vec_inC));
    759 }
    760 
    761 static void test_vbpermq (void)
    762 {
    763    __asm__ __volatile__ ("vbpermq %0, %1, %2" : "=v" (vec_out): "v" (vec_inA),"v" (vec_inB));
    764 }
    765 
    766 static void test_vgbbd (void)
    767 {
    768     __asm__ __volatile__ ("vgbbd %0, %1" : "=v" (vec_out): "v" (vec_inB));
    769 }
    770 
    771 
    772 static test_t tests_aa_quadword_two_args[] = {
    773   { &test_vaddcuq       , "vaddcuq" },
    774   { &test_vadduqm       , "vadduqm" },
    775   { &test_vsubcuq       , "vsubcuq" },
    776   { &test_vsubuqm       , "vsubuqm" },
    777   { &test_vbpermq       , "vbpermq" },
    778   { NULL                , NULL      },
    779 };
    780 
    781 static test_t tests_aa_quadword_three_args[] = {
    782   { &test_vaddecuq      , "vaddecuq" },
    783   { &test_vaddeuqm      , "vaddeuqm" },
    784   { &test_vsubecuq      , "vsubecuq" },
    785   { &test_vsubeuqm      , "vsubeuqm" },
    786   { NULL                , NULL      },
    787 };
    788 
    789 static test_t tests_aa_bcd_ops[] = {
    790   { &test_bcdadd        , "bcdadd." },
    791   { &test_bcdsub        , "bcdsub." },
    792   { NULL                , NULL      },
    793 };
    794 
    795 static test_t tests_aa_SHA_ops[] = {
    796   { &test_vshasigmad    , "vshasigmad" },
    797   { &test_vshasigmaw    , "vshasigmaw" },
    798   { NULL                , NULL         },
    799 };
    800 
    801 static test_t tests_aa_ops_three[] = {
    802   { &test_vpermxor        , "vpermxor" },
    803   { NULL                  , NULL       },
    804 };
    805 
    806 static test_t tests_aa_word_ops_one_arg_dres[] = {
    807   { &test_vupkhsw         , "vupkhsw" },
    808   { &test_vupklsw         , "vupklsw" },
    809   { NULL                  , NULL      }
    810 };
    811 
    812 static test_t tests_aa_word_ops_two_args_dres[] = {
    813   { &test_vmulouw         , "vmulouw" },
    814   { &test_vmuluwm         , "vmuluwm" },
    815   { &test_vmulosw         , "vmulosw" },
    816   { &test_vmuleuw         , "vmuleuw" },
    817   { &test_vmulesw         , "vmulesw" },
    818   { &test_vmrgew          , "vmrgew" },
    819   { &test_vmrgow          , "vmrgow" },
    820   { &test_vpmsumb         , "vpmsumb" },
    821   { &test_vpmsumh         , "vpmsumh" },
    822   { &test_vpmsumw         , "vpmsumw" },
    823   { NULL                  , NULL      }
    824 };
    825 
    826 static test_t tests_aa_dbl_ops_two_args[] = {
    827   { &test_vaddudm         , "vaddudm", },
    828   { &test_vsubudm         , "vsubudm", },
    829   { &test_vmaxud          , "vmaxud", },
    830   { &test_vmaxsd          , "vmaxsd", },
    831   { &test_vminud          , "vminud", },
    832   { &test_vminsd          , "vminsd", },
    833   { &test_vcmpequd        , "vcmpequd", },
    834   { &test_vcmpgtud        , "vcmpgtud", },
    835   { &test_vcmpgtsd        , "vcmpgtsd", },
    836   { &test_vrld            , "vrld", },
    837   { &test_vsld            , "vsld", },
    838   { &test_vsrad           , "vsrad", },
    839   { &test_vsrd            , "vsrd", },
    840   { &test_vpkudum         , "vpkudum", },
    841   { &test_vpmsumd         , "vpmsumd", },
    842   { &test_vnand           , "vnand", },
    843   { &test_vorc            , "vorc", },
    844   { &test_veqv            , "veqv", },
    845   { &test_vcipher         , "vcipher" },
    846   { &test_vcipherlast     , "vcipherlast" },
    847   { &test_vncipher        , "vncipher" },
    848   { &test_vncipherlast    , "vncipherlast" },
    849   { NULL                  , NULL,      },
    850 };
    851 
    852 static test_t tests_aa_dbl_ops_one_arg[] = {
    853   { &test_vclzb           , "vclzb" },
    854   { &test_vclzw           , "vclzw" },
    855   { &test_vclzh           , "vclzh" },
    856   { &test_vclzd           , "vclzd" },
    857   { &test_vpopcntb        , "vpopcntb" },
    858   { &test_vpopcnth        , "vpopcnth" },
    859   { &test_vpopcntw        , "vpopcntw" },
    860   { &test_vpopcntd        , "vpopcntd" },
    861   { &test_vsbox           , "vsbox" },
    862   { &test_vgbbd           , "vgbbd" },
    863   { NULL                  , NULL,      }
    864 };
    865 
    866 static test_t tests_aa_dbl_to_int_two_args[] = {
    867   { &test_vpkudus         , "vpkudus", },
    868   { &test_vpksdus         , "vpksdus", },
    869   { &test_vpksdss         , "vpksdss", },
    870   { NULL                  , NULL,      },
    871 };
    872 
    873 static int verbose = 0;
    874 static int arg_list_size = 0;
    875 static unsigned long long * vdargs = NULL;
    876 static unsigned long long * vdargs_x = NULL;
    877 #define NB_VDARGS 9
    878 #define NB_VDARGS_X 4
    879 
    880 static void build_vdargs_table (void)
    881 {
    882    // Each VSX register holds two doubleword integer values
    883    vdargs = memalign16(NB_VDARGS * sizeof(unsigned long long));
    884    vdargs[0] = 0x0102030405060708ULL;
    885    vdargs[1] = 0x090A0B0C0E0D0E0FULL;
    886    vdargs[2] = 0xF1F2F3F4F5F6F7F8ULL;
    887    vdargs[3] = 0xF9FAFBFCFEFDFEFFULL;
    888    vdargs[4] = 0x00007FFFFFFFFFFFULL;
    889    vdargs[5] = 0xFFFF000000000000ULL;
    890    vdargs[6] = 0x0000800000000000ULL;
    891    vdargs[7] = 0x0000000000000000ULL;
    892    vdargs[8] = 0xFFFFFFFFFFFFFFFFULL;
    893 
    894    vdargs_x = memalign16(NB_VDARGS_X * sizeof(unsigned long long));
    895    vdargs_x[0] = 0x000000007c118a2bULL;
    896    vdargs_x[1] = 0x00000000f1112345ULL;
    897    vdargs_x[2] = 0x01F2F3F4F5F6F7F8ULL;
    898    vdargs_x[3] = 0xF9FAFBFCFEFDFEFFULL;
    899 }
    900 
    901 static unsigned int * vwargs = NULL;
    902 #define NB_VWARGS 8
    903 
    904 static void build_vwargs_table (void)
    905 {
    906    // Each VSX register holds 4 integer word values
    907    size_t i = 0;
    908    vwargs = memalign(8, 8 * sizeof(int));
    909    assert(vwargs);
    910    assert(0 == ((8-1) & (unsigned long)vwargs));
    911    vwargs[i++] = 0x01020304;
    912    vwargs[i++] = 0x05060708;
    913    vwargs[i++] = 0x090A0B0C;
    914    vwargs[i++] = 0x0E0D0E0F;
    915    vwargs[i++] = 0xF1F2F3F4;
    916    vwargs[i++] = 0xF5F6F7F8;
    917    vwargs[i++] = 0xF9FAFBFC;
    918    vwargs[i++] = 0xFEFDFEFF;
    919 }
    920 
    921 static unsigned long long vbcd_args[] __attribute__ ((aligned (16))) = {
    922    0x8045090189321003ULL, // Negative BCD value
    923    0x001122334556677dULL,
    924    0x0000107600000001ULL, // Positive BCD value
    925    0x319293945142031aULL,
    926    0x0ULL,                // Valid BCD zero
    927    0xaULL,
    928    0x0ULL,                // Invalid BCD zero (no sign code)
    929    0x0ULL
    930 };
    931 //#define NUM_VBCD_VALS (sizeof vbcd_args/sizeof vbcd_args[0])
    932 #define NUM_VBCD_VALS 8
    933 
    934 static void build_vargs_table (void)
    935 {
    936    build_vdargs_table();
    937    build_vwargs_table();
    938 }
    939 
    940 static double *fargs = NULL;
    941 static int nb_fargs = 0;
    942 
    943 static inline void register_farg (void *farg,
    944                                   int s, uint16_t _exp, uint64_t mant)
    945 {
    946    uint64_t tmp;
    947 
    948    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
    949    *(uint64_t *)farg = tmp;
    950    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
    951               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
    952 }
    953 
    954 static void build_fargs_table (void)
    955 {
    956    /* Double precision:
    957     * Sign goes from zero to one               (1 bit)
    958     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    959     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    960     * + special values:
    961     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    962     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    963     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    964     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    965     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    966     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    967     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    968     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    969     * (8 values)
    970 
    971     * Ref only:
    972     * Single precision
    973     * Sign:     1 bit
    974     * Exponent: 8 bits
    975     * Mantissa: 23 bits
    976     * +0.0      : 0 0x00 0x000000 => 0x00000000
    977     * -0.0      : 1 0x00 0x000000 => 0x80000000
    978     * +infinity : 0 0xFF 0x000000 => 0x7F800000
    979     * -infinity : 1 0xFF 0x000000 => 0xFF800000
    980     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    981     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
    982     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    983     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    984     */
    985    uint64_t mant;
    986    uint16_t _exp, e0, e1;
    987    int s;
    988    int i=0;
    989 
    990    /* Note: VEX isn't so hot with denormals, so don't bother
    991       testing them: set _exp > 0
    992    */
    993 
    994    if ( arg_list_size == 1 ) {   // Large
    995       fargs = malloc(200 * sizeof(double));
    996       for (s=0; s<2; s++) {
    997          for (e0=0; e0<2; e0++) {
    998             for (e1=0x001; ; e1 = ((e1 + 1) << 2) + 6) {
    999                if (e1 >= 0x400)
   1000                   e1 = 0x3fe;
   1001                _exp = (e0 << 10) | e1;
   1002                for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
   1003                     /* Add 'random' bits */
   1004                     mant = ((mant + 0x4A6) << 13) + 0x359) {
   1005                   register_farg(&fargs[i++], s, _exp, mant);
   1006                }
   1007                if (e1 == 0x3fe)
   1008                   break;
   1009             }
   1010          }
   1011       }
   1012    } else {                      // Default
   1013       fargs = malloc(16 * sizeof(double));
   1014       for (s=0; s<2; s++) {                                // x2
   1015             for (e1=0x001; ; e1 = ((e1 + 1) << 13) + 7) {  // x2
   1016                if (e1 >= 0x400)
   1017                   e1 = 0x3fe;
   1018                _exp = e1;
   1019                for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
   1020                     /* Add 'random' bits */
   1021                     mant = ((mant + 0x4A6) << 29) + 0x359) {  // x2
   1022                   register_farg(&fargs[i++], s, _exp, mant);
   1023                }
   1024                if (e1 == 0x3fe)
   1025                   break;
   1026             }
   1027       }
   1028    }
   1029 
   1030    /* Special values */
   1031    /* +0.0      : 0 0x000 0x0000000000000 */
   1032    s = 0;
   1033    _exp = 0x000;
   1034    mant = 0x0000000000000ULL;
   1035    register_farg(&fargs[i++], s, _exp, mant);
   1036    /* -0.0      : 1 0x000 0x0000000000000 */
   1037    s = 1;
   1038    _exp = 0x000;
   1039    mant = 0x0000000000000ULL;
   1040    register_farg(&fargs[i++], s, _exp, mant);
   1041    /* +infinity : 0 0x7FF 0x0000000000000  */
   1042    s = 0;
   1043    _exp = 0x7FF;
   1044    mant = 0x0000000000000ULL;
   1045    register_farg(&fargs[i++], s, _exp, mant);
   1046    /* -infinity : 1 0x7FF 0x0000000000000 */
   1047    s = 1;
   1048    _exp = 0x7FF;
   1049    mant = 0x0000000000000ULL;
   1050    register_farg(&fargs[i++], s, _exp, mant);
   1051    /* +QNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
   1052    s = 0;
   1053    _exp = 0x7FF;
   1054    mant = 0x7FFFFFFFFFFFFULL;
   1055    register_farg(&fargs[i++], s, _exp, mant);
   1056    /* -QNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
   1057    s = 1;
   1058    _exp = 0x7FF;
   1059    mant = 0x7FFFFFFFFFFFFULL;
   1060    register_farg(&fargs[i++], s, _exp, mant);
   1061    /* +SNaN     : 0 0x7FF 0x8000000000000 */
   1062    s = 0;
   1063    _exp = 0x7FF;
   1064    mant = 0x8000000000000ULL;
   1065    register_farg(&fargs[i++], s, _exp, mant);
   1066    /* -SNaN     : 1 0x7FF 0x8000000000000 */
   1067    s = 1;
   1068    _exp = 0x7FF;
   1069    mant = 0x8000000000000ULL;
   1070    register_farg(&fargs[i++], s, _exp, mant);
   1071    AB_DPRINTF("Registered %d fargs values\n", i);
   1072 
   1073    nb_fargs = i;
   1074 }
   1075 
   1076 
   1077 
   1078 static int check_filter (char *filter)
   1079 {
   1080    char *c;
   1081    int ret = 1;
   1082 
   1083    if (filter != NULL) {
   1084       c = strchr(filter, '*');
   1085       if (c != NULL) {
   1086          *c = '\0';
   1087          ret = 0;
   1088       }
   1089    }
   1090    return ret;
   1091 }
   1092 
   1093 static int check_name (const char* name, const char *filter,
   1094                        int exact)
   1095 {
   1096    int nlen, flen;
   1097    int ret = 0;
   1098 
   1099    if (filter != NULL) {
   1100       for (; isspace(*name); name++)
   1101          continue;
   1102       FDPRINTF("Check '%s' againt '%s' (%s match)\n",
   1103                name, filter, exact ? "exact" : "starting");
   1104       nlen = strlen(name);
   1105       flen = strlen(filter);
   1106       if (exact) {
   1107          if (nlen == flen && memcmp(name, filter, flen) == 0)
   1108             ret = 1;
   1109       } else {
   1110          if (flen <= nlen && memcmp(name, filter, flen) == 0)
   1111             ret = 1;
   1112       }
   1113    } else {
   1114       ret = 1;
   1115    }
   1116    return ret;
   1117 }
   1118 
   1119 
   1120 typedef struct insn_sel_flags_t_struct {
   1121    int one_arg, two_args, three_args;
   1122    int arith, logical, compare, ldst;
   1123    int integer, floats, altivec, faltivec;
   1124    int cr;
   1125 } insn_sel_flags_t;
   1126 
   1127 static void test_float_two_args (const char* name, test_func_t func,
   1128                                  unused uint32_t test_flags)
   1129 {
   1130    double res;
   1131    Word_t u0, u1, ur;
   1132    volatile uint32_t flags;
   1133    int i, j;
   1134 
   1135    for (i=0; i<nb_fargs; i+=3) {
   1136       for (j=0; j<nb_fargs; j+=5) {
   1137          u0 = *(Word_t *)(&fargs[i]);
   1138          u1 = *(Word_t *)(&fargs[j]);
   1139          f14 = fargs[i];
   1140          f15 = fargs[j];
   1141 
   1142          SET_FPSCR_ZERO;
   1143          SET_CR_XER_ZERO;
   1144          (*func)();
   1145          GET_CR(flags);
   1146          res = f17;
   1147          ur = *(uint64_t *)(&res);
   1148 
   1149          printf("%s %016llx, %016llx => %016llx",
   1150                 name, u0, u1, ur);
   1151 #if defined TEST_FLOAT_FLAGS
   1152          printf(" (%08x)", flags);
   1153 #endif
   1154          printf("\n");
   1155       }
   1156       if (verbose) printf("\n");
   1157    }
   1158 }
   1159 
   1160 
   1161 static void mfvs(const char* name, test_func_t func,
   1162                  unused uint32_t test_flags)
   1163 {
   1164    /* This test is for move instructions where the input is a scalar register
   1165     * and the destination is a vector register.
   1166     */
   1167    int i;
   1168    volatile Word_t result;
   1169    result = 0ULL;
   1170 
   1171    for (i=0; i < NB_VDARGS; i++) {
   1172       r14 = ZERO;
   1173       if (isLE)
   1174          vec_inA = (vector unsigned long long){ 0ULL, vdargs[i] };
   1175       else
   1176          vec_inA = (vector unsigned long long){ vdargs[i], 0ULL };
   1177 
   1178       (*func)();
   1179       result = r14;
   1180       printf("%s: %016llx => %016llx\n", name, vdargs[i], result);
   1181    }
   1182 }
   1183 
   1184 static void mtvs(const char* name, test_func_t func,
   1185                  unused uint32_t test_flags)
   1186 {
   1187    /* This test is for move instructions where the input is a scalar register
   1188     * and the destination is a vector register.
   1189     */
   1190    unsigned long long *dst;
   1191    int i;
   1192 
   1193    for (i=0; i < NB_VDARGS; i++) {
   1194       r14  = vdargs[i];
   1195       vec_out = (vector unsigned long long){ 0ULL, 0ULL };
   1196 
   1197       (*func)();
   1198       dst = (unsigned long long *) &vec_out;
   1199       if (isLE)
   1200          dst++;
   1201       printf("%s: %016llx => %016llx\n", name, vdargs[i], *dst);
   1202    }
   1203 }
   1204 
   1205 static void mtvs2s(const char* name, test_func_t func,
   1206                  unused uint32_t test_flags)
   1207 {
   1208    /* This test is the mtvsrwa instruction.
   1209     */
   1210    unsigned long long *dst;
   1211    int i;
   1212 
   1213    for (i=0; i < NB_VDARGS; i++) {
   1214       // Only the lower half of the vdarg doubleword arg will be used as input by mtvsrwa
   1215       unsigned int * src = (unsigned int *)&vdargs[i];
   1216       if (!isLE)
   1217          src++;
   1218       r14  = vdargs[i];
   1219       vec_out = (vector unsigned long long){ 0ULL, 0ULL };
   1220 
   1221       (*func)();
   1222       // Only doubleword 0 is used in output
   1223       dst = (unsigned long long *) &vec_out;
   1224       if (isLE)
   1225          dst++;
   1226       printf("%s: %08x => %016llx\n", name, *src, *dst);
   1227    }
   1228 }
   1229 
   1230 static void test_special (special_t *table,
   1231                           const char* name, test_func_t func,
   1232                           unused uint32_t test_flags)
   1233 {
   1234    const char *tmp;
   1235    int i;
   1236 
   1237    for (tmp = name; isspace(*tmp); tmp++)
   1238       continue;
   1239    for (i=0; table[i].name != NULL; i++) {
   1240       if (strcmp(table[i].name, tmp) == 0) {
   1241          (*table[i].test_cb)(name, func, test_flags);
   1242          return;
   1243       }
   1244    }
   1245    fprintf(stderr, "ERROR: no test found for op '%s'\n", name);
   1246 }
   1247 
   1248 static special_t special_move_ops[] = {
   1249    {
   1250       "mfvsrd",  /* move from vector to scalar reg doubleword */
   1251       &mfvs,
   1252    },
   1253    {
   1254       "mtvsrd",  /* move from scalar to vector reg doubleword */
   1255       &mtvs,
   1256    },
   1257    {
   1258       "mtfprwa", /* (extended mnemonic for mtvsrwa) move from scalar to vector reg with twos-complement */
   1259       &mtvs2s,
   1260    },
   1261    {
   1262       "mfvsrwz", /* move from vector to scalar reg word */
   1263       &mfvs,
   1264    },
   1265    {
   1266       "mtvsrwz", /* move from scalar to vector reg word */
   1267       &mtvs2s,
   1268    }
   1269 };
   1270 
   1271 static void test_move_special(const char* name, test_func_t func,
   1272                                 uint32_t test_flags)
   1273 {
   1274    test_special(special_move_ops, name, func, test_flags);
   1275 }
   1276 
   1277 /* Vector Double Word tests */
   1278 
   1279 static void test_av_dint_two_args (const char* name, test_func_t func,
   1280                                    unused uint32_t test_flags)
   1281 {
   1282 
   1283    unsigned long long * dst;
   1284    unsigned int * dst_int;
   1285    int i,j;
   1286    int family = test_flags & PPC_FAMILY;
   1287    int is_vpkudum, is_vpmsumd;
   1288    if (strcmp(name, "vpkudum") == 0)
   1289       is_vpkudum = 1;
   1290    else
   1291       is_vpkudum = 0;
   1292 
   1293    if (strcmp(name, "vpmsumd") == 0)
   1294       is_vpmsumd = 1;
   1295    else
   1296       is_vpmsumd = 0;
   1297 
   1298    for (i = 0; i < NB_VDARGS - 1; i+=2) {
   1299       if (isLE && family == PPC_ALTIVECQ)
   1300          vec_inA = (vector unsigned long long){ vdargs[i+1], vdargs[i] };
   1301       else
   1302          vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1303       for (j = 0; j < NB_VDARGS - 1; j+=2) {
   1304          if (isLE && family == PPC_ALTIVECQ)
   1305             vec_inB = (vector unsigned long long){ vdargs[j+1], vdargs[j] };
   1306          else
   1307             vec_inB = (vector unsigned long long){ vdargs[j], vdargs[j+1] };
   1308          vec_out = (vector unsigned long long){ 0,0 };
   1309 
   1310          (*func)();
   1311          dst_int = (unsigned int *)&vec_out;
   1312          dst  = (unsigned long long*)&vec_out;
   1313 
   1314          printf("%s: ", name);
   1315 
   1316          if (is_vpkudum) {
   1317             printf("Inputs: %08llx %08llx %08llx %08llx\n", vdargs[i] & 0x00000000ffffffffULL,
   1318                    vdargs[i+1] & 0x00000000ffffffffULL, vdargs[j] & 0x00000000ffffffffULL,
   1319                    vdargs[j+1] & 0x00000000ffffffffULL);
   1320             if (isLE)
   1321                printf("         Output: %08x %08x %08x %08x\n", dst_int[2], dst_int[3],
   1322                       dst_int[0], dst_int[1]);
   1323             else
   1324                printf("         Output: %08x %08x %08x %08x\n", dst_int[0], dst_int[1],
   1325                       dst_int[2], dst_int[3]);
   1326          } else if (is_vpmsumd) {
   1327             printf("%016llx @@ %016llx ", vdargs[i], vdargs[j]);
   1328             if (isLE)
   1329                printf(" ==> %016llx\n", dst[1]);
   1330             else
   1331                printf(" ==> %016llx\n", dst[0]);
   1332             printf("\t%016llx @@ %016llx ", vdargs[i+1], vdargs[j+1]);
   1333             if (isLE)
   1334                printf(" ==> %016llx\n", dst[0]);
   1335             else
   1336                printf(" ==> %016llx\n", dst[1]);
   1337          } else if (family == PPC_ALTIVECQ) {
   1338             if (isLE)
   1339                printf("%016llx%016llx @@ %016llx%016llx ==> %016llx%016llx\n",
   1340                       vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1],
   1341                       dst[1], dst[0]);
   1342             else
   1343                printf("%016llx%016llx @@ %016llx%016llx ==> %016llx%016llx\n",
   1344                       vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1],
   1345                       dst[0], dst[1]);
   1346          } else {
   1347             printf("%016llx @@ %016llx ", vdargs[i], vdargs[j]);
   1348             printf(" ==> %016llx\n", dst[0]);
   1349             printf("\t%016llx @@ %016llx ", vdargs[i+1], vdargs[j+1]);
   1350             printf(" ==> %016llx\n", dst[1]);
   1351          }
   1352       }
   1353    }
   1354 }
   1355 
   1356 static void test_av_dint_one_arg (const char* name, test_func_t func,
   1357                                   unused uint32_t test_flags)
   1358 {
   1359 
   1360    unsigned long long * dst;
   1361    int i;
   1362 
   1363    for (i = 0; i < NB_VDARGS - 1; i+=2) {
   1364       vec_inB = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1365       vec_out = (vector unsigned long long){ 0,0 };
   1366 
   1367       (*func)();
   1368       dst  = (unsigned long long*)&vec_out;
   1369 
   1370       printf("%s: ", name);
   1371       printf("%016llx @@ %016llx ", vdargs[i], vdargs[i + 1]);
   1372       printf(" ==> %016llx%016llx\n", dst[0], dst[1]);
   1373    }
   1374 }
   1375 
   1376 static void test_av_dint_one_arg_SHA (const char* name, test_func_t func,
   1377                                       unused uint32_t test_flags)
   1378 {
   1379    unsigned long long * dst;
   1380    int i, st, six;
   1381 
   1382    for (i = 0; i < NB_VDARGS - 1; i+=2) {
   1383       vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1384       vec_out = (vector unsigned long long){ 0,0 };
   1385 
   1386       for (st = 0; st < 2; st++) {
   1387          for (six = 0; six < 16; six+=15) {
   1388             st_six = (st << 4) | six;
   1389             (*func)();
   1390             dst  = (unsigned long long*)&vec_out;
   1391 
   1392             printf("%s: ", name);
   1393             printf("%016llx @@ %016llx ", vdargs[i], vdargs[i + 1]);
   1394             printf(" ==> %016llx || %016llx\n", dst[0], dst[1]);
   1395          }
   1396       }
   1397    }
   1398 }
   1399 
   1400 static void test_av_bcd (const char* name, test_func_t func,
   1401                          unused uint32_t test_flags)
   1402 {
   1403    unsigned long long * dst;
   1404    int i, j;
   1405 
   1406    for (i = 0; i < NUM_VBCD_VALS - 1; i+=2) {
   1407       if (isLE)
   1408          vec_inA = (vector unsigned long long){ vbcd_args[i+1], vbcd_args[i]};
   1409       else
   1410          vec_inA = (vector unsigned long long){ vbcd_args[i], vbcd_args[i+1] };
   1411       for (j = 0; j < NUM_VBCD_VALS - 1; j+=2) {
   1412          if (isLE)
   1413             vec_inB = (vector unsigned long long){ vbcd_args[j+1] , vbcd_args[j] };
   1414          else
   1415             vec_inB = (vector unsigned long long){ vbcd_args[j], vbcd_args[j+1] };
   1416          vec_out = (vector unsigned long long){ 0, 0 };
   1417 
   1418          for (PS_bit = 0; PS_bit < 2; PS_bit++) {
   1419             (*func)();
   1420             dst  = (unsigned long long*)&vec_out;
   1421             printf("%s: ", name);
   1422             printf("%016llx || %016llx @@ %016llx || %016llx",
   1423                    vbcd_args[i], vbcd_args[i + 1],
   1424                    vbcd_args[j], vbcd_args[j + 1]);
   1425             if (isLE)
   1426                printf(" ==> %016llx || %016llx\n", dst[1], dst[0]);
   1427             else
   1428                printf(" ==> %016llx || %016llx\n", dst[0], dst[1]);
   1429          }
   1430       }
   1431    }
   1432 }
   1433 
   1434 /* Vector doubleword-to-int tests, two input args, integer result */
   1435 static void test_av_dint_to_int_two_args (const char* name, test_func_t func,
   1436                                           unused uint32_t test_flags)
   1437 {
   1438 
   1439    unsigned int * dst_int;
   1440    int i,j;
   1441    for (i = 0; i < NB_VDARGS_X - 1; i+=2) {
   1442       vec_inA = (vector unsigned long long){ vdargs_x[i], vdargs_x[i+1] };
   1443       for (j = 0; j < NB_VDARGS_X - 1; j+=2) {
   1444          vec_inB = (vector unsigned long long){ vdargs_x[j], vdargs_x[j+1] };
   1445          vec_out = (vector unsigned long long){ 0,0 };
   1446 
   1447          (*func)();
   1448          dst_int = (unsigned int *)&vec_out;
   1449 
   1450          printf("%s: ", name);
   1451          printf("%016llx, %016llx @@ %016llx, %016llx ",
   1452                 vdargs_x[i], vdargs_x[i+1],
   1453                 vdargs_x[j], vdargs_x[j+1]);
   1454          if (isLE)
   1455             printf(" ==> %08x %08x %08x %08x\n", dst_int[2], dst_int[3],
   1456                    dst_int[0], dst_int[1]);
   1457          else
   1458             printf(" ==> %08x %08x %08x %08x\n", dst_int[0], dst_int[1],
   1459                    dst_int[2], dst_int[3]);
   1460       }
   1461    }
   1462 }
   1463 
   1464 /* Vector Word tests; two integer args, with double word result */
   1465 
   1466 static void test_av_wint_two_args_dres (const char* name, test_func_t func,
   1467                                         unused uint32_t test_flags)
   1468 {
   1469 
   1470    unsigned long long * dst;
   1471    int i,j;
   1472 
   1473    for (i = 0; i < NB_VWARGS; i+=4) {
   1474       if (isLE)
   1475          vec_inA_wd = (vector unsigned int){ vwargs[i+3], vwargs[i+2], vwargs[i+1], vwargs[i] };
   1476       else
   1477          vec_inA_wd = (vector unsigned int){ vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3] };
   1478       for (j = 0; j < NB_VWARGS; j+=4) {
   1479          if (isLE)
   1480             vec_inB_wd = (vector unsigned int){ vwargs[j+3], vwargs[j+2], vwargs[j+1], vwargs[j] };
   1481          else
   1482             vec_inB_wd = (vector unsigned int){ vwargs[j], vwargs[j+1], vwargs[j+2], vwargs[j+3] };
   1483          vec_out = (vector unsigned long long){ 0, 0 };
   1484 
   1485          (*func)();
   1486          dst  = (unsigned long long *)&vec_out;
   1487          printf("%s: ", name);
   1488          if (isLE)
   1489             printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1490                    vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[1], dst[0]);
   1491          else
   1492             printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1493                    vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[0], dst[1]);
   1494       }
   1495    }
   1496 }
   1497 
   1498 /* Vector Word tests; one input arg, with double word result */
   1499 
   1500 static void test_av_wint_one_arg_dres (const char* name, test_func_t func,
   1501                                        unused uint32_t test_flags)
   1502 {
   1503    unsigned long long * dst;
   1504    int i;
   1505    for (i = 0; i < NB_VWARGS; i+=4) {
   1506       if (isLE)
   1507          vec_inB_wd = (vector unsigned int){ vwargs[i+3], vwargs[i+2], vwargs[i+1], vwargs[i] };
   1508       else
   1509          vec_inB_wd = (vector unsigned int){ vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3] };
   1510       vec_out = (vector unsigned long long){ 0, 0 };
   1511 
   1512       (*func)();
   1513       dst  = (unsigned long long *)&vec_out;
   1514       printf("%s: ", name);
   1515       if (isLE)
   1516          printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1517                 vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[1], dst[0]);
   1518       else
   1519          printf("%08x %08x %08x %08x ==> %016llx %016llx\n",
   1520                 vwargs[i], vwargs[i+1], vwargs[i+2], vwargs[i+3], dst[0], dst[1]);
   1521    }
   1522 }
   1523 
   1524 
   1525 static void test_int_stq_two_regs_imm16 (const char* name,
   1526                                         test_func_t func_IN,
   1527                                         unused uint32_t test_flags)
   1528 {
   1529    /* Store quad word from register pair */
   1530    int offs, k;
   1531    HWord_t base;
   1532    Word_t *iargs_priv;
   1533 
   1534    // private iargs table to store to, note storing pair of regs
   1535    iargs_priv = memalign16(2 * sizeof(Word_t));
   1536 
   1537    base = (HWord_t)&iargs_priv[0];
   1538    for (k = 0; k < 2; k++)  // clear array
   1539       iargs_priv[k] = 0;
   1540 
   1541    offs = 0;
   1542 
   1543    /* setup source register pair */
   1544    r14 = (HWord_t) 0xABCDEF0123456789ULL;
   1545    r15 = (HWord_t) 0x1133557722446688ULL;
   1546 
   1547    r16 = base;                 // store to r16 + offs
   1548 
   1549    (*func_IN)();
   1550 
   1551 #ifndef __powerpc64__
   1552    printf("%s %08x,%08x, %2d => "
   1553 #else
   1554    printf("%s %016llx,%016llx, %3d => "
   1555 #endif
   1556             "%016llx,%016llx)\n",
   1557             name, r14, r15, offs, iargs_priv[0], iargs_priv[1]);
   1558 
   1559    if (verbose) printf("\n");
   1560    free(iargs_priv);
   1561 }
   1562 
   1563 
   1564 static void test_int_stq_three_regs (const char* name,
   1565                                      test_func_t func_IN,
   1566                                      unused uint32_t test_flags)
   1567 {
   1568    /* Store quad word from register pair */
   1569    volatile uint32_t flags, xer;
   1570    int k;
   1571    HWord_t base;
   1572 
   1573    base = (HWord_t)&mem_resv[0];
   1574    for (k = 0; k < 2; k++)  // setup array for lqarx inst
   1575       mem_resv[k] = k;
   1576 
   1577    /* setup source register pair for store */
   1578    r14 = ZERO;
   1579    r15 = ZERO;
   1580    r16 = base;                 // store to r16 + r17
   1581    r17 = ZERO;
   1582 
   1583    /* In order for the store to occur, the lqarx instruction must first
   1584     * be used to load from the address thus creating a reservation at the
   1585     * memory address.  The lqarx instruction is done in the test_stqcx(),
   1586     * then registers 14, r15 are changed to the data to be stored in memory
   1587     * by the stqcx instruction.
   1588     */
   1589    SET_CR_XER_ZERO;
   1590    (*func_IN)();
   1591    GET_CR_XER(flags,xer);
   1592 #ifndef __powerpc64__
   1593    printf("%s %08x,%08x, =>  "
   1594 #else
   1595    printf("%s %016llx,%016llx => "
   1596 #endif
   1597             "%016llx,%016llx; CR=%08x\n",
   1598             name, r14, r15, mem_resv[0], mem_resv[1], flags);
   1599 
   1600    if (verbose) printf("\n");
   1601 }
   1602 
   1603 static void test_int_ldq_two_regs_imm16 (const char* name,
   1604                                         test_func_t func_IN,
   1605                                         unused uint32_t test_flags)
   1606 {
   1607    /* load quad word from register pair */
   1608    volatile uint32_t flags, xer;
   1609    Word_t * mem_priv;
   1610    HWord_t base;
   1611 
   1612    // private iargs table to store to, note storing pair of regs
   1613    mem_priv = memalign16(2 * sizeof(Word_t));  // want 128-bits
   1614 
   1615    base = (HWord_t)&mem_priv[0];
   1616 
   1617    mem_priv[0] = 0xAACCEE0011335577ULL;
   1618    mem_priv[1] = 0xABCDEF0123456789ULL;
   1619 
   1620    r14 = 0;
   1621    r15 = 0;
   1622    r16 = base;                 // fetch from r16 + offs
   1623    SET_CR_XER_ZERO;
   1624    (*func_IN)();
   1625    GET_CR_XER(flags,xer);
   1626 
   1627 #ifndef __powerpc64__
   1628    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = %08x,%08x)\n",
   1629 #else
   1630    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%016llx, 0x%016llx)\n",
   1631 #endif
   1632           name, mem_priv[0], mem_priv[1], r14, r15);
   1633 
   1634    if (verbose) printf("\n");
   1635 
   1636    free(mem_priv);
   1637 }
   1638 
   1639 static void test_int_ldq_three_regs (const char* name,
   1640                                      test_func_t func_IN,
   1641                                      unused uint32_t test_flags)
   1642 {
   1643    /* load quad word from register pair */
   1644    HWord_t base;
   1645 
   1646    base = (HWord_t)&mem_resv[0];
   1647 
   1648    mem_resv[0] = 0xAACCEE0011335577ULL;
   1649    mem_resv[1] = 0xABCDEF0123456789ULL;
   1650 
   1651    r14 = 0;
   1652    r15 = 0;
   1653    r16 = base;                 // fetch from r16 + r17
   1654    r17 = 0;
   1655 
   1656    (*func_IN)();
   1657 
   1658 #ifndef __powerpc64__
   1659    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%08x, 0x%08x)\n",
   1660 #else
   1661    printf("%s (0x%016llx, 0x%016llx) =>  (reg_pair = 0x%016llx, 0x%016llx)\n",
   1662 #endif
   1663           name, mem_resv[0], mem_resv[1], r14, r15);
   1664    if (verbose) printf("\n");
   1665 
   1666 }
   1667 
   1668 static void test_av_dint_three_args (const char* name, test_func_t func,
   1669                                      unused uint32_t test_flags)
   1670 {
   1671 
   1672    unsigned long long * dst;
   1673    int i,j, k;
   1674    int family = test_flags & PPC_FAMILY;
   1675    unsigned long long cin_vals[] = {
   1676                                     // First pair of ULLs have LSB=0, so cin is '0'.
   1677                                     // Second pair of ULLs have LSB=1, so cin is '1'.
   1678                                     0xf000000000000000ULL, 0xf000000000000000ULL,
   1679                                     0xf000000000000000ULL, 0xf000000000000001ULL
   1680    };
   1681    for (i = 0; i < NB_VDARGS - 1; i+=2) {
   1682       if (isLE)
   1683          vec_inA = (vector unsigned long long){ vdargs[i+1], vdargs[i] };
   1684       else
   1685          vec_inA = (vector unsigned long long){ vdargs[i], vdargs[i+1] };
   1686       for (j = 0; j < NB_VDARGS - 1; j+=2) {
   1687          if (isLE)
   1688             vec_inB = (vector unsigned long long){ vdargs[j+1], vdargs[j] };
   1689          else
   1690             vec_inB = (vector unsigned long long){ vdargs[j], vdargs[j+1] };
   1691          for (k = 0; k < 4 - 1; k+=2) {
   1692             if (family == PPC_ALTIVECQ) {
   1693                if (isLE)
   1694                   vec_inC = (vector unsigned long long){ cin_vals[k+1], cin_vals[k] };
   1695                else
   1696                   vec_inC = (vector unsigned long long){ cin_vals[k], cin_vals[k+1] };
   1697             } else {
   1698                if (isLE)
   1699                   vec_inC = (vector unsigned long long){ vdargs[k+1], vdargs[k] };
   1700                else
   1701                   vec_inC = (vector unsigned long long){ vdargs[k], vdargs[k+1] };
   1702             }
   1703             vec_out = (vector unsigned long long){ 0,0 };
   1704 
   1705             (*func)();
   1706             dst  = (unsigned long long*)&vec_out;
   1707             printf("%s: ", name);
   1708             if (family == PPC_ALTIVECQ) {
   1709                if (isLE)
   1710                   printf("%016llx%016llx @@ %016llx%016llx @@ %llx ==> %016llx%016llx\n",
   1711                          vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1], cin_vals[k+1],
   1712                          dst[1], dst[0]);
   1713                else
   1714                   printf("%016llx%016llx @@ %016llx%016llx @@ %llx ==> %016llx%016llx\n",
   1715                          vdargs[i], vdargs[i+1], vdargs[j], vdargs[j+1], cin_vals[k+1],
   1716                          dst[0], dst[1]);
   1717             } else {
   1718                printf("%016llx @@ %016llx @@ %016llx ", vdargs[i], vdargs[j], vdargs[k]);
   1719                if (isLE)
   1720                   printf(" ==> %016llx\n", dst[1]);
   1721                else
   1722                   printf(" ==> %016llx\n", dst[0]);
   1723                printf("\t%016llx @@ %016llx @@ %016llx ", vdargs[i+1], vdargs[j+1], vdargs[k+1]);
   1724                if (isLE)
   1725                   printf(" ==> %016llx\n", dst[0]);
   1726                else
   1727                   printf(" ==> %016llx\n", dst[1]);
   1728             }
   1729          }
   1730       }
   1731    }
   1732 }
   1733 
   1734 
   1735 /* The ALTIVEC_LOOPS and altive_loops defined below are used in do_tests.
   1736  * Add new values to the end; do not change order, since the altivec_loops
   1737  * array is indexed using the enumerated values defined by ALTIVEC_LOOPS.
   1738  */
   1739 enum ALTIVEC_LOOPS {
   1740    ALTV_MOV,
   1741    ALTV_DINT,
   1742    ALTV_INT_DRES,
   1743    ALTV_DINT_IRES,
   1744    ALTV_ONE_INT_DRES,
   1745    ALTV_DINT_THREE_ARGS,
   1746    ALTV_DINT_ONE_ARG,
   1747    ALTV_SHA,
   1748    ATLV_BCD
   1749 };
   1750 
   1751 static test_loop_t altivec_loops[] = {
   1752    &test_move_special,
   1753    &test_av_dint_two_args,
   1754    &test_av_wint_two_args_dres,
   1755    &test_av_dint_to_int_two_args,
   1756    &test_av_wint_one_arg_dres,
   1757    &test_av_dint_three_args,
   1758    &test_av_dint_one_arg,
   1759    &test_av_dint_one_arg_SHA,
   1760    &test_av_bcd,
   1761    NULL
   1762 };
   1763 
   1764 /* Used in do_tests, indexed by flags->nb_args
   1765    Elements correspond to enum test_flags::num args
   1766 */
   1767 static test_loop_t int_loops[] = {
   1768   /* The #defines for the family, number registers need the array
   1769    * to be properly indexed.  This test is for the new ISA 2.0.7
   1770    * instructions.  The infrastructure has been left for the momemnt
   1771    */
   1772    NULL, //&test_int_one_arg,
   1773    NULL, //&test_int_two_args,
   1774    NULL, //&test_int_three_args,
   1775    NULL, //&test_int_two_args,
   1776    NULL, //&test_int_one_reg_imm16,
   1777    NULL, //&test_int_one_reg_imm16,
   1778    NULL, //&test_int_special,
   1779    NULL, //&test_int_ld_one_reg_imm16,
   1780    NULL, //&test_int_ld_two_regs,
   1781    NULL, //&test_int_st_two_regs_imm16,
   1782    NULL, //&test_int_st_three_regs,
   1783    &test_int_stq_two_regs_imm16,
   1784    &test_int_ldq_two_regs_imm16,
   1785    &test_int_stq_three_regs,
   1786    &test_int_ldq_three_regs,
   1787 };
   1788 
   1789 /* Used in do_tests, indexed by flags->nb_args
   1790    Elements correspond to enum test_flags::num args
   1791    Must have NULL for last entry.
   1792  */
   1793 static test_loop_t float_loops[] = {
   1794    NULL,
   1795    &test_float_two_args,
   1796 };
   1797 
   1798 
   1799 static test_t tests_fa_ops_two[] = {
   1800     { &test_fmrgew          , "fmrgew", },
   1801     { &test_fmrgow          , "fmrgow", },
   1802     { NULL,                   NULL,           },
   1803 };
   1804 
   1805 static test_table_t all_tests[] = {
   1806    {
   1807        tests_move_ops_spe,
   1808        "PPC VSR special move insns",
   1809        PPC_ALTIVECD | PPC_MOV | PPC_ONE_ARG,
   1810    },
   1811    {
   1812        tests_aa_dbl_ops_two_args,
   1813        "PPC altivec double word integer insns (arith, compare) with two args",
   1814        PPC_ALTIVECD | PPC_ARITH | PPC_TWO_ARGS,
   1815    },
   1816    {
   1817        tests_aa_word_ops_two_args_dres,
   1818        "PPC altivec integer word instructions with two input args, double word result",
   1819        PPC_ALTIVEC | PPC_ARITH_DRES | PPC_TWO_ARGS,
   1820    },
   1821    {
   1822        tests_aa_dbl_to_int_two_args,
   1823        "PPC altivec doubleword-to-integer instructions with two input args, saturated integer result",
   1824        PPC_ALTIVECD | PPC_DOUBLE_IN_IRES | PPC_TWO_ARGS,
   1825    },
   1826    {
   1827        tests_aa_word_ops_one_arg_dres,
   1828        "PPC altivec integer word instructions with one input arg, double word result",
   1829        PPC_ALTIVEC | PPC_ARITH_DRES | PPC_ONE_ARG,
   1830    },
   1831    {
   1832       tests_istq_ops_two_i16,
   1833       "PPC store quadword insns\n    with one register + one 16 bits immediate args with flags update",
   1834       0x0001050c,
   1835    },
   1836    {
   1837       tests_ildq_ops_two_i16,
   1838       "PPC load quadword insns\n    with one register + one 16 bits immediate args with flags update",
   1839       0x0001050d,
   1840    },
   1841    {
   1842        tests_ldq_ops_three,
   1843        "PPC load quadword insns\n    with three register args",
   1844        0x0001050f,
   1845    },
   1846    {
   1847        tests_stq_ops_three,
   1848        "PPC store quadword insns\n    with three register args",
   1849        0x0001050e,
   1850    },
   1851    {
   1852        tests_fa_ops_two,
   1853        "PPC floating point arith insns with two args",
   1854        0x00020102,
   1855    },
   1856    {
   1857        tests_aa_ops_three    ,
   1858        "PPC altivec integer logical insns with three args",
   1859        0x00060203,
   1860    },
   1861    {
   1862        tests_aa_dbl_ops_one_arg,
   1863        "PPC altivec one vector input arg, hex result",
   1864        0x00060201,
   1865    },
   1866    {
   1867        tests_aa_SHA_ops,
   1868        "PPC altivec SSH insns",
   1869        0x00040B01,
   1870    },
   1871    {
   1872        tests_aa_bcd_ops,
   1873        "PPC altivec BCD insns",
   1874        0x00040B02,
   1875    },
   1876    {
   1877        tests_aa_quadword_two_args,
   1878        "PPC altivec quadword insns, two input args",
   1879        0x00070102,
   1880    },
   1881    {
   1882        tests_aa_quadword_three_args,
   1883        "PPC altivec quadword insns, three input args",
   1884        0x00070103
   1885    },
   1886    { NULL,                   NULL,               0x00000000, },
   1887 };
   1888 
   1889 static void do_tests ( insn_sel_flags_t seln_flags,
   1890                        char *filter)
   1891 {
   1892    test_loop_t *loop;
   1893    test_t *tests;
   1894    int nb_args, type, family;
   1895    int i, j, n;
   1896    int exact;
   1897 
   1898    exact = check_filter(filter);
   1899    n = 0;
   1900    for (i=0; all_tests[i].name != NULL; i++) {
   1901       nb_args = all_tests[i].flags & PPC_NB_ARGS;
   1902 
   1903       /* Check number of arguments */
   1904       if ((nb_args == 1 && !seln_flags.one_arg) ||
   1905           (nb_args == 2 && !seln_flags.two_args) ||
   1906           (nb_args == 3 && !seln_flags.three_args)){
   1907          continue;
   1908       }
   1909       /* Check instruction type */
   1910       type = all_tests[i].flags & PPC_TYPE;
   1911       if ((type == PPC_ARITH   && !seln_flags.arith)   ||
   1912           (type == PPC_LOGICAL && !seln_flags.logical) ||
   1913           (type == PPC_COMPARE && !seln_flags.compare) ||
   1914           (type == PPC_LDST && !seln_flags.ldst)       ||
   1915           (type == PPC_MOV && !seln_flags.ldst)       ||
   1916           (type == PPC_POPCNT && !seln_flags.arith)) {
   1917          continue;
   1918       }
   1919 
   1920       /* Check instruction family */
   1921       family = all_tests[i].flags & PPC_FAMILY;
   1922       if ((family == PPC_INTEGER  && !seln_flags.integer) ||
   1923           (family == PPC_FLOAT    && !seln_flags.floats)  ||
   1924           (family == PPC_ALTIVEC && !seln_flags.altivec)  ||
   1925           (family == PPC_ALTIVECD && !seln_flags.altivec)  ||
   1926           (family == PPC_ALTIVECQ && !seln_flags.altivec)  ||
   1927           (family == PPC_FALTIVEC && !seln_flags.faltivec)) {
   1928          continue;
   1929       }
   1930       /* Check flags update */
   1931       if (((all_tests[i].flags & PPC_CR)  && seln_flags.cr == 0) ||
   1932           (!(all_tests[i].flags & PPC_CR) && seln_flags.cr == 1))
   1933          continue;
   1934 
   1935       /* All passed, do the tests */
   1936       tests = all_tests[i].tests;
   1937 
   1938       loop = NULL;
   1939 
   1940       /* Select the test loop */
   1941       switch (family) {
   1942       case PPC_INTEGER:
   1943          mem_resv = memalign16(2 * sizeof(HWord_t));  // want 128-bits
   1944          loop = &int_loops[nb_args - 1];
   1945          break;
   1946 
   1947       case PPC_FLOAT:
   1948          loop = &float_loops[nb_args - 1];
   1949          break;
   1950 
   1951       case PPC_ALTIVECQ:
   1952          if (nb_args == 2)
   1953             loop = &altivec_loops[ALTV_DINT];
   1954          else if (nb_args == 3)
   1955             loop = &altivec_loops[ALTV_DINT_THREE_ARGS];
   1956          break;
   1957       case PPC_ALTIVECD:
   1958          switch (type) {
   1959          case PPC_MOV:
   1960             loop = &altivec_loops[ALTV_MOV];
   1961             break;
   1962          case PPC_ARITH:
   1963             loop = &altivec_loops[ALTV_DINT];
   1964             break;
   1965          case PPC_DOUBLE_IN_IRES:
   1966             loop = &altivec_loops[ALTV_DINT_IRES];
   1967             break;
   1968          case PPC_LOGICAL:
   1969             if (nb_args == 3)
   1970                loop = &altivec_loops[ALTV_DINT_THREE_ARGS];
   1971             else if (nb_args ==1)
   1972                loop = &altivec_loops[ALTV_DINT_ONE_ARG];
   1973             break;
   1974          default:
   1975             printf("No altivec test defined for type %x\n", type);
   1976          }
   1977          break;
   1978 
   1979       case PPC_FALTIVEC:
   1980          printf("Currently there are no floating altivec tests in this testsuite.\n");
   1981          break;
   1982 
   1983       case PPC_ALTIVEC:
   1984          switch (type) {
   1985          case PPC_ARITH_DRES:
   1986          {
   1987             switch (nb_args) {
   1988             case 1:
   1989                loop = &altivec_loops[ALTV_ONE_INT_DRES];
   1990                break;
   1991             case 2:
   1992                loop = &altivec_loops[ALTV_INT_DRES];
   1993                break;
   1994             default:
   1995                printf("No altivec test defined for number args %d\n", nb_args);
   1996             }
   1997             break;
   1998          }
   1999          case PPC_SHA_OR_BCD:
   2000             if (nb_args == 1)
   2001                loop = &altivec_loops[ALTV_SHA];
   2002             else
   2003                loop = &altivec_loops[ATLV_BCD];
   2004             break;
   2005          default:
   2006             printf("No altivec test defined for type %x\n", type);
   2007          }
   2008          break;
   2009 
   2010       default:
   2011          printf("ERROR: unknown insn family %08x\n", family);
   2012          continue;
   2013       }
   2014       if (1 || verbose > 0)
   2015       for (j=0; tests[j].name != NULL; j++) {
   2016          if (check_name(tests[j].name, filter, exact)) {
   2017             if (verbose > 1)
   2018                printf("Test instruction %s\n", tests[j].name);
   2019             if (loop != NULL)
   2020                (*loop)(tests[j].name, tests[j].func, all_tests[i].flags);
   2021             printf("\n");
   2022             n++;
   2023          }
   2024         }
   2025       if (verbose) printf("\n");
   2026    }
   2027    printf("All done. Tested %d different instructions\n", n);
   2028 }
   2029 
   2030 
   2031 static void usage (void)
   2032 {
   2033    fprintf(stderr,
   2034            "Usage: jm-insns [OPTION]\n"
   2035            "\t-i: test integer instructions (default)\n"
   2036            "\t-f: test floating point instructions\n"
   2037            "\t-a: test altivec instructions\n"
   2038            "\t-A: test all (int, fp, altivec) instructions\n"
   2039            "\t-v: be verbose\n"
   2040            "\t-h: display this help and exit\n"
   2041            );
   2042 }
   2043 
   2044 #endif
   2045 
   2046 int main (int argc, char **argv)
   2047 {
   2048 #ifdef HAS_ISA_2_07
   2049    /* Simple usage:
   2050       ./jm-insns -i   => int insns
   2051       ./jm-insns -f   => fp  insns
   2052       ./jm-insns -a   => av  insns
   2053       ./jm-insns -A   => int, fp and avinsns
   2054    */
   2055    char *filter = NULL;
   2056    insn_sel_flags_t flags;
   2057    int c;
   2058 
   2059    // Args
   2060    flags.one_arg    = 1;
   2061    flags.two_args   = 1;
   2062    flags.three_args = 1;
   2063    // Type
   2064    flags.arith      = 1;
   2065    flags.logical    = 1;
   2066    flags.compare    = 1;
   2067    flags.ldst       = 1;
   2068    // Family
   2069    flags.integer    = 0;
   2070    flags.floats     = 0;
   2071    flags.altivec    = 0;
   2072    flags.faltivec   = 0;
   2073    // Flags
   2074    flags.cr         = 2;
   2075 
   2076    while ((c = getopt(argc, argv, "ifahvA")) != -1) {
   2077       switch (c) {
   2078       case 'i':
   2079          flags.integer  = 1;
   2080          break;
   2081       case 'f':
   2082          build_fargs_table();
   2083          flags.floats   = 1;
   2084          break;
   2085       case 'a':
   2086          flags.altivec  = 1;
   2087          flags.faltivec = 1;
   2088          break;
   2089       case 'A':
   2090          flags.integer  = 1;
   2091          flags.floats   = 1;
   2092          flags.altivec  = 1;
   2093          flags.faltivec = 1;
   2094          break;
   2095       case 'h':
   2096          usage();
   2097          return 0;
   2098       case 'v':
   2099          verbose++;
   2100          break;
   2101       default:
   2102          usage();
   2103          fprintf(stderr, "Unknown argument: '%c'\n", c);
   2104          return 1;
   2105       }
   2106    }
   2107 
   2108    arg_list_size = 0;
   2109 
   2110    build_vargs_table();
   2111    if (verbose > 1) {
   2112       printf("\nInstruction Selection:\n");
   2113       printf("  n_args: \n");
   2114       printf("    one_arg    = %d\n", flags.one_arg);
   2115       printf("    two_args   = %d\n", flags.two_args);
   2116       printf("    three_args = %d\n", flags.three_args);
   2117       printf("  type: \n");
   2118       printf("    arith      = %d\n", flags.arith);
   2119       printf("    logical    = %d\n", flags.logical);
   2120       printf("    compare    = %d\n", flags.compare);
   2121       printf("    ldst       = %d\n", flags.ldst);
   2122       printf("  family: \n");
   2123       printf("    integer    = %d\n", flags.integer);
   2124       printf("    floats     = %d\n", flags.floats);
   2125       printf("    altivec    = %d\n", flags.altivec);
   2126       printf("    faltivec   = %d\n", flags.faltivec);
   2127       printf("  cr update: \n");
   2128       printf("    cr         = %d\n", flags.cr);
   2129       printf("\n");
   2130    }
   2131 
   2132    do_tests( flags, filter );
   2133 #else
   2134    printf("NO ISA 2.07 SUPPORT\n");
   2135 #endif
   2136    return 0;
   2137 }
   2138