tests/ppc64/test_isa_2_07_part2.c

/*  Copyright (C) 2013 IBM

 Authors: Carl Love  <carll (at) us.ibm.com>
          Maynard Johnson <maynardj (at) us.ibm.com>

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License as
 published by the Free Software Foundation; either version 2 of the
 License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 02111-1307, USA.

 The GNU General Public License is contained in the file COPYING.

 This program is based heavily on the test_isa_2_06_part*.c source files.
 */

#include <stdio.h>

#ifdef HAS_ISA_2_07

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <altivec.h>
#include <math.h>

#ifndef __powerpc64__
typedef uint32_t HWord_t;
#else
typedef uint64_t HWord_t;
#endif /* __powerpc64__ */

register HWord_t r14 __asm__ ("r14");
register HWord_t r15 __asm__ ("r15");
register HWord_t r16 __asm__ ("r16");
register HWord_t r17 __asm__ ("r17");
register double f14 __asm__ ("fr14");
register double f15 __asm__ ("fr15");
register double f16 __asm__ ("fr16");
register double f17 __asm__ ("fr17");

static volatile unsigned int cond_reg;

#define True  1
#define False 0

#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"

#define SET_CR(_arg) \
      __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );

#define SET_XER(_arg) \
      __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );

#define GET_CR(_lval) \
      __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )

#define GET_XER(_lval) \
      __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )

#define GET_CR_XER(_lval_cr,_lval_xer) \
   do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)

#define SET_CR_ZERO \
      SET_CR(0)

#define SET_XER_ZERO \
      SET_XER(0)

#define SET_CR_XER_ZERO \
   do { SET_CR_ZERO; SET_XER_ZERO; } while (0)

#define SET_FPSCR_ZERO \
   do { double _d = 0.0; \
        __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
   } while (0)

typedef unsigned char Bool;


/* These functions below that construct a table of floating point
 * values were lifted from none/tests/ppc32/jm-insns.c.
 */

#if defined (DEBUG_ARGS_BUILD)
#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
#else
#define AB_DPRINTF(fmt, args...) do { } while (0)
#endif

static inline void register_farg (void *farg,
                                  int s, uint16_t _exp, uint64_t mant)
{
   uint64_t tmp;

   tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
   *(uint64_t *)farg = tmp;
   AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
              s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
}

static inline void register_sp_farg (void *farg,
                                     int s, uint16_t _exp, uint32_t mant)
{
   uint32_t tmp;
   tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
   *(uint32_t *)farg = tmp;
}


typedef struct fp_test_args {
   int fra_idx;
   int frb_idx;
} fp_test_args_t;

static int nb_special_fargs;
static double * spec_fargs;
static float * spec_sp_fargs;

static void build_special_fargs_table(void)
{
   /*
    * Double precision:
    * Sign goes from zero to one               (1 bit)
    * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
    * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
    * + special values:
    * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
    * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
    * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
    * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
    * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
    * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
    * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
    * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
    * (8 values)
    *
    * Single precision
    * Sign:     1 bit
    * Exponent: 8 bits
    * Mantissa: 23 bits
    * +0.0      : 0 0x00 0x000000 => 0x00000000
    * -0.0      : 1 0x00 0x000000 => 0x80000000
    * +infinity : 0 0xFF 0x000000 => 0x7F800000
    * -infinity : 1 0xFF 0x000000 => 0xFF800000
    * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
    * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
    * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
    * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
   */

   uint64_t mant;
   uint32_t mant_sp;
   uint16_t _exp;
   int s;
   int j, i = 0;

   if (spec_fargs)
      return;

   spec_fargs = malloc( 20 * sizeof(double) );
   spec_sp_fargs = malloc( 20 * sizeof(float) );

   // #0
   s = 0;
   _exp = 0x3fd;
   mant = 0x8000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #1
   s = 0;
   _exp = 0x404;
   mant = 0xf000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #2
   s = 0;
   _exp = 0x001;
   mant = 0x8000000b77501ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #3
   s = 0;
   _exp = 0x7fe;
   mant = 0x800000000051bULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #4
   s = 0;
   _exp = 0x012;
   mant = 0x3214569900000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* Special values */
   /* +0.0      : 0 0x000 0x0000000000000 */
   // #5
   s = 0;
   _exp = 0x000;
   mant = 0x0000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* -0.0      : 1 0x000 0x0000000000000 */
   // #6
   s = 1;
   _exp = 0x000;
   mant = 0x0000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* +infinity : 0 0x7FF 0x0000000000000  */
   // #7
   s = 0;
   _exp = 0x7FF;
   mant = 0x0000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* -infinity : 1 0x7FF 0x0000000000000 */
   // #8
   s = 1;
   _exp = 0x7FF;
   mant = 0x0000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /*
    * This comment applies to values #9 and #10 below:
    * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
    * so we can't just copy the double-precision value to the corresponding slot in the
    * single-precision array (i.e., in the loop at the end of this function).  Instead, we
    * have to manually set the bits using register_sp_farg().
    */

   /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
   // #9
   s = 0;
   _exp = 0x7FF;
   mant = 0x7FFFFFFFFFFFFULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);
   _exp = 0xff;
   mant_sp = 0x3FFFFF;
   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);

   /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
   // #10
   s = 1;
   _exp = 0x7FF;
   mant = 0x7FFFFFFFFFFFFULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);
   _exp = 0xff;
   mant_sp = 0x3FFFFF;
   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);

   /* +QNaN     : 0 0x7FF 0x8000000000000 */
   // #11
   s = 0;
   _exp = 0x7FF;
   mant = 0x8000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* -QNaN     : 1 0x7FF 0x8000000000000 */
   // #12
   s = 1;
   _exp = 0x7FF;
   mant = 0x8000000000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* denormalized value */
   // #13
   s = 1;
   _exp = 0x000;
   mant = 0x8340000078000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* Negative finite number */
   // #14
   s = 1;
   _exp = 0x40d;
   mant = 0x0650f5a07b353ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   /* A few positive finite numbers ... */
   // #15
   s = 0;
   _exp = 0x412;
   mant = 0x32585a9900000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #16
   s = 0;
   _exp = 0x413;
   mant = 0x82511a2000000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #17
   s = 0;
   _exp = 0x403;
   mant = 0x12ef5a9300000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #18
   s = 0;
   _exp = 0x405;
   mant = 0x14bf5d2300000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);

   // #19
   s = 0;
   _exp = 0x409;
   mant = 0x76bf982440000ULL;
   register_farg(&spec_fargs[i++], s, _exp, mant);


   nb_special_fargs = i;
   for (j = 0; j < i; j++) {
      if (!(j == 9 || j == 10))
         spec_sp_fargs[j] = spec_fargs[j];
   }
}

static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
                                                              0, 0, 0, 0 };


static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
                                                                0x89abcdef,
                                                                0x00112233,
                                                                0x74556677,
                                                                0x00001abb,
                                                                0x00000001,
                                                                0x31929394,
                                                                0xa1a2a3a4,
};
#define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
#define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)

typedef void (*test_func_t)(void);

struct test_table
{
   test_func_t test_category;
   char * name;
};


typedef enum {
   SINGLE_TEST,
   SINGLE_TEST_SINGLE_RES,
   DOUBLE_TEST,
   DOUBLE_TEST_SINGLE_RES
} precision_type_t;
#define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))

typedef enum {
   VX_FP_SMAS,   // multiply add single precision result
   VX_FP_SMSS,   // multiply sub single precision result
   VX_FP_SNMAS,  // negative multiply add single precision result
   VX_FP_SNMSS,  // negative multiply sub single precision result
   VX_FP_OTHER,
   VX_CONV_WORD,
   VX_ESTIMATE,
   VX_CONV_TO_SINGLE,
   VX_CONV_TO_DOUBLE,
   VX_SCALAR_CONV_TO_WORD,
   VX_SCALAR_SP_TO_VECTOR_SP,
   VX_DEFAULT
} vx_fp_test_type;

typedef enum {
   VSX_LOAD = 1,
   VSX_LOAD_SPLAT,
   VSX_STORE,
} vsx_ldst_type;

typedef enum {
   VSX_AND = 1,
   VSX_NAND,
   VSX_ANDC,
   VSX_OR,
   VSX_ORC,
   VSX_NOR,
   VSX_XOR,
   VSX_EQV,
} vsx_log_op;

struct vx_fp_test1
{
   test_func_t test_func;
   const char *name;
   fp_test_args_t * targs;
   int num_tests;
    vx_fp_test_type test_type;
 };

struct ldst_test
{
   test_func_t test_func;
   const char *name;
   precision_type_t precision;
   void * base_addr;
   uint32_t offset;
   vsx_ldst_type type;
};

struct vx_fp_test2
{
   test_func_t test_func;
   const char *name;
   fp_test_args_t * targs;
   int num_tests;
   precision_type_t precision;
   vx_fp_test_type test_type;
   const char * op;
};

struct xs_conv_test
{
   test_func_t test_func;
   const char *name;
   int num_tests;
};

struct simple_test
{
   test_func_t test_func;
   const char *name;
};

struct vsx_logic_test
{
   test_func_t test_func;
   const char *name;
   vsx_log_op op;
};

typedef struct vsx_logic_test logic_test_t;
typedef struct ldst_test ldst_test_t;
typedef struct simple_test xs_conv_test_t;
typedef struct vx_fp_test1 vx_fp_test_basic_t;
typedef struct vx_fp_test2 vx_fp_test2_t;
typedef struct test_table test_table_t;


static vector unsigned int vec_out, vec_inA, vec_inB;

static void test_xscvdpspn(void)
{
   __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

static void test_xscvspdpn(void)
{
   __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

static int do_asp;
static void test_xsmadds(void)
{
   if (do_asp)
      __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   else
      __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xsmsubs(void)
{
   if (do_asp)
      __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   else
      __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xscvsxdsp (void)
{
   __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

static void test_xscvuxdsp (void)
{
   __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

static void test_xsnmadds(void)
{
   if (do_asp)
      __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   else
      __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xsnmsubs(void)
{
   if (do_asp)
      __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
   else
      __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_stxsspx(void)
{
   __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
}

static void test_stxsiwx(void)
{
   __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
}

static void test_lxsiwax(void)
{
   __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}

static void test_lxsiwzx(void)
{
   __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}

static void test_lxsspx(void)
{
   __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
}

static void test_xssqrtsp(void)
{
   __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

static void test_xsrsqrtesp(void)
{
   __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

/* Three argument instuctions */
static void test_xxleqv(void)
{
   __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xxlorc(void)
{
   __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xxlnand(void)
{
   __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xsaddsp(void)
{
  __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}

static void test_xssubsp(void)
{
  __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}

static void test_xsdivsp(void)
{
  __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
}

static void test_xsmulsp(void)
{
   __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
}

static void test_xsresp(void)
{
   __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}
static void test_xsrsp(void)
{
   __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
}

fp_test_args_t vx_math_tests[] = {
                                  {8, 8},
                                  {8, 14},
                                  {8, 6},
                                  {8, 5},
                                  {8, 4},
                                  {8, 7},
                                  {8, 9},
                                  {8, 11},
                                  {14, 8},
                                  {14, 14},
                                  {14, 6},
                                  {14, 5},
                                  {14, 4},
                                  {14, 7},
                                  {14, 9},
                                  {14, 11},
                                  {6, 8},
                                  {6, 14},
                                  {6, 6},
                                  {6, 5},
                                  {6, 4},
                                  {6, 7},
                                  {6, 9},
                                  {6, 11},
                                  {5, 8},
                                  {5, 14},
                                  {5, 6},
                                  {5, 5},
                                  {5, 4},
                                  {5, 7},
                                  {5, 9},
                                  {5, 11},
                                  {4, 8},
                                  {4, 14},
                                  {4, 6},
                                  {4, 5},
                                  {4, 1},
                                  {4, 7},
                                  {4, 9},
                                  {4, 11},
                                  {7, 8},
                                  {7, 14},
                                  {7, 6},
                                  {7, 5},
                                  {7, 4},
                                  {7, 7},
                                  {7, 9},
                                  {7, 11},
                                  {10, 8},
                                  {10, 14},
                                  {10, 6},
                                  {10, 5},
                                  {10, 4},
                                  {10, 7},
                                  {10, 9},
                                  {10, 11},
                                  {12, 8},
                                  {12, 14},
                                  {12, 6},
                                  {12, 5},
                                  {12, 4},
                                  {12, 7},
                                  {12, 9},
                                  {12, 11},
                                  {8, 8},
                                  {8, 14},
                                  {8, 6},
                                  {8, 5},
                                  {8, 4},
                                  {8, 7},
                                  {8, 9},
                                  {8, 11},
                                  {14, 8},
                                  {14, 14},
                                  {14, 6},
                                  {14, 5},
                                  {14, 4},
                                  {14, 7},
                                  {14, 9},
                                  {14, 11},
                                  {6, 8},
                                  {6, 14},
                                  {6, 6},
                                  {6, 5},
                                  {6, 4},
                                  {6, 7},
                                  {6, 9},
                                  {6, 11},
                                  {5, 8},
                                  {5, 14},
                                  {5, 6},
                                  {5, 5},
                                  {5, 4},
                                  {5, 7},
                                  {5, 9},
                                  {5, 11},
                                  {4, 8},
                                  {4, 14},
                                  {4, 6},
                                  {4, 5},
                                  {4, 1},
                                  {4, 7},
                                  {4, 9},
                                  {4, 11},
                                  {7, 8},
                                  {7, 14},
                                  {7, 6},
                                  {7, 5},
                                  {7, 4},
                                  {7, 7},
                                  {7, 9},
                                  {7, 11},
                                  {10, 8},
                                  {10, 14},
                                  {10, 6},
                                  {10, 5},
                                  {10, 4},
                                  {10, 7},
                                  {10, 9},
                                  {10, 11},
                                  {12, 8},
                                  {12, 14},
                                  {12, 6},
                                  {12, 5},
                                  {12, 4},
                                  {12, 7},
                                  {12, 9},
                                  {12, 11}
};

// These are all double precision inputs with double word outputs (mostly converted to single precision)
static vx_fp_test_basic_t vx_fp_tests[] = {
                                     { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
                                     { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
                                     { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
                                     { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
                                     { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
                                     { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
                                     { NULL, NULL, NULL, 0, 0 }
};

static vx_fp_test2_t
vsx_one_fp_arg_tests[] = {
                          { &test_xscvdpspn, "xscvdpspn", NULL, 20, SINGLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
                          { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
                          { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
                          { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
                          { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
                          { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
                          { NULL, NULL, NULL, 0, 0, 0, NULL}
};

// These are all double precision inputs with double word outputs (mostly converted to single precision)
static vx_fp_test_basic_t
vx_simple_scalar_fp_tests[] = {
                          { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
                          { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
                          { NULL, NULL, NULL, 0 , 0}
};

static ldst_test_t
ldst_tests[] = {
                    { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
                    { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
                    { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
                    { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 1, VSX_LOAD },
                    { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
                    { NULL, NULL, 0, NULL, 0, 0 } };

static xs_conv_test_t
xs_conv_tests[] = {
                   { &test_xscvsxdsp, "xscvsxdsp"},
                   { &test_xscvuxdsp, "xscvuxdsp"},
                   { NULL, NULL}
};

static logic_test_t
logic_tests[] = {
                 { &test_xxleqv,  "xxleqv", VSX_EQV },
                 { &test_xxlorc,  "xxlorc", VSX_ORC },
                 { &test_xxlnand, "xxlnand", VSX_NAND },
                 { NULL, NULL}
};

Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
{
   /* NOTE:
    * This function has been verified only with the xsresp and xsrsqrtes instructions.
    *
    * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
    * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
    * does an actual reciprocal calculation versus estimation, so the answer we get back from
    * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
    * precision) and the estimate may still be within expected tolerances.  On top of that,
    * we can't count on these estimates always being the same across implementations.
    * For example, with the fre[s] instruction (which should be correct to within one part
    * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
    * one implementation could return 1.0111_1111_0000 and another implementation could return
    * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
    * single bit in common.
    *
    * The upshot is we can't validate the VEX output for these instructions by comparing against
    * stored bit patterns.  We must check that the result is within expected tolerances.
    */

   /* A mask to be used for validation as a last resort.
    * Only use 12 bits of precision for reasons discussed above.
    */
#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000


   Bool result = False;
   double src_dp, res_dp;
   float calc_diff = 0;
   float real_diff = 0;
   double recip_divisor;
   float div_result;
   float calc_diff_tmp;

   src_dp = res_dp = 0;
   Bool src_is_negative = False;
   Bool res_is_negative = False;
   unsigned long long * dst_dp = NULL;
   unsigned long long * src_dp_ull;
   dst_dp = (unsigned long long *) &vec_out;
   src_dp = spec_fargs[idx];
   src_dp_ull = (unsigned long long *) &src_dp;
   src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
   res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
   memcpy(&res_dp, &dst_dp[output_vec_idx], 8);


   // Below are common rules
   if (isnan(src_dp))
      return isnan(res_dp);
   if (fpclassify(src_dp) == FP_ZERO)
      return isinf(res_dp);
   if (!src_is_negative && isinf(src_dp))
      return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
   if (is_rsqrte) {
      if (src_is_negative)
         return isnan(res_dp);
   } else {
      if (src_is_negative && isinf(src_dp))
         return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
   }

   if (is_rsqrte)
      recip_divisor = sqrt(src_dp);
   else
      recip_divisor = src_dp;

   /* The instructions handled by this function take a double precision
    * input, perform a reciprocal estimate in double-precision, round
    * the result to single precision and store into the destination
    * register in double precision format.  So, to check the result
    * for accuracy, we use float (single precision) values.
    */
   div_result = 1.0/recip_divisor;
   calc_diff_tmp = recip_divisor * 16384.0;
   if (isnormal(calc_diff_tmp)) {
      calc_diff = fabs(1.0/calc_diff_tmp);
      real_diff = fabs((float)res_dp - div_result);
      result = ( ( res_dp == div_result )
               || ( real_diff <= calc_diff ) );
#if FRES_DEBUG
      unsigned int * dv = (unsigned int *)&div_result;
      unsigned int * rd = (unsigned int *)&real_diff;
      unsigned int * cd = (unsigned int *)&calc_diff;
      printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
             *dv, *rd, *cd);
#endif

   } else {
      /* Unable to compute theoretical difference, so we fall back to masking out
       * un-precise bits.
       */
      unsigned int * div_result_sp = (unsigned int *)&div_result;
      float res_sp = (float)res_dp;
      unsigned int * dst_sp = (unsigned int *)&res_sp;
#if FRES_DEBUG
      unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
      printf("Unable to compute theoretical difference, so we fall back to masking\n");
      printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
             *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
#endif
      result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
   }
   return result;
}

static void test_vx_fp_ops(void)
{

   test_func_t func;
   int k;
   char * test_name = (char *)malloc(20);
   k = 0;

   build_special_fargs_table();
   while ((func = vx_fp_tests[k].test_func)) {
      int i, repeat = 0;
      unsigned long long * frap, * frbp, * dst;
      vx_fp_test_basic_t test_group = vx_fp_tests[k];
      vx_fp_test_type test_type = test_group.test_type;

      switch (test_type) {
         case VX_FP_SMAS:
         case VX_FP_SMSS:
         case VX_FP_SNMAS:
         case VX_FP_SNMSS:
            if (test_type == VX_FP_SMAS)
               strcpy(test_name, "xsmadd");
            else if (test_type == VX_FP_SMSS)
               strcpy(test_name, "xsmsub");
            else if (test_type == VX_FP_SNMAS)
               strcpy(test_name, "xsnmadd");
            else
               strcpy(test_name, "xsnmsub");

            if (!repeat) {
               repeat = 1;
               strcat(test_name, "asp");
               do_asp = 1;
            }
            break;
         case VX_FP_OTHER:
            strcpy(test_name, test_group.name);
            break;
         default:
            printf("ERROR:  Invalid VX FP test type %d\n", test_type);
            exit(1);
      }

again:
      for (i = 0; i < test_group.num_tests; i++) {
         unsigned int * inA, * inB, * pv;

         fp_test_args_t aTest = test_group.targs[i];
         inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
         inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
         int idx;
         unsigned long long vsr_XT;
         pv = (unsigned int *)&vec_out;

         // Only need to copy one doubleword into each vector's element 0
         memcpy(&vec_inA, inA, 8);
         memcpy(&vec_inB, inB, 8);

         // clear vec_out
         for (idx = 0; idx < 4; idx++, pv++)
            *pv = 0;

         if (test_type != VX_FP_OTHER) {
            /* Then we need a third src argument, which is stored in element 0 of
             * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
             * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
             * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
             * data (input args, result) contain only two inputs, so I arbitrarily
             * use spec_fargs elements 4 and 14 (alternating) for the third source
             * argument.  We can use the same input data for a given pair of
             * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
             * the expected result should be the same.
             */
            int extra_arg_idx;
            if (i % 2)
               extra_arg_idx = 4;
            else
               extra_arg_idx = 14;

            if (repeat) {
               /* We're on the first time through of one of the VX_FP_SMx
                * test types, meaning we're testing a xs<ZZZ>adp case, thus
                * we have to swap inputs as described above:
                *    src2 <= VSX[XT]
                *    src3 <= VSX[XB]
                */
               memcpy(&vec_out, inB, 8);  // src2
               memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8);  //src3
               frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
            } else {
               // Don't need to init src2, as it's done before the switch()
               memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8);  //src3
            }
            memcpy(&vsr_XT, &vec_out, 8);
         }

         (*func)();
         dst = (unsigned long long *) &vec_out;

         if (test_type == VX_FP_OTHER)
            printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
                   *frap, *frbp, *dst);
         else
            printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
                    test_name, vsr_XT, *frap, *frbp, *dst );

      }
      /*
           {
               // Debug code.  Keep this block commented out except when debugging.
               double result, expected;
               memcpy(&result, dst, 8);
               memcpy(&expected, &aTest.dp_bin_result, 8);
               printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
                       spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
                       expected, result );
            }
       */
      printf( "\n" );

      if (repeat) {
         repeat = 0;
         strcat(test_name, "UNKNOWN");
         switch (test_type) {
            case VX_FP_SMAS:
            case VX_FP_SMSS:
            case VX_FP_SNMAS:
            case VX_FP_SNMSS:
               if (test_type == VX_FP_SMAS)
                  strcpy(test_name, "xsmadd");
               else if (test_type == VX_FP_SMSS)
                  strcpy(test_name, "xsmsub");
               else if (test_type == VX_FP_SNMAS)
                  strcpy(test_name, "xsnmadd");
               else
                  strcpy(test_name, "xsnmsub");

               do_asp = 0;
               strcat(test_name, "msp");
               break;
            default:
               break;
         }
         goto again;
      }
      k++;
   }
   printf( "\n" );
   free(test_name);
}


static void test_vsx_one_fp_arg(void)
{
   test_func_t func;
   int k;
   k = 0;
   build_special_fargs_table();

   while ((func = vsx_one_fp_arg_tests[k].test_func)) {
      int idx, i;
      unsigned long long *dst_dp;
      unsigned int * dst_sp;
      vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
      /* size of source operands */
      Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
      /* size of result */
      Bool dp_res = IS_DP_RESULT(test_group.precision);
      Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
      Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
      Bool sparse_sp = False;
      int stride = dp ? 2 : 4;
      int loops = is_scalar ? 1 : stride;
      stride = is_scalar ? 1: stride;

      /* For conversions of single to double, the 128-bit input register is sparsely populated:
       *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
       *                     or
       *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
       *
       * For the vector op case, we need to adjust stride from '4' to '2', since
       * we'll only be loading two values per loop into the input register.
       */
      if (!dp && !is_scalar && test_group.test_type == VX_CONV_TO_DOUBLE) {
         sparse_sp = True;
         stride = 2;
      }

      for (i = 0; i < test_group.num_tests; i+=stride) {
         unsigned int * pv;
         void * inB;

         pv = (unsigned int *)&vec_out;
         // clear vec_out
         for (idx = 0; idx < 4; idx++, pv++)
            *pv = 0;

         if (dp) {
            int j;
            unsigned long long * frB_dp;
            for (j = 0; j < loops; j++) {
               inB = (void *)&spec_fargs[i + j];
               // copy double precision FP into vector element i
               memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
            }
            // execute test insn
            (*func)();
            if (dp_res)
               dst_dp = (unsigned long long *) &vec_out;
            else
               dst_sp = (unsigned int *) &vec_out;

            printf("#%d: %s ", i/stride, test_group.name);
            for (j = 0; j < loops; j++) {
               if (j)
                  printf("; ");
               frB_dp = (unsigned long long *)&spec_fargs[i + j];
               printf("%s(%016llx)", test_group.op, *frB_dp);
               if (test_group.test_type == VX_ESTIMATE)
               {
                  Bool res;
                  res = check_reciprocal_estimate(is_sqrt, i + j, j);
                  printf(" ==> %s)", res ? "PASS" : "FAIL");
               } else if (dp_res) {
                  printf(" = %016llx", dst_dp[j]);
               } else {
                  printf(" = %08x", dst_sp[j]);
               }
            }
            printf("\n");
         } else {  // single precision test type
            int j;
            // Clear input vector
            pv = (unsigned int *)&vec_inB;
            for (idx = 0; idx < 4; idx++, pv++)
               *pv = 0;

            if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
               /* Take a single-precision value stored in double word element 0
                * of src in double-precision format and convert to single-
                * precision and store in word element 0 of dst.
                */
               double input = spec_sp_fargs[i];
               memcpy(((void *)&vec_inB), (void *)&input, 8);
            } else {
               int skip_slot;
               if (sparse_sp) {
                  skip_slot = 1;
                  loops = 2;
               } else {
                  skip_slot = 0;
               }
               for (j = 0; j < loops; j++) {
                  inB = (void *)&spec_sp_fargs[i + j];
                  // copy single precision FP into vector element i

                  if (skip_slot && j > 0)
                     memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
                  else
                     memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
               }
            }
            // execute test insn
            (*func)();
            if (dp_res)
               dst_dp = (unsigned long long *) &vec_out;
            else
               dst_sp = (unsigned int *) &vec_out;
            // print result
            printf("#%d: %s ", i/stride, test_group.name);
            for (j = 0; j < loops; j++) {
               if (j)
                  printf("; ");
               printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i + j]));
               if (dp_res)
                     printf(" = %016llx", dst_dp[j]);
               else
                  printf(" = %08x", dst_sp[j]);
            }
            printf("\n");
         }
      }
      k++;
      printf( "\n" );
   }
}

/* This function currently only supports two double precision input arguments. */
static void test_vsx_two_fp_arg(void)
{
   test_func_t func;
   int k = 0;

   build_special_fargs_table();
   while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
      unsigned long long * frap, * frbp, * dst;
      unsigned int * pv;
      int idx;
      vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
      pv = (unsigned int *)&vec_out;
      // clear vec_out
      for (idx = 0; idx < 4; idx++, pv++)
         *pv = 0;

      void * inA, * inB;
      int i;
      for (i = 0; i < test_group.num_tests; i++) {
         fp_test_args_t aTest = test_group.targs[i];
         inA = (void *)&spec_fargs[aTest.fra_idx];
         inB = (void *)&spec_fargs[aTest.frb_idx];
         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
         // Only need to copy one doubleword into each vector's element 0
         memcpy(&vec_inA, inA, 8);
         memcpy(&vec_inB, inB, 8);
         (*func)();
         dst = (unsigned long long *) &vec_out;
         printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
                *frap, *frbp, *dst);
      }
      printf( "\n" );
      k++;
   }
}

/* This function handles the following cases:
 *   1) Single precision value stored in double-precision
 *      floating-point format in doubleword element 0 of src VSX register
 *   2) Integer word value stored in word element 1 of src VSX register
 */
static void _do_store_test (ldst_test_t storeTest)
{
   test_func_t func;
   unsigned int *dst32;
   unsigned int i, idx;
   unsigned int * pv = (unsigned int *) storeTest.base_addr;

   func = storeTest.test_func;
   r14 = (HWord_t) storeTest.base_addr;
   r15 = (HWord_t) storeTest.offset;

   if (storeTest.precision == DOUBLE_TEST_SINGLE_RES) {
      /* source is single precision stored in double precision format */
      /* test some of the pre-defined single precision values */
      for (i = 0; i < nb_special_fargs; i+=3) {
         // clear out storage destination
         for (idx = 0; idx < 4; idx++)
            *(pv + idx) = 0;

         printf( "%s:", storeTest.name );
         unsigned long long * dp;
         double input = spec_sp_fargs[i];
         dp = (unsigned long long *)&input;
         memcpy(&vec_inA, dp, sizeof(unsigned long long));
         printf(" %016llx ==> ", *dp);

         // execute test insn
         (*func)();
         dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
         printf( "%08x\n", *dst32);
      }
   } else {
      // source is an integer word
      for (i = 0; i < NUM_VIARGS_INTS; i++) {
         // clear out storage destination
         for (idx = 0; idx < 4; idx++)
            *(pv + idx) = 0;
         printf( "%s:", storeTest.name );
         unsigned int * pi = (unsigned int *)&vec_inA;
         memcpy(pi + 1, &viargs[i], sizeof(unsigned int));
         printf(" %08x ==> ", *(pi + 1));

         // execute test insn
         (*func)();
         dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
         printf( "%08x\n", *dst32);
      }
   }
   printf("\n");
}

static void _do_load_test(ldst_test_t storeTest)
{
   test_func_t func;
   unsigned int i;
   unsigned long long * dst_dp;

   func = storeTest.test_func;
   r15 = (HWord_t) storeTest.offset;

   if (storeTest.base_addr == NULL) {
      /* Test lxsspx: source is single precision value, so let's */
      /* test some of the pre-defined single precision values. */
      for (i = 0; i + storeTest.offset < nb_special_fargs; i+=3) {
         unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + storeTest.offset];
         printf( "%s:", storeTest.name );
         printf(" %08x ==> ", *sp);
         r14 = (HWord_t)&spec_sp_fargs[i];

         // execute test insn
         (*func)();
         dst_dp = (unsigned long long *) &vec_out;
         printf("%016llx\n", *dst_dp);
      }
   } else {
      // source is an integer word
      for (i = 0; i < NUM_VIARGS_INTS; i++) {
         printf( "%s:", storeTest.name );
         r14 = (HWord_t)&viargs[i + storeTest.offset];
         printf(" %08x ==> ", viargs[i + storeTest.offset]);

         // execute test insn
         (*func)();
         dst_dp = (unsigned long long *) &vec_out;
         printf("%016llx\n", *dst_dp);
      }
   }
   printf("\n");
}

static void test_ldst(void)
{
   int k = 0;

   while (ldst_tests[k].test_func) {
      if (ldst_tests[k].type == VSX_STORE)
         _do_store_test(ldst_tests[k]);
      else {
         _do_load_test(ldst_tests[k]);
      }
      k++;
      printf("\n");
   }
}

static void test_xs_conv_ops(void)
{

   test_func_t func;
   int k = 0;

   build_special_fargs_table();
   while ((func = xs_conv_tests[k].test_func)) {
      int i;
      unsigned long long * dst;
      xs_conv_test_t test_group = xs_conv_tests[k];
      for (i = 0; i < NUM_VIARGS_INTS; i++) {
         unsigned int * inB, * pv;
         int idx;
         inB = (unsigned int *)&viargs[i];
         memcpy(&vec_inB, inB, 4);
         pv = (unsigned int *)&vec_out;
         // clear vec_out
         for (idx = 0; idx < 4; idx++, pv++)
            *pv = 0;
         (*func)();
         dst = (unsigned long long *) &vec_out;
         printf("#%d: %s %08x => %016llx\n", i, test_group.name, viargs[i], *dst);
      }
      k++;
      printf("\n");
   }
   printf( "\n" );
}


static void test_vsx_logic(void)
{
   logic_test_t aTest;
   test_func_t func;
   int k;
   k = 0;

   while ((func = logic_tests[k].test_func)) {

      unsigned int * pv;
      unsigned int * inA, * inB, * dst;
      int idx, i;
      aTest = logic_tests[k];
      for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
         pv = (unsigned int *)&vec_out;
         inA = &viargs[i];
         inB = &viargs[i];
         memcpy(&vec_inA, inA, sizeof(vector unsigned int));
         memcpy(&vec_inB, inB, sizeof(vector unsigned int));
         // clear vec_out
         for (idx = 0; idx < 4; idx++, pv++)
            *pv = 0;

         // execute test insn
         (*func)();
         dst = (unsigned int*) &vec_out;

         printf( "#%d: %10s ", k, aTest.name);
         printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
         printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
         printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
      }
      k++;
   }
   printf( "\n" );
}


//----------------------------------------------------------

static test_table_t all_tests[] = {
                                     { &test_vx_fp_ops,
                                       "Test VSX floating point instructions"},
                                     { &test_vsx_one_fp_arg,
                                       "Test VSX vector and scalar single argument instructions"} ,
                                     { &test_vsx_logic,
                                       "Test VSX logic instructions" },
                                     { &test_xs_conv_ops,
                                       "Test VSX scalar integer conversion instructions" },
                                     { &test_ldst,
                                       "Test VSX load/store dp to sp instructions" },
                                     { &test_vsx_two_fp_arg,
                                       "Test VSX vector and scalar two argument instructions"} ,
                                     { NULL, NULL }
};

#endif

int main(int argc, char *argv[])
{

#ifdef HAS_ISA_2_07
   test_table_t aTest;
   test_func_t func;
   int i = 0;

   while ((func = all_tests[i].test_category)) {
      aTest = all_tests[i];
      printf( "%s\n", aTest.name );
      (*func)();
      i++;
   }
#else
   printf("NO ISA 2.07 SUPPORT\n");
#endif
   return 0;
}