Home | History | Annotate | Download | only in amd64
      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <assert.h>
      4 #include <math.h>
      5 #include "tests/malloc.h"
      6 
      7 typedef  unsigned char           UChar;
      8 typedef  unsigned int            UInt;
      9 typedef  unsigned long int       UWord;
     10 typedef  unsigned long long int  ULong;
     11 typedef  double                  Double;
     12 typedef  float                   Float;
     13 
     14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
     15 
     16 typedef  union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM;
     17 typedef  union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; }  YMM;
     18 typedef  struct {  YMM r1; YMM r2; YMM r3; YMM r4; YMM m; }  Block;
     19 
     20 void showFloat ( XMM* vec, int idx )
     21 {
     22    Float f = vec->f32[idx];
     23    int neg = signbit (f);
     24    char sign = neg != 0 ? '-' : ' ';
     25    switch (fpclassify (f)) {
     26       case FP_NORMAL: {
     27          for (int i = idx * 4 + 3; i >= idx * 4; i--)
     28             printf("%02x", (UInt)vec->u8[i]);
     29          break;
     30       }
     31       case FP_INFINITE: {
     32          printf ("[ %cINF ]", sign);
     33          break;
     34       }
     35       case FP_ZERO: {
     36          printf ("[%cZERO ]", sign);
     37          break;
     38       }
     39       case FP_NAN: {
     40          printf ("[  NAN ]");
     41          break;
     42       }
     43       default: {
     44          printf ("[%cSUBNR]", sign);
     45          break;
     46       }
     47    }
     48 }
     49 
     50 void showDouble ( XMM* vec, int idx )
     51 {
     52    Double d = vec->f64[idx];
     53    int neg = signbit (d);
     54    char sign = neg != 0 ? '-' : ' ';
     55    switch (fpclassify (d)) {
     56       case FP_NORMAL: {
     57          for (int i = idx * 8 + 7; i >= idx * 8; i--)
     58             printf("%02x", (UInt)vec->u8[i]);
     59          break;
     60       }
     61       case FP_INFINITE: {
     62          printf ("[     %cINF     ]", sign);
     63          break;
     64       }
     65       case FP_ZERO: {
     66          printf ("[    %cZERO     ]", sign);
     67          break;
     68       }
     69       case FP_NAN: {
     70          printf ("[      NAN     ]");
     71          break;
     72       }
     73       default: {
     74          printf ("[  %cSUBNORMAL  ]", sign);
     75          break;
     76       }
     77    }
     78 }
     79 
     80 void showXMM ( XMM* vec, int isDouble )
     81 {
     82    if (isDouble) {
     83      showDouble ( vec, 1 );
     84      printf (".");
     85      showDouble ( vec, 0 );
     86    } else {
     87      showFloat ( vec, 3 );
     88      printf (".");
     89      showFloat ( vec, 2 );
     90      printf (".");
     91      showFloat ( vec, 1 );
     92      printf (".");
     93      showFloat ( vec, 0 );
     94    }
     95 }
     96 
     97 void showYMM ( YMM* vec, int isDouble )
     98 {
     99    assert(IS_32_ALIGNED(vec));
    100    showXMM ( &vec->xmm[1], isDouble );
    101    printf(".");
    102    showXMM ( &vec->xmm[0], isDouble );
    103 }
    104 
    105 void showBlock ( char* msg, Block* block, int isDouble )
    106 {
    107    printf("  %s\n", msg);
    108    printf("r1: "); showYMM(&block->r1, isDouble); printf("\n");
    109    printf("r2: "); showYMM(&block->r2, isDouble); printf("\n");
    110    printf("r3: "); showYMM(&block->r3, isDouble); printf("\n");
    111    printf("r4: "); showYMM(&block->r4, isDouble); printf("\n");
    112    printf(" m: "); showYMM(&block->m, isDouble); printf("\n");
    113 }
    114 
    115 static Double special_values[10];
    116 
    117 static __attribute__((noinline))
    118 Double negate ( Double d ) { return -d; }
    119 static __attribute__((noinline))
    120 Double divf64 ( Double x, Double y ) { return x/y; }
    121 
    122 static __attribute__((noinline))
    123 Double plusZero  ( void ) { return 0.0; }
    124 static __attribute__((noinline))
    125 Double minusZero ( void ) { return negate(plusZero()); }
    126 
    127 static __attribute__((noinline))
    128 Double plusOne  ( void ) { return 1.0; }
    129 static __attribute__((noinline))
    130 Double minusOne ( void ) { return negate(plusOne()); }
    131 
    132 static __attribute__((noinline))
    133 Double plusInf   ( void ) { return 1.0 / 0.0; }
    134 static __attribute__((noinline))
    135 Double minusInf  ( void ) { return negate(plusInf()); }
    136 
    137 static __attribute__((noinline))
    138 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
    139 static __attribute__((noinline))
    140 Double minusNaN ( void ) { return negate(plusNaN()); }
    141 
    142 static __attribute__((noinline))
    143 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
    144 static __attribute__((noinline))
    145 Double minusDenorm ( void ) { return negate(plusDenorm()); }
    146 
    147 static void init_special_values ( void )
    148 {
    149    special_values[0] = plusZero();
    150    special_values[1] = minusZero();
    151    special_values[2] = plusOne();
    152    special_values[3] = minusOne();
    153    special_values[4] = plusInf();
    154    special_values[5] = minusInf();
    155    special_values[6] = plusNaN();
    156    special_values[7] = minusNaN();
    157    special_values[8] = plusDenorm();
    158    special_values[9] = minusDenorm();
    159 }
    160 
    161 void specialFBlock ( Block* b )
    162 {
    163    int i;
    164    Float* p = (Float*)b;
    165    for (i = 0; i < sizeof(Block) / sizeof(Float); i++)
    166       p[i] = (Float) special_values[i % 10];
    167 }
    168 
    169 void specialDBlock ( Block* b )
    170 {
    171    int i;
    172    Double* p = (Double*)b;
    173    for (i = 0; i < sizeof(Block) / sizeof(Double); i++)
    174       p[i] = special_values[i % 10];
    175 }
    176 
    177 UChar randUChar ( void )
    178 {
    179    static UInt seed = 80021;
    180    seed = 1103515245 * seed + 12345;
    181    return (seed >> 17) & 0xFF;
    182 }
    183 
    184 void randBlock ( Block* b )
    185 {
    186    int i;
    187    UChar* p = (UChar*)b;
    188    for (i = 0; i < sizeof(Block); i++)
    189       p[i] = randUChar();
    190 }
    191 
    192 void oneBlock ( Block* b )
    193 {
    194    int i;
    195    UChar* p = (UChar*)b;
    196    for (i = 0; i < sizeof(Block); i++)
    197       p[i] = 1;
    198 }
    199 
    200 #define GEN_test(_name, _instr, _isD) \
    201    __attribute__ ((noinline)) void \
    202    test_##_name ( const char *n, Block* b) \
    203    { \
    204       printf("%s %s\n", #_name, n); \
    205       showBlock("before", b, _isD); \
    206       __asm__ __volatile__( \
    207           "vmovdqa   0(%0),%%ymm7"  "\n\t" \
    208           "vmovdqa  32(%0),%%ymm8"  "\n\t" \
    209           "vmovdqa  64(%0),%%ymm6"  "\n\t" \
    210           "vmovdqa  96(%0),%%ymm9"  "\n\t" \
    211           "leaq    128(%0),%%r14"   "\n\t" \
    212           _instr "\n\t" \
    213           "vmovdqa %%ymm7,  0(%0)"  "\n\t" \
    214           "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
    215           "vmovdqa %%ymm6, 64(%0)"  "\n\t" \
    216           "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
    217           : /*OUT*/  \
    218           : /*IN*/"r"(b) \
    219           : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
    220        ); \
    221        showBlock("after", b, _isD); \
    222        printf("\n"); \
    223     }
    224 
    225 /* All these defines do the same thing (and someone with stronger
    226    preprocessor foo could probably express things much smaller).
    227    They generate 4 different functions to test 4 variants of an
    228    fma4 instruction. One with as input 4 registers, one where
    229    the output register is also one of the input registers and
    230    two versions where different inputs are a memory location.
    231    The xmm variants create 128 versions, the ymm variants 256. */
    232 
    233 #define GEN_test_VFMADDPD_xmm(_name) \
    234    GEN_test(_name##_xmm, \
    235             "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    236    GEN_test(_name##_xmm_src_dst, \
    237             "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    238    GEN_test(_name##_xmm_mem1, \
    239             "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    240    GEN_test(_name##_xmm_mem2, \
    241             "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    242 GEN_test_VFMADDPD_xmm(VFMADDPD)
    243 
    244 #define GEN_test_VFMADDPD_ymm(_name) \
    245    GEN_test(_name##_ymm, \
    246             "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    247    GEN_test(_name##_ymm_src_dst, \
    248             "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    249    GEN_test(_name##_ymm_mem1, \
    250             "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    251    GEN_test(_name##_ymm_mem2, \
    252             "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    253 GEN_test_VFMADDPD_ymm(VFMADDPD)
    254 
    255 #define GEN_test_VFMADDPS_xmm(_name) \
    256    GEN_test(_name##_xmm, \
    257             "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    258    GEN_test(_name##_xmm_src_dst, \
    259             "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    260    GEN_test(_name##_xmm_mem1, \
    261             "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    262    GEN_test(_name##_xmm_mem2, \
    263             "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    264 GEN_test_VFMADDPS_xmm(VFMADDPS)
    265 
    266 #define GEN_test_VFMADDPS_ymm(_name) \
    267    GEN_test(_name##_ymm, \
    268             "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    269    GEN_test(_name##_ymm_src_dst, \
    270             "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    271    GEN_test(_name##_ymm_mem1, \
    272             "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    273    GEN_test(_name##_ymm_mem2, \
    274             "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    275 GEN_test_VFMADDPS_ymm(VFMADDPS)
    276 
    277 #define GEN_test_VFMADDSD_xmm(_name) \
    278    GEN_test(_name##_xmm, \
    279             "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    280    GEN_test(_name##_xmm_src_dst, \
    281             "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    282    GEN_test(_name##_xmm_mem1, \
    283             "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    284    GEN_test(_name##_xmm_mem2, \
    285             "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    286 GEN_test_VFMADDSD_xmm(VFMADDSD)
    287 
    288 #define GEN_test_VFMADDSS_xmm(_name) \
    289    GEN_test(_name##_xmm, \
    290             "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    291    GEN_test(_name##_xmm_src_dst, \
    292             "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    293    GEN_test(_name##_xmm_mem1, \
    294             "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    295    GEN_test(_name##_xmm_mem2, \
    296             "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    297 GEN_test_VFMADDSS_xmm(VFMADDSS)
    298 
    299 #define GEN_test_VFMADDSUBPD_xmm(_name) \
    300    GEN_test(_name##_xmm, \
    301             "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    302    GEN_test(_name##_xmm_src_dst, \
    303             "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    304    GEN_test(_name##_xmm_mem1, \
    305             "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    306    GEN_test(_name##_xmm_mem2, \
    307             "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    308 GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD)
    309 
    310 #define GEN_test_VFMADDSUBPD_ymm(_name) \
    311    GEN_test(_name##_ymm, \
    312             "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    313    GEN_test(_name##_ymm_src_dst, \
    314             "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    315    GEN_test(_name##_ymm_mem1, \
    316             "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    317    GEN_test(_name##_ymm_mem2, \
    318             "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    319 GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD)
    320 
    321 #define GEN_test_VFMADDSUBPS_xmm(_name) \
    322    GEN_test(_name##_xmm, \
    323             "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    324    GEN_test(_name##_xmm_src_dst, \
    325             "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    326    GEN_test(_name##_xmm_mem1, \
    327             "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    328    GEN_test(_name##_xmm_mem2, \
    329             "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    330 GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS)
    331 
    332 #define GEN_test_VFMADDSUBPS_ymm(_name) \
    333    GEN_test(_name##_ymm, \
    334             "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    335    GEN_test(_name##_ymm_src_dst, \
    336             "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    337    GEN_test(_name##_ymm_mem1, \
    338             "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    339    GEN_test(_name##_ymm_mem2, \
    340             "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    341 GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS)
    342 
    343 #define GEN_test_VFMSUBADDPD_xmm(_name) \
    344    GEN_test(_name##_xmm, \
    345             "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    346    GEN_test(_name##_xmm_src_dst, \
    347             "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    348    GEN_test(_name##_xmm_mem1, \
    349             "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    350    GEN_test(_name##_xmm_mem2, \
    351             "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    352 GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD)
    353 
    354 #define GEN_test_VFMSUBADDPD_ymm(_name) \
    355    GEN_test(_name##_ymm, \
    356             "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    357    GEN_test(_name##_ymm_src_dst, \
    358             "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    359    GEN_test(_name##_ymm_mem1, \
    360             "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    361    GEN_test(_name##_ymm_mem2, \
    362             "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    363 GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD)
    364 
    365 #define GEN_test_VFMSUBADDPS_xmm(_name) \
    366    GEN_test(_name##_xmm, \
    367             "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    368    GEN_test(_name##_xmm_src_dst, \
    369             "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    370    GEN_test(_name##_xmm_mem1, \
    371             "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    372    GEN_test(_name##_xmm_mem2, \
    373             "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    374 GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS)
    375 
    376 #define GEN_test_VFMSUBADDPS_ymm(_name) \
    377    GEN_test(_name##_ymm, \
    378             "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    379    GEN_test(_name##_ymm_src_dst, \
    380             "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    381    GEN_test(_name##_ymm_mem1, \
    382             "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    383    GEN_test(_name##_ymm_mem2, \
    384             "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    385 GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS)
    386 
    387 #define GEN_test_VFMSUBPD_xmm(_name) \
    388    GEN_test(_name##_xmm, \
    389             "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    390    GEN_test(_name##_xmm_src_dst, \
    391             "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    392    GEN_test(_name##_xmm_mem1, \
    393             "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    394    GEN_test(_name##_xmm_mem2, \
    395             "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    396 GEN_test_VFMSUBPD_xmm(VFMSUBPD)
    397 
    398 #define GEN_test_VFMSUBPD_ymm(_name) \
    399    GEN_test(_name##_ymm, \
    400             "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    401    GEN_test(_name##_ymm_src_dst, \
    402             "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    403    GEN_test(_name##_ymm_mem1, \
    404             "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    405    GEN_test(_name##_ymm_mem2, \
    406             "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    407 GEN_test_VFMSUBPD_ymm(VFMSUBPD)
    408 
    409 #define GEN_test_VFMSUBPS_xmm(_name) \
    410    GEN_test(_name##_xmm, \
    411             "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    412    GEN_test(_name##_xmm_src_dst, \
    413             "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    414    GEN_test(_name##_xmm_mem1, \
    415             "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    416    GEN_test(_name##_xmm_mem2, \
    417             "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    418 GEN_test_VFMSUBPS_xmm(VFMSUBPS)
    419 
    420 #define GEN_test_VFMSUBPS_ymm(_name) \
    421    GEN_test(_name##_ymm, \
    422             "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    423    GEN_test(_name##_ymm_src_dst, \
    424             "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    425    GEN_test(_name##_ymm_mem1, \
    426             "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    427    GEN_test(_name##_ymm_mem2, \
    428             "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    429 GEN_test_VFMSUBPS_ymm(VFMSUBPS)
    430 
    431 #define GEN_test_VFMSUBSD_xmm(_name) \
    432    GEN_test(_name##_xmm, \
    433             "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    434    GEN_test(_name##_xmm_src_dst, \
    435             "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    436    GEN_test(_name##_xmm_mem1, \
    437             "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    438    GEN_test(_name##_xmm_mem2, \
    439             "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    440 GEN_test_VFMSUBSD_xmm(VFMSUBSD)
    441 
    442 #define GEN_test_VFMSUBSS_xmm(_name) \
    443    GEN_test(_name##_xmm, \
    444             "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    445    GEN_test(_name##_xmm_src_dst, \
    446             "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    447    GEN_test(_name##_xmm_mem1, \
    448             "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    449    GEN_test(_name##_xmm_mem2, \
    450             "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    451 GEN_test_VFMSUBSS_xmm(VFMSUBSS)
    452 
    453 #define GEN_test_VFNMADDPD_xmm(_name) \
    454    GEN_test(_name##_xmm, \
    455             "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    456    GEN_test(_name##_xmm_src_dst, \
    457             "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    458    GEN_test(_name##_xmm_mem1, \
    459             "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    460    GEN_test(_name##_xmm_mem2, \
    461             "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    462 GEN_test_VFNMADDPD_xmm(VFNMADDPD)
    463 
    464 #define GEN_test_VFNMADDPD_ymm(_name) \
    465    GEN_test(_name##_ymm, \
    466             "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    467    GEN_test(_name##_ymm_src_dst, \
    468             "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    469    GEN_test(_name##_ymm_mem1, \
    470             "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    471    GEN_test(_name##_ymm_mem2, \
    472             "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    473 GEN_test_VFNMADDPD_ymm(VFNMADDPD)
    474 
    475 #define GEN_test_VFNMADDPS_xmm(_name) \
    476    GEN_test(_name##_xmm, \
    477             "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    478    GEN_test(_name##_xmm_src_dst, \
    479             "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    480    GEN_test(_name##_xmm_mem1, \
    481             "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    482    GEN_test(_name##_xmm_mem2, \
    483             "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    484 GEN_test_VFNMADDPS_xmm(VFNMADDPS)
    485 
    486 #define GEN_test_VFNMADDPS_ymm(_name) \
    487    GEN_test(_name##_ymm, \
    488             "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    489    GEN_test(_name##_ymm_src_dst, \
    490             "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    491    GEN_test(_name##_ymm_mem1, \
    492             "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    493    GEN_test(_name##_ymm_mem2, \
    494             "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    495 GEN_test_VFNMADDPS_ymm(VFNMADDPS)
    496 
    497 #define GEN_test_VFNMADDSD_xmm(_name) \
    498    GEN_test(_name##_xmm, \
    499             "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    500    GEN_test(_name##_xmm_src_dst, \
    501             "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    502    GEN_test(_name##_xmm_mem1, \
    503             "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    504    GEN_test(_name##_xmm_mem2, \
    505             "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    506 GEN_test_VFNMADDSD_xmm(VFNMADDSD)
    507 
    508 #define GEN_test_VFNMADDSS_xmm(_name) \
    509    GEN_test(_name##_xmm, \
    510             "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    511    GEN_test(_name##_xmm_src_dst, \
    512             "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    513    GEN_test(_name##_xmm_mem1, \
    514             "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    515    GEN_test(_name##_xmm_mem2, \
    516             "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    517 GEN_test_VFNMADDSS_xmm(VFNMADDSS)
    518 
    519 #define GEN_test_VFNMSUBPD_xmm(_name) \
    520    GEN_test(_name##_xmm, \
    521             "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    522    GEN_test(_name##_xmm_src_dst, \
    523             "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    524    GEN_test(_name##_xmm_mem1, \
    525             "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    526    GEN_test(_name##_xmm_mem2, \
    527             "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    528 GEN_test_VFNMSUBPD_xmm(VFNMSUBPD)
    529 
    530 #define GEN_test_VFNMSUBPD_ymm(_name) \
    531    GEN_test(_name##_ymm, \
    532             "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
    533    GEN_test(_name##_ymm_src_dst, \
    534             "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
    535    GEN_test(_name##_ymm_mem1, \
    536             "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
    537    GEN_test(_name##_ymm_mem2, \
    538             "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
    539 GEN_test_VFNMSUBPD_ymm(VFNMSUBPD)
    540 
    541 #define GEN_test_VFNMSUBPS_xmm(_name) \
    542    GEN_test(_name##_xmm, \
    543             "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    544    GEN_test(_name##_xmm_src_dst, \
    545             "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    546    GEN_test(_name##_xmm_mem1, \
    547             "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    548    GEN_test(_name##_xmm_mem2, \
    549             "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    550 GEN_test_VFNMSUBPS_xmm(VFNMSUBPS)
    551 
    552 #define GEN_test_VFNMSUBPS_ymm(_name) \
    553    GEN_test(_name##_ymm, \
    554             "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
    555    GEN_test(_name##_ymm_src_dst, \
    556             "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
    557    GEN_test(_name##_ymm_mem1, \
    558             "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
    559    GEN_test(_name##_ymm_mem2, \
    560             "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
    561 GEN_test_VFNMSUBPS_ymm(VFNMSUBPS)
    562 
    563 #define GEN_test_VFNMSUBSD_xmm(_name) \
    564    GEN_test(_name##_xmm, \
    565             "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
    566    GEN_test(_name##_xmm_src_dst, \
    567             "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
    568    GEN_test(_name##_xmm_mem1, \
    569             "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
    570    GEN_test(_name##_xmm_mem2, \
    571             "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
    572 GEN_test_VFNMSUBSD_xmm(VFNMSUBSD)
    573 
    574 #define GEN_test_VFNMSUBSS_xmm(_name) \
    575    GEN_test(_name##_xmm, \
    576             "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
    577    GEN_test(_name##_xmm_src_dst, \
    578             "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
    579    GEN_test(_name##_xmm_mem1, \
    580             "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
    581    GEN_test(_name##_xmm_mem2, \
    582             "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
    583 GEN_test_VFNMSUBSS_xmm(VFNMSUBSS)
    584 
    585 #define DO_test_block(_name, _sub, _bname, _block) \
    586    test_##_name##_##_sub(_bname,_block);
    587 
    588 #define DO_test(_name, _sub, _isD) { \
    589    Block* b = memalign32(sizeof(Block)); \
    590    oneBlock(b); \
    591    DO_test_block(_name, _sub, "ones", b); \
    592    if (_isD) { \
    593       specialDBlock(b); \
    594       DO_test_block(_name, _sub, "specialD", b); \
    595    } else { \
    596       specialFBlock(b); \
    597       DO_test_block(_name, _sub, "specialF", b); \
    598    } \
    599    randBlock(b); \
    600    DO_test_block(_name, _sub, "rand", b); \
    601    free(b); \
    602 }
    603 
    604 #define DO_tests_xmm(_name,_isD) \
    605    DO_test(_name, xmm, _isD); \
    606    DO_test(_name, xmm_src_dst, _isD); \
    607    DO_test(_name, xmm_mem1, _isD); \
    608    DO_test(_name, xmm_mem2, _isD);
    609 
    610 #define DO_tests_ymm(_name,_isD) \
    611    DO_test(_name, ymm, _isD); \
    612    DO_test(_name, ymm_src_dst, _isD); \
    613    DO_test(_name, ymm_mem1, _isD); \
    614    DO_test(_name, ymm_mem2, _isD);
    615 
    616 int main ( void )
    617 {
    618   init_special_values();
    619 
    620   // 128
    621   DO_tests_xmm(VFMADDPD, 1);
    622   DO_tests_xmm(VFMADDPS, 0);
    623   DO_tests_xmm(VFMADDSD, 1);
    624   DO_tests_xmm(VFMADDSS, 0);
    625   DO_tests_xmm(VFMADDSUBPD, 1);
    626   DO_tests_xmm(VFMADDSUBPS, 0);
    627   DO_tests_xmm(VFMSUBADDPD, 1);
    628   DO_tests_xmm(VFMSUBADDPS, 0);
    629   DO_tests_xmm(VFMSUBPD, 1);
    630   DO_tests_xmm(VFMSUBPS, 0);
    631   DO_tests_xmm(VFMSUBSD, 1);
    632   DO_tests_xmm(VFMSUBSS, 0);
    633   DO_tests_xmm(VFNMADDPD, 1);
    634   DO_tests_xmm(VFNMADDPS, 0);
    635   DO_tests_xmm(VFNMADDSD, 1);
    636   DO_tests_xmm(VFNMADDSS, 0);
    637   DO_tests_xmm(VFNMSUBPD, 1);
    638   DO_tests_xmm(VFNMSUBPS, 0);
    639   DO_tests_xmm(VFNMSUBSD, 1);
    640   DO_tests_xmm(VFNMSUBSS, 0);
    641 
    642   // 256
    643   /*
    644   DO_tests_ymm(VFMADDPD, 1);
    645   DO_tests_ymm(VFMADDPS, 0);
    646   DO_tests_ymm(VFMADDSUBPD, 1);
    647   DO_tests_ymm(VFMADDSUBPS, 0);
    648   DO_tests_ymm(VFMSUBADDPD, 1);
    649   DO_tests_ymm(VFMSUBADDPS, 0);
    650   DO_tests_ymm(VFMSUBPD, 1);
    651   DO_tests_ymm(VFMSUBPS, 0);
    652   DO_tests_ymm(VFNMADDPD, 1);
    653   DO_tests_ymm(VFNMADDPS, 0);
    654   DO_tests_ymm(VFNMSUBPD, 1);
    655   DO_tests_ymm(VFNMSUBPS, 0);
    656   */
    657 
    658   return 0;
    659 }
    660