Home | History | Annotate | Download | only in AssemblerX8664
      1 //===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include "AssemblerX8664/TestUtil.h"
     10 
     11 namespace Ice {
     12 namespace X8664 {
     13 namespace Test {
     14 namespace {
     15 
     16 TEST_F(AssemblerX8664Test, ArithSS) {
     17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
     18   do {                                                                         \
     19     static_assert(FloatSize == 32 || FloatSize == 64,                          \
     20                   "Invalid fp size " #FloatSize);                              \
     21     static constexpr char TestString[] =                                       \
     22         "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
     23         ", " #Inst ", " #Op ")";                                               \
     24     static constexpr bool IsDouble = FloatSize == 64;                          \
     25     using Type = std::conditional<IsDouble, double, float>::type;              \
     26     const uint32_t T0 = allocateQword();                                       \
     27     const Type V0 = Value0;                                                    \
     28     const uint32_t T1 = allocateQword();                                       \
     29     const Type V1 = Value1;                                                    \
     30                                                                                \
     31     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
     32     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1));     \
     33     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
     34                                                                                \
     35     AssembledTest test = assemble();                                           \
     36     if (IsDouble) {                                                            \
     37       test.setQwordTo(T0, static_cast<double>(V0));                            \
     38       test.setQwordTo(T1, static_cast<double>(V1));                            \
     39     } else {                                                                   \
     40       test.setDwordTo(T0, static_cast<float>(V0));                             \
     41       test.setDwordTo(T1, static_cast<float>(V1));                             \
     42     }                                                                          \
     43                                                                                \
     44     test.run();                                                                \
     45                                                                                \
     46     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
     47     reset();                                                                   \
     48   } while (0)
     49 
     50 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
     51   do {                                                                         \
     52     static_assert(FloatSize == 32 || FloatSize == 64,                          \
     53                   "Invalid fp size " #FloatSize);                              \
     54     static constexpr char TestString[] =                                       \
     55         "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
     56         ", " #Op ")";                                                          \
     57     static constexpr bool IsDouble = FloatSize == 64;                          \
     58     using Type = std::conditional<IsDouble, double, float>::type;              \
     59     const uint32_t T0 = allocateQword();                                       \
     60     const Type V0 = Value0;                                                    \
     61     const uint32_t T1 = allocateQword();                                       \
     62     const Type V1 = Value1;                                                    \
     63                                                                                \
     64     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
     65     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
     66                                                                                \
     67     AssembledTest test = assemble();                                           \
     68     if (IsDouble) {                                                            \
     69       test.setQwordTo(T0, static_cast<double>(V0));                            \
     70       test.setQwordTo(T1, static_cast<double>(V1));                            \
     71     } else {                                                                   \
     72       test.setDwordTo(T0, static_cast<float>(V0));                             \
     73       test.setDwordTo(T1, static_cast<float>(V1));                             \
     74     }                                                                          \
     75                                                                                \
     76     test.run();                                                                \
     77                                                                                \
     78     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
     79     reset();                                                                   \
     80   } while (0)
     81 
     82 #define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
     83   do {                                                                         \
     84     TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
     85     TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
     86     TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
     87     TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
     88     TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
     89     TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
     90     TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / );             \
     91     TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / );                 \
     92   } while (0)
     93 
     94 #define TestImpl(Src, Dst0, Dst1)                                              \
     95   do {                                                                         \
     96     TestArithSS(32, Src, Dst0, Dst1);                                          \
     97     TestArithSS(64, Src, Dst0, Dst1);                                          \
     98   } while (0)
     99 
    100   TestImpl(xmm0, xmm1, xmm2);
    101   TestImpl(xmm1, xmm2, xmm3);
    102   TestImpl(xmm2, xmm3, xmm4);
    103   TestImpl(xmm3, xmm4, xmm5);
    104   TestImpl(xmm4, xmm5, xmm6);
    105   TestImpl(xmm5, xmm6, xmm7);
    106   TestImpl(xmm6, xmm7, xmm8);
    107   TestImpl(xmm7, xmm8, xmm9);
    108   TestImpl(xmm8, xmm9, xmm10);
    109   TestImpl(xmm9, xmm10, xmm11);
    110   TestImpl(xmm10, xmm11, xmm12);
    111   TestImpl(xmm11, xmm12, xmm13);
    112   TestImpl(xmm12, xmm13, xmm14);
    113   TestImpl(xmm13, xmm14, xmm15);
    114   TestImpl(xmm14, xmm15, xmm0);
    115   TestImpl(xmm15, xmm0, xmm1);
    116 
    117 #undef TestImpl
    118 #undef TestArithSS
    119 #undef TestArithSSXmmAddr
    120 #undef TestArithSSXmmXmm
    121 }
    122 
    123 TEST_F(AssemblerX8664Test, PArith) {
    124 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
    125   do {                                                                         \
    126     static constexpr char TestString[] =                                       \
    127         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    128         ", " #Type ", " #Size ")";                                             \
    129     const uint32_t T0 = allocateDqword();                                      \
    130     const Dqword V0 Value0;                                                    \
    131                                                                                \
    132     const uint32_t T1 = allocateDqword();                                      \
    133     const Dqword V1 Value1;                                                    \
    134                                                                                \
    135     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    136     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    137     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
    138                                                                                \
    139     AssembledTest test = assemble();                                           \
    140     test.setDqwordTo(T0, V0);                                                  \
    141     test.setDqwordTo(T1, V1);                                                  \
    142     test.run();                                                                \
    143                                                                                \
    144     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
    145         << TestString;                                                         \
    146     reset();                                                                   \
    147   } while (0)
    148 
    149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
    150   do {                                                                         \
    151     static constexpr char TestString[] =                                       \
    152         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    153         ", " #Type ", " #Size ")";                                             \
    154     const uint32_t T0 = allocateDqword();                                      \
    155     const Dqword V0 Value0;                                                    \
    156                                                                                \
    157     const uint32_t T1 = allocateDqword();                                      \
    158     const Dqword V1 Value1;                                                    \
    159                                                                                \
    160     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    161     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
    162                                                                                \
    163     AssembledTest test = assemble();                                           \
    164     test.setDqwordTo(T0, V0);                                                  \
    165     test.setDqwordTo(T1, V1);                                                  \
    166     test.run();                                                                \
    167                                                                                \
    168     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
    169         << TestString;                                                         \
    170     reset();                                                                   \
    171   } while (0)
    172 
    173 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
    174   do {                                                                         \
    175     static constexpr char TestString[] =                                       \
    176         "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
    177         ", " #Size ")";                                                        \
    178     const uint32_t T0 = allocateDqword();                                      \
    179     const Dqword V0 Value0;                                                    \
    180                                                                                \
    181     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    182     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm));             \
    183                                                                                \
    184     AssembledTest test = assemble();                                           \
    185     test.setDqwordTo(T0, V0);                                                  \
    186     test.run();                                                                \
    187                                                                                \
    188     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
    189         << TestString;                                                         \
    190     reset();                                                                   \
    191   } while (0)
    192 
    193 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
    194   do {                                                                         \
    195     static constexpr char TestString[] =                                       \
    196         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
    197         ", " #Size ")";                                                        \
    198     const uint32_t T0 = allocateDqword();                                      \
    199     const Dqword V0 Value0;                                                    \
    200                                                                                \
    201     const uint32_t T1 = allocateDqword();                                      \
    202     const Dqword V1 Value1;                                                    \
    203                                                                                \
    204     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    205     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    206     __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());       \
    207                                                                                \
    208     AssembledTest test = assemble();                                           \
    209     test.setDqwordTo(T0, V0);                                                  \
    210     test.setDqwordTo(T1, V1);                                                  \
    211     test.run();                                                                \
    212                                                                                \
    213     ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
    214         << TestString;                                                         \
    215     reset();                                                                   \
    216   } while (0)
    217 
    218 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
    219   do {                                                                         \
    220     static constexpr char TestString[] =                                       \
    221         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
    222         ")";                                                                   \
    223     const uint32_t T0 = allocateDqword();                                      \
    224     const Dqword V0 Value0;                                                    \
    225                                                                                \
    226     const uint32_t T1 = allocateDqword();                                      \
    227     const Dqword V1 Value1;                                                    \
    228                                                                                \
    229     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    230     __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));          \
    231                                                                                \
    232     AssembledTest test = assemble();                                           \
    233     test.setDqwordTo(T0, V0);                                                  \
    234     test.setDqwordTo(T1, V1);                                                  \
    235     test.run();                                                                \
    236                                                                                \
    237     ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
    238         << TestString;                                                         \
    239     reset();                                                                   \
    240   } while (0)
    241 
    242 #define TestPArithSize(Dst, Src, Size)                                         \
    243   do {                                                                         \
    244     static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
    245     if (Size != 8) {                                                           \
    246       TestPArithXmmXmm(                                                        \
    247           Dst,                                                                 \
    248           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    249           Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
    250       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    251                               uint64_t(0x8080404002020101ull)),                \
    252                         (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);    \
    253       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    254                              uint64_t(0x8080404002020101ull)),                 \
    255                        3u, psra, >>, int, Size);                               \
    256       TestPArithXmmXmm(                                                        \
    257           Dst,                                                                 \
    258           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    259           Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
    260       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    261                               uint64_t(0x8080404002020101ull)),                \
    262                         (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);   \
    263       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    264                              uint64_t(0x8080404002020101ull)),                 \
    265                        3u, psrl, >>, uint, Size);                              \
    266       TestPArithXmmXmm(                                                        \
    267           Dst,                                                                 \
    268           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    269           Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
    270       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    271                               uint64_t(0x8080404002020101ull)),                \
    272                         (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);   \
    273       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    274                              uint64_t(0x8080404002020101ull)),                 \
    275                        3u, psll, <<, uint, Size);                              \
    276                                                                                \
    277       TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                  \
    278                              uint64_t(0x8080404002020101ull)),                 \
    279                        Src, (uint64_t(0xFFFFFFFF00000000ull),                  \
    280                              uint64_t(0x0123456789ABCDEull)),                  \
    281                        pmull, *, int, Size);                                   \
    282       TestPArithXmmAddr(                                                       \
    283           Dst,                                                                 \
    284           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    285           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
    286           pmull, *, int, Size);                                                \
    287       if (Size != 16) {                                                        \
    288         TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                \
    289                                uint64_t(0x8080404002020101ull)),               \
    290                          Src, (uint64_t(0xFFFFFFFF00000000ull),                \
    291                                uint64_t(0x0123456789ABCDEull)),                \
    292                          pmuludq, *, uint, Size);                              \
    293         TestPArithXmmAddr(                                                     \
    294             Dst, (uint64_t(0x8040201008040201ull),                             \
    295                   uint64_t(0x8080404002020101ull)),                            \
    296             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
    297             pmuludq, *, uint, Size);                                           \
    298       }                                                                        \
    299     }                                                                          \
    300     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    301                            uint64_t(0x8080404002020101ull)),                   \
    302                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    303                            uint64_t(0x0123456789ABCDEull)),                    \
    304                      padd, +, int, Size);                                      \
    305     TestPArithXmmAddr(                                                         \
    306         Dst,                                                                   \
    307         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    308         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    309         padd, +, int, Size);                                                   \
    310     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    311                            uint64_t(0x8080404002020101ull)),                   \
    312                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    313                            uint64_t(0x0123456789ABCDEull)),                    \
    314                      psub, -, int, Size);                                      \
    315     TestPArithXmmAddr(                                                         \
    316         Dst,                                                                   \
    317         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    318         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    319         psub, -, int, Size);                                                   \
    320     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    321                            uint64_t(0x8080404002020101ull)),                   \
    322                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    323                            uint64_t(0x0123456789ABCDEull)),                    \
    324                      pand, &, int, Size);                                      \
    325     TestPArithXmmAddr(                                                         \
    326         Dst,                                                                   \
    327         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    328         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    329         pand, &, int, Size);                                                   \
    330                                                                                \
    331     TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                     \
    332                           uint64_t(0x8080404002020101ull)),                    \
    333                     Src, (uint64_t(0xFFFFFFFF00000000ull),                     \
    334                           uint64_t(0x0123456789ABCDEull)),                     \
    335                     int, Size);                                                \
    336     TestPAndnXmmAddr(                                                          \
    337         Dst,                                                                   \
    338         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    339         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    340         int, Size);                                                            \
    341                                                                                \
    342     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    343                            uint64_t(0x8080404002020101ull)),                   \
    344                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    345                            uint64_t(0x0123456789ABCDEull)),                    \
    346                      por, |, int, Size);                                       \
    347     TestPArithXmmAddr(                                                         \
    348         Dst,                                                                   \
    349         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    350         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    351         por, |, int, Size);                                                    \
    352     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    353                            uint64_t(0x8080404002020101ull)),                   \
    354                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    355                            uint64_t(0x0123456789ABCDEull)),                    \
    356                      pxor, ^, int, Size);                                      \
    357     TestPArithXmmAddr(                                                         \
    358         Dst,                                                                   \
    359         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    360         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    361         pxor, ^, int, Size);                                                   \
    362   } while (0)
    363 
    364 #define TestPArith(Src, Dst)                                                   \
    365   do {                                                                         \
    366     TestPArithSize(Src, Dst, 8);                                               \
    367     TestPArithSize(Src, Dst, 16);                                              \
    368     TestPArithSize(Src, Dst, 32);                                              \
    369   } while (0)
    370 
    371   TestPArith(xmm0, xmm1);
    372   TestPArith(xmm1, xmm2);
    373   TestPArith(xmm2, xmm3);
    374   TestPArith(xmm3, xmm4);
    375   TestPArith(xmm4, xmm5);
    376   TestPArith(xmm5, xmm6);
    377   TestPArith(xmm6, xmm7);
    378   TestPArith(xmm7, xmm8);
    379   TestPArith(xmm8, xmm9);
    380   TestPArith(xmm9, xmm10);
    381   TestPArith(xmm10, xmm11);
    382   TestPArith(xmm11, xmm12);
    383   TestPArith(xmm12, xmm13);
    384   TestPArith(xmm13, xmm14);
    385   TestPArith(xmm14, xmm15);
    386   TestPArith(xmm15, xmm0);
    387 
    388 #undef TestPArith
    389 #undef TestPArithSize
    390 #undef TestPAndnXmmAddr
    391 #undef TestPAndnXmmXmm
    392 #undef TestPArithXmmImm
    393 #undef TestPArithXmmAddr
    394 #undef TestPArithXmmXmm
    395 }
    396 
    397 TEST_F(AssemblerX8664Test, ArithPS) {
    398 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
    399   do {                                                                         \
    400     static constexpr char TestString[] =                                       \
    401         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    402         ", " #Type ")";                                                        \
    403     const uint32_t T0 = allocateDqword();                                      \
    404     const Dqword V0 Value0;                                                    \
    405     const uint32_t T1 = allocateDqword();                                      \
    406     const Dqword V1 Value1;                                                    \
    407                                                                                \
    408     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    409     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    410     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());            \
    411                                                                                \
    412     AssembledTest test = assemble();                                           \
    413     test.setDqwordTo(T0, V0);                                                  \
    414     test.setDqwordTo(T1, V1);                                                  \
    415     test.run();                                                                \
    416                                                                                \
    417     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    418                                                                                \
    419     reset();                                                                   \
    420   } while (0)
    421 
    422 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
    423   do {                                                                         \
    424     static constexpr char TestString[] =                                       \
    425         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    426         ", " #Type ")";                                                        \
    427     const uint32_t T0 = allocateDqword();                                      \
    428     const Dqword V0 Value0;                                                    \
    429     const uint32_t T1 = allocateDqword();                                      \
    430     const Dqword V1 Value1;                                                    \
    431                                                                                \
    432     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    433     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    434     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
    435                                                                                \
    436     AssembledTest test = assemble();                                           \
    437     test.setDqwordTo(T0, V0);                                                  \
    438     test.setDqwordTo(T1, V1);                                                  \
    439     test.run();                                                                \
    440                                                                                \
    441     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    442                                                                                \
    443     reset();                                                                   \
    444   } while (0)
    445 
    446 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
    447   do {                                                                         \
    448     static constexpr char TestString[] =                                       \
    449         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    450         ", " #Type ")";                                                        \
    451     const uint32_t T0 = allocateDqword();                                      \
    452     const Dqword V0 Value0;                                                    \
    453     const uint32_t T1 = allocateDqword();                                      \
    454     const Dqword V1 Value1;                                                    \
    455                                                                                \
    456     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    457     __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1));                            \
    458                                                                                \
    459     AssembledTest test = assemble();                                           \
    460     test.setDqwordTo(T0, V0);                                                  \
    461     test.setDqwordTo(T1, V1);                                                  \
    462     test.run();                                                                \
    463                                                                                \
    464     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    465                                                                                \
    466     reset();                                                                   \
    467   } while (0)
    468 
    469 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
    470   do {                                                                         \
    471     static constexpr char TestString[] =                                       \
    472         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
    473         ")";                                                                   \
    474     const uint32_t T0 = allocateDqword();                                      \
    475     const Dqword V0 Value0;                                                    \
    476     const uint32_t T1 = allocateDqword();                                      \
    477     const Dqword V1 Value1;                                                    \
    478                                                                                \
    479     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    480     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    481     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
    482                                                                                \
    483     AssembledTest test = assemble();                                           \
    484     test.setDqwordTo(T0, V0);                                                  \
    485     test.setDqwordTo(T1, V1);                                                  \
    486     test.run();                                                                \
    487                                                                                \
    488     ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
    489                                                                                \
    490     reset();                                                                   \
    491   } while (0)
    492 
    493 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
    494   do {                                                                         \
    495     static constexpr char TestString[] =                                       \
    496         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    497         ", " #Type ")";                                                        \
    498     const uint32_t T0 = allocateDqword();                                      \
    499     const Dqword V0 Value0;                                                    \
    500     const uint32_t T1 = allocateDqword();                                      \
    501     const Dqword V1 Value1;                                                    \
    502                                                                                \
    503     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    504     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
    505                                                                                \
    506     AssembledTest test = assemble();                                           \
    507     test.setDqwordTo(T0, V0);                                                  \
    508     test.setDqwordTo(T1, V1);                                                  \
    509     test.run();                                                                \
    510                                                                                \
    511     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    512                                                                                \
    513     reset();                                                                   \
    514   } while (0)
    515 
    516 #define TestArithPS(Dst, Src)                                                  \
    517   do {                                                                         \
    518     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    519                       (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
    520     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    521                        (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
    522     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    523                       (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
    524     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    525                        (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
    526     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    527                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
    528     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    529                        (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
    530     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    531                       (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
    532     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    533                        (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
    534     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    535                       (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
    536     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    537                        (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
    538     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
    539                       double);                                                 \
    540     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
    541                        double);                                                \
    542     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    543                       (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
    544     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
    545                       double);                                                 \
    546     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
    547                  (0.55, 0.43, 0.23, 1.21), minps, float);                      \
    548     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
    549                  (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
    550     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
    551     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
    552     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    553                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
    554     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    555                        (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
    556     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
    557                       double);                                                 \
    558     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
    559                        double);                                                \
    560   } while (0)
    561 
    562   TestArithPS(xmm0, xmm1);
    563   TestArithPS(xmm1, xmm2);
    564   TestArithPS(xmm2, xmm3);
    565   TestArithPS(xmm3, xmm4);
    566   TestArithPS(xmm4, xmm5);
    567   TestArithPS(xmm5, xmm6);
    568   TestArithPS(xmm6, xmm7);
    569   TestArithPS(xmm7, xmm8);
    570   TestArithPS(xmm8, xmm9);
    571   TestArithPS(xmm9, xmm10);
    572   TestArithPS(xmm10, xmm11);
    573   TestArithPS(xmm11, xmm12);
    574   TestArithPS(xmm12, xmm13);
    575   TestArithPS(xmm13, xmm14);
    576   TestArithPS(xmm14, xmm15);
    577   TestArithPS(xmm15, xmm0);
    578 
    579 #undef TestArithPs
    580 #undef TestMinMaxPS
    581 #undef TestArithPSXmmXmmUntyped
    582 #undef TestArithPSXmmAddr
    583 #undef TestArithPSXmmXmm
    584 }
    585 
    586 TEST_F(AssemblerX8664Test, Blending) {
    587   using f32 = float;
    588   using i8 = uint8_t;
    589 
    590 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
    591   do {                                                                         \
    592     static constexpr char TestString[] =                                       \
    593         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
    594         ", " #Type ")";                                                        \
    595     const uint32_t T0 = allocateDqword();                                      \
    596     const Dqword V0 Value0;                                                    \
    597     const uint32_t T1 = allocateDqword();                                      \
    598     const Dqword V1 Value1;                                                    \
    599     const uint32_t Mask = allocateDqword();                                    \
    600     const Dqword MaskValue M;                                                  \
    601                                                                                \
    602     __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
    603     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    604     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    605     __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());         \
    606                                                                                \
    607     AssembledTest test = assemble();                                           \
    608     test.setDqwordTo(T0, V0);                                                  \
    609     test.setDqwordTo(T1, V1);                                                  \
    610     test.setDqwordTo(Mask, MaskValue);                                         \
    611     test.run();                                                                \
    612                                                                                \
    613     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
    614         << TestString;                                                         \
    615     reset();                                                                   \
    616   } while (0)
    617 
    618 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
    619   do {                                                                         \
    620     static constexpr char TestString[] =                                       \
    621         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
    622         ")";                                                                   \
    623     const uint32_t T0 = allocateDqword();                                      \
    624     const Dqword V0 Value0;                                                    \
    625     const uint32_t T1 = allocateDqword();                                      \
    626     const Dqword V1 Value1;                                                    \
    627     const uint32_t Mask = allocateDqword();                                    \
    628     const Dqword MaskValue M;                                                  \
    629                                                                                \
    630     __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
    631     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    632     __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1));            \
    633                                                                                \
    634     AssembledTest test = assemble();                                           \
    635     test.setDqwordTo(T0, V0);                                                  \
    636     test.setDqwordTo(T1, V1);                                                  \
    637     test.setDqwordTo(Mask, MaskValue);                                         \
    638     test.run();                                                                \
    639                                                                                \
    640     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
    641         << TestString;                                                         \
    642     reset();                                                                   \
    643   } while (0)
    644 
    645 #define TestBlending(Src, Dst)                                                 \
    646   do {                                                                         \
    647     TestBlendingXmmXmm(                                                        \
    648         Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
    649         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
    650         blendvps, f32);                                                        \
    651     TestBlendingXmmAddr(                                                       \
    652         Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
    653         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
    654         blendvps, f32);                                                        \
    655     TestBlendingXmmXmm(                                                        \
    656         Dst,                                                                   \
    657         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
    658         Src,                                                                   \
    659         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
    660         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
    661         pblendvb, i8);                                                         \
    662     TestBlendingXmmAddr(                                                       \
    663         Dst,                                                                   \
    664         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
    665         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
    666         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
    667         pblendvb, i8);                                                         \
    668   } while (0)
    669 
    670   /* xmm0 is taken. It is the implicit mask . */
    671   TestBlending(xmm1, xmm2);
    672   TestBlending(xmm2, xmm3);
    673   TestBlending(xmm3, xmm4);
    674   TestBlending(xmm4, xmm5);
    675   TestBlending(xmm5, xmm6);
    676   TestBlending(xmm6, xmm7);
    677   TestBlending(xmm7, xmm8);
    678   TestBlending(xmm8, xmm9);
    679   TestBlending(xmm9, xmm10);
    680   TestBlending(xmm10, xmm11);
    681   TestBlending(xmm11, xmm12);
    682   TestBlending(xmm12, xmm13);
    683   TestBlending(xmm13, xmm14);
    684   TestBlending(xmm14, xmm15);
    685   TestBlending(xmm15, xmm1);
    686 
    687 #undef TestBlending
    688 #undef TestBlendingXmmAddr
    689 #undef TestBlendingXmmXmm
    690 }
    691 
    692 TEST_F(AssemblerX8664Test, Cmpps) {
    693 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
    694   do {                                                                         \
    695     static constexpr char TestString[] =                                       \
    696         "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
    697     const uint32_t T0 = allocateDqword();                                      \
    698     const Dqword V0 Value0;                                                    \
    699     const uint32_t T1 = allocateDqword();                                      \
    700     const Dqword V1 Value1;                                                    \
    701                                                                                \
    702     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    703     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    704     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
    705              Cond::Cmpps_##C);                                                 \
    706                                                                                \
    707     AssembledTest test = assemble();                                           \
    708     test.setDqwordTo(T0, V0);                                                  \
    709     test.setDqwordTo(T1, V1);                                                  \
    710     test.run();                                                                \
    711                                                                                \
    712     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    713     ;                                                                          \
    714     reset();                                                                   \
    715   } while (0)
    716 
    717 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
    718   do {                                                                         \
    719     static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
    720     const uint32_t T0 = allocateDqword();                                      \
    721     const Dqword V0 Value0;                                                    \
    722     const uint32_t T1 = allocateDqword();                                      \
    723     const Dqword V1 Value1;                                                    \
    724                                                                                \
    725     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    726     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
    727              Cond::Cmpps_##C);                                                 \
    728                                                                                \
    729     AssembledTest test = assemble();                                           \
    730     test.setDqwordTo(T0, V0);                                                  \
    731     test.setDqwordTo(T1, V1);                                                  \
    732     test.run();                                                                \
    733                                                                                \
    734     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    735     ;                                                                          \
    736     reset();                                                                   \
    737   } while (0)
    738 
    739 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
    740   do {                                                                         \
    741     static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
    742     const uint32_t T0 = allocateDqword();                                      \
    743     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    744                     std::numeric_limits<float>::quiet_NaN());                  \
    745     const uint32_t T1 = allocateDqword();                                      \
    746     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
    747                     std::numeric_limits<float>::quiet_NaN());                  \
    748                                                                                \
    749     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    750     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    751     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
    752              Cond::Cmpps_##C);                                                 \
    753                                                                                \
    754     AssembledTest test = assemble();                                           \
    755     test.setDqwordTo(T0, V0);                                                  \
    756     test.setDqwordTo(T1, V1);                                                  \
    757     test.run();                                                                \
    758                                                                                \
    759     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
    760     ;                                                                          \
    761     reset();                                                                   \
    762   } while (0)
    763 
    764 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
    765   do {                                                                         \
    766     static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
    767     const uint32_t T0 = allocateDqword();                                      \
    768     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    769                     std::numeric_limits<float>::quiet_NaN());                  \
    770     const uint32_t T1 = allocateDqword();                                      \
    771     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
    772                     std::numeric_limits<float>::quiet_NaN());                  \
    773                                                                                \
    774     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    775     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
    776              Cond::Cmpps_##C);                                                 \
    777                                                                                \
    778     AssembledTest test = assemble();                                           \
    779     test.setDqwordTo(T0, V0);                                                  \
    780     test.setDqwordTo(T1, V1);                                                  \
    781     test.run();                                                                \
    782                                                                                \
    783     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
    784     ;                                                                          \
    785     reset();                                                                   \
    786   } while (0)
    787 
    788 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
    789   do {                                                                         \
    790     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    791     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    792     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    793     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    794     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    795     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    796     TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
    797     TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
    798     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    799     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    800     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    801     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    802     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    803     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    804     if (FloatSize == 32) {                                                     \
    805       TestCmppsOrdUnordXmmXmm(                                                 \
    806           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    807                     std::numeric_limits<float>::quiet_NaN()),                  \
    808           Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,             \
    809                 std::numeric_limits<float>::quiet_NaN()),                      \
    810           unord, Type);                                                        \
    811       TestCmppsOrdUnordXmmAddr(                                                \
    812           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    813                     std::numeric_limits<float>::quiet_NaN()),                  \
    814           (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,                  \
    815            std::numeric_limits<float>::quiet_NaN()),                           \
    816           unord, Type);                                                        \
    817     } else {                                                                   \
    818       TestCmppsOrdUnordXmmXmm(64, Dst,                                         \
    819                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
    820                               Src, (std::numeric_limits<double>::quiet_NaN(),  \
    821                                     std::numeric_limits<double>::quiet_NaN()), \
    822                               unord, Type);                                    \
    823       TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
    824                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
    825                               unord, Type);                                    \
    826       TestCmppsOrdUnordXmmAddr(                                                \
    827           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
    828           (std::numeric_limits<double>::quiet_NaN(),                           \
    829            std::numeric_limits<double>::quiet_NaN()),                          \
    830           unord, Type);                                                        \
    831       TestCmppsOrdUnordXmmAddr(                                                \
    832           64, Dst, (1.0, 1.0),                                                 \
    833           (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
    834     }                                                                          \
    835   } while (0)
    836 
    837 #define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
    838   do {                                                                         \
    839     TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
    840     TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
    841     TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
    842     TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
    843     TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
    844     TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
    845     TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
    846     TestCmpps(FloatSize, xmm7, Value0, xmm8, Value1, Type);                    \
    847     TestCmpps(FloatSize, xmm8, Value0, xmm9, Value1, Type);                    \
    848     TestCmpps(FloatSize, xmm9, Value0, xmm10, Value1, Type);                   \
    849     TestCmpps(FloatSize, xmm10, Value0, xmm11, Value1, Type);                  \
    850     TestCmpps(FloatSize, xmm11, Value0, xmm12, Value1, Type);                  \
    851     TestCmpps(FloatSize, xmm12, Value0, xmm13, Value1, Type);                  \
    852     TestCmpps(FloatSize, xmm13, Value0, xmm14, Value1, Type);                  \
    853     TestCmpps(FloatSize, xmm14, Value0, xmm15, Value1, Type);                  \
    854     TestCmpps(FloatSize, xmm15, Value0, xmm0, Value1, Type);                   \
    855   } while (0)
    856 
    857   TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
    858                 float);
    859   TestCmppsSize(64, (1.0, -1000.0), (1.0, -1000.0), double);
    860 
    861 #undef TestCmpps
    862 #undef TestCmppsOrdUnordXmmAddr
    863 #undef TestCmppsOrdUnordXmmXmm
    864 #undef TestCmppsXmmAddr
    865 #undef TestCmppsXmmXmm
    866 }
    867 
    868 TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
    869 #define TestImplSingle(Dst, Inst, Expect)                                      \
    870   do {                                                                         \
    871     static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
    872     const uint32_t T0 = allocateDqword();                                      \
    873     const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
    874                                                                                \
    875     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    876     __ Inst(Encoded_Xmm_##Dst());                                              \
    877                                                                                \
    878     AssembledTest test = assemble();                                           \
    879     test.setDqwordTo(T0, V0);                                                  \
    880     test.run();                                                                \
    881     ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
    882     reset();                                                                   \
    883   } while (0)
    884 
    885 #define TestImpl(Dst)                                                          \
    886   do {                                                                         \
    887     TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull),              \
    888                                  uint64_t(0x3FE2D10B408F1BBDull)));            \
    889     TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull),             \
    890                                   uint64_t(0x3F1078003E64F000ull)));           \
    891     TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull),        \
    892                                        uint64_t(0x3EA310003D4CC000ull)));      \
    893                                                                                \
    894     TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull),              \
    895                                  uint64_t(0x401C42FAE40282A8ull)));            \
    896   } while (0)
    897 
    898   TestImpl(xmm0);
    899   TestImpl(xmm1);
    900   TestImpl(xmm2);
    901   TestImpl(xmm3);
    902   TestImpl(xmm4);
    903   TestImpl(xmm5);
    904   TestImpl(xmm6);
    905   TestImpl(xmm7);
    906   TestImpl(xmm8);
    907   TestImpl(xmm9);
    908   TestImpl(xmm10);
    909   TestImpl(xmm11);
    910   TestImpl(xmm12);
    911   TestImpl(xmm13);
    912   TestImpl(xmm14);
    913   TestImpl(xmm15);
    914 
    915 #undef TestImpl
    916 #undef TestImplSingle
    917 }
    918 
    919 TEST_F(AssemblerX8664Test, Unpck) {
    920   const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
    921                   uint64_t(0xCCCCCCCCDDDDDDDDull));
    922   const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
    923                   uint64_t(0x9999999988888888ull));
    924 
    925   const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
    926                                 uint64_t(0xEEEEEEEEAAAAAAAAull));
    927   const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
    928                                 uint64_t(0xEEEEEEEEFFFFFFFFull));
    929   const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
    930                                 uint64_t(0x99999999CCCCCCCCull));
    931   const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
    932                                 uint64_t(0x9999999988888888ull));
    933 
    934 #define TestImplSingle(Dst, Src, Inst)                                         \
    935   do {                                                                         \
    936     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
    937     const uint32_t T0 = allocateDqword();                                      \
    938     const uint32_t T1 = allocateDqword();                                      \
    939                                                                                \
    940     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
    941     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
    942     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
    943                                                                                \
    944     AssembledTest test = assemble();                                           \
    945     test.setDqwordTo(T0, V0);                                                  \
    946     test.setDqwordTo(T1, V1);                                                  \
    947     test.run();                                                                \
    948                                                                                \
    949     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
    950     reset();                                                                   \
    951   } while (0)
    952 
    953 #define TestImpl(Dst, Src)                                                     \
    954   do {                                                                         \
    955     TestImplSingle(Dst, Src, unpcklps);                                        \
    956     TestImplSingle(Dst, Src, unpcklpd);                                        \
    957     TestImplSingle(Dst, Src, unpckhps);                                        \
    958     TestImplSingle(Dst, Src, unpckhpd);                                        \
    959   } while (0)
    960 
    961   TestImpl(xmm0, xmm1);
    962   TestImpl(xmm1, xmm2);
    963   TestImpl(xmm2, xmm3);
    964   TestImpl(xmm3, xmm4);
    965   TestImpl(xmm4, xmm5);
    966   TestImpl(xmm5, xmm6);
    967   TestImpl(xmm6, xmm7);
    968   TestImpl(xmm7, xmm8);
    969   TestImpl(xmm8, xmm9);
    970   TestImpl(xmm9, xmm10);
    971   TestImpl(xmm10, xmm11);
    972   TestImpl(xmm11, xmm12);
    973   TestImpl(xmm12, xmm13);
    974   TestImpl(xmm13, xmm14);
    975   TestImpl(xmm14, xmm15);
    976   TestImpl(xmm15, xmm0);
    977 
    978 #undef TestImpl
    979 #undef TestImplSingle
    980 }
    981 
    982 TEST_F(AssemblerX8664Test, Shufp) {
    983   const Dqword V0(uint64_t(0x1111111122222222ull),
    984                   uint64_t(0x5555555577777777ull));
    985   const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
    986                   uint64_t(0xCCCCCCCCDDDDDDDDull));
    987 
    988   const uint8_t pshufdImm = 0x63;
    989   const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
    990                               uint64_t(0xAAAAAAAADDDDDDDDull));
    991 
    992   const uint8_t shufpsImm = 0xf9;
    993   const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
    994                               uint64_t(0xCCCCCCCCCCCCCCCCull));
    995 
    996 #define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
    997   do {                                                                         \
    998     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
    999     const uint32_t T0 = allocateDqword();                                      \
   1000     const uint32_t T1 = allocateDqword();                                      \
   1001                                                                                \
   1002     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1003     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   1004     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),             \
   1005             Immediate(Inst##Imm));                                             \
   1006                                                                                \
   1007     AssembledTest test = assemble();                                           \
   1008     test.setDqwordTo(T0, V0);                                                  \
   1009     test.setDqwordTo(T1, V1);                                                  \
   1010     test.run();                                                                \
   1011                                                                                \
   1012     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
   1013     reset();                                                                   \
   1014   } while (0)
   1015 
   1016 #define TestImplSingleXmmAddr(Dst, Inst)                                       \
   1017   do {                                                                         \
   1018     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
   1019     const uint32_t T0 = allocateDqword();                                      \
   1020     const uint32_t T1 = allocateDqword();                                      \
   1021                                                                                \
   1022     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1023     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1),                \
   1024             Immediate(Inst##Imm));                                             \
   1025                                                                                \
   1026     AssembledTest test = assemble();                                           \
   1027     test.setDqwordTo(T0, V0);                                                  \
   1028     test.setDqwordTo(T1, V1);                                                  \
   1029     test.run();                                                                \
   1030                                                                                \
   1031     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
   1032     reset();                                                                   \
   1033   } while (0)
   1034 
   1035 #define TestImplSingleXmmXmmUntyped(Dst, Src, Inst)                            \
   1036   do {                                                                         \
   1037     static constexpr char TestString[] =                                       \
   1038         "(" #Dst ", " #Src ", " #Inst ", Untyped)";                            \
   1039     const uint32_t T0 = allocateDqword();                                      \
   1040     const uint32_t T1 = allocateDqword();                                      \
   1041                                                                                \
   1042     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1043     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   1044     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm));   \
   1045                                                                                \
   1046     AssembledTest test = assemble();                                           \
   1047     test.setDqwordTo(T0, V0);                                                  \
   1048     test.setDqwordTo(T1, V1);                                                  \
   1049     test.run();                                                                \
   1050                                                                                \
   1051     ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString;        \
   1052     reset();                                                                   \
   1053   } while (0)
   1054 
   1055 #define TestImpl(Dst, Src)                                                     \
   1056   do {                                                                         \
   1057     TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
   1058     TestImplSingleXmmAddr(Dst, pshufd);                                        \
   1059     TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
   1060     TestImplSingleXmmAddr(Dst, shufps);                                        \
   1061   } while (0)
   1062 
   1063   TestImpl(xmm0, xmm1);
   1064   TestImpl(xmm1, xmm2);
   1065   TestImpl(xmm2, xmm3);
   1066   TestImpl(xmm3, xmm4);
   1067   TestImpl(xmm4, xmm5);
   1068   TestImpl(xmm5, xmm6);
   1069   TestImpl(xmm6, xmm7);
   1070   TestImpl(xmm7, xmm8);
   1071   TestImpl(xmm8, xmm9);
   1072   TestImpl(xmm9, xmm10);
   1073   TestImpl(xmm10, xmm11);
   1074   TestImpl(xmm11, xmm12);
   1075   TestImpl(xmm12, xmm13);
   1076   TestImpl(xmm13, xmm14);
   1077   TestImpl(xmm14, xmm15);
   1078   TestImpl(xmm15, xmm0);
   1079 
   1080 #undef TestImpl
   1081 #undef TestImplSingleXmmXmmUntyped
   1082 #undef TestImplSingleXmmAddr
   1083 #undef TestImplSingleXmmXmm
   1084 }
   1085 
   1086 TEST_F(AssemblerX8664Test, Punpckl) {
   1087   const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
   1088                         uint64_t(0x5555555577777777ull));
   1089   const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
   1090                         uint64_t(0xCCCCCCCCDDDDDDDDull));
   1091   const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
   1092                               uint64_t(0xAAAAAAAA11111111ull));
   1093 
   1094   const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
   1095                         uint64_t(0x5555666677778888ull));
   1096   const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
   1097                         uint64_t(0xEEEEFFFF00009999ull));
   1098   const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
   1099                               uint64_t(0xAAAA1111BBBB2222ull));
   1100 
   1101   const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
   1102                         uint64_t(0x99AABBCCDDEEFF00ull));
   1103   const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
   1104                         uint64_t(0xBAADF00DFEEDFACEull));
   1105   const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
   1106                               uint64_t(0xFF11EE22DD33CC44ull));
   1107 
   1108 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1109   do {                                                                         \
   1110     static constexpr char TestString[] =                                       \
   1111         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1112     const uint32_t T0 = allocateDqword();                                      \
   1113     const uint32_t T1 = allocateDqword();                                      \
   1114                                                                                \
   1115     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1116     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1117     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1118             XmmRegister::Encoded_Reg_##Src);                                   \
   1119                                                                                \
   1120     AssembledTest test = assemble();                                           \
   1121     test.setDqwordTo(T0, V0_##Ty);                                             \
   1122     test.setDqwordTo(T1, V1_##Ty);                                             \
   1123     test.run();                                                                \
   1124                                                                                \
   1125     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1126     reset();                                                                   \
   1127   } while (0)
   1128 
   1129 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1130   do {                                                                         \
   1131     static constexpr char TestString[] =                                       \
   1132         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1133     const uint32_t T0 = allocateDqword();                                      \
   1134     const uint32_t T1 = allocateDqword();                                      \
   1135                                                                                \
   1136     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1137     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1138                                                                                \
   1139     AssembledTest test = assemble();                                           \
   1140     test.setDqwordTo(T0, V0_##Ty);                                             \
   1141     test.setDqwordTo(T1, V1_##Ty);                                             \
   1142     test.run();                                                                \
   1143                                                                                \
   1144     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1145     reset();                                                                   \
   1146   } while (0)
   1147 
   1148 #define TestImpl(Dst, Src)                                                     \
   1149   do {                                                                         \
   1150     TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
   1151     TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
   1152     TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
   1153     TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
   1154     TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
   1155     TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
   1156   } while (0)
   1157 
   1158   TestImpl(xmm0, xmm1);
   1159   TestImpl(xmm1, xmm2);
   1160   TestImpl(xmm2, xmm3);
   1161   TestImpl(xmm3, xmm4);
   1162   TestImpl(xmm4, xmm5);
   1163   TestImpl(xmm5, xmm6);
   1164   TestImpl(xmm6, xmm7);
   1165   TestImpl(xmm7, xmm0);
   1166 
   1167 #undef TestImpl
   1168 #undef TestImplXmmAddr
   1169 #undef TestImplXmmXmm
   1170 }
   1171 
   1172 TEST_F(AssemblerX8664Test, Packss) {
   1173   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
   1174                         uint64_t(0x7FFFFFFF80000000ull));
   1175   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
   1176                         uint64_t(0x0000800100007FFEull));
   1177   const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
   1178                               uint64_t(0x7FFF7FFEFFFEFFFFull));
   1179 
   1180   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
   1181                         uint64_t(0xFFFEFFFF7FFF8000ull));
   1182   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
   1183                         uint64_t(0x0088007700660055ull));
   1184   const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
   1185                               uint64_t(0x7F776655057F7F7Eull));
   1186 
   1187 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1188   do {                                                                         \
   1189     static constexpr char TestString[] =                                       \
   1190         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1191     const uint32_t T0 = allocateDqword();                                      \
   1192     const uint32_t T1 = allocateDqword();                                      \
   1193                                                                                \
   1194     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1195     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1196     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1197             XmmRegister::Encoded_Reg_##Src);                                   \
   1198                                                                                \
   1199     AssembledTest test = assemble();                                           \
   1200     test.setDqwordTo(T0, V0_##Ty);                                             \
   1201     test.setDqwordTo(T1, V1_##Ty);                                             \
   1202     test.run();                                                                \
   1203                                                                                \
   1204     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1205     reset();                                                                   \
   1206   } while (0)
   1207 
   1208 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1209   do {                                                                         \
   1210     static constexpr char TestString[] =                                       \
   1211         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1212     const uint32_t T0 = allocateDqword();                                      \
   1213     const uint32_t T1 = allocateDqword();                                      \
   1214                                                                                \
   1215     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1216     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1217                                                                                \
   1218     AssembledTest test = assemble();                                           \
   1219     test.setDqwordTo(T0, V0_##Ty);                                             \
   1220     test.setDqwordTo(T1, V1_##Ty);                                             \
   1221     test.run();                                                                \
   1222                                                                                \
   1223     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1224     reset();                                                                   \
   1225   } while (0)
   1226 
   1227 #define TestImpl(Dst, Src)                                                     \
   1228   do {                                                                         \
   1229     TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
   1230     TestImplXmmAddr(Dst, packss, v4i32);                                       \
   1231     TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
   1232     TestImplXmmAddr(Dst, packss, v8i16);                                       \
   1233   } while (0)
   1234 
   1235   TestImpl(xmm0, xmm1);
   1236   TestImpl(xmm1, xmm2);
   1237   TestImpl(xmm2, xmm3);
   1238   TestImpl(xmm3, xmm4);
   1239   TestImpl(xmm4, xmm5);
   1240   TestImpl(xmm5, xmm6);
   1241   TestImpl(xmm6, xmm7);
   1242   TestImpl(xmm7, xmm0);
   1243 
   1244 #undef TestImpl
   1245 #undef TestImplXmmAddr
   1246 #undef TestImplXmmXmm
   1247 }
   1248 
   1249 TEST_F(AssemblerX8664Test, Packus) {
   1250   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
   1251                         uint64_t(0x7FFFFFFF80000000ull));
   1252   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
   1253                         uint64_t(0x0000800100007FFEull));
   1254   const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
   1255                               uint64_t(0x80017FFE00000000ull));
   1256 
   1257   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
   1258                         uint64_t(0xFFFEFFFF7FFF8000ull));
   1259   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
   1260                         uint64_t(0x0088007700660055ull));
   1261   const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
   1262                               uint64_t(0x8877665505FF817Eull));
   1263 
   1264 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1265   do {                                                                         \
   1266     static constexpr char TestString[] =                                       \
   1267         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1268     const uint32_t T0 = allocateDqword();                                      \
   1269     const uint32_t T1 = allocateDqword();                                      \
   1270                                                                                \
   1271     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1272     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1273     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1274             XmmRegister::Encoded_Reg_##Src);                                   \
   1275                                                                                \
   1276     AssembledTest test = assemble();                                           \
   1277     test.setDqwordTo(T0, V0_##Ty);                                             \
   1278     test.setDqwordTo(T1, V1_##Ty);                                             \
   1279     test.run();                                                                \
   1280                                                                                \
   1281     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1282     reset();                                                                   \
   1283   } while (0)
   1284 
   1285 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1286   do {                                                                         \
   1287     static constexpr char TestString[] =                                       \
   1288         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1289     const uint32_t T0 = allocateDqword();                                      \
   1290     const uint32_t T1 = allocateDqword();                                      \
   1291                                                                                \
   1292     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1293     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1294                                                                                \
   1295     AssembledTest test = assemble();                                           \
   1296     test.setDqwordTo(T0, V0_##Ty);                                             \
   1297     test.setDqwordTo(T1, V1_##Ty);                                             \
   1298     test.run();                                                                \
   1299                                                                                \
   1300     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1301     reset();                                                                   \
   1302   } while (0)
   1303 
   1304 #define TestImpl(Dst, Src)                                                     \
   1305   do {                                                                         \
   1306     TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
   1307     TestImplXmmAddr(Dst, packus, v4i32);                                       \
   1308     TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
   1309     TestImplXmmAddr(Dst, packus, v8i16);                                       \
   1310   } while (0)
   1311 
   1312   TestImpl(xmm0, xmm1);
   1313   TestImpl(xmm1, xmm2);
   1314   TestImpl(xmm2, xmm3);
   1315   TestImpl(xmm3, xmm4);
   1316   TestImpl(xmm4, xmm5);
   1317   TestImpl(xmm5, xmm6);
   1318   TestImpl(xmm6, xmm7);
   1319   TestImpl(xmm7, xmm0);
   1320 
   1321 #undef TestImpl
   1322 #undef TestImplXmmAddr
   1323 #undef TestImplXmmXmm
   1324 }
   1325 
   1326 TEST_F(AssemblerX8664Test, Pshufb) {
   1327   const Dqword V0(uint64_t(0x1122334455667788ull),
   1328                   uint64_t(0x99aabbccddeeff32ull));
   1329   const Dqword V1(uint64_t(0x0204050380060708ull),
   1330                   uint64_t(0x010306080a8b0c0dull));
   1331 
   1332   const Dqword Expected(uint64_t(0x6644335500221132ull),
   1333                         uint64_t(0x77552232ee00ccbbull));
   1334 
   1335 #define TestImplXmmXmm(Dst, Src, Inst)                                         \
   1336   do {                                                                         \
   1337     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
   1338     const uint32_t T0 = allocateDqword();                                      \
   1339     const uint32_t T1 = allocateDqword();                                      \
   1340                                                                                \
   1341     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1342     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1343     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
   1344             XmmRegister::Encoded_Reg_##Src);                                   \
   1345                                                                                \
   1346     AssembledTest test = assemble();                                           \
   1347     test.setDqwordTo(T0, V0);                                                  \
   1348     test.setDqwordTo(T1, V1);                                                  \
   1349     test.run();                                                                \
   1350                                                                                \
   1351     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1352     reset();                                                                   \
   1353   } while (0)
   1354 
   1355 #define TestImplXmmAddr(Dst, Inst)                                             \
   1356   do {                                                                         \
   1357     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
   1358     const uint32_t T0 = allocateDqword();                                      \
   1359     const uint32_t T1 = allocateDqword();                                      \
   1360                                                                                \
   1361     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1362     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1363                                                                                \
   1364     AssembledTest test = assemble();                                           \
   1365     test.setDqwordTo(T0, V0);                                                  \
   1366     test.setDqwordTo(T1, V1);                                                  \
   1367     test.run();                                                                \
   1368                                                                                \
   1369     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1370     reset();                                                                   \
   1371   } while (0)
   1372 
   1373 #define TestImpl(Dst, Src)                                                     \
   1374   do {                                                                         \
   1375     TestImplXmmXmm(Dst, Src, pshufb);                                          \
   1376     TestImplXmmAddr(Dst, pshufb);                                              \
   1377   } while (0)
   1378 
   1379   TestImpl(xmm0, xmm1);
   1380   TestImpl(xmm1, xmm2);
   1381   TestImpl(xmm2, xmm3);
   1382   TestImpl(xmm3, xmm4);
   1383   TestImpl(xmm4, xmm5);
   1384   TestImpl(xmm5, xmm6);
   1385   TestImpl(xmm6, xmm7);
   1386   TestImpl(xmm7, xmm8);
   1387   TestImpl(xmm8, xmm9);
   1388   TestImpl(xmm9, xmm10);
   1389   TestImpl(xmm10, xmm11);
   1390   TestImpl(xmm11, xmm12);
   1391   TestImpl(xmm12, xmm13);
   1392   TestImpl(xmm13, xmm14);
   1393   TestImpl(xmm14, xmm15);
   1394   TestImpl(xmm15, xmm0);
   1395 
   1396 #undef TestImpl
   1397 #undef TestImplXmmAddr
   1398 #undef TestImplXmmXmm
   1399 }
   1400 
   1401 TEST_F(AssemblerX8664Test, Cvt) {
   1402   const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1403   const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
   1404   const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
   1405 
   1406   const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
   1407   const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
   1408   const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
   1409 
   1410   const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1411   const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
   1412   const Dqword tps2dq32Expected(-5, 3, 100, 200);
   1413 
   1414   const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1415   const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
   1416   const Dqword tps2dq64Expected(-5, 3, 100, 200);
   1417 
   1418   const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1419   const int32_t si2ss32SrcValue = 5;
   1420   const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
   1421 
   1422   const Dqword si2ss64DstValue(-1.0, -1.0);
   1423   const int32_t si2ss64SrcValue = 5;
   1424   const Dqword si2ss64Expected(5.0, -1.0);
   1425 
   1426   const int32_t tss2si32DstValue = 0xF00F0FF0;
   1427   const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
   1428   const int32_t tss2si32Expected = -5;
   1429 
   1430   const int32_t tss2si64DstValue = 0xF00F0FF0;
   1431   const Dqword tss2si64SrcValue(-5.0, -1.0);
   1432   const int32_t tss2si64Expected = -5;
   1433 
   1434   const Dqword float2float32DstValue(-1.0, -1.0);
   1435   const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
   1436   const Dqword float2float32Expected(-5.0, -1.0);
   1437 
   1438   const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
   1439   const Dqword float2float64SrcValue(-5.0, 3.0);
   1440   const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
   1441 
   1442 #define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
   1443   do {                                                                         \
   1444     static constexpr char TestString[] =                                       \
   1445         "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
   1446     const uint32_t T0 = allocateDqword();                                      \
   1447     const uint32_t T1 = allocateDqword();                                      \
   1448                                                                                \
   1449     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1450     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   1451     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
   1452                                                                                \
   1453     AssembledTest test = assemble();                                           \
   1454     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1455     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
   1456     test.run();                                                                \
   1457                                                                                \
   1458     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1459     reset();                                                                   \
   1460   } while (0)
   1461 
   1462 #define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType)                         \
   1463   do {                                                                         \
   1464     static constexpr char TestString[] =                                       \
   1465         "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")";        \
   1466     const uint32_t T0 = allocateDqword();                                      \
   1467                                                                                \
   1468     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1469     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
   1470     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
   1471                  Encoded_GPR_##GPR());                                         \
   1472                                                                                \
   1473     AssembledTest test = assemble();                                           \
   1474     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1475     test.run();                                                                \
   1476                                                                                \
   1477     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1478     reset();                                                                   \
   1479   } while (0)
   1480 
   1481 #define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size)                         \
   1482   do {                                                                         \
   1483     static constexpr char TestString[] =                                       \
   1484         "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")";        \
   1485     const uint32_t T0 = allocateDqword();                                      \
   1486                                                                                \
   1487     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
   1488     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
   1489     __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
   1490                  Encoded_Xmm_##Src());                                         \
   1491                                                                                \
   1492     AssembledTest test = assemble();                                           \
   1493     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
   1494     test.run();                                                                \
   1495                                                                                \
   1496     ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
   1497               test.GPR())                                                      \
   1498         << TestString;                                                         \
   1499     reset();                                                                   \
   1500   } while (0)
   1501 
   1502 #define TestImplPXmmAddr(Dst, Inst, Size)                                      \
   1503   do {                                                                         \
   1504     static constexpr char TestString[] =                                       \
   1505         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
   1506     const uint32_t T0 = allocateDqword();                                      \
   1507     const uint32_t T1 = allocateDqword();                                      \
   1508                                                                                \
   1509     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1510     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
   1511                                                                                \
   1512     AssembledTest test = assemble();                                           \
   1513     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1514     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
   1515     test.run();                                                                \
   1516                                                                                \
   1517     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1518     reset();                                                                   \
   1519   } while (0)
   1520 
   1521 #define TestImplSXmmAddr(Dst, Inst, Size, IntType)                             \
   1522   do {                                                                         \
   1523     static constexpr char TestString[] =                                       \
   1524         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")";            \
   1525     const uint32_t T0 = allocateDqword();                                      \
   1526     const uint32_t T1 = allocateDword();                                       \
   1527                                                                                \
   1528     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1529     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
   1530                  dwordAddress(T1));                                            \
   1531                                                                                \
   1532     AssembledTest test = assemble();                                           \
   1533     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1534     test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
   1535     test.run();                                                                \
   1536                                                                                \
   1537     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1538     reset();                                                                   \
   1539   } while (0)
   1540 
   1541 #define TestImplSRegAddr(GPR, Inst, IntSize, Size)                             \
   1542   do {                                                                         \
   1543     static constexpr char TestString[] =                                       \
   1544         "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")";            \
   1545     const uint32_t T0 = allocateDqword();                                      \
   1546                                                                                \
   1547     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
   1548     __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
   1549                  dwordAddress(T0));                                            \
   1550                                                                                \
   1551     AssembledTest test = assemble();                                           \
   1552     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
   1553     test.run();                                                                \
   1554                                                                                \
   1555     ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
   1556               test.GPR())                                                      \
   1557         << TestString;                                                         \
   1558     reset();                                                                   \
   1559   } while (0)
   1560 
   1561 #define TestImplSize(Dst, Src, GPR, Size)                                      \
   1562   do {                                                                         \
   1563     TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
   1564     TestImplPXmmAddr(Src, dq2ps, Size);                                        \
   1565     TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
   1566     TestImplPXmmAddr(Src, tps2dq, Size);                                       \
   1567     TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32);                       \
   1568     TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64);                       \
   1569     TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32);                           \
   1570     TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64);                           \
   1571     TestImplSRegXmm(GPR, Src, tss2si, 32, Size);                               \
   1572     TestImplSRegXmm(GPR, Src, tss2si, 64, Size);                               \
   1573     TestImplSRegAddr(GPR, tss2si, 32, Size);                                   \
   1574     TestImplSRegAddr(GPR, tss2si, 64, Size);                                   \
   1575     TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
   1576     TestImplPXmmAddr(Src, float2float, Size);                                  \
   1577   } while (0)
   1578 
   1579 #define TestImpl(Dst, Src, GPR)                                                \
   1580   do {                                                                         \
   1581     TestImplSize(Dst, Src, GPR, 32);                                           \
   1582     TestImplSize(Dst, Src, GPR, 64);                                           \
   1583   } while (0)
   1584 
   1585   TestImpl(xmm0, xmm1, r1);
   1586   TestImpl(xmm1, xmm2, r2);
   1587   TestImpl(xmm2, xmm3, r3);
   1588   TestImpl(xmm3, xmm4, r4);
   1589   TestImpl(xmm4, xmm5, r5);
   1590   TestImpl(xmm5, xmm6, r6);
   1591   TestImpl(xmm6, xmm7, r7);
   1592   TestImpl(xmm7, xmm8, r8);
   1593   TestImpl(xmm8, xmm9, r10);
   1594   TestImpl(xmm9, xmm10, r11);
   1595   TestImpl(xmm10, xmm11, r12);
   1596   TestImpl(xmm11, xmm12, r13);
   1597   TestImpl(xmm12, xmm13, r14);
   1598   TestImpl(xmm13, xmm14, r15);
   1599   TestImpl(xmm14, xmm15, r1);
   1600   TestImpl(xmm15, xmm0, r2);
   1601 
   1602 #undef TestImpl
   1603 #undef TestImplSize
   1604 #undef TestImplSRegAddr
   1605 #undef TestImplSXmmAddr
   1606 #undef TestImplPXmmAddr
   1607 #undef TestImplSRegXmm
   1608 #undef TestImplSXmmReg
   1609 #undef TestImplPXmmXmm
   1610 }
   1611 
   1612 TEST_F(AssemblerX8664Test, Ucomiss) {
   1613   static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
   1614   static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
   1615 
   1616   Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
   1617   Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
   1618 
   1619   Dqword test64DstValue(0.0, qnan64);
   1620   Dqword test64SrcValue(0.0, qnan64);
   1621 
   1622 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
   1623                        BOther)                                                 \
   1624   do {                                                                         \
   1625     static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
   1626     static constexpr char TestString[] =                                       \
   1627         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
   1628         ", " #BParity ", " #BOther ")";                                        \
   1629     const uint32_t T0 = allocateDqword();                                      \
   1630     test##Size##DstValue.F##Size[0] = Value0;                                  \
   1631     const uint32_t T1 = allocateDqword();                                      \
   1632     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1633     const uint32_t ImmIfTrue = 0xBEEF;                                         \
   1634     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
   1635                                                                                \
   1636     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1637     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   1638     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
   1639     __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());     \
   1640     Label Done;                                                                \
   1641     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
   1642     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
   1643     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
   1644     __ bind(&Done);                                                            \
   1645                                                                                \
   1646     AssembledTest test = assemble();                                           \
   1647     test.setDqwordTo(T0, test##Size##DstValue);                                \
   1648     test.setDqwordTo(T1, test##Size##SrcValue);                                \
   1649     test.run();                                                                \
   1650                                                                                \
   1651     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
   1652     reset();                                                                   \
   1653   } while (0)
   1654 
   1655 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
   1656   do {                                                                         \
   1657     static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
   1658     static constexpr char TestString[] =                                       \
   1659         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
   1660         ", " #BParity ", " #BOther ")";                                        \
   1661     const uint32_t T0 = allocateDqword();                                      \
   1662     test##Size##DstValue.F##Size[0] = Value0;                                  \
   1663     const uint32_t T1 = allocateDqword();                                      \
   1664     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1665     const uint32_t ImmIfTrue = 0xBEEF;                                         \
   1666     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
   1667                                                                                \
   1668     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1669     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
   1670     __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));        \
   1671     Label Done;                                                                \
   1672     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
   1673     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
   1674     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
   1675     __ bind(&Done);                                                            \
   1676                                                                                \
   1677     AssembledTest test = assemble();                                           \
   1678     test.setDqwordTo(T0, test##Size##DstValue);                                \
   1679     test.setDqwordTo(T1, test##Size##SrcValue);                                \
   1680     test.run();                                                                \
   1681                                                                                \
   1682     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
   1683     reset();                                                                   \
   1684   } while (0)
   1685 
   1686 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
   1687                      BOther)                                                   \
   1688   do {                                                                         \
   1689     TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
   1690     TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
   1691   } while (0)
   1692 
   1693 #define TestImplSize(Dst, Src, Size)                                           \
   1694   do {                                                                         \
   1695     TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
   1696     TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
   1697     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
   1698     TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
   1699     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
   1700     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
   1701     TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
   1702     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
   1703     TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
   1704     TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
   1705     TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
   1706   } while (0)
   1707 
   1708 #define TestImpl(Dst, Src)                                                     \
   1709   do {                                                                         \
   1710     TestImplSize(Dst, Src, 32);                                                \
   1711     TestImplSize(Dst, Src, 64);                                                \
   1712   } while (0)
   1713 
   1714   TestImpl(xmm0, xmm1);
   1715   TestImpl(xmm1, xmm2);
   1716   TestImpl(xmm2, xmm3);
   1717   TestImpl(xmm3, xmm4);
   1718   TestImpl(xmm4, xmm5);
   1719   TestImpl(xmm5, xmm6);
   1720   TestImpl(xmm6, xmm7);
   1721   TestImpl(xmm7, xmm8);
   1722   TestImpl(xmm8, xmm9);
   1723   TestImpl(xmm9, xmm10);
   1724   TestImpl(xmm10, xmm11);
   1725   TestImpl(xmm11, xmm12);
   1726   TestImpl(xmm12, xmm13);
   1727   TestImpl(xmm13, xmm14);
   1728   TestImpl(xmm14, xmm15);
   1729   TestImpl(xmm15, xmm0);
   1730 
   1731 #undef TestImpl
   1732 #undef TestImplSize
   1733 #undef TestImplCond
   1734 #undef TestImplXmmAddr
   1735 #undef TestImplXmmXmm
   1736 }
   1737 
   1738 TEST_F(AssemblerX8664Test, Sqrtss) {
   1739   Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
   1740   Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
   1741 
   1742   Dqword test64SrcValue(-100.0, -100.0);
   1743   Dqword test64DstValue(-1.0, -1.0);
   1744 
   1745 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
   1746   do {                                                                         \
   1747     static constexpr char TestString[] =                                       \
   1748         "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
   1749     const uint32_t T0 = allocateDqword();                                      \
   1750     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1751     const uint32_t T1 = allocateDqword();                                      \
   1752                                                                                \
   1753     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
   1754     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
   1755     __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
   1756                                                                                \
   1757     AssembledTest test = assemble();                                           \
   1758     test.setDqwordTo(T0, test##Size##SrcValue);                                \
   1759     test.setDqwordTo(T1, test##Size##DstValue);                                \
   1760     test.run();                                                                \
   1761                                                                                \
   1762     Dqword Expected = test##Size##DstValue;                                    \
   1763     Expected.F##Size[0] = Result;                                              \
   1764     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1765     reset();                                                                   \
   1766   } while (0)
   1767 
   1768 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
   1769   do {                                                                         \
   1770     static constexpr char TestString[] =                                       \
   1771         "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
   1772     const uint32_t T0 = allocateDqword();                                      \
   1773     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1774     const uint32_t T1 = allocateDqword();                                      \
   1775                                                                                \
   1776     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
   1777     __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0));           \
   1778                                                                                \
   1779     AssembledTest test = assemble();                                           \
   1780     test.setDqwordTo(T0, test##Size##SrcValue);                                \
   1781     test.setDqwordTo(T1, test##Size##DstValue);                                \
   1782     test.run();                                                                \
   1783                                                                                \
   1784     Dqword Expected = test##Size##DstValue;                                    \
   1785     Expected.F##Size[0] = Result;                                              \
   1786     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1787     reset();                                                                   \
   1788   } while (0)
   1789 
   1790 #define TestSqrtssSize(Dst, Src, Size)                                         \
   1791   do {                                                                         \
   1792     TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
   1793     TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
   1794     TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
   1795     TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
   1796     TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
   1797     TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
   1798   } while (0)
   1799 
   1800 #define TestSqrtss(Dst, Src)                                                   \
   1801   do {                                                                         \
   1802     TestSqrtssSize(Dst, Src, 32);                                              \
   1803     TestSqrtssSize(Dst, Src, 64);                                              \
   1804   } while (0)
   1805 
   1806   TestSqrtss(xmm0, xmm1);
   1807   TestSqrtss(xmm1, xmm2);
   1808   TestSqrtss(xmm2, xmm3);
   1809   TestSqrtss(xmm3, xmm4);
   1810   TestSqrtss(xmm4, xmm5);
   1811   TestSqrtss(xmm5, xmm6);
   1812   TestSqrtss(xmm6, xmm7);
   1813   TestSqrtss(xmm7, xmm8);
   1814   TestSqrtss(xmm8, xmm9);
   1815   TestSqrtss(xmm9, xmm10);
   1816   TestSqrtss(xmm10, xmm11);
   1817   TestSqrtss(xmm11, xmm12);
   1818   TestSqrtss(xmm12, xmm13);
   1819   TestSqrtss(xmm13, xmm14);
   1820   TestSqrtss(xmm14, xmm15);
   1821   TestSqrtss(xmm15, xmm0);
   1822 
   1823 #undef TestSqrtss
   1824 #undef TestSqrtssSize
   1825 #undef TestSqrtssXmmAddr
   1826 #undef TestSqrtssXmmXmm
   1827 }
   1828 
   1829 TEST_F(AssemblerX8664Test, Insertps) {
   1830 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
   1831   do {                                                                         \
   1832     static constexpr char TestString[] =                                       \
   1833         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
   1834         ")";                                                                   \
   1835     const uint32_t T0 = allocateDqword();                                      \
   1836     const Dqword V0 Value0;                                                    \
   1837     const uint32_t T1 = allocateDqword();                                      \
   1838     const Dqword V1 Value1;                                                    \
   1839                                                                                \
   1840     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1841     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   1842     __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),       \
   1843                 Immediate(Imm));                                               \
   1844                                                                                \
   1845     AssembledTest test = assemble();                                           \
   1846     test.setDqwordTo(T0, V0);                                                  \
   1847     test.setDqwordTo(T1, V1);                                                  \
   1848     test.run();                                                                \
   1849                                                                                \
   1850     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
   1851     reset();                                                                   \
   1852   } while (0)
   1853 
   1854 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
   1855   do {                                                                         \
   1856     static constexpr char TestString[] =                                       \
   1857         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
   1858     const uint32_t T0 = allocateDqword();                                      \
   1859     const Dqword V0 Value0;                                                    \
   1860     const uint32_t T1 = allocateDqword();                                      \
   1861     const Dqword V1 Value1;                                                    \
   1862                                                                                \
   1863     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1864     __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1),          \
   1865                 Immediate(Imm));                                               \
   1866                                                                                \
   1867     AssembledTest test = assemble();                                           \
   1868     test.setDqwordTo(T0, V0);                                                  \
   1869     test.setDqwordTo(T1, V1);                                                  \
   1870     test.run();                                                                \
   1871                                                                                \
   1872     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
   1873     reset();                                                                   \
   1874   } while (0)
   1875 
   1876 #define TestInsertps(Dst, Src)                                                 \
   1877   do {                                                                         \
   1878     TestInsertpsXmmXmmImm(                                                     \
   1879         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
   1880         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1881         0x99,                                                                  \
   1882         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
   1883     TestInsertpsXmmAddrImm(                                                    \
   1884         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
   1885         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1886         0x99,                                                                  \
   1887         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
   1888     TestInsertpsXmmXmmImm(                                                     \
   1889         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
   1890         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1891         0x9D,                                                                  \
   1892         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
   1893     TestInsertpsXmmAddrImm(                                                    \
   1894         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
   1895         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1896         0x9D,                                                                  \
   1897         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
   1898   } while (0)
   1899 
   1900   TestInsertps(xmm0, xmm1);
   1901   TestInsertps(xmm1, xmm2);
   1902   TestInsertps(xmm2, xmm3);
   1903   TestInsertps(xmm3, xmm4);
   1904   TestInsertps(xmm4, xmm5);
   1905   TestInsertps(xmm5, xmm6);
   1906   TestInsertps(xmm6, xmm7);
   1907   TestInsertps(xmm7, xmm8);
   1908   TestInsertps(xmm8, xmm9);
   1909   TestInsertps(xmm9, xmm10);
   1910   TestInsertps(xmm10, xmm11);
   1911   TestInsertps(xmm11, xmm12);
   1912   TestInsertps(xmm12, xmm13);
   1913   TestInsertps(xmm13, xmm14);
   1914   TestInsertps(xmm14, xmm15);
   1915   TestInsertps(xmm15, xmm0);
   1916 
   1917 #undef TestInsertps
   1918 #undef TestInsertpsXmmXmmAddr
   1919 #undef TestInsertpsXmmXmmImm
   1920 }
   1921 
   1922 TEST_F(AssemblerX8664Test, Pinsr) {
   1923   static constexpr uint8_t Mask32 = 0x03;
   1924   static constexpr uint8_t Mask16 = 0x07;
   1925   static constexpr uint8_t Mask8 = 0x0F;
   1926 
   1927 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
   1928   do {                                                                         \
   1929     static constexpr char TestString[] =                                       \
   1930         "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
   1931     const uint32_t T0 = allocateDqword();                                      \
   1932     const Dqword V0 Value0;                                                    \
   1933                                                                                \
   1934     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1935     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1));               \
   1936     __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(),        \
   1937              Immediate(Imm));                                                  \
   1938                                                                                \
   1939     AssembledTest test = assemble();                                           \
   1940     test.setDqwordTo(T0, V0);                                                  \
   1941     test.run();                                                                \
   1942                                                                                \
   1943     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   1944     Dqword Expected = V0;                                                      \
   1945     Expected.U##Size[sel] = Value1;                                            \
   1946     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1947     reset();                                                                   \
   1948   } while (0)
   1949 
   1950 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
   1951   do {                                                                         \
   1952     static constexpr char TestString[] =                                       \
   1953         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
   1954     const uint32_t T0 = allocateDqword();                                      \
   1955     const Dqword V0 Value0;                                                    \
   1956     const uint32_t T1 = allocateDword();                                       \
   1957     const uint32_t V1 = Value1;                                                \
   1958                                                                                \
   1959     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   1960     __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1),           \
   1961              Immediate(Imm));                                                  \
   1962                                                                                \
   1963     AssembledTest test = assemble();                                           \
   1964     test.setDqwordTo(T0, V0);                                                  \
   1965     test.setDwordTo(T1, V1);                                                   \
   1966     test.run();                                                                \
   1967                                                                                \
   1968     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   1969     Dqword Expected = V0;                                                      \
   1970     Expected.U##Size[sel] = Value1;                                            \
   1971     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1972     reset();                                                                   \
   1973   } while (0)
   1974 
   1975 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
   1976   do {                                                                         \
   1977     TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                  \
   1978                              uint64_t(0xFFFFFFFFDDDDDDDDull)),                 \
   1979                        GPR, Value1, Imm, Size);                                \
   1980     TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                 \
   1981                               uint64_t(0xFFFFFFFFDDDDDDDDull)),                \
   1982                         Value1, Imm, Size);                                    \
   1983   } while (0)
   1984 
   1985 #define TestPinsr(Src, Dst)                                                    \
   1986   do {                                                                         \
   1987     TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
   1988     TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
   1989     TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
   1990   } while (0)
   1991 
   1992   TestPinsr(xmm0, r1);
   1993   TestPinsr(xmm1, r2);
   1994   TestPinsr(xmm2, r3);
   1995   TestPinsr(xmm3, r4);
   1996   TestPinsr(xmm4, r5);
   1997   TestPinsr(xmm5, r6);
   1998   TestPinsr(xmm6, r7);
   1999   TestPinsr(xmm7, r8);
   2000   TestPinsr(xmm8, r10);
   2001   TestPinsr(xmm9, r11);
   2002   TestPinsr(xmm10, r12);
   2003   TestPinsr(xmm11, r13);
   2004   TestPinsr(xmm12, r14);
   2005   TestPinsr(xmm13, r15);
   2006   TestPinsr(xmm14, r1);
   2007   TestPinsr(xmm15, r2);
   2008 
   2009 #undef TestPinsr
   2010 #undef TestPinsrSize
   2011 #undef TestPinsrXmmAddrImm
   2012 #undef TestPinsrXmmGPRImm
   2013 }
   2014 
   2015 TEST_F(AssemblerX8664Test, Pextr) {
   2016   static constexpr uint8_t Mask32 = 0x03;
   2017   static constexpr uint8_t Mask16 = 0x07;
   2018   static constexpr uint8_t Mask8 = 0x0F;
   2019 
   2020 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
   2021   do {                                                                         \
   2022     static constexpr char TestString[] =                                       \
   2023         "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
   2024     const uint32_t T0 = allocateDqword();                                      \
   2025     const Dqword V0 Value1;                                                    \
   2026                                                                                \
   2027     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
   2028     __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(),        \
   2029              Immediate(Imm));                                                  \
   2030                                                                                \
   2031     AssembledTest test = assemble();                                           \
   2032     test.setDqwordTo(T0, V0);                                                  \
   2033     test.run();                                                                \
   2034                                                                                \
   2035     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   2036     ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
   2037     reset();                                                                   \
   2038   } while (0)
   2039 
   2040 #define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
   2041   do {                                                                         \
   2042     TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull),             \
   2043                                   uint64_t(0xFFFFFFFFDDDDDDDDull)),            \
   2044                        Imm, Size);                                             \
   2045   } while (0)
   2046 
   2047 #define TestPextr(Src, Dst)                                                    \
   2048   do {                                                                         \
   2049     TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
   2050     TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
   2051     TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
   2052   } while (0)
   2053 
   2054   TestPextr(r1, xmm0);
   2055   TestPextr(r2, xmm1);
   2056   TestPextr(r3, xmm2);
   2057   TestPextr(r4, xmm3);
   2058   TestPextr(r5, xmm4);
   2059   TestPextr(r6, xmm5);
   2060   TestPextr(r7, xmm6);
   2061   TestPextr(r8, xmm7);
   2062   TestPextr(r10, xmm8);
   2063   TestPextr(r11, xmm9);
   2064   TestPextr(r12, xmm10);
   2065   TestPextr(r13, xmm11);
   2066   TestPextr(r14, xmm12);
   2067   TestPextr(r15, xmm13);
   2068   TestPextr(r1, xmm14);
   2069   TestPextr(r2, xmm15);
   2070 
   2071 #undef TestPextr
   2072 #undef TestPextrSize
   2073 #undef TestPextrXmmGPRImm
   2074 }
   2075 
   2076 TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) {
   2077 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
   2078   do {                                                                         \
   2079     static constexpr char TestString[] =                                       \
   2080         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
   2081     const uint32_t T0 = allocateDqword();                                      \
   2082     const Dqword V0 Value0;                                                    \
   2083     const uint32_t T1 = allocateDqword();                                      \
   2084     const Dqword V1 Value1;                                                    \
   2085                                                                                \
   2086     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   2087     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   2088     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
   2089                                                                                \
   2090     AssembledTest test = assemble();                                           \
   2091     test.setDqwordTo(T0, V0);                                                  \
   2092     test.setDqwordTo(T1, V1);                                                  \
   2093     test.run();                                                                \
   2094                                                                                \
   2095     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
   2096     static constexpr uint8_t ArraySize =                                       \
   2097         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
   2098     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
   2099       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
   2100     }                                                                          \
   2101     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   2102     reset();                                                                   \
   2103   } while (0)
   2104 
   2105 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
   2106   do {                                                                         \
   2107     static constexpr char TestString[] =                                       \
   2108         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
   2109     const uint32_t T0 = allocateDqword();                                      \
   2110     const Dqword V0 Value0;                                                    \
   2111     const uint32_t T1 = allocateDqword();                                      \
   2112     const Dqword V1 Value1;                                                    \
   2113                                                                                \
   2114     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   2115     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
   2116                                                                                \
   2117     AssembledTest test = assemble();                                           \
   2118     test.setDqwordTo(T0, V0);                                                  \
   2119     test.setDqwordTo(T1, V1);                                                  \
   2120     test.run();                                                                \
   2121                                                                                \
   2122     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
   2123     static constexpr uint8_t ArraySize =                                       \
   2124         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
   2125     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
   2126       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
   2127     }                                                                          \
   2128     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   2129     reset();                                                                   \
   2130   } while (0)
   2131 
   2132 #define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
   2133   do {                                                                         \
   2134     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == );               \
   2135     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == );                   \
   2136     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < );                \
   2137     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < );                    \
   2138   } while (0)
   2139 
   2140 #define TestPcmpSize(Dst, Src, Size)                                           \
   2141   do {                                                                         \
   2142     TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull),                      \
   2143                          uint64_t(0x0000000000000000ull)),                     \
   2144                    Src, (uint64_t(0x0000008800008800ull),                      \
   2145                          uint64_t(0xFFFFFFFFFFFFFFFFull)),                     \
   2146                    Size);                                                      \
   2147     TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull),                      \
   2148                          uint64_t(0x12345abcde12345Aull)),                     \
   2149                    Src, (uint64_t(0x0000008800008800ull),                      \
   2150                          uint64_t(0xAABBCCDD1234321Aull)),                     \
   2151                    Size);                                                      \
   2152   } while (0)
   2153 
   2154 #define TestPcmp(Dst, Src)                                                     \
   2155   do {                                                                         \
   2156     TestPcmpSize(xmm0, xmm1, 8);                                               \
   2157     TestPcmpSize(xmm0, xmm1, 16);                                              \
   2158     TestPcmpSize(xmm0, xmm1, 32);                                              \
   2159   } while (0)
   2160 
   2161   TestPcmp(xmm0, xmm1);
   2162   TestPcmp(xmm1, xmm2);
   2163   TestPcmp(xmm2, xmm3);
   2164   TestPcmp(xmm3, xmm4);
   2165   TestPcmp(xmm4, xmm5);
   2166   TestPcmp(xmm5, xmm6);
   2167   TestPcmp(xmm6, xmm7);
   2168   TestPcmp(xmm7, xmm8);
   2169   TestPcmp(xmm8, xmm9);
   2170   TestPcmp(xmm9, xmm10);
   2171   TestPcmp(xmm10, xmm11);
   2172   TestPcmp(xmm11, xmm12);
   2173   TestPcmp(xmm12, xmm13);
   2174   TestPcmp(xmm13, xmm14);
   2175   TestPcmp(xmm14, xmm15);
   2176   TestPcmp(xmm15, xmm0);
   2177 
   2178 #undef TestPcmp
   2179 #undef TestPcmpSize
   2180 #undef TestPcmpValues
   2181 #undef TestPcmpXmmAddr
   2182 #undef TestPcmpXmmXmm
   2183 }
   2184 
   2185 TEST_F(AssemblerX8664Test, Roundsd) {
   2186 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
   2187   do {                                                                         \
   2188     static constexpr char TestString[] =                                       \
   2189         "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
   2190     const uint32_t T0 = allocateDqword();                                      \
   2191     const Dqword V0(-3.0, -3.0);                                               \
   2192     const uint32_t T1 = allocateDqword();                                      \
   2193     const Dqword V1(double(Input), -123.4);                                    \
   2194                                                                                \
   2195     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
   2196     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
   2197     __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),            \
   2198              Immediate(AssemblerX8664::k##Mode));                              \
   2199                                                                                \
   2200     AssembledTest test = assemble();                                           \
   2201     test.setDqwordTo(T0, V0);                                                  \
   2202     test.setDqwordTo(T1, V1);                                                  \
   2203     test.run();                                                                \
   2204                                                                                \
   2205     const Dqword Expected(double(RN), -3.0);                                   \
   2206     EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   2207     reset();                                                                   \
   2208   } while (0)
   2209 
   2210 #define TestRoundsd(Dst, Src)                                                  \
   2211   do {                                                                         \
   2212     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
   2213     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
   2214     TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
   2215     TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
   2216     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
   2217     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
   2218   } while (0)
   2219 
   2220   TestRoundsd(xmm0, xmm1);
   2221   TestRoundsd(xmm1, xmm2);
   2222   TestRoundsd(xmm2, xmm3);
   2223   TestRoundsd(xmm3, xmm4);
   2224   TestRoundsd(xmm4, xmm5);
   2225   TestRoundsd(xmm5, xmm6);
   2226   TestRoundsd(xmm6, xmm7);
   2227   TestRoundsd(xmm7, xmm8);
   2228   TestRoundsd(xmm8, xmm9);
   2229   TestRoundsd(xmm9, xmm10);
   2230   TestRoundsd(xmm10, xmm11);
   2231   TestRoundsd(xmm11, xmm12);
   2232   TestRoundsd(xmm12, xmm13);
   2233   TestRoundsd(xmm13, xmm14);
   2234   TestRoundsd(xmm14, xmm15);
   2235   TestRoundsd(xmm15, xmm0);
   2236 
   2237 #undef TestRoundsd
   2238 #undef TestRoundsdXmmXmm
   2239 }
   2240 
   2241 TEST_F(AssemblerX8664Test, Set1ps) {
   2242 #define TestImpl(Xmm, Src, Imm)                                                \
   2243   do {                                                                         \
   2244     __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm));       \
   2245                                                                                \
   2246     AssembledTest test = assemble();                                           \
   2247     test.run();                                                                \
   2248                                                                                \
   2249     const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
   2250                           (uint64_t(Imm) << 32) | uint32_t(Imm));              \
   2251     ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
   2252         << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
   2253     reset();                                                                   \
   2254   } while (0)
   2255 
   2256   TestImpl(xmm0, r1, 1);
   2257   TestImpl(xmm1, r2, 12);
   2258   TestImpl(xmm2, r3, 22);
   2259   TestImpl(xmm3, r4, 54);
   2260   TestImpl(xmm4, r5, 80);
   2261   TestImpl(xmm5, r6, 32);
   2262   TestImpl(xmm6, r7, 55);
   2263   TestImpl(xmm7, r8, 44);
   2264   TestImpl(xmm8, r10, 10);
   2265   TestImpl(xmm9, r11, 155);
   2266   TestImpl(xmm10, r12, 165);
   2267   TestImpl(xmm11, r13, 170);
   2268   TestImpl(xmm12, r14, 200);
   2269   TestImpl(xmm13, r15, 124);
   2270   TestImpl(xmm14, r1, 101);
   2271   TestImpl(xmm15, r2, 166);
   2272 
   2273 #undef TestImpl
   2274 }
   2275 
   2276 } // end of anonymous namespace
   2277 } // end of namespace Test
   2278 } // end of namespace X8664
   2279 } // end of namespace Ice
   2280