Home | History | Annotate | Download | only in AssemblerX8632
      1 //===- subzero/unittest/AssemblerX8632/XmmArith.cpp -----------------------===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include "AssemblerX8632/TestUtil.h"
     10 
     11 namespace Ice {
     12 namespace X8632 {
     13 namespace Test {
     14 namespace {
     15 
     16 TEST_F(AssemblerX8632Test, ArithSS) {
     17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
     18   do {                                                                         \
     19     static_assert(FloatSize == 32 || FloatSize == 64,                          \
     20                   "Invalid fp size " #FloatSize);                              \
     21     static constexpr char TestString[] =                                       \
     22         "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
     23         ", " #Inst ", " #Op ")";                                               \
     24     static constexpr bool IsDouble = FloatSize == 64;                          \
     25     using Type = std::conditional<IsDouble, double, float>::type;              \
     26     const uint32_t T0 = allocateQword();                                       \
     27     const Type V0 = Value0;                                                    \
     28     const uint32_t T1 = allocateQword();                                       \
     29     const Type V1 = Value1;                                                    \
     30                                                                                \
     31     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
     32              dwordAddress(T0));                                                \
     33     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Src,             \
     34              dwordAddress(T1));                                                \
     35     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
     36             XmmRegister::Encoded_Reg_##Src);                                   \
     37                                                                                \
     38     AssembledTest test = assemble();                                           \
     39     if (IsDouble) {                                                            \
     40       test.setQwordTo(T0, static_cast<double>(V0));                            \
     41       test.setQwordTo(T1, static_cast<double>(V1));                            \
     42     } else {                                                                   \
     43       test.setDwordTo(T0, static_cast<float>(V0));                             \
     44       test.setDwordTo(T1, static_cast<float>(V1));                             \
     45     }                                                                          \
     46                                                                                \
     47     test.run();                                                                \
     48                                                                                \
     49     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
     50     reset();                                                                   \
     51   } while (0)
     52 
     53 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
     54   do {                                                                         \
     55     static_assert(FloatSize == 32 || FloatSize == 64,                          \
     56                   "Invalid fp size " #FloatSize);                              \
     57     static constexpr char TestString[] =                                       \
     58         "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
     59         ", " #Op ")";                                                          \
     60     static constexpr bool IsDouble = FloatSize == 64;                          \
     61     using Type = std::conditional<IsDouble, double, float>::type;              \
     62     const uint32_t T0 = allocateQword();                                       \
     63     const Type V0 = Value0;                                                    \
     64     const uint32_t T1 = allocateQword();                                       \
     65     const Type V1 = Value1;                                                    \
     66                                                                                \
     67     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
     68              dwordAddress(T0));                                                \
     69     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
     70             dwordAddress(T1));                                                 \
     71                                                                                \
     72     AssembledTest test = assemble();                                           \
     73     if (IsDouble) {                                                            \
     74       test.setQwordTo(T0, static_cast<double>(V0));                            \
     75       test.setQwordTo(T1, static_cast<double>(V1));                            \
     76     } else {                                                                   \
     77       test.setDwordTo(T0, static_cast<float>(V0));                             \
     78       test.setDwordTo(T1, static_cast<float>(V1));                             \
     79     }                                                                          \
     80                                                                                \
     81     test.run();                                                                \
     82                                                                                \
     83     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
     84     reset();                                                                   \
     85   } while (0)
     86 
     87 #define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
     88   do {                                                                         \
     89     TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
     90     TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
     91     TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
     92     TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
     93     TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
     94     TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
     95     TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / );             \
     96     TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / );                 \
     97   } while (0)
     98 
     99   TestArithSS(32, xmm0, xmm1, xmm2);
    100   TestArithSS(32, xmm1, xmm2, xmm3);
    101   TestArithSS(32, xmm2, xmm3, xmm4);
    102   TestArithSS(32, xmm3, xmm4, xmm5);
    103   TestArithSS(32, xmm4, xmm5, xmm6);
    104   TestArithSS(32, xmm5, xmm6, xmm7);
    105   TestArithSS(32, xmm6, xmm7, xmm0);
    106   TestArithSS(32, xmm7, xmm0, xmm1);
    107 
    108   TestArithSS(64, xmm0, xmm1, xmm2);
    109   TestArithSS(64, xmm1, xmm2, xmm3);
    110   TestArithSS(64, xmm2, xmm3, xmm4);
    111   TestArithSS(64, xmm3, xmm4, xmm5);
    112   TestArithSS(64, xmm4, xmm5, xmm6);
    113   TestArithSS(64, xmm5, xmm6, xmm7);
    114   TestArithSS(64, xmm6, xmm7, xmm0);
    115   TestArithSS(64, xmm7, xmm0, xmm1);
    116 
    117 #undef TestArithSS
    118 #undef TestArithSSXmmAddr
    119 #undef TestArithSSXmmXmm
    120 }
    121 
    122 TEST_F(AssemblerX8632Test, PArith) {
    123 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
    124   do {                                                                         \
    125     static constexpr char TestString[] =                                       \
    126         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    127         ", " #Type ", " #Size ")";                                             \
    128     const uint32_t T0 = allocateDqword();                                      \
    129     const Dqword V0 Value0;                                                    \
    130                                                                                \
    131     const uint32_t T1 = allocateDqword();                                      \
    132     const Dqword V1 Value1;                                                    \
    133                                                                                \
    134     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    135     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    136     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
    137             XmmRegister::Encoded_Reg_##Src);                                   \
    138                                                                                \
    139     AssembledTest test = assemble();                                           \
    140     test.setDqwordTo(T0, V0);                                                  \
    141     test.setDqwordTo(T1, V1);                                                  \
    142     test.run();                                                                \
    143                                                                                \
    144     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
    145         << TestString;                                                         \
    146     reset();                                                                   \
    147   } while (0)
    148 
    149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
    150   do {                                                                         \
    151     static constexpr char TestString[] =                                       \
    152         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    153         ", " #Type ", " #Size ")";                                             \
    154     const uint32_t T0 = allocateDqword();                                      \
    155     const Dqword V0 Value0;                                                    \
    156                                                                                \
    157     const uint32_t T1 = allocateDqword();                                      \
    158     const Dqword V1 Value1;                                                    \
    159                                                                                \
    160     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    161     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
    162             dwordAddress(T1));                                                 \
    163                                                                                \
    164     AssembledTest test = assemble();                                           \
    165     test.setDqwordTo(T0, V0);                                                  \
    166     test.setDqwordTo(T1, V1);                                                  \
    167     test.run();                                                                \
    168                                                                                \
    169     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
    170         << TestString;                                                         \
    171     reset();                                                                   \
    172   } while (0)
    173 
    174 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
    175   do {                                                                         \
    176     static constexpr char TestString[] =                                       \
    177         "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
    178         ", " #Size ")";                                                        \
    179     const uint32_t T0 = allocateDqword();                                      \
    180     const Dqword V0 Value0;                                                    \
    181                                                                                \
    182     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    183     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, Immediate(Imm));  \
    184                                                                                \
    185     AssembledTest test = assemble();                                           \
    186     test.setDqwordTo(T0, V0);                                                  \
    187     test.run();                                                                \
    188                                                                                \
    189     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
    190         << TestString;                                                         \
    191     reset();                                                                   \
    192   } while (0)
    193 
    194 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
    195   do {                                                                         \
    196     static constexpr char TestString[] =                                       \
    197         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
    198         ", " #Size ")";                                                        \
    199     const uint32_t T0 = allocateDqword();                                      \
    200     const Dqword V0 Value0;                                                    \
    201                                                                                \
    202     const uint32_t T1 = allocateDqword();                                      \
    203     const Dqword V1 Value1;                                                    \
    204                                                                                \
    205     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    206     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    207     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
    208              XmmRegister::Encoded_Reg_##Src);                                  \
    209                                                                                \
    210     AssembledTest test = assemble();                                           \
    211     test.setDqwordTo(T0, V0);                                                  \
    212     test.setDqwordTo(T1, V1);                                                  \
    213     test.run();                                                                \
    214                                                                                \
    215     ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
    216         << TestString;                                                         \
    217     reset();                                                                   \
    218   } while (0)
    219 
    220 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
    221   do {                                                                         \
    222     static constexpr char TestString[] =                                       \
    223         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
    224         ")";                                                                   \
    225     const uint32_t T0 = allocateDqword();                                      \
    226     const Dqword V0 Value0;                                                    \
    227                                                                                \
    228     const uint32_t T1 = allocateDqword();                                      \
    229     const Dqword V1 Value1;                                                    \
    230                                                                                \
    231     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    232     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
    233              dwordAddress(T1));                                                \
    234                                                                                \
    235     AssembledTest test = assemble();                                           \
    236     test.setDqwordTo(T0, V0);                                                  \
    237     test.setDqwordTo(T1, V1);                                                  \
    238     test.run();                                                                \
    239                                                                                \
    240     ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
    241         << TestString;                                                         \
    242     reset();                                                                   \
    243   } while (0)
    244 
    245 #define TestPArithSize(Dst, Src, Size)                                         \
    246   do {                                                                         \
    247     static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
    248     if (Size != 8) {                                                           \
    249       TestPArithXmmXmm(                                                        \
    250           Dst,                                                                 \
    251           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    252           Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
    253       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    254                               uint64_t(0x8080404002020101ull)),                \
    255                         (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);    \
    256       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    257                              uint64_t(0x8080404002020101ull)),                 \
    258                        3u, psra, >>, int, Size);                               \
    259       TestPArithXmmXmm(                                                        \
    260           Dst,                                                                 \
    261           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    262           Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
    263       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    264                               uint64_t(0x8080404002020101ull)),                \
    265                         (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);   \
    266       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    267                              uint64_t(0x8080404002020101ull)),                 \
    268                        3u, psrl, >>, uint, Size);                              \
    269       TestPArithXmmXmm(                                                        \
    270           Dst,                                                                 \
    271           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    272           Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
    273       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
    274                               uint64_t(0x8080404002020101ull)),                \
    275                         (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);   \
    276       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
    277                              uint64_t(0x8080404002020101ull)),                 \
    278                        3u, psll, <<, uint, Size);                              \
    279                                                                                \
    280       TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                  \
    281                              uint64_t(0x8080404002020101ull)),                 \
    282                        Src, (uint64_t(0xFFFFFFFF00000000ull),                  \
    283                              uint64_t(0x0123456789ABCDEull)),                  \
    284                        pmull, *, int, Size);                                   \
    285       TestPArithXmmAddr(                                                       \
    286           Dst,                                                                 \
    287           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
    288           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
    289           pmull, *, int, Size);                                                \
    290       if (Size != 16) {                                                        \
    291         TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                \
    292                                uint64_t(0x8080404002020101ull)),               \
    293                          Src, (uint64_t(0xFFFFFFFF00000000ull),                \
    294                                uint64_t(0x0123456789ABCDEull)),                \
    295                          pmuludq, *, uint, Size);                              \
    296         TestPArithXmmAddr(                                                     \
    297             Dst, (uint64_t(0x8040201008040201ull),                             \
    298                   uint64_t(0x8080404002020101ull)),                            \
    299             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
    300             pmuludq, *, uint, Size);                                           \
    301       }                                                                        \
    302     }                                                                          \
    303     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    304                            uint64_t(0x8080404002020101ull)),                   \
    305                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    306                            uint64_t(0x0123456789ABCDEull)),                    \
    307                      padd, +, int, Size);                                      \
    308     TestPArithXmmAddr(                                                         \
    309         Dst,                                                                   \
    310         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    311         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    312         padd, +, int, Size);                                                   \
    313     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    314                            uint64_t(0x8080404002020101ull)),                   \
    315                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    316                            uint64_t(0x0123456789ABCDEull)),                    \
    317                      psub, -, int, Size);                                      \
    318     TestPArithXmmAddr(                                                         \
    319         Dst,                                                                   \
    320         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    321         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    322         psub, -, int, Size);                                                   \
    323     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    324                            uint64_t(0x8080404002020101ull)),                   \
    325                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    326                            uint64_t(0x0123456789ABCDEull)),                    \
    327                      pand, &, int, Size);                                      \
    328     TestPArithXmmAddr(                                                         \
    329         Dst,                                                                   \
    330         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    331         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    332         pand, &, int, Size);                                                   \
    333                                                                                \
    334     TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                     \
    335                           uint64_t(0x8080404002020101ull)),                    \
    336                     Src, (uint64_t(0xFFFFFFFF00000000ull),                     \
    337                           uint64_t(0x0123456789ABCDEull)),                     \
    338                     int, Size);                                                \
    339     TestPAndnXmmAddr(                                                          \
    340         Dst,                                                                   \
    341         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    342         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    343         int, Size);                                                            \
    344                                                                                \
    345     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    346                            uint64_t(0x8080404002020101ull)),                   \
    347                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    348                            uint64_t(0x0123456789ABCDEull)),                    \
    349                      por, |, int, Size);                                       \
    350     TestPArithXmmAddr(                                                         \
    351         Dst,                                                                   \
    352         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    353         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    354         por, |, int, Size);                                                    \
    355     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
    356                            uint64_t(0x8080404002020101ull)),                   \
    357                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
    358                            uint64_t(0x0123456789ABCDEull)),                    \
    359                      pxor, ^, int, Size);                                      \
    360     TestPArithXmmAddr(                                                         \
    361         Dst,                                                                   \
    362         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
    363         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
    364         pxor, ^, int, Size);                                                   \
    365   } while (0)
    366 
    367 #define TestPArith(Src, Dst)                                                   \
    368   do {                                                                         \
    369     TestPArithSize(Src, Dst, 8);                                               \
    370     TestPArithSize(Src, Dst, 16);                                              \
    371     TestPArithSize(Src, Dst, 32);                                              \
    372   } while (0)
    373 
    374   TestPArith(xmm0, xmm1);
    375   TestPArith(xmm1, xmm2);
    376   TestPArith(xmm2, xmm3);
    377   TestPArith(xmm3, xmm4);
    378   TestPArith(xmm4, xmm5);
    379   TestPArith(xmm5, xmm6);
    380   TestPArith(xmm6, xmm7);
    381   TestPArith(xmm7, xmm0);
    382 
    383 #undef TestPArith
    384 #undef TestPArithSize
    385 #undef TestPAndnXmmAddr
    386 #undef TestPAndnXmmXmm
    387 #undef TestPArithXmmImm
    388 #undef TestPArithXmmAddr
    389 #undef TestPArithXmmXmm
    390 }
    391 
    392 TEST_F(AssemblerX8632Test, ArithPS) {
    393 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
    394   do {                                                                         \
    395     static constexpr char TestString[] =                                       \
    396         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    397         ", " #Type ")";                                                        \
    398     const uint32_t T0 = allocateDqword();                                      \
    399     const Dqword V0 Value0;                                                    \
    400     const uint32_t T1 = allocateDqword();                                      \
    401     const Dqword V1 Value1;                                                    \
    402                                                                                \
    403     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    404     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    405     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
    406             XmmRegister::Encoded_Reg_##Src);                                   \
    407                                                                                \
    408     AssembledTest test = assemble();                                           \
    409     test.setDqwordTo(T0, V0);                                                  \
    410     test.setDqwordTo(T1, V1);                                                  \
    411     test.run();                                                                \
    412                                                                                \
    413     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    414                                                                                \
    415     reset();                                                                   \
    416   } while (0)
    417 
    418 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
    419   do {                                                                         \
    420     static constexpr char TestString[] =                                       \
    421         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
    422         ", " #Type ")";                                                        \
    423     const uint32_t T0 = allocateDqword();                                      \
    424     const Dqword V0 Value0;                                                    \
    425     const uint32_t T1 = allocateDqword();                                      \
    426     const Dqword V1 Value1;                                                    \
    427                                                                                \
    428     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    429     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    430     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
    431                                                                                \
    432     AssembledTest test = assemble();                                           \
    433     test.setDqwordTo(T0, V0);                                                  \
    434     test.setDqwordTo(T1, V1);                                                  \
    435     test.run();                                                                \
    436                                                                                \
    437     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    438                                                                                \
    439     reset();                                                                   \
    440   } while (0)
    441 
    442 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
    443   do {                                                                         \
    444     static constexpr char TestString[] =                                       \
    445         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    446         ", " #Type ")";                                                        \
    447     const uint32_t T0 = allocateDqword();                                      \
    448     const Dqword V0 Value0;                                                    \
    449     const uint32_t T1 = allocateDqword();                                      \
    450     const Dqword V1 Value1;                                                    \
    451                                                                                \
    452     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    453     __ Inst(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));                 \
    454                                                                                \
    455     AssembledTest test = assemble();                                           \
    456     test.setDqwordTo(T0, V0);                                                  \
    457     test.setDqwordTo(T1, V1);                                                  \
    458     test.run();                                                                \
    459                                                                                \
    460     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    461                                                                                \
    462     reset();                                                                   \
    463   } while (0)
    464 
    465 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
    466   do {                                                                         \
    467     static constexpr char TestString[] =                                       \
    468         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
    469         ")";                                                                   \
    470     const uint32_t T0 = allocateDqword();                                      \
    471     const Dqword V0 Value0;                                                    \
    472     const uint32_t T1 = allocateDqword();                                      \
    473     const Dqword V1 Value1;                                                    \
    474                                                                                \
    475     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    476     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    477     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
    478             XmmRegister::Encoded_Reg_##Src);                                   \
    479                                                                                \
    480     AssembledTest test = assemble();                                           \
    481     test.setDqwordTo(T0, V0);                                                  \
    482     test.setDqwordTo(T1, V1);                                                  \
    483     test.run();                                                                \
    484                                                                                \
    485     ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
    486                                                                                \
    487     reset();                                                                   \
    488   } while (0)
    489 
    490 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
    491   do {                                                                         \
    492     static constexpr char TestString[] =                                       \
    493         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
    494         ", " #Type ")";                                                        \
    495     const uint32_t T0 = allocateDqword();                                      \
    496     const Dqword V0 Value0;                                                    \
    497     const uint32_t T1 = allocateDqword();                                      \
    498     const Dqword V1 Value1;                                                    \
    499                                                                                \
    500     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    501     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
    502             dwordAddress(T1));                                                 \
    503                                                                                \
    504     AssembledTest test = assemble();                                           \
    505     test.setDqwordTo(T0, V0);                                                  \
    506     test.setDqwordTo(T1, V1);                                                  \
    507     test.run();                                                                \
    508                                                                                \
    509     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    510                                                                                \
    511     reset();                                                                   \
    512   } while (0)
    513 
    514 #define TestArithPS(Dst, Src)                                                  \
    515   do {                                                                         \
    516     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    517                       (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
    518     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    519                        (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
    520     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    521                       (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
    522     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    523                        (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
    524     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    525                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
    526     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    527                        (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
    528     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    529                       (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
    530     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    531                        (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
    532     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    533                       (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
    534     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    535                        (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
    536     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
    537                       double);                                                 \
    538     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
    539                        double);                                                \
    540     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    541                       (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
    542     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
    543                       double);                                                 \
    544     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
    545                  (0.55, 0.43, 0.23, 1.21), minps, float);                      \
    546     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
    547                  (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
    548     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
    549     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
    550     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
    551                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
    552     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
    553                        (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
    554     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
    555                       double);                                                 \
    556     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
    557                        double);                                                \
    558   } while (0)
    559 
    560 #if 0
    561 
    562 #endif
    563 
    564   TestArithPS(xmm0, xmm1);
    565   TestArithPS(xmm1, xmm2);
    566   TestArithPS(xmm2, xmm3);
    567   TestArithPS(xmm3, xmm4);
    568   TestArithPS(xmm4, xmm5);
    569   TestArithPS(xmm5, xmm6);
    570   TestArithPS(xmm6, xmm7);
    571   TestArithPS(xmm7, xmm0);
    572 
    573 #undef TestArithPs
    574 #undef TestMinMaxPS
    575 #undef TestArithPSXmmXmmUntyped
    576 #undef TestArithPSXmmAddr
    577 #undef TestArithPSXmmXmm
    578 }
    579 
    580 TEST_F(AssemblerX8632Test, Blending) {
    581   using f32 = float;
    582   using i8 = uint8_t;
    583 
    584 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
    585   do {                                                                         \
    586     static constexpr char TestString[] =                                       \
    587         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
    588         ", " #Type ")";                                                        \
    589     const uint32_t T0 = allocateDqword();                                      \
    590     const Dqword V0 Value0;                                                    \
    591     const uint32_t T1 = allocateDqword();                                      \
    592     const Dqword V1 Value1;                                                    \
    593     const uint32_t Mask = allocateDqword();                                    \
    594     const Dqword MaskValue M;                                                  \
    595                                                                                \
    596     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
    597     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    598     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    599     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst,                    \
    600             XmmRegister::Encoded_Reg_##Src);                                   \
    601                                                                                \
    602     AssembledTest test = assemble();                                           \
    603     test.setDqwordTo(T0, V0);                                                  \
    604     test.setDqwordTo(T1, V1);                                                  \
    605     test.setDqwordTo(Mask, MaskValue);                                         \
    606     test.run();                                                                \
    607                                                                                \
    608     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
    609         << TestString;                                                         \
    610     reset();                                                                   \
    611   } while (0)
    612 
    613 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
    614   do {                                                                         \
    615     static constexpr char TestString[] =                                       \
    616         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
    617         ")";                                                                   \
    618     const uint32_t T0 = allocateDqword();                                      \
    619     const Dqword V0 Value0;                                                    \
    620     const uint32_t T1 = allocateDqword();                                      \
    621     const Dqword V1 Value1;                                                    \
    622     const uint32_t Mask = allocateDqword();                                    \
    623     const Dqword MaskValue M;                                                  \
    624                                                                                \
    625     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
    626     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    627     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
    628                                                                                \
    629     AssembledTest test = assemble();                                           \
    630     test.setDqwordTo(T0, V0);                                                  \
    631     test.setDqwordTo(T1, V1);                                                  \
    632     test.setDqwordTo(Mask, MaskValue);                                         \
    633     test.run();                                                                \
    634                                                                                \
    635     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
    636         << TestString;                                                         \
    637     reset();                                                                   \
    638   } while (0)
    639 
    640 #define TestBlending(Src, Dst)                                                 \
    641   do {                                                                         \
    642     TestBlendingXmmXmm(                                                        \
    643         Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
    644         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
    645         blendvps, f32);                                                        \
    646     TestBlendingXmmAddr(                                                       \
    647         Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
    648         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
    649         blendvps, f32);                                                        \
    650     TestBlendingXmmXmm(                                                        \
    651         Dst,                                                                   \
    652         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
    653         Src,                                                                   \
    654         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
    655         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
    656         pblendvb, i8);                                                         \
    657     TestBlendingXmmAddr(                                                       \
    658         Dst,                                                                   \
    659         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
    660         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
    661         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
    662         pblendvb, i8);                                                         \
    663   } while (0)
    664 
    665   /* xmm0 is taken. It is the implicit mask . */
    666   TestBlending(xmm1, xmm2);
    667   TestBlending(xmm2, xmm3);
    668   TestBlending(xmm3, xmm4);
    669   TestBlending(xmm4, xmm5);
    670   TestBlending(xmm5, xmm6);
    671   TestBlending(xmm6, xmm7);
    672   TestBlending(xmm7, xmm1);
    673 
    674 #undef TestBlending
    675 #undef TestBlendingXmmAddr
    676 #undef TestBlendingXmmXmm
    677 }
    678 
    679 TEST_F(AssemblerX8632Test, Cmpps) {
    680 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
    681   do {                                                                         \
    682     static constexpr char TestString[] =                                       \
    683         "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
    684     const uint32_t T0 = allocateDqword();                                      \
    685     const Dqword V0 Value0;                                                    \
    686     const uint32_t T1 = allocateDqword();                                      \
    687     const Dqword V1 Value1;                                                    \
    688                                                                                \
    689     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    690     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    691     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
    692              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
    693                                                                                \
    694     AssembledTest test = assemble();                                           \
    695     test.setDqwordTo(T0, V0);                                                  \
    696     test.setDqwordTo(T1, V1);                                                  \
    697     test.run();                                                                \
    698                                                                                \
    699     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    700     ;                                                                          \
    701     reset();                                                                   \
    702   } while (0)
    703 
    704 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
    705   do {                                                                         \
    706     static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
    707     const uint32_t T0 = allocateDqword();                                      \
    708     const Dqword V0 Value0;                                                    \
    709     const uint32_t T1 = allocateDqword();                                      \
    710     const Dqword V1 Value1;                                                    \
    711                                                                                \
    712     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    713     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
    714              dwordAddress(T1), Cond::Cmpps_##C);                               \
    715                                                                                \
    716     AssembledTest test = assemble();                                           \
    717     test.setDqwordTo(T0, V0);                                                  \
    718     test.setDqwordTo(T1, V1);                                                  \
    719     test.run();                                                                \
    720                                                                                \
    721     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
    722     ;                                                                          \
    723     reset();                                                                   \
    724   } while (0)
    725 
    726 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
    727   do {                                                                         \
    728     static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
    729     const uint32_t T0 = allocateDqword();                                      \
    730     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    731                     std::numeric_limits<float>::quiet_NaN());                  \
    732     const uint32_t T1 = allocateDqword();                                      \
    733     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
    734                     std::numeric_limits<float>::quiet_NaN());                  \
    735                                                                                \
    736     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    737     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    738     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
    739              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
    740                                                                                \
    741     AssembledTest test = assemble();                                           \
    742     test.setDqwordTo(T0, V0);                                                  \
    743     test.setDqwordTo(T1, V1);                                                  \
    744     test.run();                                                                \
    745                                                                                \
    746     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
    747     ;                                                                          \
    748     reset();                                                                   \
    749   } while (0)
    750 
    751 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
    752   do {                                                                         \
    753     static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
    754     const uint32_t T0 = allocateDqword();                                      \
    755     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    756                     std::numeric_limits<float>::quiet_NaN());                  \
    757     const uint32_t T1 = allocateDqword();                                      \
    758     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
    759                     std::numeric_limits<float>::quiet_NaN());                  \
    760                                                                                \
    761     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    762     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
    763              dwordAddress(T1), Cond::Cmpps_##C);                               \
    764                                                                                \
    765     AssembledTest test = assemble();                                           \
    766     test.setDqwordTo(T0, V0);                                                  \
    767     test.setDqwordTo(T1, V1);                                                  \
    768     test.run();                                                                \
    769                                                                                \
    770     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
    771     ;                                                                          \
    772     reset();                                                                   \
    773   } while (0)
    774 
    775 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
    776   do {                                                                         \
    777     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    778     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    779     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    780     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    781     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    782     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    783     TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
    784     TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
    785     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    786     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    787     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    788     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    789     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
    790     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
    791     if (FloatSize == 32) {                                                     \
    792       TestCmppsOrdUnordXmmXmm(                                                 \
    793           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    794                     std::numeric_limits<float>::quiet_NaN()),                  \
    795           Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,             \
    796                 std::numeric_limits<float>::quiet_NaN()),                      \
    797           unord, Type);                                                        \
    798       TestCmppsOrdUnordXmmAddr(                                                \
    799           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
    800                     std::numeric_limits<float>::quiet_NaN()),                  \
    801           (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,                  \
    802            std::numeric_limits<float>::quiet_NaN()),                           \
    803           unord, Type);                                                        \
    804     } else {                                                                   \
    805       TestCmppsOrdUnordXmmXmm(64, Dst,                                         \
    806                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
    807                               Src, (std::numeric_limits<double>::quiet_NaN(),  \
    808                                     std::numeric_limits<double>::quiet_NaN()), \
    809                               unord, Type);                                    \
    810       TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
    811                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
    812                               unord, Type);                                    \
    813       TestCmppsOrdUnordXmmAddr(                                                \
    814           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
    815           (std::numeric_limits<double>::quiet_NaN(),                           \
    816            std::numeric_limits<double>::quiet_NaN()),                          \
    817           unord, Type);                                                        \
    818       TestCmppsOrdUnordXmmAddr(                                                \
    819           64, Dst, (1.0, 1.0),                                                 \
    820           (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
    821     }                                                                          \
    822   } while (0)
    823 
    824 #define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
    825   do {                                                                         \
    826     TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
    827     TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
    828     TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
    829     TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
    830     TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
    831     TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
    832     TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
    833     TestCmpps(FloatSize, xmm7, Value0, xmm0, Value1, Type);                    \
    834   } while (0)
    835 
    836   TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
    837                 float);
    838   TestCmppsSize(64, (1.0, -1000.0), (0.55, 1.21), double);
    839 
    840 #undef TestCmpps
    841 #undef TestCmppsOrdUnordXmmAddr
    842 #undef TestCmppsOrdUnordXmmXmm
    843 #undef TestCmppsXmmAddr
    844 #undef TestCmppsXmmXmm
    845 }
    846 
    847 TEST_F(AssemblerX8632Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
    848 #define TestImplSingle(Dst, Inst, Expect)                                      \
    849   do {                                                                         \
    850     static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
    851     const uint32_t T0 = allocateDqword();                                      \
    852     const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
    853                                                                                \
    854     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    855     __ Inst(XmmRegister::Encoded_Reg_##Dst);                                   \
    856                                                                                \
    857     AssembledTest test = assemble();                                           \
    858     test.setDqwordTo(T0, V0);                                                  \
    859     test.run();                                                                \
    860     ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
    861     reset();                                                                   \
    862   } while (0)
    863 
    864 #define TestImpl(Dst)                                                          \
    865   do {                                                                         \
    866     TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull),              \
    867                                  uint64_t(0x3FE2D10B408F1BBDull)));            \
    868     TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull),             \
    869                                   uint64_t(0x3F1078003E64F000ull)));           \
    870     TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull),        \
    871                                        uint64_t(0x3EA310003D4CC000ull)));      \
    872                                                                                \
    873     TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull),              \
    874                                  uint64_t(0x401C42FAE40282A8ull)));            \
    875   } while (0)
    876 
    877   TestImpl(xmm0);
    878   TestImpl(xmm1);
    879   TestImpl(xmm2);
    880   TestImpl(xmm3);
    881   TestImpl(xmm4);
    882   TestImpl(xmm5);
    883   TestImpl(xmm6);
    884   TestImpl(xmm7);
    885 
    886 #undef TestImpl
    887 #undef TestImplSingle
    888 }
    889 
    890 TEST_F(AssemblerX8632Test, Unpck) {
    891   const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
    892                   uint64_t(0xCCCCCCCCDDDDDDDDull));
    893   const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
    894                   uint64_t(0x9999999988888888ull));
    895 
    896   const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
    897                                 uint64_t(0xEEEEEEEEAAAAAAAAull));
    898   const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
    899                                 uint64_t(0xEEEEEEEEFFFFFFFFull));
    900   const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
    901                                 uint64_t(0x99999999CCCCCCCCull));
    902   const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
    903                                 uint64_t(0x9999999988888888ull));
    904 
    905 #define TestImplSingle(Dst, Src, Inst)                                         \
    906   do {                                                                         \
    907     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
    908     const uint32_t T0 = allocateDqword();                                      \
    909     const uint32_t T1 = allocateDqword();                                      \
    910                                                                                \
    911     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    912     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    913     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
    914                                                                                \
    915     AssembledTest test = assemble();                                           \
    916     test.setDqwordTo(T0, V0);                                                  \
    917     test.setDqwordTo(T1, V1);                                                  \
    918     test.run();                                                                \
    919                                                                                \
    920     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
    921     reset();                                                                   \
    922   } while (0)
    923 
    924 #define TestImpl(Dst, Src)                                                     \
    925   do {                                                                         \
    926     TestImplSingle(Dst, Src, unpcklps);                                        \
    927     TestImplSingle(Dst, Src, unpcklpd);                                        \
    928     TestImplSingle(Dst, Src, unpckhps);                                        \
    929     TestImplSingle(Dst, Src, unpckhpd);                                        \
    930   } while (0)
    931 
    932   TestImpl(xmm0, xmm1);
    933   TestImpl(xmm1, xmm2);
    934   TestImpl(xmm2, xmm3);
    935   TestImpl(xmm3, xmm4);
    936   TestImpl(xmm4, xmm5);
    937   TestImpl(xmm5, xmm6);
    938   TestImpl(xmm6, xmm7);
    939   TestImpl(xmm7, xmm0);
    940 
    941 #undef TestImpl
    942 #undef TestImplSingle
    943 }
    944 
    945 TEST_F(AssemblerX8632Test, Shufp) {
    946   const Dqword V0(uint64_t(0x1111111122222222ull),
    947                   uint64_t(0x5555555577777777ull));
    948   const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
    949                   uint64_t(0xCCCCCCCCDDDDDDDDull));
    950 
    951   const uint8_t pshufdImm = 0x63;
    952   const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
    953                               uint64_t(0xAAAAAAAADDDDDDDDull));
    954 
    955   const uint8_t shufpsImm = 0xf9;
    956   const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
    957                               uint64_t(0xCCCCCCCCCCCCCCCCull));
    958 
    959 #define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
    960   do {                                                                         \
    961     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
    962     const uint32_t T0 = allocateDqword();                                      \
    963     const uint32_t T1 = allocateDqword();                                      \
    964                                                                                \
    965     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    966     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
    967     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst,                       \
    968             XmmRegister::Encoded_Reg_##Src, Immediate(Inst##Imm));             \
    969                                                                                \
    970     AssembledTest test = assemble();                                           \
    971     test.setDqwordTo(T0, V0);                                                  \
    972     test.setDqwordTo(T1, V1);                                                  \
    973     test.run();                                                                \
    974                                                                                \
    975     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
    976     reset();                                                                   \
    977   } while (0)
    978 
    979 #define TestImplSingleXmmAddr(Dst, Inst)                                       \
    980   do {                                                                         \
    981     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
    982     const uint32_t T0 = allocateDqword();                                      \
    983     const uint32_t T1 = allocateDqword();                                      \
    984                                                                                \
    985     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
    986     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1),     \
    987             Immediate(Inst##Imm));                                             \
    988                                                                                \
    989     AssembledTest test = assemble();                                           \
    990     test.setDqwordTo(T0, V0);                                                  \
    991     test.setDqwordTo(T1, V1);                                                  \
    992     test.run();                                                                \
    993                                                                                \
    994     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
    995     reset();                                                                   \
    996   } while (0)
    997 
    998 #define TestImpl(Dst, Src)                                                     \
    999   do {                                                                         \
   1000     TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
   1001     TestImplSingleXmmAddr(Dst, pshufd);                                        \
   1002     TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
   1003     TestImplSingleXmmAddr(Dst, shufps);                                        \
   1004   } while (0)
   1005 
   1006   TestImpl(xmm0, xmm1);
   1007   TestImpl(xmm1, xmm2);
   1008   TestImpl(xmm2, xmm3);
   1009   TestImpl(xmm3, xmm4);
   1010   TestImpl(xmm4, xmm5);
   1011   TestImpl(xmm5, xmm6);
   1012   TestImpl(xmm6, xmm7);
   1013   TestImpl(xmm7, xmm0);
   1014 
   1015 #undef TestImpl
   1016 #undef TestImplSingleXmmAddr
   1017 #undef TestImplSingleXmmXmm
   1018 }
   1019 
   1020 TEST_F(AssemblerX8632Test, Punpckl) {
   1021   const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
   1022                         uint64_t(0x5555555577777777ull));
   1023   const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
   1024                         uint64_t(0xCCCCCCCCDDDDDDDDull));
   1025   const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
   1026                               uint64_t(0xAAAAAAAA11111111ull));
   1027 
   1028   const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
   1029                         uint64_t(0x5555666677778888ull));
   1030   const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
   1031                         uint64_t(0xEEEEFFFF00009999ull));
   1032   const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
   1033                               uint64_t(0xAAAA1111BBBB2222ull));
   1034 
   1035   const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
   1036                         uint64_t(0x99AABBCCDDEEFF00ull));
   1037   const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
   1038                         uint64_t(0xBAADF00DFEEDFACEull));
   1039   const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
   1040                               uint64_t(0xFF11EE22DD33CC44ull));
   1041 
   1042 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1043   do {                                                                         \
   1044     static constexpr char TestString[] =                                       \
   1045         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1046     const uint32_t T0 = allocateDqword();                                      \
   1047     const uint32_t T1 = allocateDqword();                                      \
   1048                                                                                \
   1049     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1050     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1051     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1052             XmmRegister::Encoded_Reg_##Src);                                   \
   1053                                                                                \
   1054     AssembledTest test = assemble();                                           \
   1055     test.setDqwordTo(T0, V0_##Ty);                                             \
   1056     test.setDqwordTo(T1, V1_##Ty);                                             \
   1057     test.run();                                                                \
   1058                                                                                \
   1059     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1060     reset();                                                                   \
   1061   } while (0)
   1062 
   1063 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1064   do {                                                                         \
   1065     static constexpr char TestString[] =                                       \
   1066         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1067     const uint32_t T0 = allocateDqword();                                      \
   1068     const uint32_t T1 = allocateDqword();                                      \
   1069                                                                                \
   1070     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1071     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1072                                                                                \
   1073     AssembledTest test = assemble();                                           \
   1074     test.setDqwordTo(T0, V0_##Ty);                                             \
   1075     test.setDqwordTo(T1, V1_##Ty);                                             \
   1076     test.run();                                                                \
   1077                                                                                \
   1078     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1079     reset();                                                                   \
   1080   } while (0)
   1081 
   1082 #define TestImpl(Dst, Src)                                                     \
   1083   do {                                                                         \
   1084     TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
   1085     TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
   1086     TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
   1087     TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
   1088     TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
   1089     TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
   1090   } while (0)
   1091 
   1092   TestImpl(xmm0, xmm1);
   1093   TestImpl(xmm1, xmm2);
   1094   TestImpl(xmm2, xmm3);
   1095   TestImpl(xmm3, xmm4);
   1096   TestImpl(xmm4, xmm5);
   1097   TestImpl(xmm5, xmm6);
   1098   TestImpl(xmm6, xmm7);
   1099   TestImpl(xmm7, xmm0);
   1100 
   1101 #undef TestImpl
   1102 #undef TestImplXmmAddr
   1103 #undef TestImplXmmXmm
   1104 }
   1105 
   1106 TEST_F(AssemblerX8632Test, Packss) {
   1107   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
   1108                         uint64_t(0x7FFFFFFF80000000ull));
   1109   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
   1110                         uint64_t(0x0000800100007FFEull));
   1111   const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
   1112                               uint64_t(0x7FFF7FFEFFFEFFFFull));
   1113 
   1114   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
   1115                         uint64_t(0xFFFEFFFF7FFF8000ull));
   1116   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
   1117                         uint64_t(0x0088007700660055ull));
   1118   const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
   1119                               uint64_t(0x7F776655057F7F7Eull));
   1120 
   1121 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1122   do {                                                                         \
   1123     static constexpr char TestString[] =                                       \
   1124         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1125     const uint32_t T0 = allocateDqword();                                      \
   1126     const uint32_t T1 = allocateDqword();                                      \
   1127                                                                                \
   1128     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1129     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1130     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1131             XmmRegister::Encoded_Reg_##Src);                                   \
   1132                                                                                \
   1133     AssembledTest test = assemble();                                           \
   1134     test.setDqwordTo(T0, V0_##Ty);                                             \
   1135     test.setDqwordTo(T1, V1_##Ty);                                             \
   1136     test.run();                                                                \
   1137                                                                                \
   1138     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1139     reset();                                                                   \
   1140   } while (0)
   1141 
   1142 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1143   do {                                                                         \
   1144     static constexpr char TestString[] =                                       \
   1145         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1146     const uint32_t T0 = allocateDqword();                                      \
   1147     const uint32_t T1 = allocateDqword();                                      \
   1148                                                                                \
   1149     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1150     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1151                                                                                \
   1152     AssembledTest test = assemble();                                           \
   1153     test.setDqwordTo(T0, V0_##Ty);                                             \
   1154     test.setDqwordTo(T1, V1_##Ty);                                             \
   1155     test.run();                                                                \
   1156                                                                                \
   1157     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1158     reset();                                                                   \
   1159   } while (0)
   1160 
   1161 #define TestImpl(Dst, Src)                                                     \
   1162   do {                                                                         \
   1163     TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
   1164     TestImplXmmAddr(Dst, packss, v4i32);                                       \
   1165     TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
   1166     TestImplXmmAddr(Dst, packss, v8i16);                                       \
   1167   } while (0)
   1168 
   1169   TestImpl(xmm0, xmm1);
   1170   TestImpl(xmm1, xmm2);
   1171   TestImpl(xmm2, xmm3);
   1172   TestImpl(xmm3, xmm4);
   1173   TestImpl(xmm4, xmm5);
   1174   TestImpl(xmm5, xmm6);
   1175   TestImpl(xmm6, xmm7);
   1176   TestImpl(xmm7, xmm0);
   1177 
   1178 #undef TestImpl
   1179 #undef TestImplXmmAddr
   1180 #undef TestImplXmmXmm
   1181 }
   1182 
   1183 TEST_F(AssemblerX8632Test, Packus) {
   1184   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
   1185                         uint64_t(0x7FFFFFFF80000000ull));
   1186   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
   1187                         uint64_t(0x0000800100007FFEull));
   1188   const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
   1189                               uint64_t(0x80017FFE00000000ull));
   1190 
   1191   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
   1192                         uint64_t(0xFFFEFFFF7FFF8000ull));
   1193   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
   1194                         uint64_t(0x0088007700660055ull));
   1195   const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
   1196                               uint64_t(0x8877665505FF817Eull));
   1197 
   1198 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
   1199   do {                                                                         \
   1200     static constexpr char TestString[] =                                       \
   1201         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
   1202     const uint32_t T0 = allocateDqword();                                      \
   1203     const uint32_t T1 = allocateDqword();                                      \
   1204                                                                                \
   1205     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1206     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1207     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
   1208             XmmRegister::Encoded_Reg_##Src);                                   \
   1209                                                                                \
   1210     AssembledTest test = assemble();                                           \
   1211     test.setDqwordTo(T0, V0_##Ty);                                             \
   1212     test.setDqwordTo(T1, V1_##Ty);                                             \
   1213     test.run();                                                                \
   1214                                                                                \
   1215     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1216     reset();                                                                   \
   1217   } while (0)
   1218 
   1219 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
   1220   do {                                                                         \
   1221     static constexpr char TestString[] =                                       \
   1222         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
   1223     const uint32_t T0 = allocateDqword();                                      \
   1224     const uint32_t T1 = allocateDqword();                                      \
   1225                                                                                \
   1226     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1227     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1228                                                                                \
   1229     AssembledTest test = assemble();                                           \
   1230     test.setDqwordTo(T0, V0_##Ty);                                             \
   1231     test.setDqwordTo(T1, V1_##Ty);                                             \
   1232     test.run();                                                                \
   1233                                                                                \
   1234     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
   1235     reset();                                                                   \
   1236   } while (0)
   1237 
   1238 #define TestImpl(Dst, Src)                                                     \
   1239   do {                                                                         \
   1240     TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
   1241     TestImplXmmAddr(Dst, packus, v4i32);                                       \
   1242     TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
   1243     TestImplXmmAddr(Dst, packus, v8i16);                                       \
   1244   } while (0)
   1245 
   1246   TestImpl(xmm0, xmm1);
   1247   TestImpl(xmm1, xmm2);
   1248   TestImpl(xmm2, xmm3);
   1249   TestImpl(xmm3, xmm4);
   1250   TestImpl(xmm4, xmm5);
   1251   TestImpl(xmm5, xmm6);
   1252   TestImpl(xmm6, xmm7);
   1253   TestImpl(xmm7, xmm0);
   1254 
   1255 #undef TestImpl
   1256 #undef TestImplXmmAddr
   1257 #undef TestImplXmmXmm
   1258 }
   1259 
   1260 TEST_F(AssemblerX8632Test, Pshufb) {
   1261   const Dqword V0(uint64_t(0x1122334455667788ull),
   1262                   uint64_t(0x99aabbccddeeff32ull));
   1263   const Dqword V1(uint64_t(0x0204050380060708ull),
   1264                   uint64_t(0x010306080a8b0c0dull));
   1265 
   1266   const Dqword Expected(uint64_t(0x6644335500221132ull),
   1267                         uint64_t(0x77552232ee00ccbbull));
   1268 
   1269 #define TestImplXmmXmm(Dst, Src, Inst)                                         \
   1270   do {                                                                         \
   1271     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
   1272     const uint32_t T0 = allocateDqword();                                      \
   1273     const uint32_t T1 = allocateDqword();                                      \
   1274                                                                                \
   1275     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1276     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1277     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
   1278             XmmRegister::Encoded_Reg_##Src);                                   \
   1279                                                                                \
   1280     AssembledTest test = assemble();                                           \
   1281     test.setDqwordTo(T0, V0);                                                  \
   1282     test.setDqwordTo(T1, V1);                                                  \
   1283     test.run();                                                                \
   1284                                                                                \
   1285     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1286     reset();                                                                   \
   1287   } while (0)
   1288 
   1289 #define TestImplXmmAddr(Dst, Inst)                                             \
   1290   do {                                                                         \
   1291     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
   1292     const uint32_t T0 = allocateDqword();                                      \
   1293     const uint32_t T1 = allocateDqword();                                      \
   1294                                                                                \
   1295     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1296     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
   1297                                                                                \
   1298     AssembledTest test = assemble();                                           \
   1299     test.setDqwordTo(T0, V0);                                                  \
   1300     test.setDqwordTo(T1, V1);                                                  \
   1301     test.run();                                                                \
   1302                                                                                \
   1303     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1304     reset();                                                                   \
   1305   } while (0)
   1306 
   1307 #define TestImpl(Dst, Src)                                                     \
   1308   do {                                                                         \
   1309     TestImplXmmXmm(Dst, Src, pshufb);                                          \
   1310     TestImplXmmAddr(Dst, pshufb);                                              \
   1311   } while (0)
   1312 
   1313   TestImpl(xmm0, xmm1);
   1314   TestImpl(xmm1, xmm2);
   1315   TestImpl(xmm2, xmm3);
   1316   TestImpl(xmm3, xmm4);
   1317   TestImpl(xmm4, xmm5);
   1318   TestImpl(xmm5, xmm6);
   1319   TestImpl(xmm6, xmm7);
   1320   TestImpl(xmm7, xmm0);
   1321 
   1322 #undef TestImpl
   1323 #undef TestImplXmmAddr
   1324 #undef TestImplXmmXmm
   1325 }
   1326 
   1327 TEST_F(AssemblerX8632Test, Cvt) {
   1328   const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1329   const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
   1330   const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
   1331 
   1332   const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
   1333   const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
   1334   const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
   1335 
   1336   const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1337   const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
   1338   const Dqword tps2dq32Expected(-5, 3, 100, 200);
   1339 
   1340   const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1341   const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
   1342   const Dqword tps2dq64Expected(-5, 3, 100, 200);
   1343 
   1344   const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
   1345   const int32_t si2ss32SrcValue = 5;
   1346   const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
   1347 
   1348   const Dqword si2ss64DstValue(-1.0, -1.0);
   1349   const int32_t si2ss64SrcValue = 5;
   1350   const Dqword si2ss64Expected(5.0, -1.0);
   1351 
   1352   const int32_t tss2si32DstValue = 0xF00F0FF0;
   1353   const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
   1354   const int32_t tss2si32Expected = -5;
   1355 
   1356   const int32_t tss2si64DstValue = 0xF00F0FF0;
   1357   const Dqword tss2si64SrcValue(-5.0, -1.0);
   1358   const int32_t tss2si64Expected = -5;
   1359 
   1360   const Dqword float2float32DstValue(-1.0, -1.0);
   1361   const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
   1362   const Dqword float2float32Expected(-5.0, -1.0);
   1363 
   1364   const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
   1365   const Dqword float2float64SrcValue(-5.0, 3.0);
   1366   const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
   1367 
   1368 #define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
   1369   do {                                                                         \
   1370     static constexpr char TestString[] =                                       \
   1371         "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
   1372     const uint32_t T0 = allocateDqword();                                      \
   1373     const uint32_t T1 = allocateDqword();                                      \
   1374                                                                                \
   1375     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1376     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1377     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
   1378                  XmmRegister::Encoded_Reg_##Src);                              \
   1379                                                                                \
   1380     AssembledTest test = assemble();                                           \
   1381     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1382     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
   1383     test.run();                                                                \
   1384                                                                                \
   1385     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1386     reset();                                                                   \
   1387   } while (0)
   1388 
   1389 #define TestImplSXmmReg(Dst, GPR, Inst, Size)                                  \
   1390   do {                                                                         \
   1391     static constexpr char TestString[] =                                       \
   1392         "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")";                      \
   1393     const uint32_t T0 = allocateDqword();                                      \
   1394                                                                                \
   1395     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1396     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
   1397            Immediate(Inst##Size##SrcValue));                                   \
   1398     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
   1399                  GPRRegister::Encoded_Reg_##GPR);                              \
   1400                                                                                \
   1401     AssembledTest test = assemble();                                           \
   1402     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1403     test.run();                                                                \
   1404                                                                                \
   1405     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1406     reset();                                                                   \
   1407   } while (0)
   1408 
   1409 #define TestImplSRegXmm(GPR, Src, Inst, Size)                                  \
   1410   do {                                                                         \
   1411     static constexpr char TestString[] =                                       \
   1412         "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
   1413     const uint32_t T0 = allocateDqword();                                      \
   1414                                                                                \
   1415     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
   1416            Immediate(Inst##Size##DstValue));                                   \
   1417     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
   1418     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
   1419                  XmmRegister::Encoded_Reg_##Src);                              \
   1420                                                                                \
   1421     AssembledTest test = assemble();                                           \
   1422     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
   1423     test.run();                                                                \
   1424                                                                                \
   1425     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
   1426         << TestString;                                                         \
   1427     reset();                                                                   \
   1428   } while (0)
   1429 
   1430 #define TestImplPXmmAddr(Dst, Inst, Size)                                      \
   1431   do {                                                                         \
   1432     static constexpr char TestString[] =                                       \
   1433         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
   1434     const uint32_t T0 = allocateDqword();                                      \
   1435     const uint32_t T1 = allocateDqword();                                      \
   1436                                                                                \
   1437     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1438     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
   1439                  dwordAddress(T1));                                            \
   1440                                                                                \
   1441     AssembledTest test = assemble();                                           \
   1442     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1443     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
   1444     test.run();                                                                \
   1445                                                                                \
   1446     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1447     reset();                                                                   \
   1448   } while (0)
   1449 
   1450 #define TestImplSXmmAddr(Dst, Inst, Size)                                      \
   1451   do {                                                                         \
   1452     static constexpr char TestString[] =                                       \
   1453         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
   1454     const uint32_t T0 = allocateDqword();                                      \
   1455     const uint32_t T1 = allocateDword();                                       \
   1456                                                                                \
   1457     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1458     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
   1459                  dwordAddress(T1));                                            \
   1460                                                                                \
   1461     AssembledTest test = assemble();                                           \
   1462     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
   1463     test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
   1464     test.run();                                                                \
   1465                                                                                \
   1466     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
   1467     reset();                                                                   \
   1468   } while (0)
   1469 
   1470 #define TestImplSRegAddr(GPR, Inst, Size)                                      \
   1471   do {                                                                         \
   1472     static constexpr char TestString[] =                                       \
   1473         "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")";                          \
   1474     const uint32_t T0 = allocateDqword();                                      \
   1475                                                                                \
   1476     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
   1477            Immediate(Inst##Size##DstValue));                                   \
   1478     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
   1479                  dwordAddress(T0));                                            \
   1480                                                                                \
   1481     AssembledTest test = assemble();                                           \
   1482     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
   1483     test.run();                                                                \
   1484                                                                                \
   1485     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
   1486         << TestString;                                                         \
   1487     reset();                                                                   \
   1488   } while (0)
   1489 
   1490 #define TestImplSize(Dst, Src, GPR, Size)                                      \
   1491   do {                                                                         \
   1492     TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
   1493     TestImplPXmmAddr(Src, dq2ps, Size);                                        \
   1494     TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
   1495     TestImplPXmmAddr(Src, tps2dq, Size);                                       \
   1496     TestImplSXmmReg(Dst, GPR, si2ss, Size);                                    \
   1497     TestImplSXmmAddr(Dst, si2ss, Size);                                        \
   1498     TestImplSRegXmm(GPR, Src, tss2si, Size);                                   \
   1499     TestImplSRegAddr(GPR, tss2si, Size);                                       \
   1500     TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
   1501     TestImplPXmmAddr(Src, float2float, Size);                                  \
   1502   } while (0)
   1503 
   1504 #define TestImpl(Dst, Src, GPR)                                                \
   1505   do {                                                                         \
   1506     TestImplSize(Dst, Src, GPR, 32);                                           \
   1507     TestImplSize(Dst, Src, GPR, 64);                                           \
   1508   } while (0)
   1509 
   1510   TestImpl(xmm0, xmm1, eax);
   1511   TestImpl(xmm1, xmm2, ebx);
   1512   TestImpl(xmm2, xmm3, ecx);
   1513   TestImpl(xmm3, xmm4, edx);
   1514   TestImpl(xmm4, xmm5, esi);
   1515   TestImpl(xmm5, xmm6, edi);
   1516   TestImpl(xmm6, xmm7, eax);
   1517   TestImpl(xmm7, xmm0, ebx);
   1518 
   1519 #undef TestImpl
   1520 #undef TestImplSize
   1521 #undef TestImplSRegAddr
   1522 #undef TestImplSXmmAddr
   1523 #undef TestImplPXmmAddr
   1524 #undef TestImplSRegXmm
   1525 #undef TestImplSXmmReg
   1526 #undef TestImplPXmmXmm
   1527 }
   1528 
   1529 TEST_F(AssemblerX8632Test, Ucomiss) {
   1530   static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
   1531   static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
   1532 
   1533   Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
   1534   Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
   1535 
   1536   Dqword test64DstValue(0.0, qnan64);
   1537   Dqword test64SrcValue(0.0, qnan64);
   1538 
   1539 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
   1540                        BOther)                                                 \
   1541   do {                                                                         \
   1542     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
   1543     static constexpr char TestString[] =                                       \
   1544         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
   1545         ", " #BParity ", " #BOther ")";                                        \
   1546     const uint32_t T0 = allocateDqword();                                      \
   1547     test##Size##DstValue.F##Size[0] = Value0;                                  \
   1548     const uint32_t T1 = allocateDqword();                                      \
   1549     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1550     const uint32_t ImmIfTrue = 0xBEEF;                                         \
   1551     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
   1552                                                                                \
   1553     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1554     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1555     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
   1556     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
   1557                XmmRegister::Encoded_Reg_##Src);                                \
   1558     Label Done;                                                                \
   1559     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
   1560     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
   1561     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
   1562     __ bind(&Done);                                                            \
   1563                                                                                \
   1564     AssembledTest test = assemble();                                           \
   1565     test.setDqwordTo(T0, test##Size##DstValue);                                \
   1566     test.setDqwordTo(T1, test##Size##SrcValue);                                \
   1567     test.run();                                                                \
   1568                                                                                \
   1569     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
   1570     reset();                                                                   \
   1571   } while (0)
   1572 
   1573 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
   1574   do {                                                                         \
   1575     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
   1576     static constexpr char TestString[] =                                       \
   1577         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
   1578         ", " #BParity ", " #BOther ")";                                        \
   1579     const uint32_t T0 = allocateDqword();                                      \
   1580     test##Size##DstValue.F##Size[0] = Value0;                                  \
   1581     const uint32_t T1 = allocateDqword();                                      \
   1582     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1583     const uint32_t ImmIfTrue = 0xBEEF;                                         \
   1584     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
   1585                                                                                \
   1586     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1587     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
   1588     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
   1589                dwordAddress(T1));                                              \
   1590     Label Done;                                                                \
   1591     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
   1592     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
   1593     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
   1594     __ bind(&Done);                                                            \
   1595                                                                                \
   1596     AssembledTest test = assemble();                                           \
   1597     test.setDqwordTo(T0, test##Size##DstValue);                                \
   1598     test.setDqwordTo(T1, test##Size##SrcValue);                                \
   1599     test.run();                                                                \
   1600                                                                                \
   1601     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
   1602     reset();                                                                   \
   1603   } while (0)
   1604 
   1605 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
   1606                      BOther)                                                   \
   1607   do {                                                                         \
   1608     TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
   1609     TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
   1610   } while (0)
   1611 
   1612 #define TestImplSize(Dst, Src, Size)                                           \
   1613   do {                                                                         \
   1614     TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
   1615     TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
   1616     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
   1617     TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
   1618     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
   1619     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
   1620     TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
   1621     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
   1622     TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
   1623     TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
   1624     TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
   1625   } while (0)
   1626 
   1627 #define TestImpl(Dst, Src)                                                     \
   1628   do {                                                                         \
   1629     TestImplSize(Dst, Src, 32);                                                \
   1630     TestImplSize(Dst, Src, 64);                                                \
   1631   } while (0)
   1632 
   1633   TestImpl(xmm0, xmm1);
   1634   TestImpl(xmm1, xmm2);
   1635   TestImpl(xmm2, xmm3);
   1636   TestImpl(xmm3, xmm4);
   1637   TestImpl(xmm4, xmm5);
   1638   TestImpl(xmm5, xmm6);
   1639   TestImpl(xmm6, xmm7);
   1640   TestImpl(xmm7, xmm0);
   1641 
   1642 #undef TestImpl
   1643 #undef TestImplSize
   1644 #undef TestImplCond
   1645 #undef TestImplXmmAddr
   1646 #undef TestImplXmmXmm
   1647 }
   1648 
   1649 TEST_F(AssemblerX8632Test, Sqrtss) {
   1650   Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
   1651   Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
   1652 
   1653   Dqword test64SrcValue(-100.0, -100.0);
   1654   Dqword test64DstValue(-1.0, -1.0);
   1655 
   1656 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
   1657   do {                                                                         \
   1658     static constexpr char TestString[] =                                       \
   1659         "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
   1660     const uint32_t T0 = allocateDqword();                                      \
   1661     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1662     const uint32_t T1 = allocateDqword();                                      \
   1663                                                                                \
   1664     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
   1665     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
   1666     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
   1667             XmmRegister::Encoded_Reg_##Src);                                   \
   1668                                                                                \
   1669     AssembledTest test = assemble();                                           \
   1670     test.setDqwordTo(T0, test##Size##SrcValue);                                \
   1671     test.setDqwordTo(T1, test##Size##DstValue);                                \
   1672     test.run();                                                                \
   1673                                                                                \
   1674     Dqword Expected = test##Size##DstValue;                                    \
   1675     Expected.F##Size[0] = Result;                                              \
   1676     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1677     reset();                                                                   \
   1678   } while (0)
   1679 
   1680 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
   1681   do {                                                                         \
   1682     static constexpr char TestString[] =                                       \
   1683         "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
   1684     const uint32_t T0 = allocateDqword();                                      \
   1685     test##Size##SrcValue.F##Size[0] = Value1;                                  \
   1686     const uint32_t T1 = allocateDqword();                                      \
   1687                                                                                \
   1688     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
   1689     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
   1690             dwordAddress(T0));                                                 \
   1691                                                                                \
   1692     AssembledTest test = assemble();                                           \
   1693     test.setDqwordTo(T0, test##Size##SrcValue);                                \
   1694     test.setDqwordTo(T1, test##Size##DstValue);                                \
   1695     test.run();                                                                \
   1696                                                                                \
   1697     Dqword Expected = test##Size##DstValue;                                    \
   1698     Expected.F##Size[0] = Result;                                              \
   1699     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1700     reset();                                                                   \
   1701   } while (0)
   1702 
   1703 #define TestSqrtssSize(Dst, Src, Size)                                         \
   1704   do {                                                                         \
   1705     TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
   1706     TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
   1707     TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
   1708     TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
   1709     TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
   1710     TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
   1711   } while (0)
   1712 
   1713 #define TestSqrtss(Dst, Src)                                                   \
   1714   do {                                                                         \
   1715     TestSqrtssSize(Dst, Src, 32);                                              \
   1716     TestSqrtssSize(Dst, Src, 64);                                              \
   1717   } while (0)
   1718 
   1719   TestSqrtss(xmm0, xmm1);
   1720   TestSqrtss(xmm1, xmm2);
   1721   TestSqrtss(xmm2, xmm3);
   1722   TestSqrtss(xmm3, xmm4);
   1723   TestSqrtss(xmm4, xmm5);
   1724   TestSqrtss(xmm5, xmm6);
   1725   TestSqrtss(xmm6, xmm7);
   1726   TestSqrtss(xmm7, xmm0);
   1727 
   1728 #undef TestSqrtss
   1729 #undef TestSqrtssSize
   1730 #undef TestSqrtssXmmAddr
   1731 #undef TestSqrtssXmmXmm
   1732 }
   1733 
   1734 TEST_F(AssemblerX8632Test, Insertps) {
   1735 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
   1736   do {                                                                         \
   1737     static constexpr char TestString[] =                                       \
   1738         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
   1739         ")";                                                                   \
   1740     const uint32_t T0 = allocateDqword();                                      \
   1741     const Dqword V0 Value0;                                                    \
   1742     const uint32_t T1 = allocateDqword();                                      \
   1743     const Dqword V1 Value1;                                                    \
   1744                                                                                \
   1745     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1746     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1747     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
   1748                 XmmRegister::Encoded_Reg_##Src, Immediate(Imm));               \
   1749                                                                                \
   1750     AssembledTest test = assemble();                                           \
   1751     test.setDqwordTo(T0, V0);                                                  \
   1752     test.setDqwordTo(T1, V1);                                                  \
   1753     test.run();                                                                \
   1754                                                                                \
   1755     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
   1756     reset();                                                                   \
   1757   } while (0)
   1758 
   1759 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
   1760   do {                                                                         \
   1761     static constexpr char TestString[] =                                       \
   1762         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
   1763     const uint32_t T0 = allocateDqword();                                      \
   1764     const Dqword V0 Value0;                                                    \
   1765     const uint32_t T1 = allocateDqword();                                      \
   1766     const Dqword V1 Value1;                                                    \
   1767                                                                                \
   1768     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1769     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
   1770                 dwordAddress(T1), Immediate(Imm));                             \
   1771                                                                                \
   1772     AssembledTest test = assemble();                                           \
   1773     test.setDqwordTo(T0, V0);                                                  \
   1774     test.setDqwordTo(T1, V1);                                                  \
   1775     test.run();                                                                \
   1776                                                                                \
   1777     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
   1778     reset();                                                                   \
   1779   } while (0)
   1780 
   1781 #define TestInsertps(Dst, Src)                                                 \
   1782   do {                                                                         \
   1783     TestInsertpsXmmXmmImm(                                                     \
   1784         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
   1785         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1786         0x99,                                                                  \
   1787         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
   1788     TestInsertpsXmmAddrImm(                                                    \
   1789         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
   1790         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1791         0x99,                                                                  \
   1792         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
   1793     TestInsertpsXmmXmmImm(                                                     \
   1794         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
   1795         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1796         0x9D,                                                                  \
   1797         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
   1798     TestInsertpsXmmAddrImm(                                                    \
   1799         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
   1800         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
   1801         0x9D,                                                                  \
   1802         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
   1803   } while (0)
   1804 
   1805   TestInsertps(xmm0, xmm1);
   1806   TestInsertps(xmm1, xmm2);
   1807   TestInsertps(xmm2, xmm3);
   1808   TestInsertps(xmm3, xmm4);
   1809   TestInsertps(xmm4, xmm5);
   1810   TestInsertps(xmm5, xmm6);
   1811   TestInsertps(xmm6, xmm7);
   1812   TestInsertps(xmm7, xmm0);
   1813 
   1814 #undef TestInsertps
   1815 #undef TestInsertpsXmmXmmAddr
   1816 #undef TestInsertpsXmmXmmImm
   1817 }
   1818 
   1819 TEST_F(AssemblerX8632Test, Pinsr) {
   1820   static constexpr uint8_t Mask32 = 0x03;
   1821   static constexpr uint8_t Mask16 = 0x07;
   1822   static constexpr uint8_t Mask8 = 0x0F;
   1823 
   1824 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
   1825   do {                                                                         \
   1826     static constexpr char TestString[] =                                       \
   1827         "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
   1828     const uint32_t T0 = allocateDqword();                                      \
   1829     const Dqword V0 Value0;                                                    \
   1830                                                                                \
   1831     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1832     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, Immediate(Value1));    \
   1833     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
   1834              GPRRegister::Encoded_Reg_##GPR, Immediate(Imm));                  \
   1835                                                                                \
   1836     AssembledTest test = assemble();                                           \
   1837     test.setDqwordTo(T0, V0);                                                  \
   1838     test.run();                                                                \
   1839                                                                                \
   1840     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   1841     Dqword Expected = V0;                                                      \
   1842     Expected.U##Size[sel] = Value1;                                            \
   1843     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1844     reset();                                                                   \
   1845   } while (0)
   1846 
   1847 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
   1848   do {                                                                         \
   1849     static constexpr char TestString[] =                                       \
   1850         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
   1851     const uint32_t T0 = allocateDqword();                                      \
   1852     const Dqword V0 Value0;                                                    \
   1853     const uint32_t T1 = allocateDword();                                       \
   1854     const uint32_t V1 = Value1;                                                \
   1855                                                                                \
   1856     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1857     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
   1858              dwordAddress(T1), Immediate(Imm));                                \
   1859                                                                                \
   1860     AssembledTest test = assemble();                                           \
   1861     test.setDqwordTo(T0, V0);                                                  \
   1862     test.setDwordTo(T1, V1);                                                   \
   1863     test.run();                                                                \
   1864                                                                                \
   1865     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   1866     Dqword Expected = V0;                                                      \
   1867     Expected.U##Size[sel] = Value1;                                            \
   1868     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1869     reset();                                                                   \
   1870   } while (0)
   1871 
   1872 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
   1873   do {                                                                         \
   1874     TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                  \
   1875                              uint64_t(0xFFFFFFFFDDDDDDDDull)),                 \
   1876                        GPR, Value1, Imm, Size);                                \
   1877     TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                 \
   1878                               uint64_t(0xFFFFFFFFDDDDDDDDull)),                \
   1879                         Value1, Imm, Size);                                    \
   1880   } while (0)
   1881 
   1882 #define TestPinsr(Src, Dst)                                                    \
   1883   do {                                                                         \
   1884     TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
   1885     TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
   1886     TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
   1887   } while (0)
   1888 
   1889   TestPinsr(xmm0, eax);
   1890   TestPinsr(xmm1, ebx);
   1891   TestPinsr(xmm2, ecx);
   1892   TestPinsr(xmm3, edx);
   1893   TestPinsr(xmm4, esi);
   1894   TestPinsr(xmm5, edi);
   1895   TestPinsr(xmm6, eax);
   1896   TestPinsr(xmm7, ebx);
   1897 
   1898 #undef TestPinsr
   1899 #undef TestPinsrSize
   1900 #undef TestPinsrXmmAddrImm
   1901 #undef TestPinsrXmmGPRImm
   1902 }
   1903 
   1904 TEST_F(AssemblerX8632Test, Pextr) {
   1905   static constexpr uint8_t Mask32 = 0x03;
   1906   static constexpr uint8_t Mask16 = 0x07;
   1907   static constexpr uint8_t Mask8 = 0x0F;
   1908 
   1909 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
   1910   do {                                                                         \
   1911     static constexpr char TestString[] =                                       \
   1912         "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
   1913     const uint32_t T0 = allocateDqword();                                      \
   1914     const Dqword V0 Value1;                                                    \
   1915                                                                                \
   1916     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
   1917     __ pextr(IceType_i##Size, GPRRegister::Encoded_Reg_##GPR,                  \
   1918              XmmRegister::Encoded_Reg_##Src, Immediate(Imm));                  \
   1919                                                                                \
   1920     AssembledTest test = assemble();                                           \
   1921     test.setDqwordTo(T0, V0);                                                  \
   1922     test.run();                                                                \
   1923                                                                                \
   1924     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
   1925     ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
   1926     reset();                                                                   \
   1927   } while (0)
   1928 
   1929 #define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
   1930   do {                                                                         \
   1931     TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull),             \
   1932                                   uint64_t(0xFFFFFFFFDDDDDDDDull)),            \
   1933                        Imm, Size);                                             \
   1934   } while (0)
   1935 
   1936 #define TestPextr(Src, Dst)                                                    \
   1937   do {                                                                         \
   1938     TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
   1939     TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
   1940     TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
   1941   } while (0)
   1942 
   1943   TestPextr(eax, xmm0);
   1944   TestPextr(ebx, xmm1);
   1945   TestPextr(ecx, xmm2);
   1946   TestPextr(edx, xmm3);
   1947   TestPextr(esi, xmm4);
   1948   TestPextr(edi, xmm5);
   1949   TestPextr(eax, xmm6);
   1950   TestPextr(ebx, xmm7);
   1951 
   1952 #undef TestPextr
   1953 #undef TestPextrSize
   1954 #undef TestPextrXmmGPRImm
   1955 }
   1956 
   1957 TEST_F(AssemblerX8632Test, Pcmpeq_Pcmpgt) {
   1958 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
   1959   do {                                                                         \
   1960     static constexpr char TestString[] =                                       \
   1961         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
   1962     const uint32_t T0 = allocateDqword();                                      \
   1963     const Dqword V0 Value0;                                                    \
   1964     const uint32_t T1 = allocateDqword();                                      \
   1965     const Dqword V1 Value1;                                                    \
   1966                                                                                \
   1967     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1968     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   1969     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
   1970             XmmRegister::Encoded_Reg_##Src);                                   \
   1971                                                                                \
   1972     AssembledTest test = assemble();                                           \
   1973     test.setDqwordTo(T0, V0);                                                  \
   1974     test.setDqwordTo(T1, V1);                                                  \
   1975     test.run();                                                                \
   1976                                                                                \
   1977     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
   1978     static constexpr uint8_t ArraySize =                                       \
   1979         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
   1980     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
   1981       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
   1982     }                                                                          \
   1983     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   1984     reset();                                                                   \
   1985   } while (0)
   1986 
   1987 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
   1988   do {                                                                         \
   1989     static constexpr char TestString[] =                                       \
   1990         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
   1991     const uint32_t T0 = allocateDqword();                                      \
   1992     const Dqword V0 Value0;                                                    \
   1993     const uint32_t T1 = allocateDqword();                                      \
   1994     const Dqword V1 Value1;                                                    \
   1995                                                                                \
   1996     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   1997     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
   1998             dwordAddress(T1));                                                 \
   1999                                                                                \
   2000     AssembledTest test = assemble();                                           \
   2001     test.setDqwordTo(T0, V0);                                                  \
   2002     test.setDqwordTo(T1, V1);                                                  \
   2003     test.run();                                                                \
   2004                                                                                \
   2005     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
   2006     static constexpr uint8_t ArraySize =                                       \
   2007         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
   2008     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
   2009       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
   2010     }                                                                          \
   2011     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   2012     reset();                                                                   \
   2013   } while (0)
   2014 
   2015 #define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
   2016   do {                                                                         \
   2017     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == );               \
   2018     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == );                   \
   2019     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < );                \
   2020     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < );                    \
   2021   } while (0)
   2022 
   2023 #define TestPcmpSize(Dst, Src, Size)                                           \
   2024   do {                                                                         \
   2025     TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull),                      \
   2026                          uint64_t(0x0000000000000000ull)),                     \
   2027                    Src, (uint64_t(0x0000008800008800ull),                      \
   2028                          uint64_t(0xFFFFFFFFFFFFFFFFull)),                     \
   2029                    Size);                                                      \
   2030     TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull),                      \
   2031                          uint64_t(0x12345abcde12345Aull)),                     \
   2032                    Src, (uint64_t(0x0000008800008800ull),                      \
   2033                          uint64_t(0xAABBCCDD1234321Aull)),                     \
   2034                    Size);                                                      \
   2035   } while (0)
   2036 
   2037 #define TestPcmp(Dst, Src)                                                     \
   2038   do {                                                                         \
   2039     TestPcmpSize(xmm0, xmm1, 8);                                               \
   2040     TestPcmpSize(xmm0, xmm1, 16);                                              \
   2041     TestPcmpSize(xmm0, xmm1, 32);                                              \
   2042   } while (0)
   2043 
   2044   TestPcmp(xmm0, xmm1);
   2045   TestPcmp(xmm1, xmm2);
   2046   TestPcmp(xmm2, xmm3);
   2047   TestPcmp(xmm3, xmm4);
   2048   TestPcmp(xmm4, xmm5);
   2049   TestPcmp(xmm5, xmm6);
   2050   TestPcmp(xmm6, xmm7);
   2051   TestPcmp(xmm7, xmm0);
   2052 
   2053 #undef TestPcmp
   2054 #undef TestPcmpSize
   2055 #undef TestPcmpValues
   2056 #undef TestPcmpXmmAddr
   2057 #undef TestPcmpXmmXmm
   2058 }
   2059 
   2060 TEST_F(AssemblerX8632Test, Roundsd) {
   2061 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
   2062   do {                                                                         \
   2063     static constexpr char TestString[] =                                       \
   2064         "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
   2065     const uint32_t T0 = allocateDqword();                                      \
   2066     const Dqword V0(-3.0, -3.0);                                               \
   2067     const uint32_t T1 = allocateDqword();                                      \
   2068     const Dqword V1(double(Input), -123.4);                                    \
   2069                                                                                \
   2070     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
   2071     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
   2072     __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst,                      \
   2073              XmmRegister::Encoded_Reg_##Src,                                   \
   2074              Immediate(AssemblerX8632::k##Mode));                              \
   2075                                                                                \
   2076     AssembledTest test = assemble();                                           \
   2077     test.setDqwordTo(T0, V0);                                                  \
   2078     test.setDqwordTo(T1, V1);                                                  \
   2079     test.run();                                                                \
   2080                                                                                \
   2081     const Dqword Expected(double(RN), -3.0);                                   \
   2082     EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
   2083     reset();                                                                   \
   2084   } while (0)
   2085 
   2086 #define TestRoundsd(Dst, Src)                                                  \
   2087   do {                                                                         \
   2088     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
   2089     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
   2090     TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
   2091     TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
   2092     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
   2093     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
   2094   } while (0)
   2095 
   2096   TestRoundsd(xmm0, xmm1);
   2097   TestRoundsd(xmm1, xmm2);
   2098   TestRoundsd(xmm2, xmm3);
   2099   TestRoundsd(xmm3, xmm4);
   2100   TestRoundsd(xmm4, xmm5);
   2101   TestRoundsd(xmm5, xmm6);
   2102   TestRoundsd(xmm6, xmm7);
   2103   TestRoundsd(xmm7, xmm0);
   2104 
   2105 #undef TestRoundsd
   2106 #undef TestRoundsdXmmXmm
   2107 }
   2108 
   2109 TEST_F(AssemblerX8632Test, Set1ps) {
   2110 #define TestImpl(Xmm, Src, Imm)                                                \
   2111   do {                                                                         \
   2112     __ set1ps(XmmRegister::Encoded_Reg_##Xmm, GPRRegister::Encoded_Reg_##Src,  \
   2113               Immediate(Imm));                                                 \
   2114                                                                                \
   2115     AssembledTest test = assemble();                                           \
   2116     test.run();                                                                \
   2117                                                                                \
   2118     const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
   2119                           (uint64_t(Imm) << 32) | uint32_t(Imm));              \
   2120     ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
   2121         << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
   2122     reset();                                                                   \
   2123   } while (0)
   2124 
   2125   TestImpl(xmm0, ebx, 1);
   2126   TestImpl(xmm1, ecx, 2);
   2127   TestImpl(xmm2, edx, 3);
   2128   TestImpl(xmm3, esi, 4);
   2129   TestImpl(xmm4, edi, 5);
   2130   TestImpl(xmm5, eax, 6);
   2131   TestImpl(xmm6, ebx, 7);
   2132   TestImpl(xmm7, ecx, 8);
   2133 
   2134 #undef TestImpl
   2135 }
   2136 
   2137 } // end of anonymous namespace
   2138 } // end of namespace Test
   2139 } // end of namespace X8632
   2140 } // end of namespace Ice
   2141