1 //===- subzero/unittest/AssemblerX8632/XmmArith.cpp -----------------------===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "AssemblerX8632/TestUtil.h" 10 11 namespace Ice { 12 namespace X8632 { 13 namespace Test { 14 namespace { 15 16 TEST_F(AssemblerX8632Test, ArithSS) { 17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \ 18 do { \ 19 static_assert(FloatSize == 32 || FloatSize == 64, \ 20 "Invalid fp size " #FloatSize); \ 21 static constexpr char TestString[] = \ 22 "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \ 23 ", " #Inst ", " #Op ")"; \ 24 static constexpr bool IsDouble = FloatSize == 64; \ 25 using Type = std::conditional<IsDouble, double, float>::type; \ 26 const uint32_t T0 = allocateQword(); \ 27 const Type V0 = Value0; \ 28 const uint32_t T1 = allocateQword(); \ 29 const Type V1 = Value1; \ 30 \ 31 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 32 dwordAddress(T0)); \ 33 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Src, \ 34 dwordAddress(T1)); \ 35 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 36 XmmRegister::Encoded_Reg_##Src); \ 37 \ 38 AssembledTest test = assemble(); \ 39 if (IsDouble) { \ 40 test.setQwordTo(T0, static_cast<double>(V0)); \ 41 test.setQwordTo(T1, static_cast<double>(V1)); \ 42 } else { \ 43 test.setDwordTo(T0, static_cast<float>(V0)); \ 44 test.setDwordTo(T1, static_cast<float>(V1)); \ 45 } \ 46 \ 47 test.run(); \ 48 \ 49 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ 50 reset(); \ 51 } while (0) 52 53 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \ 54 do { \ 55 static_assert(FloatSize == 32 || FloatSize == 64, \ 56 "Invalid fp size " #FloatSize); \ 57 static constexpr char TestString[] = \ 58 "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \ 59 ", " #Op ")"; \ 60 static constexpr bool IsDouble = FloatSize == 64; \ 61 using Type = std::conditional<IsDouble, double, float>::type; \ 62 const uint32_t T0 = allocateQword(); \ 63 const Type V0 = Value0; \ 64 const uint32_t T1 = allocateQword(); \ 65 const Type V1 = Value1; \ 66 \ 67 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 68 dwordAddress(T0)); \ 69 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 70 dwordAddress(T1)); \ 71 \ 72 AssembledTest test = assemble(); \ 73 if (IsDouble) { \ 74 test.setQwordTo(T0, static_cast<double>(V0)); \ 75 test.setQwordTo(T1, static_cast<double>(V1)); \ 76 } else { \ 77 test.setDwordTo(T0, static_cast<float>(V0)); \ 78 test.setDwordTo(T1, static_cast<float>(V1)); \ 79 } \ 80 \ 81 test.run(); \ 82 \ 83 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ 84 reset(); \ 85 } while (0) 86 87 #define TestArithSS(FloatSize, Src, Dst0, Dst1) \ 88 do { \ 89 TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \ 90 TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \ 91 TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \ 92 TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \ 93 TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \ 94 TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \ 95 TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / ); \ 96 TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / ); \ 97 } while (0) 98 99 TestArithSS(32, xmm0, xmm1, xmm2); 100 TestArithSS(32, xmm1, xmm2, xmm3); 101 TestArithSS(32, xmm2, xmm3, xmm4); 102 TestArithSS(32, xmm3, xmm4, xmm5); 103 TestArithSS(32, xmm4, xmm5, xmm6); 104 TestArithSS(32, xmm5, xmm6, xmm7); 105 TestArithSS(32, xmm6, xmm7, xmm0); 106 TestArithSS(32, xmm7, xmm0, xmm1); 107 108 TestArithSS(64, xmm0, xmm1, xmm2); 109 TestArithSS(64, xmm1, xmm2, xmm3); 110 TestArithSS(64, xmm2, xmm3, xmm4); 111 TestArithSS(64, xmm3, xmm4, xmm5); 112 TestArithSS(64, xmm4, xmm5, xmm6); 113 TestArithSS(64, xmm5, xmm6, xmm7); 114 TestArithSS(64, xmm6, xmm7, xmm0); 115 TestArithSS(64, xmm7, xmm0, xmm1); 116 117 #undef TestArithSS 118 #undef TestArithSSXmmAddr 119 #undef TestArithSSXmmXmm 120 } 121 122 TEST_F(AssemblerX8632Test, PArith) { 123 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \ 124 do { \ 125 static constexpr char TestString[] = \ 126 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 127 ", " #Type ", " #Size ")"; \ 128 const uint32_t T0 = allocateDqword(); \ 129 const Dqword V0 Value0; \ 130 \ 131 const uint32_t T1 = allocateDqword(); \ 132 const Dqword V1 Value1; \ 133 \ 134 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 135 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 136 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 137 XmmRegister::Encoded_Reg_##Src); \ 138 \ 139 AssembledTest test = assemble(); \ 140 test.setDqwordTo(T0, V0); \ 141 test.setDqwordTo(T1, V1); \ 142 test.run(); \ 143 \ 144 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ 145 << TestString; \ 146 reset(); \ 147 } while (0) 148 149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \ 150 do { \ 151 static constexpr char TestString[] = \ 152 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 153 ", " #Type ", " #Size ")"; \ 154 const uint32_t T0 = allocateDqword(); \ 155 const Dqword V0 Value0; \ 156 \ 157 const uint32_t T1 = allocateDqword(); \ 158 const Dqword V1 Value1; \ 159 \ 160 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 161 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 162 dwordAddress(T1)); \ 163 \ 164 AssembledTest test = assemble(); \ 165 test.setDqwordTo(T0, V0); \ 166 test.setDqwordTo(T1, V1); \ 167 test.run(); \ 168 \ 169 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ 170 << TestString; \ 171 reset(); \ 172 } while (0) 173 174 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \ 175 do { \ 176 static constexpr char TestString[] = \ 177 "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \ 178 ", " #Size ")"; \ 179 const uint32_t T0 = allocateDqword(); \ 180 const Dqword V0 Value0; \ 181 \ 182 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 183 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, Immediate(Imm)); \ 184 \ 185 AssembledTest test = assemble(); \ 186 test.setDqwordTo(T0, V0); \ 187 test.run(); \ 188 \ 189 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \ 190 << TestString; \ 191 reset(); \ 192 } while (0) 193 194 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \ 195 do { \ 196 static constexpr char TestString[] = \ 197 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \ 198 ", " #Size ")"; \ 199 const uint32_t T0 = allocateDqword(); \ 200 const Dqword V0 Value0; \ 201 \ 202 const uint32_t T1 = allocateDqword(); \ 203 const Dqword V1 Value1; \ 204 \ 205 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 206 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 207 __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 208 XmmRegister::Encoded_Reg_##Src); \ 209 \ 210 AssembledTest test = assemble(); \ 211 test.setDqwordTo(T0, V0); \ 212 test.setDqwordTo(T1, V1); \ 213 test.run(); \ 214 \ 215 ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ 216 << TestString; \ 217 reset(); \ 218 } while (0) 219 220 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \ 221 do { \ 222 static constexpr char TestString[] = \ 223 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \ 224 ")"; \ 225 const uint32_t T0 = allocateDqword(); \ 226 const Dqword V0 Value0; \ 227 \ 228 const uint32_t T1 = allocateDqword(); \ 229 const Dqword V1 Value1; \ 230 \ 231 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 232 __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 233 dwordAddress(T1)); \ 234 \ 235 AssembledTest test = assemble(); \ 236 test.setDqwordTo(T0, V0); \ 237 test.setDqwordTo(T1, V1); \ 238 test.run(); \ 239 \ 240 ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ 241 << TestString; \ 242 reset(); \ 243 } while (0) 244 245 #define TestPArithSize(Dst, Src, Size) \ 246 do { \ 247 static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \ 248 if (Size != 8) { \ 249 TestPArithXmmXmm( \ 250 Dst, \ 251 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 252 Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ 253 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 254 uint64_t(0x8080404002020101ull)), \ 255 (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ 256 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 257 uint64_t(0x8080404002020101ull)), \ 258 3u, psra, >>, int, Size); \ 259 TestPArithXmmXmm( \ 260 Dst, \ 261 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 262 Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ 263 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 264 uint64_t(0x8080404002020101ull)), \ 265 (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ 266 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 267 uint64_t(0x8080404002020101ull)), \ 268 3u, psrl, >>, uint, Size); \ 269 TestPArithXmmXmm( \ 270 Dst, \ 271 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 272 Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ 273 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ 274 uint64_t(0x8080404002020101ull)), \ 275 (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ 276 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ 277 uint64_t(0x8080404002020101ull)), \ 278 3u, psll, <<, uint, Size); \ 279 \ 280 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 281 uint64_t(0x8080404002020101ull)), \ 282 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 283 uint64_t(0x0123456789ABCDEull)), \ 284 pmull, *, int, Size); \ 285 TestPArithXmmAddr( \ 286 Dst, \ 287 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 288 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 289 pmull, *, int, Size); \ 290 if (Size != 16) { \ 291 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 292 uint64_t(0x8080404002020101ull)), \ 293 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 294 uint64_t(0x0123456789ABCDEull)), \ 295 pmuludq, *, uint, Size); \ 296 TestPArithXmmAddr( \ 297 Dst, (uint64_t(0x8040201008040201ull), \ 298 uint64_t(0x8080404002020101ull)), \ 299 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 300 pmuludq, *, uint, Size); \ 301 } \ 302 } \ 303 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 304 uint64_t(0x8080404002020101ull)), \ 305 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 306 uint64_t(0x0123456789ABCDEull)), \ 307 padd, +, int, Size); \ 308 TestPArithXmmAddr( \ 309 Dst, \ 310 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 311 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 312 padd, +, int, Size); \ 313 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 314 uint64_t(0x8080404002020101ull)), \ 315 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 316 uint64_t(0x0123456789ABCDEull)), \ 317 psub, -, int, Size); \ 318 TestPArithXmmAddr( \ 319 Dst, \ 320 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 321 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 322 psub, -, int, Size); \ 323 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 324 uint64_t(0x8080404002020101ull)), \ 325 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 326 uint64_t(0x0123456789ABCDEull)), \ 327 pand, &, int, Size); \ 328 TestPArithXmmAddr( \ 329 Dst, \ 330 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 331 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 332 pand, &, int, Size); \ 333 \ 334 TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 335 uint64_t(0x8080404002020101ull)), \ 336 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 337 uint64_t(0x0123456789ABCDEull)), \ 338 int, Size); \ 339 TestPAndnXmmAddr( \ 340 Dst, \ 341 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 342 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 343 int, Size); \ 344 \ 345 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 346 uint64_t(0x8080404002020101ull)), \ 347 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 348 uint64_t(0x0123456789ABCDEull)), \ 349 por, |, int, Size); \ 350 TestPArithXmmAddr( \ 351 Dst, \ 352 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 353 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 354 por, |, int, Size); \ 355 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ 356 uint64_t(0x8080404002020101ull)), \ 357 Src, (uint64_t(0xFFFFFFFF00000000ull), \ 358 uint64_t(0x0123456789ABCDEull)), \ 359 pxor, ^, int, Size); \ 360 TestPArithXmmAddr( \ 361 Dst, \ 362 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ 363 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ 364 pxor, ^, int, Size); \ 365 } while (0) 366 367 #define TestPArith(Src, Dst) \ 368 do { \ 369 TestPArithSize(Src, Dst, 8); \ 370 TestPArithSize(Src, Dst, 16); \ 371 TestPArithSize(Src, Dst, 32); \ 372 } while (0) 373 374 TestPArith(xmm0, xmm1); 375 TestPArith(xmm1, xmm2); 376 TestPArith(xmm2, xmm3); 377 TestPArith(xmm3, xmm4); 378 TestPArith(xmm4, xmm5); 379 TestPArith(xmm5, xmm6); 380 TestPArith(xmm6, xmm7); 381 TestPArith(xmm7, xmm0); 382 383 #undef TestPArith 384 #undef TestPArithSize 385 #undef TestPAndnXmmAddr 386 #undef TestPAndnXmmXmm 387 #undef TestPArithXmmImm 388 #undef TestPArithXmmAddr 389 #undef TestPArithXmmXmm 390 } 391 392 TEST_F(AssemblerX8632Test, ArithPS) { 393 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \ 394 do { \ 395 static constexpr char TestString[] = \ 396 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 397 ", " #Type ")"; \ 398 const uint32_t T0 = allocateDqword(); \ 399 const Dqword V0 Value0; \ 400 const uint32_t T1 = allocateDqword(); \ 401 const Dqword V1 Value1; \ 402 \ 403 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 404 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 405 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 406 XmmRegister::Encoded_Reg_##Src); \ 407 \ 408 AssembledTest test = assemble(); \ 409 test.setDqwordTo(T0, V0); \ 410 test.setDqwordTo(T1, V1); \ 411 test.run(); \ 412 \ 413 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 414 \ 415 reset(); \ 416 } while (0) 417 418 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \ 419 do { \ 420 static constexpr char TestString[] = \ 421 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ 422 ", " #Type ")"; \ 423 const uint32_t T0 = allocateDqword(); \ 424 const Dqword V0 Value0; \ 425 const uint32_t T1 = allocateDqword(); \ 426 const Dqword V1 Value1; \ 427 \ 428 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 429 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 430 __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \ 431 \ 432 AssembledTest test = assemble(); \ 433 test.setDqwordTo(T0, V0); \ 434 test.setDqwordTo(T1, V1); \ 435 test.run(); \ 436 \ 437 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 438 \ 439 reset(); \ 440 } while (0) 441 442 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \ 443 do { \ 444 static constexpr char TestString[] = \ 445 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 446 ", " #Type ")"; \ 447 const uint32_t T0 = allocateDqword(); \ 448 const Dqword V0 Value0; \ 449 const uint32_t T1 = allocateDqword(); \ 450 const Dqword V1 Value1; \ 451 \ 452 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 453 __ Inst(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 454 \ 455 AssembledTest test = assemble(); \ 456 test.setDqwordTo(T0, V0); \ 457 test.setDqwordTo(T1, V1); \ 458 test.run(); \ 459 \ 460 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 461 \ 462 reset(); \ 463 } while (0) 464 465 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type) \ 466 do { \ 467 static constexpr char TestString[] = \ 468 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \ 469 ")"; \ 470 const uint32_t T0 = allocateDqword(); \ 471 const Dqword V0 Value0; \ 472 const uint32_t T1 = allocateDqword(); \ 473 const Dqword V1 Value1; \ 474 \ 475 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 476 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 477 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 478 XmmRegister::Encoded_Reg_##Src); \ 479 \ 480 AssembledTest test = assemble(); \ 481 test.setDqwordTo(T0, V0); \ 482 test.setDqwordTo(T1, V1); \ 483 test.run(); \ 484 \ 485 ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \ 486 \ 487 reset(); \ 488 } while (0) 489 490 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type) \ 491 do { \ 492 static constexpr char TestString[] = \ 493 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ 494 ", " #Type ")"; \ 495 const uint32_t T0 = allocateDqword(); \ 496 const Dqword V0 Value0; \ 497 const uint32_t T1 = allocateDqword(); \ 498 const Dqword V1 Value1; \ 499 \ 500 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 501 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 502 dwordAddress(T1)); \ 503 \ 504 AssembledTest test = assemble(); \ 505 test.setDqwordTo(T0, V0); \ 506 test.setDqwordTo(T1, V1); \ 507 test.run(); \ 508 \ 509 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 510 \ 511 reset(); \ 512 } while (0) 513 514 #define TestArithPS(Dst, Src) \ 515 do { \ 516 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 517 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ 518 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 519 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ 520 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 521 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ 522 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 523 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ 524 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 525 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ 526 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 527 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ 528 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 529 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ 530 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 531 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ 532 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 533 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ 534 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 535 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ 536 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &, \ 537 double); \ 538 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &, \ 539 double); \ 540 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 541 (0.55, 0.43, 0.23, 1.21), orps, |, float); \ 542 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |, \ 543 double); \ 544 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 545 (0.55, 0.43, 0.23, 1.21), minps, float); \ 546 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 547 (0.55, 0.43, 0.23, 1.21), maxps, float); \ 548 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double); \ 549 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double); \ 550 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ 551 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ 552 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \ 553 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ 554 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^, \ 555 double); \ 556 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^, \ 557 double); \ 558 } while (0) 559 560 #if 0 561 562 #endif 563 564 TestArithPS(xmm0, xmm1); 565 TestArithPS(xmm1, xmm2); 566 TestArithPS(xmm2, xmm3); 567 TestArithPS(xmm3, xmm4); 568 TestArithPS(xmm4, xmm5); 569 TestArithPS(xmm5, xmm6); 570 TestArithPS(xmm6, xmm7); 571 TestArithPS(xmm7, xmm0); 572 573 #undef TestArithPs 574 #undef TestMinMaxPS 575 #undef TestArithPSXmmXmmUntyped 576 #undef TestArithPSXmmAddr 577 #undef TestArithPSXmmXmm 578 } 579 580 TEST_F(AssemblerX8632Test, Blending) { 581 using f32 = float; 582 using i8 = uint8_t; 583 584 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \ 585 do { \ 586 static constexpr char TestString[] = \ 587 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \ 588 ", " #Type ")"; \ 589 const uint32_t T0 = allocateDqword(); \ 590 const Dqword V0 Value0; \ 591 const uint32_t T1 = allocateDqword(); \ 592 const Dqword V1 Value1; \ 593 const uint32_t Mask = allocateDqword(); \ 594 const Dqword MaskValue M; \ 595 \ 596 __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask)); \ 597 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 598 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 599 __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, \ 600 XmmRegister::Encoded_Reg_##Src); \ 601 \ 602 AssembledTest test = assemble(); \ 603 test.setDqwordTo(T0, V0); \ 604 test.setDqwordTo(T1, V1); \ 605 test.setDqwordTo(Mask, MaskValue); \ 606 test.run(); \ 607 \ 608 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ 609 << TestString; \ 610 reset(); \ 611 } while (0) 612 613 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \ 614 do { \ 615 static constexpr char TestString[] = \ 616 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \ 617 ")"; \ 618 const uint32_t T0 = allocateDqword(); \ 619 const Dqword V0 Value0; \ 620 const uint32_t T1 = allocateDqword(); \ 621 const Dqword V1 Value1; \ 622 const uint32_t Mask = allocateDqword(); \ 623 const Dqword MaskValue M; \ 624 \ 625 __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask)); \ 626 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 627 __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 628 \ 629 AssembledTest test = assemble(); \ 630 test.setDqwordTo(T0, V0); \ 631 test.setDqwordTo(T1, V1); \ 632 test.setDqwordTo(Mask, MaskValue); \ 633 test.run(); \ 634 \ 635 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ 636 << TestString; \ 637 reset(); \ 638 } while (0) 639 640 #define TestBlending(Src, Dst) \ 641 do { \ 642 TestBlendingXmmXmm( \ 643 Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \ 644 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ 645 blendvps, f32); \ 646 TestBlendingXmmAddr( \ 647 Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \ 648 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ 649 blendvps, f32); \ 650 TestBlendingXmmXmm( \ 651 Dst, \ 652 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ 653 Src, \ 654 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ 655 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ 656 pblendvb, i8); \ 657 TestBlendingXmmAddr( \ 658 Dst, \ 659 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ 660 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ 661 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ 662 pblendvb, i8); \ 663 } while (0) 664 665 /* xmm0 is taken. It is the implicit mask . */ 666 TestBlending(xmm1, xmm2); 667 TestBlending(xmm2, xmm3); 668 TestBlending(xmm3, xmm4); 669 TestBlending(xmm4, xmm5); 670 TestBlending(xmm5, xmm6); 671 TestBlending(xmm6, xmm7); 672 TestBlending(xmm7, xmm1); 673 674 #undef TestBlending 675 #undef TestBlendingXmmAddr 676 #undef TestBlendingXmmXmm 677 } 678 679 TEST_F(AssemblerX8632Test, Cmpps) { 680 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type) \ 681 do { \ 682 static constexpr char TestString[] = \ 683 "(" #Src ", " #Dst ", " #C ", " #Op ")"; \ 684 const uint32_t T0 = allocateDqword(); \ 685 const Dqword V0 Value0; \ 686 const uint32_t T1 = allocateDqword(); \ 687 const Dqword V1 Value1; \ 688 \ 689 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 690 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 691 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 692 XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C); \ 693 \ 694 AssembledTest test = assemble(); \ 695 test.setDqwordTo(T0, V0); \ 696 test.setDqwordTo(T1, V1); \ 697 test.run(); \ 698 \ 699 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 700 ; \ 701 reset(); \ 702 } while (0) 703 704 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type) \ 705 do { \ 706 static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \ 707 const uint32_t T0 = allocateDqword(); \ 708 const Dqword V0 Value0; \ 709 const uint32_t T1 = allocateDqword(); \ 710 const Dqword V1 Value1; \ 711 \ 712 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 713 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 714 dwordAddress(T1), Cond::Cmpps_##C); \ 715 \ 716 AssembledTest test = assemble(); \ 717 test.setDqwordTo(T0, V0); \ 718 test.setDqwordTo(T1, V1); \ 719 test.run(); \ 720 \ 721 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ 722 ; \ 723 reset(); \ 724 } while (0) 725 726 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type) \ 727 do { \ 728 static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \ 729 const uint32_t T0 = allocateDqword(); \ 730 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 731 std::numeric_limits<float>::quiet_NaN()); \ 732 const uint32_t T1 = allocateDqword(); \ 733 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 734 std::numeric_limits<float>::quiet_NaN()); \ 735 \ 736 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 737 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 738 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 739 XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C); \ 740 \ 741 AssembledTest test = assemble(); \ 742 test.setDqwordTo(T0, V0); \ 743 test.setDqwordTo(T1, V1); \ 744 test.run(); \ 745 \ 746 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ 747 ; \ 748 reset(); \ 749 } while (0) 750 751 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type) \ 752 do { \ 753 static constexpr char TestString[] = "(" #Dst ", " #C ")"; \ 754 const uint32_t T0 = allocateDqword(); \ 755 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 756 std::numeric_limits<float>::quiet_NaN()); \ 757 const uint32_t T1 = allocateDqword(); \ 758 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 759 std::numeric_limits<float>::quiet_NaN()); \ 760 \ 761 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 762 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \ 763 dwordAddress(T1), Cond::Cmpps_##C); \ 764 \ 765 AssembledTest test = assemble(); \ 766 test.setDqwordTo(T0, V0); \ 767 test.setDqwordTo(T1, V1); \ 768 test.run(); \ 769 \ 770 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ 771 ; \ 772 reset(); \ 773 } while (0) 774 775 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type) \ 776 do { \ 777 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 778 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 779 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 780 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 781 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 782 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 783 TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \ 784 TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type); \ 785 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 786 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 787 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 788 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 789 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \ 790 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \ 791 if (FloatSize == 32) { \ 792 TestCmppsOrdUnordXmmXmm( \ 793 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 794 std::numeric_limits<float>::quiet_NaN()), \ 795 Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 796 std::numeric_limits<float>::quiet_NaN()), \ 797 unord, Type); \ 798 TestCmppsOrdUnordXmmAddr( \ 799 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ 800 std::numeric_limits<float>::quiet_NaN()), \ 801 (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ 802 std::numeric_limits<float>::quiet_NaN()), \ 803 unord, Type); \ 804 } else { \ 805 TestCmppsOrdUnordXmmXmm(64, Dst, \ 806 (1.0, std::numeric_limits<double>::quiet_NaN()), \ 807 Src, (std::numeric_limits<double>::quiet_NaN(), \ 808 std::numeric_limits<double>::quiet_NaN()), \ 809 unord, Type); \ 810 TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src, \ 811 (1.0, std::numeric_limits<double>::quiet_NaN()), \ 812 unord, Type); \ 813 TestCmppsOrdUnordXmmAddr( \ 814 64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), \ 815 (std::numeric_limits<double>::quiet_NaN(), \ 816 std::numeric_limits<double>::quiet_NaN()), \ 817 unord, Type); \ 818 TestCmppsOrdUnordXmmAddr( \ 819 64, Dst, (1.0, 1.0), \ 820 (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type); \ 821 } \ 822 } while (0) 823 824 #define TestCmppsSize(FloatSize, Value0, Value1, Type) \ 825 do { \ 826 TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type); \ 827 TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type); \ 828 TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type); \ 829 TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type); \ 830 TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type); \ 831 TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type); \ 832 TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type); \ 833 TestCmpps(FloatSize, xmm7, Value0, xmm0, Value1, Type); \ 834 } while (0) 835 836 TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5), 837 float); 838 TestCmppsSize(64, (1.0, -1000.0), (0.55, 1.21), double); 839 840 #undef TestCmpps 841 #undef TestCmppsOrdUnordXmmAddr 842 #undef TestCmppsOrdUnordXmmXmm 843 #undef TestCmppsXmmAddr 844 #undef TestCmppsXmmXmm 845 } 846 847 TEST_F(AssemblerX8632Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) { 848 #define TestImplSingle(Dst, Inst, Expect) \ 849 do { \ 850 static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \ 851 const uint32_t T0 = allocateDqword(); \ 852 const Dqword V0(1.0, 4.0, 20.0, 3.14); \ 853 \ 854 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 855 __ Inst(XmmRegister::Encoded_Reg_##Dst); \ 856 \ 857 AssembledTest test = assemble(); \ 858 test.setDqwordTo(T0, V0); \ 859 test.run(); \ 860 ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \ 861 reset(); \ 862 } while (0) 863 864 #define TestImpl(Dst) \ 865 do { \ 866 TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull), \ 867 uint64_t(0x3FE2D10B408F1BBDull))); \ 868 TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull), \ 869 uint64_t(0x3F1078003E64F000ull))); \ 870 TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull), \ 871 uint64_t(0x3EA310003D4CC000ull))); \ 872 \ 873 TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull), \ 874 uint64_t(0x401C42FAE40282A8ull))); \ 875 } while (0) 876 877 TestImpl(xmm0); 878 TestImpl(xmm1); 879 TestImpl(xmm2); 880 TestImpl(xmm3); 881 TestImpl(xmm4); 882 TestImpl(xmm5); 883 TestImpl(xmm6); 884 TestImpl(xmm7); 885 886 #undef TestImpl 887 #undef TestImplSingle 888 } 889 890 TEST_F(AssemblerX8632Test, Unpck) { 891 const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull), 892 uint64_t(0xCCCCCCCCDDDDDDDDull)); 893 const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull), 894 uint64_t(0x9999999988888888ull)); 895 896 const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull), 897 uint64_t(0xEEEEEEEEAAAAAAAAull)); 898 const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull), 899 uint64_t(0xEEEEEEEEFFFFFFFFull)); 900 const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull), 901 uint64_t(0x99999999CCCCCCCCull)); 902 const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull), 903 uint64_t(0x9999999988888888ull)); 904 905 #define TestImplSingle(Dst, Src, Inst) \ 906 do { \ 907 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 908 const uint32_t T0 = allocateDqword(); \ 909 const uint32_t T1 = allocateDqword(); \ 910 \ 911 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 912 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 913 __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \ 914 \ 915 AssembledTest test = assemble(); \ 916 test.setDqwordTo(T0, V0); \ 917 test.setDqwordTo(T1, V1); \ 918 test.run(); \ 919 \ 920 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 921 reset(); \ 922 } while (0) 923 924 #define TestImpl(Dst, Src) \ 925 do { \ 926 TestImplSingle(Dst, Src, unpcklps); \ 927 TestImplSingle(Dst, Src, unpcklpd); \ 928 TestImplSingle(Dst, Src, unpckhps); \ 929 TestImplSingle(Dst, Src, unpckhpd); \ 930 } while (0) 931 932 TestImpl(xmm0, xmm1); 933 TestImpl(xmm1, xmm2); 934 TestImpl(xmm2, xmm3); 935 TestImpl(xmm3, xmm4); 936 TestImpl(xmm4, xmm5); 937 TestImpl(xmm5, xmm6); 938 TestImpl(xmm6, xmm7); 939 TestImpl(xmm7, xmm0); 940 941 #undef TestImpl 942 #undef TestImplSingle 943 } 944 945 TEST_F(AssemblerX8632Test, Shufp) { 946 const Dqword V0(uint64_t(0x1111111122222222ull), 947 uint64_t(0x5555555577777777ull)); 948 const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull), 949 uint64_t(0xCCCCCCCCDDDDDDDDull)); 950 951 const uint8_t pshufdImm = 0x63; 952 const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull), 953 uint64_t(0xAAAAAAAADDDDDDDDull)); 954 955 const uint8_t shufpsImm = 0xf9; 956 const Dqword shufpsExpected(uint64_t(0x7777777711111111ull), 957 uint64_t(0xCCCCCCCCCCCCCCCCull)); 958 959 #define TestImplSingleXmmXmm(Dst, Src, Inst) \ 960 do { \ 961 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 962 const uint32_t T0 = allocateDqword(); \ 963 const uint32_t T1 = allocateDqword(); \ 964 \ 965 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 966 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 967 __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, \ 968 XmmRegister::Encoded_Reg_##Src, Immediate(Inst##Imm)); \ 969 \ 970 AssembledTest test = assemble(); \ 971 test.setDqwordTo(T0, V0); \ 972 test.setDqwordTo(T1, V1); \ 973 test.run(); \ 974 \ 975 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 976 reset(); \ 977 } while (0) 978 979 #define TestImplSingleXmmAddr(Dst, Inst) \ 980 do { \ 981 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \ 982 const uint32_t T0 = allocateDqword(); \ 983 const uint32_t T1 = allocateDqword(); \ 984 \ 985 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 986 __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1), \ 987 Immediate(Inst##Imm)); \ 988 \ 989 AssembledTest test = assemble(); \ 990 test.setDqwordTo(T0, V0); \ 991 test.setDqwordTo(T1, V1); \ 992 test.run(); \ 993 \ 994 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ 995 reset(); \ 996 } while (0) 997 998 #define TestImpl(Dst, Src) \ 999 do { \ 1000 TestImplSingleXmmXmm(Dst, Src, pshufd); \ 1001 TestImplSingleXmmAddr(Dst, pshufd); \ 1002 TestImplSingleXmmXmm(Dst, Src, shufps); \ 1003 TestImplSingleXmmAddr(Dst, shufps); \ 1004 } while (0) 1005 1006 TestImpl(xmm0, xmm1); 1007 TestImpl(xmm1, xmm2); 1008 TestImpl(xmm2, xmm3); 1009 TestImpl(xmm3, xmm4); 1010 TestImpl(xmm4, xmm5); 1011 TestImpl(xmm5, xmm6); 1012 TestImpl(xmm6, xmm7); 1013 TestImpl(xmm7, xmm0); 1014 1015 #undef TestImpl 1016 #undef TestImplSingleXmmAddr 1017 #undef TestImplSingleXmmXmm 1018 } 1019 1020 TEST_F(AssemblerX8632Test, Punpckl) { 1021 const Dqword V0_v4i32(uint64_t(0x1111111122222222ull), 1022 uint64_t(0x5555555577777777ull)); 1023 const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull), 1024 uint64_t(0xCCCCCCCCDDDDDDDDull)); 1025 const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull), 1026 uint64_t(0xAAAAAAAA11111111ull)); 1027 1028 const Dqword V0_v8i16(uint64_t(0x1111222233334444ull), 1029 uint64_t(0x5555666677778888ull)); 1030 const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull), 1031 uint64_t(0xEEEEFFFF00009999ull)); 1032 const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull), 1033 uint64_t(0xAAAA1111BBBB2222ull)); 1034 1035 const Dqword V0_v16i8(uint64_t(0x1122334455667788ull), 1036 uint64_t(0x99AABBCCDDEEFF00ull)); 1037 const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull), 1038 uint64_t(0xBAADF00DFEEDFACEull)); 1039 const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull), 1040 uint64_t(0xFF11EE22DD33CC44ull)); 1041 1042 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1043 do { \ 1044 static constexpr char TestString[] = \ 1045 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1046 const uint32_t T0 = allocateDqword(); \ 1047 const uint32_t T1 = allocateDqword(); \ 1048 \ 1049 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1050 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1051 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1052 XmmRegister::Encoded_Reg_##Src); \ 1053 \ 1054 AssembledTest test = assemble(); \ 1055 test.setDqwordTo(T0, V0_##Ty); \ 1056 test.setDqwordTo(T1, V1_##Ty); \ 1057 test.run(); \ 1058 \ 1059 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1060 reset(); \ 1061 } while (0) 1062 1063 #define TestImplXmmAddr(Dst, Inst, Ty) \ 1064 do { \ 1065 static constexpr char TestString[] = \ 1066 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1067 const uint32_t T0 = allocateDqword(); \ 1068 const uint32_t T1 = allocateDqword(); \ 1069 \ 1070 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1071 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1072 \ 1073 AssembledTest test = assemble(); \ 1074 test.setDqwordTo(T0, V0_##Ty); \ 1075 test.setDqwordTo(T1, V1_##Ty); \ 1076 test.run(); \ 1077 \ 1078 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1079 reset(); \ 1080 } while (0) 1081 1082 #define TestImpl(Dst, Src) \ 1083 do { \ 1084 TestImplXmmXmm(Dst, Src, punpckl, v4i32); \ 1085 TestImplXmmAddr(Dst, punpckl, v4i32); \ 1086 TestImplXmmXmm(Dst, Src, punpckl, v8i16); \ 1087 TestImplXmmAddr(Dst, punpckl, v8i16); \ 1088 TestImplXmmXmm(Dst, Src, punpckl, v16i8); \ 1089 TestImplXmmAddr(Dst, punpckl, v16i8); \ 1090 } while (0) 1091 1092 TestImpl(xmm0, xmm1); 1093 TestImpl(xmm1, xmm2); 1094 TestImpl(xmm2, xmm3); 1095 TestImpl(xmm3, xmm4); 1096 TestImpl(xmm4, xmm5); 1097 TestImpl(xmm5, xmm6); 1098 TestImpl(xmm6, xmm7); 1099 TestImpl(xmm7, xmm0); 1100 1101 #undef TestImpl 1102 #undef TestImplXmmAddr 1103 #undef TestImplXmmXmm 1104 } 1105 1106 TEST_F(AssemblerX8632Test, Packss) { 1107 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), 1108 uint64_t(0x7FFFFFFF80000000ull)); 1109 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), 1110 uint64_t(0x0000800100007FFEull)); 1111 const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull), 1112 uint64_t(0x7FFF7FFEFFFEFFFFull)); 1113 1114 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), 1115 uint64_t(0xFFFEFFFF7FFF8000ull)); 1116 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), 1117 uint64_t(0x0088007700660055ull)); 1118 const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull), 1119 uint64_t(0x7F776655057F7F7Eull)); 1120 1121 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1122 do { \ 1123 static constexpr char TestString[] = \ 1124 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1125 const uint32_t T0 = allocateDqword(); \ 1126 const uint32_t T1 = allocateDqword(); \ 1127 \ 1128 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1129 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1130 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1131 XmmRegister::Encoded_Reg_##Src); \ 1132 \ 1133 AssembledTest test = assemble(); \ 1134 test.setDqwordTo(T0, V0_##Ty); \ 1135 test.setDqwordTo(T1, V1_##Ty); \ 1136 test.run(); \ 1137 \ 1138 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1139 reset(); \ 1140 } while (0) 1141 1142 #define TestImplXmmAddr(Dst, Inst, Ty) \ 1143 do { \ 1144 static constexpr char TestString[] = \ 1145 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1146 const uint32_t T0 = allocateDqword(); \ 1147 const uint32_t T1 = allocateDqword(); \ 1148 \ 1149 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1150 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1151 \ 1152 AssembledTest test = assemble(); \ 1153 test.setDqwordTo(T0, V0_##Ty); \ 1154 test.setDqwordTo(T1, V1_##Ty); \ 1155 test.run(); \ 1156 \ 1157 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1158 reset(); \ 1159 } while (0) 1160 1161 #define TestImpl(Dst, Src) \ 1162 do { \ 1163 TestImplXmmXmm(Dst, Src, packss, v4i32); \ 1164 TestImplXmmAddr(Dst, packss, v4i32); \ 1165 TestImplXmmXmm(Dst, Src, packss, v8i16); \ 1166 TestImplXmmAddr(Dst, packss, v8i16); \ 1167 } while (0) 1168 1169 TestImpl(xmm0, xmm1); 1170 TestImpl(xmm1, xmm2); 1171 TestImpl(xmm2, xmm3); 1172 TestImpl(xmm3, xmm4); 1173 TestImpl(xmm4, xmm5); 1174 TestImpl(xmm5, xmm6); 1175 TestImpl(xmm6, xmm7); 1176 TestImpl(xmm7, xmm0); 1177 1178 #undef TestImpl 1179 #undef TestImplXmmAddr 1180 #undef TestImplXmmXmm 1181 } 1182 1183 TEST_F(AssemblerX8632Test, Packus) { 1184 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull), 1185 uint64_t(0x7FFFFFFF80000000ull)); 1186 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull), 1187 uint64_t(0x0000800100007FFEull)); 1188 const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull), 1189 uint64_t(0x80017FFE00000000ull)); 1190 1191 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull), 1192 uint64_t(0xFFFEFFFF7FFF8000ull)); 1193 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull), 1194 uint64_t(0x0088007700660055ull)); 1195 const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull), 1196 uint64_t(0x8877665505FF817Eull)); 1197 1198 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \ 1199 do { \ 1200 static constexpr char TestString[] = \ 1201 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \ 1202 const uint32_t T0 = allocateDqword(); \ 1203 const uint32_t T1 = allocateDqword(); \ 1204 \ 1205 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1206 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1207 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \ 1208 XmmRegister::Encoded_Reg_##Src); \ 1209 \ 1210 AssembledTest test = assemble(); \ 1211 test.setDqwordTo(T0, V0_##Ty); \ 1212 test.setDqwordTo(T1, V1_##Ty); \ 1213 test.run(); \ 1214 \ 1215 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1216 reset(); \ 1217 } while (0) 1218 1219 #define TestImplXmmAddr(Dst, Inst, Ty) \ 1220 do { \ 1221 static constexpr char TestString[] = \ 1222 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \ 1223 const uint32_t T0 = allocateDqword(); \ 1224 const uint32_t T1 = allocateDqword(); \ 1225 \ 1226 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1227 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1228 \ 1229 AssembledTest test = assemble(); \ 1230 test.setDqwordTo(T0, V0_##Ty); \ 1231 test.setDqwordTo(T1, V1_##Ty); \ 1232 test.run(); \ 1233 \ 1234 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \ 1235 reset(); \ 1236 } while (0) 1237 1238 #define TestImpl(Dst, Src) \ 1239 do { \ 1240 TestImplXmmXmm(Dst, Src, packus, v4i32); \ 1241 TestImplXmmAddr(Dst, packus, v4i32); \ 1242 TestImplXmmXmm(Dst, Src, packus, v8i16); \ 1243 TestImplXmmAddr(Dst, packus, v8i16); \ 1244 } while (0) 1245 1246 TestImpl(xmm0, xmm1); 1247 TestImpl(xmm1, xmm2); 1248 TestImpl(xmm2, xmm3); 1249 TestImpl(xmm3, xmm4); 1250 TestImpl(xmm4, xmm5); 1251 TestImpl(xmm5, xmm6); 1252 TestImpl(xmm6, xmm7); 1253 TestImpl(xmm7, xmm0); 1254 1255 #undef TestImpl 1256 #undef TestImplXmmAddr 1257 #undef TestImplXmmXmm 1258 } 1259 1260 TEST_F(AssemblerX8632Test, Pshufb) { 1261 const Dqword V0(uint64_t(0x1122334455667788ull), 1262 uint64_t(0x99aabbccddeeff32ull)); 1263 const Dqword V1(uint64_t(0x0204050380060708ull), 1264 uint64_t(0x010306080a8b0c0dull)); 1265 1266 const Dqword Expected(uint64_t(0x6644335500221132ull), 1267 uint64_t(0x77552232ee00ccbbull)); 1268 1269 #define TestImplXmmXmm(Dst, Src, Inst) \ 1270 do { \ 1271 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ 1272 const uint32_t T0 = allocateDqword(); \ 1273 const uint32_t T1 = allocateDqword(); \ 1274 \ 1275 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1276 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1277 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \ 1278 XmmRegister::Encoded_Reg_##Src); \ 1279 \ 1280 AssembledTest test = assemble(); \ 1281 test.setDqwordTo(T0, V0); \ 1282 test.setDqwordTo(T1, V1); \ 1283 test.run(); \ 1284 \ 1285 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1286 reset(); \ 1287 } while (0) 1288 1289 #define TestImplXmmAddr(Dst, Inst) \ 1290 do { \ 1291 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \ 1292 const uint32_t T0 = allocateDqword(); \ 1293 const uint32_t T1 = allocateDqword(); \ 1294 \ 1295 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1296 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1297 \ 1298 AssembledTest test = assemble(); \ 1299 test.setDqwordTo(T0, V0); \ 1300 test.setDqwordTo(T1, V1); \ 1301 test.run(); \ 1302 \ 1303 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1304 reset(); \ 1305 } while (0) 1306 1307 #define TestImpl(Dst, Src) \ 1308 do { \ 1309 TestImplXmmXmm(Dst, Src, pshufb); \ 1310 TestImplXmmAddr(Dst, pshufb); \ 1311 } while (0) 1312 1313 TestImpl(xmm0, xmm1); 1314 TestImpl(xmm1, xmm2); 1315 TestImpl(xmm2, xmm3); 1316 TestImpl(xmm3, xmm4); 1317 TestImpl(xmm4, xmm5); 1318 TestImpl(xmm5, xmm6); 1319 TestImpl(xmm6, xmm7); 1320 TestImpl(xmm7, xmm0); 1321 1322 #undef TestImpl 1323 #undef TestImplXmmAddr 1324 #undef TestImplXmmXmm 1325 } 1326 1327 TEST_F(AssemblerX8632Test, Cvt) { 1328 const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1329 const Dqword dq2ps32SrcValue(-5, 3, 100, 200); 1330 const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0); 1331 1332 const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f); 1333 const Dqword dq2ps64SrcValue(-5, 3, 100, 200); 1334 const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0); 1335 1336 const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1337 const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0); 1338 const Dqword tps2dq32Expected(-5, 3, 100, 200); 1339 1340 const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1341 const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0); 1342 const Dqword tps2dq64Expected(-5, 3, 100, 200); 1343 1344 const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); 1345 const int32_t si2ss32SrcValue = 5; 1346 const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f); 1347 1348 const Dqword si2ss64DstValue(-1.0, -1.0); 1349 const int32_t si2ss64SrcValue = 5; 1350 const Dqword si2ss64Expected(5.0, -1.0); 1351 1352 const int32_t tss2si32DstValue = 0xF00F0FF0; 1353 const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f); 1354 const int32_t tss2si32Expected = -5; 1355 1356 const int32_t tss2si64DstValue = 0xF00F0FF0; 1357 const Dqword tss2si64SrcValue(-5.0, -1.0); 1358 const int32_t tss2si64Expected = -5; 1359 1360 const Dqword float2float32DstValue(-1.0, -1.0); 1361 const Dqword float2float32SrcValue(-5.0, 3, 100, 200); 1362 const Dqword float2float32Expected(-5.0, -1.0); 1363 1364 const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0); 1365 const Dqword float2float64SrcValue(-5.0, 3.0); 1366 const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0); 1367 1368 #define TestImplPXmmXmm(Dst, Src, Inst, Size) \ 1369 do { \ 1370 static constexpr char TestString[] = \ 1371 "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \ 1372 const uint32_t T0 = allocateDqword(); \ 1373 const uint32_t T1 = allocateDqword(); \ 1374 \ 1375 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1376 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1377 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1378 XmmRegister::Encoded_Reg_##Src); \ 1379 \ 1380 AssembledTest test = assemble(); \ 1381 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1382 test.setDqwordTo(T1, Inst##Size##SrcValue); \ 1383 test.run(); \ 1384 \ 1385 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1386 reset(); \ 1387 } while (0) 1388 1389 #define TestImplSXmmReg(Dst, GPR, Inst, Size) \ 1390 do { \ 1391 static constexpr char TestString[] = \ 1392 "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \ 1393 const uint32_t T0 = allocateDqword(); \ 1394 \ 1395 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1396 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ 1397 Immediate(Inst##Size##SrcValue)); \ 1398 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \ 1399 GPRRegister::Encoded_Reg_##GPR); \ 1400 \ 1401 AssembledTest test = assemble(); \ 1402 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1403 test.run(); \ 1404 \ 1405 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1406 reset(); \ 1407 } while (0) 1408 1409 #define TestImplSRegXmm(GPR, Src, Inst, Size) \ 1410 do { \ 1411 static constexpr char TestString[] = \ 1412 "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \ 1413 const uint32_t T0 = allocateDqword(); \ 1414 \ 1415 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ 1416 Immediate(Inst##Size##DstValue)); \ 1417 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ 1418 __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \ 1419 XmmRegister::Encoded_Reg_##Src); \ 1420 \ 1421 AssembledTest test = assemble(); \ 1422 test.setDqwordTo(T0, Inst##Size##SrcValue); \ 1423 test.run(); \ 1424 \ 1425 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ 1426 << TestString; \ 1427 reset(); \ 1428 } while (0) 1429 1430 #define TestImplPXmmAddr(Dst, Inst, Size) \ 1431 do { \ 1432 static constexpr char TestString[] = \ 1433 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ 1434 const uint32_t T0 = allocateDqword(); \ 1435 const uint32_t T1 = allocateDqword(); \ 1436 \ 1437 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1438 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1439 dwordAddress(T1)); \ 1440 \ 1441 AssembledTest test = assemble(); \ 1442 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1443 test.setDqwordTo(T1, Inst##Size##SrcValue); \ 1444 test.run(); \ 1445 \ 1446 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1447 reset(); \ 1448 } while (0) 1449 1450 #define TestImplSXmmAddr(Dst, Inst, Size) \ 1451 do { \ 1452 static constexpr char TestString[] = \ 1453 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ 1454 const uint32_t T0 = allocateDqword(); \ 1455 const uint32_t T1 = allocateDword(); \ 1456 \ 1457 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1458 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \ 1459 dwordAddress(T1)); \ 1460 \ 1461 AssembledTest test = assemble(); \ 1462 test.setDqwordTo(T0, Inst##Size##DstValue); \ 1463 test.setDwordTo(T1, Inst##Size##SrcValue); \ 1464 test.run(); \ 1465 \ 1466 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ 1467 reset(); \ 1468 } while (0) 1469 1470 #define TestImplSRegAddr(GPR, Inst, Size) \ 1471 do { \ 1472 static constexpr char TestString[] = \ 1473 "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \ 1474 const uint32_t T0 = allocateDqword(); \ 1475 \ 1476 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \ 1477 Immediate(Inst##Size##DstValue)); \ 1478 __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \ 1479 dwordAddress(T0)); \ 1480 \ 1481 AssembledTest test = assemble(); \ 1482 test.setDqwordTo(T0, Inst##Size##SrcValue); \ 1483 test.run(); \ 1484 \ 1485 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ 1486 << TestString; \ 1487 reset(); \ 1488 } while (0) 1489 1490 #define TestImplSize(Dst, Src, GPR, Size) \ 1491 do { \ 1492 TestImplPXmmXmm(Dst, Src, dq2ps, Size); \ 1493 TestImplPXmmAddr(Src, dq2ps, Size); \ 1494 TestImplPXmmXmm(Dst, Src, tps2dq, Size); \ 1495 TestImplPXmmAddr(Src, tps2dq, Size); \ 1496 TestImplSXmmReg(Dst, GPR, si2ss, Size); \ 1497 TestImplSXmmAddr(Dst, si2ss, Size); \ 1498 TestImplSRegXmm(GPR, Src, tss2si, Size); \ 1499 TestImplSRegAddr(GPR, tss2si, Size); \ 1500 TestImplPXmmXmm(Dst, Src, float2float, Size); \ 1501 TestImplPXmmAddr(Src, float2float, Size); \ 1502 } while (0) 1503 1504 #define TestImpl(Dst, Src, GPR) \ 1505 do { \ 1506 TestImplSize(Dst, Src, GPR, 32); \ 1507 TestImplSize(Dst, Src, GPR, 64); \ 1508 } while (0) 1509 1510 TestImpl(xmm0, xmm1, eax); 1511 TestImpl(xmm1, xmm2, ebx); 1512 TestImpl(xmm2, xmm3, ecx); 1513 TestImpl(xmm3, xmm4, edx); 1514 TestImpl(xmm4, xmm5, esi); 1515 TestImpl(xmm5, xmm6, edi); 1516 TestImpl(xmm6, xmm7, eax); 1517 TestImpl(xmm7, xmm0, ebx); 1518 1519 #undef TestImpl 1520 #undef TestImplSize 1521 #undef TestImplSRegAddr 1522 #undef TestImplSXmmAddr 1523 #undef TestImplPXmmAddr 1524 #undef TestImplSRegXmm 1525 #undef TestImplSXmmReg 1526 #undef TestImplPXmmXmm 1527 } 1528 1529 TEST_F(AssemblerX8632Test, Ucomiss) { 1530 static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN(); 1531 static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN(); 1532 1533 Dqword test32DstValue(0.0, qnan32, qnan32, qnan32); 1534 Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32); 1535 1536 Dqword test64DstValue(0.0, qnan64); 1537 Dqword test64SrcValue(0.0, qnan64); 1538 1539 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \ 1540 BOther) \ 1541 do { \ 1542 static constexpr char NearBranch = AssemblerX8632::kNearJump; \ 1543 static constexpr char TestString[] = \ 1544 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \ 1545 ", " #BParity ", " #BOther ")"; \ 1546 const uint32_t T0 = allocateDqword(); \ 1547 test##Size##DstValue.F##Size[0] = Value0; \ 1548 const uint32_t T1 = allocateDqword(); \ 1549 test##Size##SrcValue.F##Size[0] = Value1; \ 1550 const uint32_t ImmIfTrue = 0xBEEF; \ 1551 const uint32_t ImmIfFalse = 0xC0FFE; \ 1552 \ 1553 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1554 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1555 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ 1556 __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1557 XmmRegister::Encoded_Reg_##Src); \ 1558 Label Done; \ 1559 __ j(Cond::Br_##BParity, &Done, NearBranch); \ 1560 __ j(Cond::Br_##BOther, &Done, NearBranch); \ 1561 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ 1562 __ bind(&Done); \ 1563 \ 1564 AssembledTest test = assemble(); \ 1565 test.setDqwordTo(T0, test##Size##DstValue); \ 1566 test.setDqwordTo(T1, test##Size##SrcValue); \ 1567 test.run(); \ 1568 \ 1569 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ 1570 reset(); \ 1571 } while (0) 1572 1573 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \ 1574 do { \ 1575 static constexpr char NearBranch = AssemblerX8632::kNearJump; \ 1576 static constexpr char TestString[] = \ 1577 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \ 1578 ", " #BParity ", " #BOther ")"; \ 1579 const uint32_t T0 = allocateDqword(); \ 1580 test##Size##DstValue.F##Size[0] = Value0; \ 1581 const uint32_t T1 = allocateDqword(); \ 1582 test##Size##SrcValue.F##Size[0] = Value1; \ 1583 const uint32_t ImmIfTrue = 0xBEEF; \ 1584 const uint32_t ImmIfFalse = 0xC0FFE; \ 1585 \ 1586 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1587 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ 1588 __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1589 dwordAddress(T1)); \ 1590 Label Done; \ 1591 __ j(Cond::Br_##BParity, &Done, NearBranch); \ 1592 __ j(Cond::Br_##BOther, &Done, NearBranch); \ 1593 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ 1594 __ bind(&Done); \ 1595 \ 1596 AssembledTest test = assemble(); \ 1597 test.setDqwordTo(T0, test##Size##DstValue); \ 1598 test.setDqwordTo(T1, test##Size##SrcValue); \ 1599 test.run(); \ 1600 \ 1601 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ 1602 reset(); \ 1603 } while (0) 1604 1605 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \ 1606 BOther) \ 1607 do { \ 1608 TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \ 1609 TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \ 1610 } while (0) 1611 1612 #define TestImplSize(Dst, Src, Size) \ 1613 do { \ 1614 TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \ 1615 TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \ 1616 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \ 1617 TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \ 1618 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \ 1619 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \ 1620 TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \ 1621 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \ 1622 TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \ 1623 TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \ 1624 TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \ 1625 } while (0) 1626 1627 #define TestImpl(Dst, Src) \ 1628 do { \ 1629 TestImplSize(Dst, Src, 32); \ 1630 TestImplSize(Dst, Src, 64); \ 1631 } while (0) 1632 1633 TestImpl(xmm0, xmm1); 1634 TestImpl(xmm1, xmm2); 1635 TestImpl(xmm2, xmm3); 1636 TestImpl(xmm3, xmm4); 1637 TestImpl(xmm4, xmm5); 1638 TestImpl(xmm5, xmm6); 1639 TestImpl(xmm6, xmm7); 1640 TestImpl(xmm7, xmm0); 1641 1642 #undef TestImpl 1643 #undef TestImplSize 1644 #undef TestImplCond 1645 #undef TestImplXmmAddr 1646 #undef TestImplXmmXmm 1647 } 1648 1649 TEST_F(AssemblerX8632Test, Sqrtss) { 1650 Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0); 1651 Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0); 1652 1653 Dqword test64SrcValue(-100.0, -100.0); 1654 Dqword test64DstValue(-1.0, -1.0); 1655 1656 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \ 1657 do { \ 1658 static constexpr char TestString[] = \ 1659 "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \ 1660 const uint32_t T0 = allocateDqword(); \ 1661 test##Size##SrcValue.F##Size[0] = Value1; \ 1662 const uint32_t T1 = allocateDqword(); \ 1663 \ 1664 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ 1665 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1666 __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1667 XmmRegister::Encoded_Reg_##Src); \ 1668 \ 1669 AssembledTest test = assemble(); \ 1670 test.setDqwordTo(T0, test##Size##SrcValue); \ 1671 test.setDqwordTo(T1, test##Size##DstValue); \ 1672 test.run(); \ 1673 \ 1674 Dqword Expected = test##Size##DstValue; \ 1675 Expected.F##Size[0] = Result; \ 1676 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1677 reset(); \ 1678 } while (0) 1679 1680 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \ 1681 do { \ 1682 static constexpr char TestString[] = \ 1683 "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \ 1684 const uint32_t T0 = allocateDqword(); \ 1685 test##Size##SrcValue.F##Size[0] = Value1; \ 1686 const uint32_t T1 = allocateDqword(); \ 1687 \ 1688 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \ 1689 __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \ 1690 dwordAddress(T0)); \ 1691 \ 1692 AssembledTest test = assemble(); \ 1693 test.setDqwordTo(T0, test##Size##SrcValue); \ 1694 test.setDqwordTo(T1, test##Size##DstValue); \ 1695 test.run(); \ 1696 \ 1697 Dqword Expected = test##Size##DstValue; \ 1698 Expected.F##Size[0] = Result; \ 1699 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1700 reset(); \ 1701 } while (0) 1702 1703 #define TestSqrtssSize(Dst, Src, Size) \ 1704 do { \ 1705 TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \ 1706 TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \ 1707 TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \ 1708 TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \ 1709 TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \ 1710 TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \ 1711 } while (0) 1712 1713 #define TestSqrtss(Dst, Src) \ 1714 do { \ 1715 TestSqrtssSize(Dst, Src, 32); \ 1716 TestSqrtssSize(Dst, Src, 64); \ 1717 } while (0) 1718 1719 TestSqrtss(xmm0, xmm1); 1720 TestSqrtss(xmm1, xmm2); 1721 TestSqrtss(xmm2, xmm3); 1722 TestSqrtss(xmm3, xmm4); 1723 TestSqrtss(xmm4, xmm5); 1724 TestSqrtss(xmm5, xmm6); 1725 TestSqrtss(xmm6, xmm7); 1726 TestSqrtss(xmm7, xmm0); 1727 1728 #undef TestSqrtss 1729 #undef TestSqrtssSize 1730 #undef TestSqrtssXmmAddr 1731 #undef TestSqrtssXmmXmm 1732 } 1733 1734 TEST_F(AssemblerX8632Test, Insertps) { 1735 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \ 1736 do { \ 1737 static constexpr char TestString[] = \ 1738 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \ 1739 ")"; \ 1740 const uint32_t T0 = allocateDqword(); \ 1741 const Dqword V0 Value0; \ 1742 const uint32_t T1 = allocateDqword(); \ 1743 const Dqword V1 Value1; \ 1744 \ 1745 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1746 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1747 __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst, \ 1748 XmmRegister::Encoded_Reg_##Src, Immediate(Imm)); \ 1749 \ 1750 AssembledTest test = assemble(); \ 1751 test.setDqwordTo(T0, V0); \ 1752 test.setDqwordTo(T1, V1); \ 1753 test.run(); \ 1754 \ 1755 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ 1756 reset(); \ 1757 } while (0) 1758 1759 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \ 1760 do { \ 1761 static constexpr char TestString[] = \ 1762 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \ 1763 const uint32_t T0 = allocateDqword(); \ 1764 const Dqword V0 Value0; \ 1765 const uint32_t T1 = allocateDqword(); \ 1766 const Dqword V1 Value1; \ 1767 \ 1768 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1769 __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst, \ 1770 dwordAddress(T1), Immediate(Imm)); \ 1771 \ 1772 AssembledTest test = assemble(); \ 1773 test.setDqwordTo(T0, V0); \ 1774 test.setDqwordTo(T1, V1); \ 1775 test.run(); \ 1776 \ 1777 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ 1778 reset(); \ 1779 } while (0) 1780 1781 #define TestInsertps(Dst, Src) \ 1782 do { \ 1783 TestInsertpsXmmXmmImm( \ 1784 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ 1785 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1786 0x99, \ 1787 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ 1788 TestInsertpsXmmAddrImm( \ 1789 Dst, (uint64_t(-1), uint64_t(-1)), \ 1790 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1791 0x99, \ 1792 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ 1793 TestInsertpsXmmXmmImm( \ 1794 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ 1795 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1796 0x9D, \ 1797 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \ 1798 TestInsertpsXmmAddrImm( \ 1799 Dst, (uint64_t(-1), uint64_t(-1)), \ 1800 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ 1801 0x9D, \ 1802 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \ 1803 } while (0) 1804 1805 TestInsertps(xmm0, xmm1); 1806 TestInsertps(xmm1, xmm2); 1807 TestInsertps(xmm2, xmm3); 1808 TestInsertps(xmm3, xmm4); 1809 TestInsertps(xmm4, xmm5); 1810 TestInsertps(xmm5, xmm6); 1811 TestInsertps(xmm6, xmm7); 1812 TestInsertps(xmm7, xmm0); 1813 1814 #undef TestInsertps 1815 #undef TestInsertpsXmmXmmAddr 1816 #undef TestInsertpsXmmXmmImm 1817 } 1818 1819 TEST_F(AssemblerX8632Test, Pinsr) { 1820 static constexpr uint8_t Mask32 = 0x03; 1821 static constexpr uint8_t Mask16 = 0x07; 1822 static constexpr uint8_t Mask8 = 0x0F; 1823 1824 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \ 1825 do { \ 1826 static constexpr char TestString[] = \ 1827 "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \ 1828 const uint32_t T0 = allocateDqword(); \ 1829 const Dqword V0 Value0; \ 1830 \ 1831 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1832 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, Immediate(Value1)); \ 1833 __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 1834 GPRRegister::Encoded_Reg_##GPR, Immediate(Imm)); \ 1835 \ 1836 AssembledTest test = assemble(); \ 1837 test.setDqwordTo(T0, V0); \ 1838 test.run(); \ 1839 \ 1840 constexpr uint8_t sel = (Imm)&Mask##Size; \ 1841 Dqword Expected = V0; \ 1842 Expected.U##Size[sel] = Value1; \ 1843 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1844 reset(); \ 1845 } while (0) 1846 1847 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \ 1848 do { \ 1849 static constexpr char TestString[] = \ 1850 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \ 1851 const uint32_t T0 = allocateDqword(); \ 1852 const Dqword V0 Value0; \ 1853 const uint32_t T1 = allocateDword(); \ 1854 const uint32_t V1 = Value1; \ 1855 \ 1856 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1857 __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 1858 dwordAddress(T1), Immediate(Imm)); \ 1859 \ 1860 AssembledTest test = assemble(); \ 1861 test.setDqwordTo(T0, V0); \ 1862 test.setDwordTo(T1, V1); \ 1863 test.run(); \ 1864 \ 1865 constexpr uint8_t sel = (Imm)&Mask##Size; \ 1866 Dqword Expected = V0; \ 1867 Expected.U##Size[sel] = Value1; \ 1868 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1869 reset(); \ 1870 } while (0) 1871 1872 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \ 1873 do { \ 1874 TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 1875 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 1876 GPR, Value1, Imm, Size); \ 1877 TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 1878 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 1879 Value1, Imm, Size); \ 1880 } while (0) 1881 1882 #define TestPinsr(Src, Dst) \ 1883 do { \ 1884 TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \ 1885 TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \ 1886 TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ 1887 } while (0) 1888 1889 TestPinsr(xmm0, eax); 1890 TestPinsr(xmm1, ebx); 1891 TestPinsr(xmm2, ecx); 1892 TestPinsr(xmm3, edx); 1893 TestPinsr(xmm4, esi); 1894 TestPinsr(xmm5, edi); 1895 TestPinsr(xmm6, eax); 1896 TestPinsr(xmm7, ebx); 1897 1898 #undef TestPinsr 1899 #undef TestPinsrSize 1900 #undef TestPinsrXmmAddrImm 1901 #undef TestPinsrXmmGPRImm 1902 } 1903 1904 TEST_F(AssemblerX8632Test, Pextr) { 1905 static constexpr uint8_t Mask32 = 0x03; 1906 static constexpr uint8_t Mask16 = 0x07; 1907 static constexpr uint8_t Mask8 = 0x0F; 1908 1909 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \ 1910 do { \ 1911 static constexpr char TestString[] = \ 1912 "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \ 1913 const uint32_t T0 = allocateDqword(); \ 1914 const Dqword V0 Value1; \ 1915 \ 1916 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \ 1917 __ pextr(IceType_i##Size, GPRRegister::Encoded_Reg_##GPR, \ 1918 XmmRegister::Encoded_Reg_##Src, Immediate(Imm)); \ 1919 \ 1920 AssembledTest test = assemble(); \ 1921 test.setDqwordTo(T0, V0); \ 1922 test.run(); \ 1923 \ 1924 constexpr uint8_t sel = (Imm)&Mask##Size; \ 1925 ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \ 1926 reset(); \ 1927 } while (0) 1928 1929 #define TestPextrSize(GPR, Src, Value1, Imm, Size) \ 1930 do { \ 1931 TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull), \ 1932 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ 1933 Imm, Size); \ 1934 } while (0) 1935 1936 #define TestPextr(Src, Dst) \ 1937 do { \ 1938 TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \ 1939 TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \ 1940 TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ 1941 } while (0) 1942 1943 TestPextr(eax, xmm0); 1944 TestPextr(ebx, xmm1); 1945 TestPextr(ecx, xmm2); 1946 TestPextr(edx, xmm3); 1947 TestPextr(esi, xmm4); 1948 TestPextr(edi, xmm5); 1949 TestPextr(eax, xmm6); 1950 TestPextr(ebx, xmm7); 1951 1952 #undef TestPextr 1953 #undef TestPextrSize 1954 #undef TestPextrXmmGPRImm 1955 } 1956 1957 TEST_F(AssemblerX8632Test, Pcmpeq_Pcmpgt) { 1958 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \ 1959 do { \ 1960 static constexpr char TestString[] = \ 1961 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \ 1962 const uint32_t T0 = allocateDqword(); \ 1963 const Dqword V0 Value0; \ 1964 const uint32_t T1 = allocateDqword(); \ 1965 const Dqword V1 Value1; \ 1966 \ 1967 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1968 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 1969 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 1970 XmmRegister::Encoded_Reg_##Src); \ 1971 \ 1972 AssembledTest test = assemble(); \ 1973 test.setDqwordTo(T0, V0); \ 1974 test.setDqwordTo(T1, V1); \ 1975 test.run(); \ 1976 \ 1977 Dqword Expected(uint64_t(0), uint64_t(0)); \ 1978 static constexpr uint8_t ArraySize = \ 1979 sizeof(Dqword) / sizeof(uint##Size##_t); \ 1980 for (uint8_t i = 0; i < ArraySize; ++i) { \ 1981 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ 1982 } \ 1983 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 1984 reset(); \ 1985 } while (0) 1986 1987 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \ 1988 do { \ 1989 static constexpr char TestString[] = \ 1990 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \ 1991 const uint32_t T0 = allocateDqword(); \ 1992 const Dqword V0 Value0; \ 1993 const uint32_t T1 = allocateDqword(); \ 1994 const Dqword V1 Value1; \ 1995 \ 1996 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 1997 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \ 1998 dwordAddress(T1)); \ 1999 \ 2000 AssembledTest test = assemble(); \ 2001 test.setDqwordTo(T0, V0); \ 2002 test.setDqwordTo(T1, V1); \ 2003 test.run(); \ 2004 \ 2005 Dqword Expected(uint64_t(0), uint64_t(0)); \ 2006 static constexpr uint8_t ArraySize = \ 2007 sizeof(Dqword) / sizeof(uint##Size##_t); \ 2008 for (uint8_t i = 0; i < ArraySize; ++i) { \ 2009 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ 2010 } \ 2011 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 2012 reset(); \ 2013 } while (0) 2014 2015 #define TestPcmpValues(Dst, Value0, Src, Value1, Size) \ 2016 do { \ 2017 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == ); \ 2018 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == ); \ 2019 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < ); \ 2020 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < ); \ 2021 } while (0) 2022 2023 #define TestPcmpSize(Dst, Src, Size) \ 2024 do { \ 2025 TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull), \ 2026 uint64_t(0x0000000000000000ull)), \ 2027 Src, (uint64_t(0x0000008800008800ull), \ 2028 uint64_t(0xFFFFFFFFFFFFFFFFull)), \ 2029 Size); \ 2030 TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull), \ 2031 uint64_t(0x12345abcde12345Aull)), \ 2032 Src, (uint64_t(0x0000008800008800ull), \ 2033 uint64_t(0xAABBCCDD1234321Aull)), \ 2034 Size); \ 2035 } while (0) 2036 2037 #define TestPcmp(Dst, Src) \ 2038 do { \ 2039 TestPcmpSize(xmm0, xmm1, 8); \ 2040 TestPcmpSize(xmm0, xmm1, 16); \ 2041 TestPcmpSize(xmm0, xmm1, 32); \ 2042 } while (0) 2043 2044 TestPcmp(xmm0, xmm1); 2045 TestPcmp(xmm1, xmm2); 2046 TestPcmp(xmm2, xmm3); 2047 TestPcmp(xmm3, xmm4); 2048 TestPcmp(xmm4, xmm5); 2049 TestPcmp(xmm5, xmm6); 2050 TestPcmp(xmm6, xmm7); 2051 TestPcmp(xmm7, xmm0); 2052 2053 #undef TestPcmp 2054 #undef TestPcmpSize 2055 #undef TestPcmpValues 2056 #undef TestPcmpXmmAddr 2057 #undef TestPcmpXmmXmm 2058 } 2059 2060 TEST_F(AssemblerX8632Test, Roundsd) { 2061 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \ 2062 do { \ 2063 static constexpr char TestString[] = \ 2064 "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \ 2065 const uint32_t T0 = allocateDqword(); \ 2066 const Dqword V0(-3.0, -3.0); \ 2067 const uint32_t T1 = allocateDqword(); \ 2068 const Dqword V1(double(Input), -123.4); \ 2069 \ 2070 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ 2071 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ 2072 __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst, \ 2073 XmmRegister::Encoded_Reg_##Src, \ 2074 Immediate(AssemblerX8632::k##Mode)); \ 2075 \ 2076 AssembledTest test = assemble(); \ 2077 test.setDqwordTo(T0, V0); \ 2078 test.setDqwordTo(T1, V1); \ 2079 test.run(); \ 2080 \ 2081 const Dqword Expected(double(RN), -3.0); \ 2082 EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ 2083 reset(); \ 2084 } while (0) 2085 2086 #define TestRoundsd(Dst, Src) \ 2087 do { \ 2088 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \ 2089 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \ 2090 TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \ 2091 TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \ 2092 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \ 2093 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \ 2094 } while (0) 2095 2096 TestRoundsd(xmm0, xmm1); 2097 TestRoundsd(xmm1, xmm2); 2098 TestRoundsd(xmm2, xmm3); 2099 TestRoundsd(xmm3, xmm4); 2100 TestRoundsd(xmm4, xmm5); 2101 TestRoundsd(xmm5, xmm6); 2102 TestRoundsd(xmm6, xmm7); 2103 TestRoundsd(xmm7, xmm0); 2104 2105 #undef TestRoundsd 2106 #undef TestRoundsdXmmXmm 2107 } 2108 2109 TEST_F(AssemblerX8632Test, Set1ps) { 2110 #define TestImpl(Xmm, Src, Imm) \ 2111 do { \ 2112 __ set1ps(XmmRegister::Encoded_Reg_##Xmm, GPRRegister::Encoded_Reg_##Src, \ 2113 Immediate(Imm)); \ 2114 \ 2115 AssembledTest test = assemble(); \ 2116 test.run(); \ 2117 \ 2118 const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \ 2119 (uint64_t(Imm) << 32) | uint32_t(Imm)); \ 2120 ASSERT_EQ(Expected, test.Xmm<Dqword>()) \ 2121 << "(" #Xmm ", " #Src ", " #Imm ")"; \ 2122 reset(); \ 2123 } while (0) 2124 2125 TestImpl(xmm0, ebx, 1); 2126 TestImpl(xmm1, ecx, 2); 2127 TestImpl(xmm2, edx, 3); 2128 TestImpl(xmm3, esi, 4); 2129 TestImpl(xmm4, edi, 5); 2130 TestImpl(xmm5, eax, 6); 2131 TestImpl(xmm6, ebx, 7); 2132 TestImpl(xmm7, ecx, 8); 2133 2134 #undef TestImpl 2135 } 2136 2137 } // end of anonymous namespace 2138 } // end of namespace Test 2139 } // end of namespace X8632 2140 } // end of namespace Ice 2141