1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * some of the FCOM cases could do with testing -- not convinced 55 that the args are the right way round. 56 57 * FSAVE does not re-initialise the FPU; it should do 58 59 * FINIT not only initialises the FPU environment, it also zeroes 60 all the FP registers. It should leave the registers unchanged. 61 62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 63 per Intel docs this bit has no meaning anyway. Since PUSHF is the 64 only way to observe eflags[1], a proper fix would be to make that 65 bit be set by PUSHF. 66 67 This module uses global variables and so is not MT-safe (if that 68 should ever become relevant). 69 */ 70 71 /* Notes re address size overrides (0x67). 72 73 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 75 and System Instructions"), Section 1.2.3 ("Address-Size Override 76 Prefix"): 77 78 0x67 applies to all explicit memory references, causing the top 79 32 bits of the effective address to become zero. 80 81 0x67 has no effect on stack references (push/pop); these always 82 use a 64-bit address. 83 84 0x67 changes the interpretation of instructions which implicitly 85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 86 instead. These are: 87 88 cmp{s,sb,sw,sd,sq} 89 in{s,sb,sw,sd} 90 jcxz, jecxz, jrcxz 91 lod{s,sb,sw,sd,sq} 92 loop{,e,bz,be,z} 93 mov{s,sb,sw,sd,sq} 94 out{s,sb,sw,sd} 95 rep{,e,ne,nz} 96 sca{s,sb,sw,sd,sq} 97 sto{s,sb,sw,sd,sq} 98 xlat{,b} */ 99 100 /* "Special" instructions. 101 102 This instruction decoder can decode three special instructions 103 which mean nothing natively (are no-ops as far as regs/mem are 104 concerned) but have meaning for supporting Valgrind. A special 105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 108 Following that, one of the following 3 are allowed (standard 109 interpretation in parentheses): 110 111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 114 4887F6 (xchgq %rdi,%rdi) IR injection 115 116 Any other bytes following the 16-byte preamble are illegal and 117 constitute a failure in instruction decoding. This all assumes 118 that the preamble will never occur except in specific code 119 fragments designed for Valgrind to catch. 120 121 No prefixes may precede a "Special" instruction. 122 */ 123 124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 125 insns: the side-exit back to the start of the insn is done with 126 Ijk_Boring. This is quite wrong, it should be done with 127 Ijk_NoRedir, since otherwise the side exit, which is intended to 128 restart the instruction for whatever reason, could go somewhere 129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 130 no-redir jumps performance critical, at least for rep-prefixed 131 instructions, since all iterations thereof would involve such a 132 jump. It's not such a big deal with casLE since the side exit is 133 only taken if the CAS fails, that is, the location is contended, 134 which is relatively unlikely. 135 136 Note also, the test for CAS success vs failure is done using 137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 139 shouldn't definedness-check these comparisons. See 140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 141 background/rationale. 142 */ 143 144 /* LOCK prefixed instructions. These are translated using IR-level 145 CAS statements (IRCAS) and are believed to preserve atomicity, even 146 from the point of view of some other process racing against a 147 simulated one (presumably they communicate via a shared memory 148 segment). 149 150 Handlers which are aware of LOCK prefixes are: 151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 152 dis_cmpxchg_G_E (cmpxchg) 153 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 154 dis_Grp3 (not, neg) 155 dis_Grp4 (inc, dec) 156 dis_Grp5 (inc, dec) 157 dis_Grp8_Imm (bts, btc, btr) 158 dis_bt_G_E (bts, btc, btr) 159 dis_xadd_G_E (xadd) 160 */ 161 162 163 #include "libvex_basictypes.h" 164 #include "libvex_ir.h" 165 #include "libvex.h" 166 #include "libvex_guest_amd64.h" 167 168 #include "main_util.h" 169 #include "main_globals.h" 170 #include "guest_generic_bb_to_IR.h" 171 #include "guest_generic_x87.h" 172 #include "guest_amd64_defs.h" 173 174 175 /*------------------------------------------------------------*/ 176 /*--- Globals ---*/ 177 /*------------------------------------------------------------*/ 178 179 /* These are set at the start of the translation of an insn, right 180 down in disInstr_AMD64, so that we don't have to pass them around 181 endlessly. They are all constant during the translation of any 182 given insn. */ 183 184 /* These are set at the start of the translation of a BB, so 185 that we don't have to pass them around endlessly. */ 186 187 /* We need to know this to do sub-register accesses correctly. */ 188 static VexEndness host_endness; 189 190 /* Pointer to the guest code area (points to start of BB, not to the 191 insn being processed). */ 192 static const UChar* guest_code; 193 194 /* The guest address corresponding to guest_code[0]. */ 195 static Addr64 guest_RIP_bbstart; 196 197 /* The guest address for the instruction currently being 198 translated. */ 199 static Addr64 guest_RIP_curr_instr; 200 201 /* The IRSB* into which we're generating code. */ 202 static IRSB* irsb; 203 204 /* For ensuring that %rip-relative addressing is done right. A read 205 of %rip generates the address of the next instruction. It may be 206 that we don't conveniently know that inside disAMode(). For sanity 207 checking, if the next insn %rip is needed, we make a guess at what 208 it is, record that guess here, and set the accompanying Bool to 209 indicate that -- after this insn's decode is finished -- that guess 210 needs to be checked. */ 211 212 /* At the start of each insn decode, is set to (0, False). 213 After the decode, if _mustcheck is now True, _assumed is 214 checked. */ 215 216 static Addr64 guest_RIP_next_assumed; 217 static Bool guest_RIP_next_mustcheck; 218 219 220 /*------------------------------------------------------------*/ 221 /*--- Helpers for constructing IR. ---*/ 222 /*------------------------------------------------------------*/ 223 224 /* Generate a new temporary of the given type. */ 225 static IRTemp newTemp ( IRType ty ) 226 { 227 vassert(isPlausibleIRType(ty)); 228 return newIRTemp( irsb->tyenv, ty ); 229 } 230 231 /* Add a statement to the list held by "irsb". */ 232 static void stmt ( IRStmt* st ) 233 { 234 addStmtToIRSB( irsb, st ); 235 } 236 237 /* Generate a statement "dst := e". */ 238 static void assign ( IRTemp dst, IRExpr* e ) 239 { 240 stmt( IRStmt_WrTmp(dst, e) ); 241 } 242 243 static IRExpr* unop ( IROp op, IRExpr* a ) 244 { 245 return IRExpr_Unop(op, a); 246 } 247 248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 249 { 250 return IRExpr_Binop(op, a1, a2); 251 } 252 253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 254 { 255 return IRExpr_Triop(op, a1, a2, a3); 256 } 257 258 static IRExpr* mkexpr ( IRTemp tmp ) 259 { 260 return IRExpr_RdTmp(tmp); 261 } 262 263 static IRExpr* mkU8 ( ULong i ) 264 { 265 vassert(i < 256); 266 return IRExpr_Const(IRConst_U8( (UChar)i )); 267 } 268 269 static IRExpr* mkU16 ( ULong i ) 270 { 271 vassert(i < 0x10000ULL); 272 return IRExpr_Const(IRConst_U16( (UShort)i )); 273 } 274 275 static IRExpr* mkU32 ( ULong i ) 276 { 277 vassert(i < 0x100000000ULL); 278 return IRExpr_Const(IRConst_U32( (UInt)i )); 279 } 280 281 static IRExpr* mkU64 ( ULong i ) 282 { 283 return IRExpr_Const(IRConst_U64(i)); 284 } 285 286 static IRExpr* mkU ( IRType ty, ULong i ) 287 { 288 switch (ty) { 289 case Ity_I8: return mkU8(i); 290 case Ity_I16: return mkU16(i); 291 case Ity_I32: return mkU32(i); 292 case Ity_I64: return mkU64(i); 293 default: vpanic("mkU(amd64)"); 294 } 295 } 296 297 static void storeLE ( IRExpr* addr, IRExpr* data ) 298 { 299 stmt( IRStmt_Store(Iend_LE, addr, data) ); 300 } 301 302 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 303 { 304 return IRExpr_Load(Iend_LE, ty, addr); 305 } 306 307 static IROp mkSizedOp ( IRType ty, IROp op8 ) 308 { 309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 310 || op8 == Iop_Mul8 311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 314 || op8 == Iop_CasCmpNE8 315 || op8 == Iop_Not8 ); 316 switch (ty) { 317 case Ity_I8: return 0 +op8; 318 case Ity_I16: return 1 +op8; 319 case Ity_I32: return 2 +op8; 320 case Ity_I64: return 3 +op8; 321 default: vpanic("mkSizedOp(amd64)"); 322 } 323 } 324 325 static 326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 327 { 328 if (szSmall == 1 && szBig == 4) { 329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 330 } 331 if (szSmall == 1 && szBig == 2) { 332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 333 } 334 if (szSmall == 2 && szBig == 4) { 335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 336 } 337 if (szSmall == 1 && szBig == 8 && !signd) { 338 return unop(Iop_8Uto64, src); 339 } 340 if (szSmall == 1 && szBig == 8 && signd) { 341 return unop(Iop_8Sto64, src); 342 } 343 if (szSmall == 2 && szBig == 8 && !signd) { 344 return unop(Iop_16Uto64, src); 345 } 346 if (szSmall == 2 && szBig == 8 && signd) { 347 return unop(Iop_16Sto64, src); 348 } 349 vpanic("doScalarWidening(amd64)"); 350 } 351 352 353 354 /*------------------------------------------------------------*/ 355 /*--- Debugging output ---*/ 356 /*------------------------------------------------------------*/ 357 358 /* Bomb out if we can't handle something. */ 359 __attribute__ ((noreturn)) 360 static void unimplemented ( const HChar* str ) 361 { 362 vex_printf("amd64toIR: unimplemented feature\n"); 363 vpanic(str); 364 } 365 366 #define DIP(format, args...) \ 367 if (vex_traceflags & VEX_TRACE_FE) \ 368 vex_printf(format, ## args) 369 370 #define DIS(buf, format, args...) \ 371 if (vex_traceflags & VEX_TRACE_FE) \ 372 vex_sprintf(buf, format, ## args) 373 374 375 /*------------------------------------------------------------*/ 376 /*--- Offsets of various parts of the amd64 guest state. ---*/ 377 /*------------------------------------------------------------*/ 378 379 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 380 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 381 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 382 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 383 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 384 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 385 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 386 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 387 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 388 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 389 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 390 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 391 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 392 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 393 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 394 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 395 396 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 397 398 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST) 399 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST) 400 401 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 402 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 403 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 404 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 405 406 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 407 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 408 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 409 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 410 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 411 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 412 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 413 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 414 415 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 416 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 417 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 418 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 419 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 420 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 421 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 422 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 423 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 424 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 425 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 426 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 427 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 428 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 429 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 430 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 431 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 432 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 433 434 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) 435 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) 436 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) 437 438 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 439 440 441 /*------------------------------------------------------------*/ 442 /*--- Helper bits and pieces for deconstructing the ---*/ 443 /*--- amd64 insn stream. ---*/ 444 /*------------------------------------------------------------*/ 445 446 /* This is the AMD64 register encoding -- integer regs. */ 447 #define R_RAX 0 448 #define R_RCX 1 449 #define R_RDX 2 450 #define R_RBX 3 451 #define R_RSP 4 452 #define R_RBP 5 453 #define R_RSI 6 454 #define R_RDI 7 455 #define R_R8 8 456 #define R_R9 9 457 #define R_R10 10 458 #define R_R11 11 459 #define R_R12 12 460 #define R_R13 13 461 #define R_R14 14 462 #define R_R15 15 463 464 /* This is the Intel register encoding -- segment regs. */ 465 #define R_ES 0 466 #define R_CS 1 467 #define R_SS 2 468 #define R_DS 3 469 #define R_FS 4 470 #define R_GS 5 471 472 473 /* Various simple conversions */ 474 475 static ULong extend_s_8to64 ( UChar x ) 476 { 477 return (ULong)((Long)(((ULong)x) << 56) >> 56); 478 } 479 480 static ULong extend_s_16to64 ( UShort x ) 481 { 482 return (ULong)((Long)(((ULong)x) << 48) >> 48); 483 } 484 485 static ULong extend_s_32to64 ( UInt x ) 486 { 487 return (ULong)((Long)(((ULong)x) << 32) >> 32); 488 } 489 490 /* Figure out whether the mod and rm parts of a modRM byte refer to a 491 register or memory. If so, the byte will have the form 11XXXYYY, 492 where YYY is the register number. */ 493 inline 494 static Bool epartIsReg ( UChar mod_reg_rm ) 495 { 496 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 497 } 498 499 /* Extract the 'g' field from a modRM byte. This only produces 3 500 bits, which is not a complete register number. You should avoid 501 this function if at all possible. */ 502 inline 503 static Int gregLO3ofRM ( UChar mod_reg_rm ) 504 { 505 return (Int)( (mod_reg_rm >> 3) & 7 ); 506 } 507 508 /* Ditto the 'e' field of a modRM byte. */ 509 inline 510 static Int eregLO3ofRM ( UChar mod_reg_rm ) 511 { 512 return (Int)(mod_reg_rm & 0x7); 513 } 514 515 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 516 517 static inline UChar getUChar ( Long delta ) 518 { 519 UChar v = guest_code[delta+0]; 520 return v; 521 } 522 523 static UInt getUDisp16 ( Long delta ) 524 { 525 UInt v = guest_code[delta+1]; v <<= 8; 526 v |= guest_code[delta+0]; 527 return v & 0xFFFF; 528 } 529 530 //.. static UInt getUDisp ( Int size, Long delta ) 531 //.. { 532 //.. switch (size) { 533 //.. case 4: return getUDisp32(delta); 534 //.. case 2: return getUDisp16(delta); 535 //.. case 1: return getUChar(delta); 536 //.. default: vpanic("getUDisp(x86)"); 537 //.. } 538 //.. return 0; /*notreached*/ 539 //.. } 540 541 542 /* Get a byte value out of the insn stream and sign-extend to 64 543 bits. */ 544 static Long getSDisp8 ( Long delta ) 545 { 546 return extend_s_8to64( guest_code[delta] ); 547 } 548 549 /* Get a 16-bit value out of the insn stream and sign-extend to 64 550 bits. */ 551 static Long getSDisp16 ( Long delta ) 552 { 553 UInt v = guest_code[delta+1]; v <<= 8; 554 v |= guest_code[delta+0]; 555 return extend_s_16to64( (UShort)v ); 556 } 557 558 /* Get a 32-bit value out of the insn stream and sign-extend to 64 559 bits. */ 560 static Long getSDisp32 ( Long delta ) 561 { 562 UInt v = guest_code[delta+3]; v <<= 8; 563 v |= guest_code[delta+2]; v <<= 8; 564 v |= guest_code[delta+1]; v <<= 8; 565 v |= guest_code[delta+0]; 566 return extend_s_32to64( v ); 567 } 568 569 /* Get a 64-bit value out of the insn stream. */ 570 static Long getDisp64 ( Long delta ) 571 { 572 ULong v = 0; 573 v |= guest_code[delta+7]; v <<= 8; 574 v |= guest_code[delta+6]; v <<= 8; 575 v |= guest_code[delta+5]; v <<= 8; 576 v |= guest_code[delta+4]; v <<= 8; 577 v |= guest_code[delta+3]; v <<= 8; 578 v |= guest_code[delta+2]; v <<= 8; 579 v |= guest_code[delta+1]; v <<= 8; 580 v |= guest_code[delta+0]; 581 return v; 582 } 583 584 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 585 if this is called with size==8. Should not happen. */ 586 static Long getSDisp ( Int size, Long delta ) 587 { 588 switch (size) { 589 case 4: return getSDisp32(delta); 590 case 2: return getSDisp16(delta); 591 case 1: return getSDisp8(delta); 592 default: vpanic("getSDisp(amd64)"); 593 } 594 } 595 596 static ULong mkSizeMask ( Int sz ) 597 { 598 switch (sz) { 599 case 1: return 0x00000000000000FFULL; 600 case 2: return 0x000000000000FFFFULL; 601 case 4: return 0x00000000FFFFFFFFULL; 602 case 8: return 0xFFFFFFFFFFFFFFFFULL; 603 default: vpanic("mkSzMask(amd64)"); 604 } 605 } 606 607 static Int imin ( Int a, Int b ) 608 { 609 return (a < b) ? a : b; 610 } 611 612 static IRType szToITy ( Int n ) 613 { 614 switch (n) { 615 case 1: return Ity_I8; 616 case 2: return Ity_I16; 617 case 4: return Ity_I32; 618 case 8: return Ity_I64; 619 default: vex_printf("\nszToITy(%d)\n", n); 620 vpanic("szToITy(amd64)"); 621 } 622 } 623 624 625 /*------------------------------------------------------------*/ 626 /*--- For dealing with prefixes. ---*/ 627 /*------------------------------------------------------------*/ 628 629 /* The idea is to pass around an int holding a bitmask summarising 630 info from the prefixes seen on the current instruction, including 631 info from the REX byte. This info is used in various places, but 632 most especially when making sense of register fields in 633 instructions. 634 635 The top 8 bits of the prefix are 0x55, just as a hacky way to 636 ensure it really is a valid prefix. 637 638 Things you can safely assume about a well-formed prefix: 639 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 640 * if REX is not present then REXW,REXR,REXX,REXB will read 641 as zero. 642 * F2 and F3 will not both be 1. 643 */ 644 645 typedef UInt Prefix; 646 647 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 648 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 649 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 650 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 651 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 652 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 653 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 654 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 655 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 656 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 657 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 658 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 659 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 660 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 661 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 662 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 663 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 664 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 665 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 666 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 667 positions. */ 668 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 669 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 670 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 671 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 672 673 674 #define PFX_EMPTY 0x55000000 675 676 static Bool IS_VALID_PFX ( Prefix pfx ) { 677 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 678 } 679 680 static Bool haveREX ( Prefix pfx ) { 681 return toBool(pfx & PFX_REX); 682 } 683 684 static Int getRexW ( Prefix pfx ) { 685 return (pfx & PFX_REXW) ? 1 : 0; 686 } 687 static Int getRexR ( Prefix pfx ) { 688 return (pfx & PFX_REXR) ? 1 : 0; 689 } 690 static Int getRexX ( Prefix pfx ) { 691 return (pfx & PFX_REXX) ? 1 : 0; 692 } 693 static Int getRexB ( Prefix pfx ) { 694 return (pfx & PFX_REXB) ? 1 : 0; 695 } 696 697 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 698 completely changes what instruction it really is. */ 699 static Bool haveF2orF3 ( Prefix pfx ) { 700 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 701 } 702 static Bool haveF2andF3 ( Prefix pfx ) { 703 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3)); 704 } 705 static Bool haveF2 ( Prefix pfx ) { 706 return toBool((pfx & PFX_F2) > 0); 707 } 708 static Bool haveF3 ( Prefix pfx ) { 709 return toBool((pfx & PFX_F3) > 0); 710 } 711 712 static Bool have66 ( Prefix pfx ) { 713 return toBool((pfx & PFX_66) > 0); 714 } 715 static Bool haveASO ( Prefix pfx ) { 716 return toBool((pfx & PFX_ASO) > 0); 717 } 718 static Bool haveLOCK ( Prefix pfx ) { 719 return toBool((pfx & PFX_LOCK) > 0); 720 } 721 722 /* Return True iff pfx has 66 set and F2 and F3 clear */ 723 static Bool have66noF2noF3 ( Prefix pfx ) 724 { 725 return 726 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 727 } 728 729 /* Return True iff pfx has F2 set and 66 and F3 clear */ 730 static Bool haveF2no66noF3 ( Prefix pfx ) 731 { 732 return 733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 734 } 735 736 /* Return True iff pfx has F3 set and 66 and F2 clear */ 737 static Bool haveF3no66noF2 ( Prefix pfx ) 738 { 739 return 740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 741 } 742 743 /* Return True iff pfx has F3 set and F2 clear */ 744 static Bool haveF3noF2 ( Prefix pfx ) 745 { 746 return 747 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 748 } 749 750 /* Return True iff pfx has F2 set and F3 clear */ 751 static Bool haveF2noF3 ( Prefix pfx ) 752 { 753 return 754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 755 } 756 757 /* Return True iff pfx has 66, F2 and F3 clear */ 758 static Bool haveNo66noF2noF3 ( Prefix pfx ) 759 { 760 return 761 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 762 } 763 764 /* Return True iff pfx has any of 66, F2 and F3 set */ 765 static Bool have66orF2orF3 ( Prefix pfx ) 766 { 767 return toBool( ! haveNo66noF2noF3(pfx) ); 768 } 769 770 /* Return True iff pfx has 66 or F3 set */ 771 static Bool have66orF3 ( Prefix pfx ) 772 { 773 return toBool((pfx & (PFX_66|PFX_F3)) > 0); 774 } 775 776 /* Clear all the segment-override bits in a prefix. */ 777 static Prefix clearSegBits ( Prefix p ) 778 { 779 return 780 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 781 } 782 783 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 784 static UInt getVexNvvvv ( Prefix pfx ) { 785 UInt r = (UInt)pfx; 786 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 787 return r & 0xF; 788 } 789 790 static Bool haveVEX ( Prefix pfx ) { 791 return toBool(pfx & PFX_VEX); 792 } 793 794 static Int getVexL ( Prefix pfx ) { 795 return (pfx & PFX_VEXL) ? 1 : 0; 796 } 797 798 799 /*------------------------------------------------------------*/ 800 /*--- For dealing with escapes ---*/ 801 /*------------------------------------------------------------*/ 802 803 804 /* Escapes come after the prefixes, but before the primary opcode 805 byte. They escape the primary opcode byte into a bigger space. 806 The 0xF0000000 isn't significant, except so as to make it not 807 overlap valid Prefix values, for sanity checking. 808 */ 809 810 typedef 811 enum { 812 ESC_NONE=0xF0000000, // none 813 ESC_0F, // 0F 814 ESC_0F38, // 0F 38 815 ESC_0F3A // 0F 3A 816 } 817 Escape; 818 819 820 /*------------------------------------------------------------*/ 821 /*--- For dealing with integer registers ---*/ 822 /*------------------------------------------------------------*/ 823 824 /* This is somewhat complex. The rules are: 825 826 For 64, 32 and 16 bit register references, the e or g fields in the 827 modrm bytes supply the low 3 bits of the register number. The 828 fourth (most-significant) bit of the register number is supplied by 829 the REX byte, if it is present; else that bit is taken to be zero. 830 831 The REX.R bit supplies the high bit corresponding to the g register 832 field, and the REX.B bit supplies the high bit corresponding to the 833 e register field (when the mod part of modrm indicates that modrm's 834 e component refers to a register and not to memory). 835 836 The REX.X bit supplies a high register bit for certain registers 837 in SIB address modes, and is generally rarely used. 838 839 For 8 bit register references, the presence of the REX byte itself 840 has significance. If there is no REX present, then the 3-bit 841 number extracted from the modrm e or g field is treated as an index 842 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 843 old x86 encoding scheme. 844 845 But if there is a REX present, the register reference is 846 interpreted in the same way as for 64/32/16-bit references: a high 847 bit is extracted from REX, giving a 4-bit number, and the denoted 848 register is the lowest 8 bits of the 16 integer registers denoted 849 by the number. In particular, values 3 through 7 of this sequence 850 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 851 %rsp %rbp %rsi %rdi. 852 853 The REX.W bit has no bearing at all on register numbers. Instead 854 its presence indicates that the operand size is to be overridden 855 from its default value (32 bits) to 64 bits instead. This is in 856 the same fashion that an 0x66 prefix indicates the operand size is 857 to be overridden from 32 bits down to 16 bits. When both REX.W and 858 0x66 are present there is a conflict, and REX.W takes precedence. 859 860 Rather than try to handle this complexity using a single huge 861 function, several smaller ones are provided. The aim is to make it 862 as difficult as possible to screw up register decoding in a subtle 863 and hard-to-track-down way. 864 865 Because these routines fish around in the host's memory (that is, 866 in the guest state area) for sub-parts of guest registers, their 867 correctness depends on the host's endianness. So far these 868 routines only work for little-endian hosts. Those for which 869 endianness is important have assertions to ensure sanity. 870 */ 871 872 873 /* About the simplest question you can ask: where do the 64-bit 874 integer registers live (in the guest state) ? */ 875 876 static Int integerGuestReg64Offset ( UInt reg ) 877 { 878 switch (reg) { 879 case R_RAX: return OFFB_RAX; 880 case R_RCX: return OFFB_RCX; 881 case R_RDX: return OFFB_RDX; 882 case R_RBX: return OFFB_RBX; 883 case R_RSP: return OFFB_RSP; 884 case R_RBP: return OFFB_RBP; 885 case R_RSI: return OFFB_RSI; 886 case R_RDI: return OFFB_RDI; 887 case R_R8: return OFFB_R8; 888 case R_R9: return OFFB_R9; 889 case R_R10: return OFFB_R10; 890 case R_R11: return OFFB_R11; 891 case R_R12: return OFFB_R12; 892 case R_R13: return OFFB_R13; 893 case R_R14: return OFFB_R14; 894 case R_R15: return OFFB_R15; 895 default: vpanic("integerGuestReg64Offset(amd64)"); 896 } 897 } 898 899 900 /* Produce the name of an integer register, for printing purposes. 901 reg is a number in the range 0 .. 15 that has been generated from a 902 3-bit reg-field number and a REX extension bit. irregular denotes 903 the case where sz==1 and no REX byte is present. */ 904 905 static 906 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 907 { 908 static const HChar* ireg64_names[16] 909 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 910 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 911 static const HChar* ireg32_names[16] 912 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 913 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 914 static const HChar* ireg16_names[16] 915 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 916 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 917 static const HChar* ireg8_names[16] 918 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 919 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 920 static const HChar* ireg8_irregular[8] 921 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 922 923 vassert(reg < 16); 924 if (sz == 1) { 925 if (irregular) 926 vassert(reg < 8); 927 } else { 928 vassert(irregular == False); 929 } 930 931 switch (sz) { 932 case 8: return ireg64_names[reg]; 933 case 4: return ireg32_names[reg]; 934 case 2: return ireg16_names[reg]; 935 case 1: if (irregular) { 936 return ireg8_irregular[reg]; 937 } else { 938 return ireg8_names[reg]; 939 } 940 default: vpanic("nameIReg(amd64)"); 941 } 942 } 943 944 /* Using the same argument conventions as nameIReg, produce the 945 guest state offset of an integer register. */ 946 947 static 948 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 949 { 950 vassert(reg < 16); 951 if (sz == 1) { 952 if (irregular) 953 vassert(reg < 8); 954 } else { 955 vassert(irregular == False); 956 } 957 958 /* Deal with irregular case -- sz==1 and no REX present */ 959 if (sz == 1 && irregular) { 960 switch (reg) { 961 case R_RSP: return 1+ OFFB_RAX; 962 case R_RBP: return 1+ OFFB_RCX; 963 case R_RSI: return 1+ OFFB_RDX; 964 case R_RDI: return 1+ OFFB_RBX; 965 default: break; /* use the normal case */ 966 } 967 } 968 969 /* Normal case */ 970 return integerGuestReg64Offset(reg); 971 } 972 973 974 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 975 976 static IRExpr* getIRegCL ( void ) 977 { 978 vassert(host_endness == VexEndnessLE); 979 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 980 } 981 982 983 /* Write to the %AH register. */ 984 985 static void putIRegAH ( IRExpr* e ) 986 { 987 vassert(host_endness == VexEndnessLE); 988 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 989 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 990 } 991 992 993 /* Read/write various widths of %RAX, as it has various 994 special-purpose uses. */ 995 996 static const HChar* nameIRegRAX ( Int sz ) 997 { 998 switch (sz) { 999 case 1: return "%al"; 1000 case 2: return "%ax"; 1001 case 4: return "%eax"; 1002 case 8: return "%rax"; 1003 default: vpanic("nameIRegRAX(amd64)"); 1004 } 1005 } 1006 1007 static IRExpr* getIRegRAX ( Int sz ) 1008 { 1009 vassert(host_endness == VexEndnessLE); 1010 switch (sz) { 1011 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1012 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1013 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1014 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1015 default: vpanic("getIRegRAX(amd64)"); 1016 } 1017 } 1018 1019 static void putIRegRAX ( Int sz, IRExpr* e ) 1020 { 1021 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1022 vassert(host_endness == VexEndnessLE); 1023 switch (sz) { 1024 case 8: vassert(ty == Ity_I64); 1025 stmt( IRStmt_Put( OFFB_RAX, e )); 1026 break; 1027 case 4: vassert(ty == Ity_I32); 1028 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1029 break; 1030 case 2: vassert(ty == Ity_I16); 1031 stmt( IRStmt_Put( OFFB_RAX, e )); 1032 break; 1033 case 1: vassert(ty == Ity_I8); 1034 stmt( IRStmt_Put( OFFB_RAX, e )); 1035 break; 1036 default: vpanic("putIRegRAX(amd64)"); 1037 } 1038 } 1039 1040 1041 /* Read/write various widths of %RDX, as it has various 1042 special-purpose uses. */ 1043 1044 static const HChar* nameIRegRDX ( Int sz ) 1045 { 1046 switch (sz) { 1047 case 1: return "%dl"; 1048 case 2: return "%dx"; 1049 case 4: return "%edx"; 1050 case 8: return "%rdx"; 1051 default: vpanic("nameIRegRDX(amd64)"); 1052 } 1053 } 1054 1055 static IRExpr* getIRegRDX ( Int sz ) 1056 { 1057 vassert(host_endness == VexEndnessLE); 1058 switch (sz) { 1059 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1060 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1061 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1062 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1063 default: vpanic("getIRegRDX(amd64)"); 1064 } 1065 } 1066 1067 static void putIRegRDX ( Int sz, IRExpr* e ) 1068 { 1069 vassert(host_endness == VexEndnessLE); 1070 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1071 switch (sz) { 1072 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1073 break; 1074 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1075 break; 1076 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1077 break; 1078 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1079 break; 1080 default: vpanic("putIRegRDX(amd64)"); 1081 } 1082 } 1083 1084 1085 /* Simplistic functions to deal with the integer registers as a 1086 straightforward bank of 16 64-bit regs. */ 1087 1088 static IRExpr* getIReg64 ( UInt regno ) 1089 { 1090 return IRExpr_Get( integerGuestReg64Offset(regno), 1091 Ity_I64 ); 1092 } 1093 1094 static void putIReg64 ( UInt regno, IRExpr* e ) 1095 { 1096 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1097 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1098 } 1099 1100 static const HChar* nameIReg64 ( UInt regno ) 1101 { 1102 return nameIReg( 8, regno, False ); 1103 } 1104 1105 1106 /* Simplistic functions to deal with the lower halves of integer 1107 registers as a straightforward bank of 16 32-bit regs. */ 1108 1109 static IRExpr* getIReg32 ( UInt regno ) 1110 { 1111 vassert(host_endness == VexEndnessLE); 1112 return unop(Iop_64to32, 1113 IRExpr_Get( integerGuestReg64Offset(regno), 1114 Ity_I64 )); 1115 } 1116 1117 static void putIReg32 ( UInt regno, IRExpr* e ) 1118 { 1119 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1120 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1121 unop(Iop_32Uto64,e) ) ); 1122 } 1123 1124 static const HChar* nameIReg32 ( UInt regno ) 1125 { 1126 return nameIReg( 4, regno, False ); 1127 } 1128 1129 1130 /* Simplistic functions to deal with the lower quarters of integer 1131 registers as a straightforward bank of 16 16-bit regs. */ 1132 1133 static IRExpr* getIReg16 ( UInt regno ) 1134 { 1135 vassert(host_endness == VexEndnessLE); 1136 return IRExpr_Get( integerGuestReg64Offset(regno), 1137 Ity_I16 ); 1138 } 1139 1140 static void putIReg16 ( UInt regno, IRExpr* e ) 1141 { 1142 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1143 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1144 unop(Iop_16Uto64,e) ) ); 1145 } 1146 1147 static const HChar* nameIReg16 ( UInt regno ) 1148 { 1149 return nameIReg( 2, regno, False ); 1150 } 1151 1152 1153 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1154 which field of the REX byte is to be used to extend to a 4-bit 1155 number. These functions cater for that situation. 1156 */ 1157 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1158 { 1159 vassert(lo3bits < 8); 1160 vassert(IS_VALID_PFX(pfx)); 1161 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1162 } 1163 1164 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1165 { 1166 vassert(lo3bits < 8); 1167 vassert(IS_VALID_PFX(pfx)); 1168 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1169 } 1170 1171 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1172 { 1173 vassert(lo3bits < 8); 1174 vassert(IS_VALID_PFX(pfx)); 1175 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1176 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1177 toBool(sz==1 && !haveREX(pfx)) ); 1178 } 1179 1180 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1181 { 1182 vassert(lo3bits < 8); 1183 vassert(IS_VALID_PFX(pfx)); 1184 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1185 if (sz == 4) { 1186 sz = 8; 1187 return unop(Iop_64to32, 1188 IRExpr_Get( 1189 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1190 False/*!irregular*/ ), 1191 szToITy(sz) 1192 ) 1193 ); 1194 } else { 1195 return IRExpr_Get( 1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1197 toBool(sz==1 && !haveREX(pfx)) ), 1198 szToITy(sz) 1199 ); 1200 } 1201 } 1202 1203 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1204 { 1205 vassert(lo3bits < 8); 1206 vassert(IS_VALID_PFX(pfx)); 1207 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1208 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1209 stmt( IRStmt_Put( 1210 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1211 toBool(sz==1 && !haveREX(pfx)) ), 1212 sz==4 ? unop(Iop_32Uto64,e) : e 1213 )); 1214 } 1215 1216 1217 /* Functions for getting register numbers from modrm bytes and REX 1218 when we don't have to consider the complexities of integer subreg 1219 accesses. 1220 */ 1221 /* Extract the g reg field from a modRM byte, and augment it using the 1222 REX.R bit from the supplied REX byte. The R bit usually is 1223 associated with the g register field. 1224 */ 1225 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1226 { 1227 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1228 reg += (pfx & PFX_REXR) ? 8 : 0; 1229 return reg; 1230 } 1231 1232 /* Extract the e reg field from a modRM byte, and augment it using the 1233 REX.B bit from the supplied REX byte. The B bit usually is 1234 associated with the e register field (when modrm indicates e is a 1235 register, that is). 1236 */ 1237 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1238 { 1239 Int rm; 1240 vassert(epartIsReg(mod_reg_rm)); 1241 rm = (Int)(mod_reg_rm & 0x7); 1242 rm += (pfx & PFX_REXB) ? 8 : 0; 1243 return rm; 1244 } 1245 1246 1247 /* General functions for dealing with integer register access. */ 1248 1249 /* Produce the guest state offset for a reference to the 'g' register 1250 field in a modrm byte, taking into account REX (or its absence), 1251 and the size of the access. 1252 */ 1253 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1254 { 1255 UInt reg; 1256 vassert(host_endness == VexEndnessLE); 1257 vassert(IS_VALID_PFX(pfx)); 1258 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1259 reg = gregOfRexRM( pfx, mod_reg_rm ); 1260 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1261 } 1262 1263 static 1264 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1265 { 1266 if (sz == 4) { 1267 sz = 8; 1268 return unop(Iop_64to32, 1269 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1270 szToITy(sz) )); 1271 } else { 1272 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1273 szToITy(sz) ); 1274 } 1275 } 1276 1277 static 1278 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1279 { 1280 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1281 if (sz == 4) { 1282 e = unop(Iop_32Uto64,e); 1283 } 1284 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1285 } 1286 1287 static 1288 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1289 { 1290 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1291 toBool(sz==1 && !haveREX(pfx)) ); 1292 } 1293 1294 1295 static 1296 IRExpr* getIRegV ( Int sz, Prefix pfx ) 1297 { 1298 if (sz == 4) { 1299 sz = 8; 1300 return unop(Iop_64to32, 1301 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1302 szToITy(sz) )); 1303 } else { 1304 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1305 szToITy(sz) ); 1306 } 1307 } 1308 1309 static 1310 void putIRegV ( Int sz, Prefix pfx, IRExpr* e ) 1311 { 1312 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1313 if (sz == 4) { 1314 e = unop(Iop_32Uto64,e); 1315 } 1316 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) ); 1317 } 1318 1319 static 1320 const HChar* nameIRegV ( Int sz, Prefix pfx ) 1321 { 1322 return nameIReg( sz, getVexNvvvv(pfx), False ); 1323 } 1324 1325 1326 1327 /* Produce the guest state offset for a reference to the 'e' register 1328 field in a modrm byte, taking into account REX (or its absence), 1329 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1330 denotes a memory access rather than a register access. 1331 */ 1332 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1333 { 1334 UInt reg; 1335 vassert(host_endness == VexEndnessLE); 1336 vassert(IS_VALID_PFX(pfx)); 1337 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1338 reg = eregOfRexRM( pfx, mod_reg_rm ); 1339 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1340 } 1341 1342 static 1343 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1344 { 1345 if (sz == 4) { 1346 sz = 8; 1347 return unop(Iop_64to32, 1348 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1349 szToITy(sz) )); 1350 } else { 1351 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1352 szToITy(sz) ); 1353 } 1354 } 1355 1356 static 1357 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1358 { 1359 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1360 if (sz == 4) { 1361 e = unop(Iop_32Uto64,e); 1362 } 1363 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1364 } 1365 1366 static 1367 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1368 { 1369 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1370 toBool(sz==1 && !haveREX(pfx)) ); 1371 } 1372 1373 1374 /*------------------------------------------------------------*/ 1375 /*--- For dealing with XMM registers ---*/ 1376 /*------------------------------------------------------------*/ 1377 1378 static Int ymmGuestRegOffset ( UInt ymmreg ) 1379 { 1380 switch (ymmreg) { 1381 case 0: return OFFB_YMM0; 1382 case 1: return OFFB_YMM1; 1383 case 2: return OFFB_YMM2; 1384 case 3: return OFFB_YMM3; 1385 case 4: return OFFB_YMM4; 1386 case 5: return OFFB_YMM5; 1387 case 6: return OFFB_YMM6; 1388 case 7: return OFFB_YMM7; 1389 case 8: return OFFB_YMM8; 1390 case 9: return OFFB_YMM9; 1391 case 10: return OFFB_YMM10; 1392 case 11: return OFFB_YMM11; 1393 case 12: return OFFB_YMM12; 1394 case 13: return OFFB_YMM13; 1395 case 14: return OFFB_YMM14; 1396 case 15: return OFFB_YMM15; 1397 default: vpanic("ymmGuestRegOffset(amd64)"); 1398 } 1399 } 1400 1401 static Int xmmGuestRegOffset ( UInt xmmreg ) 1402 { 1403 /* Correct for little-endian host only. */ 1404 vassert(host_endness == VexEndnessLE); 1405 return ymmGuestRegOffset( xmmreg ); 1406 } 1407 1408 /* Lanes of vector registers are always numbered from zero being the 1409 least significant lane (rightmost in the register). */ 1410 1411 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1412 { 1413 /* Correct for little-endian host only. */ 1414 vassert(host_endness == VexEndnessLE); 1415 vassert(laneno >= 0 && laneno < 8); 1416 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1417 } 1418 1419 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1420 { 1421 /* Correct for little-endian host only. */ 1422 vassert(host_endness == VexEndnessLE); 1423 vassert(laneno >= 0 && laneno < 4); 1424 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1425 } 1426 1427 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1428 { 1429 /* Correct for little-endian host only. */ 1430 vassert(host_endness == VexEndnessLE); 1431 vassert(laneno >= 0 && laneno < 2); 1432 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1433 } 1434 1435 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1436 { 1437 /* Correct for little-endian host only. */ 1438 vassert(host_endness == VexEndnessLE); 1439 vassert(laneno >= 0 && laneno < 2); 1440 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1441 } 1442 1443 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1444 { 1445 /* Correct for little-endian host only. */ 1446 vassert(host_endness == VexEndnessLE); 1447 vassert(laneno >= 0 && laneno < 4); 1448 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1449 } 1450 1451 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1452 { 1453 /* Correct for little-endian host only. */ 1454 vassert(host_endness == VexEndnessLE); 1455 vassert(laneno >= 0 && laneno < 8); 1456 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1457 } 1458 1459 static IRExpr* getXMMReg ( UInt xmmreg ) 1460 { 1461 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1462 } 1463 1464 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1465 { 1466 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1467 } 1468 1469 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1470 { 1471 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1472 } 1473 1474 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1475 { 1476 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1477 } 1478 1479 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1480 { 1481 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1482 } 1483 1484 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1485 { 1486 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1487 } 1488 1489 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1490 { 1491 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1492 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1493 } 1494 1495 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1496 { 1497 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1498 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1499 } 1500 1501 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1502 { 1503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1504 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1505 } 1506 1507 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1508 { 1509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1510 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1511 } 1512 1513 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1514 { 1515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1516 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1517 } 1518 1519 static IRExpr* getYMMReg ( UInt xmmreg ) 1520 { 1521 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1522 } 1523 1524 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1525 { 1526 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1527 } 1528 1529 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1530 { 1531 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1532 } 1533 1534 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1535 { 1536 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1537 } 1538 1539 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1540 { 1541 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1542 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1543 } 1544 1545 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1546 { 1547 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1548 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1549 } 1550 1551 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1552 { 1553 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1554 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1555 } 1556 1557 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1558 { 1559 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1560 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1561 } 1562 1563 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1564 { 1565 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1566 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1567 } 1568 1569 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1570 { 1571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1572 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1573 } 1574 1575 static IRExpr* mkV128 ( UShort mask ) 1576 { 1577 return IRExpr_Const(IRConst_V128(mask)); 1578 } 1579 1580 /* Write the low half of a YMM reg and zero out the upper half. */ 1581 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1582 { 1583 putYMMRegLane128( ymmreg, 0, e ); 1584 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1585 } 1586 1587 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1588 { 1589 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1590 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1591 return unop(Iop_64to1, 1592 binop(Iop_And64, 1593 unop(Iop_1Uto64,x), 1594 unop(Iop_1Uto64,y))); 1595 } 1596 1597 /* Generate a compare-and-swap operation, operating on memory at 1598 'addr'. The expected value is 'expVal' and the new value is 1599 'newVal'. If the operation fails, then transfer control (with a 1600 no-redir jump (XXX no -- see comment at top of this file)) to 1601 'restart_point', which is presumably the address of the guest 1602 instruction again -- retrying, essentially. */ 1603 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1604 Addr64 restart_point ) 1605 { 1606 IRCAS* cas; 1607 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1608 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1609 IRTemp oldTmp = newTemp(tyE); 1610 IRTemp expTmp = newTemp(tyE); 1611 vassert(tyE == tyN); 1612 vassert(tyE == Ity_I64 || tyE == Ity_I32 1613 || tyE == Ity_I16 || tyE == Ity_I8); 1614 assign(expTmp, expVal); 1615 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1616 NULL, mkexpr(expTmp), NULL, newVal ); 1617 stmt( IRStmt_CAS(cas) ); 1618 stmt( IRStmt_Exit( 1619 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1620 mkexpr(oldTmp), mkexpr(expTmp) ), 1621 Ijk_Boring, /*Ijk_NoRedir*/ 1622 IRConst_U64( restart_point ), 1623 OFFB_RIP 1624 )); 1625 } 1626 1627 1628 /*------------------------------------------------------------*/ 1629 /*--- Helpers for %rflags. ---*/ 1630 /*------------------------------------------------------------*/ 1631 1632 /* -------------- Evaluating the flags-thunk. -------------- */ 1633 1634 /* Build IR to calculate all the eflags from stored 1635 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1636 Ity_I64. */ 1637 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1638 { 1639 IRExpr** args 1640 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1641 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1642 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1643 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1644 IRExpr* call 1645 = mkIRExprCCall( 1646 Ity_I64, 1647 0/*regparm*/, 1648 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1649 args 1650 ); 1651 /* Exclude OP and NDEP from definedness checking. We're only 1652 interested in DEP1 and DEP2. */ 1653 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1654 return call; 1655 } 1656 1657 /* Build IR to calculate some particular condition from stored 1658 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1659 Ity_Bit. */ 1660 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1661 { 1662 IRExpr** args 1663 = mkIRExprVec_5( mkU64(cond), 1664 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1665 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1666 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1667 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1668 IRExpr* call 1669 = mkIRExprCCall( 1670 Ity_I64, 1671 0/*regparm*/, 1672 "amd64g_calculate_condition", &amd64g_calculate_condition, 1673 args 1674 ); 1675 /* Exclude the requested condition, OP and NDEP from definedness 1676 checking. We're only interested in DEP1 and DEP2. */ 1677 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1678 return unop(Iop_64to1, call); 1679 } 1680 1681 /* Build IR to calculate just the carry flag from stored 1682 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1683 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1684 { 1685 IRExpr** args 1686 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1690 IRExpr* call 1691 = mkIRExprCCall( 1692 Ity_I64, 1693 0/*regparm*/, 1694 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1695 args 1696 ); 1697 /* Exclude OP and NDEP from definedness checking. We're only 1698 interested in DEP1 and DEP2. */ 1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1700 return call; 1701 } 1702 1703 1704 /* -------------- Building the flags-thunk. -------------- */ 1705 1706 /* The machinery in this section builds the flag-thunk following a 1707 flag-setting operation. Hence the various setFlags_* functions. 1708 */ 1709 1710 static Bool isAddSub ( IROp op8 ) 1711 { 1712 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1713 } 1714 1715 static Bool isLogic ( IROp op8 ) 1716 { 1717 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1718 } 1719 1720 /* U-widen 1/8/16/32/64 bit int expr to 64. */ 1721 static IRExpr* widenUto64 ( IRExpr* e ) 1722 { 1723 switch (typeOfIRExpr(irsb->tyenv,e)) { 1724 case Ity_I64: return e; 1725 case Ity_I32: return unop(Iop_32Uto64, e); 1726 case Ity_I16: return unop(Iop_16Uto64, e); 1727 case Ity_I8: return unop(Iop_8Uto64, e); 1728 case Ity_I1: return unop(Iop_1Uto64, e); 1729 default: vpanic("widenUto64"); 1730 } 1731 } 1732 1733 /* S-widen 8/16/32/64 bit int expr to 32. */ 1734 static IRExpr* widenSto64 ( IRExpr* e ) 1735 { 1736 switch (typeOfIRExpr(irsb->tyenv,e)) { 1737 case Ity_I64: return e; 1738 case Ity_I32: return unop(Iop_32Sto64, e); 1739 case Ity_I16: return unop(Iop_16Sto64, e); 1740 case Ity_I8: return unop(Iop_8Sto64, e); 1741 default: vpanic("widenSto64"); 1742 } 1743 } 1744 1745 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1746 of these combinations make sense. */ 1747 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1748 { 1749 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1750 if (src_ty == dst_ty) 1751 return e; 1752 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1753 return unop(Iop_32to16, e); 1754 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1755 return unop(Iop_32to8, e); 1756 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1757 return unop(Iop_64to32, e); 1758 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1759 return unop(Iop_64to16, e); 1760 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1761 return unop(Iop_64to8, e); 1762 1763 vex_printf("\nsrc, dst tys are: "); 1764 ppIRType(src_ty); 1765 vex_printf(", "); 1766 ppIRType(dst_ty); 1767 vex_printf("\n"); 1768 vpanic("narrowTo(amd64)"); 1769 } 1770 1771 1772 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1773 auto-sized up to the real op. */ 1774 1775 static 1776 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1777 { 1778 Int ccOp = 0; 1779 switch (ty) { 1780 case Ity_I8: ccOp = 0; break; 1781 case Ity_I16: ccOp = 1; break; 1782 case Ity_I32: ccOp = 2; break; 1783 case Ity_I64: ccOp = 3; break; 1784 default: vassert(0); 1785 } 1786 switch (op8) { 1787 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1788 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1789 default: ppIROp(op8); 1790 vpanic("setFlags_DEP1_DEP2(amd64)"); 1791 } 1792 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1793 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1794 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1795 } 1796 1797 1798 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1799 1800 static 1801 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1802 { 1803 Int ccOp = 0; 1804 switch (ty) { 1805 case Ity_I8: ccOp = 0; break; 1806 case Ity_I16: ccOp = 1; break; 1807 case Ity_I32: ccOp = 2; break; 1808 case Ity_I64: ccOp = 3; break; 1809 default: vassert(0); 1810 } 1811 switch (op8) { 1812 case Iop_Or8: 1813 case Iop_And8: 1814 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1815 default: ppIROp(op8); 1816 vpanic("setFlags_DEP1(amd64)"); 1817 } 1818 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1819 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1820 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1821 } 1822 1823 1824 /* For shift operations, we put in the result and the undershifted 1825 result. Except if the shift amount is zero, the thunk is left 1826 unchanged. */ 1827 1828 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1829 IRTemp res, 1830 IRTemp resUS, 1831 IRType ty, 1832 IRTemp guard ) 1833 { 1834 Int ccOp = 0; 1835 switch (ty) { 1836 case Ity_I8: ccOp = 0; break; 1837 case Ity_I16: ccOp = 1; break; 1838 case Ity_I32: ccOp = 2; break; 1839 case Ity_I64: ccOp = 3; break; 1840 default: vassert(0); 1841 } 1842 1843 vassert(guard); 1844 1845 /* Both kinds of right shifts are handled by the same thunk 1846 operation. */ 1847 switch (op64) { 1848 case Iop_Shr64: 1849 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1850 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1851 default: ppIROp(op64); 1852 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1853 } 1854 1855 /* guard :: Ity_I8. We need to convert it to I1. */ 1856 IRTemp guardB = newTemp(Ity_I1); 1857 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 1858 1859 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1860 stmt( IRStmt_Put( OFFB_CC_OP, 1861 IRExpr_ITE( mkexpr(guardB), 1862 mkU64(ccOp), 1863 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 1864 stmt( IRStmt_Put( OFFB_CC_DEP1, 1865 IRExpr_ITE( mkexpr(guardB), 1866 widenUto64(mkexpr(res)), 1867 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 1868 stmt( IRStmt_Put( OFFB_CC_DEP2, 1869 IRExpr_ITE( mkexpr(guardB), 1870 widenUto64(mkexpr(resUS)), 1871 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 1872 } 1873 1874 1875 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1876 the former value of the carry flag, which unfortunately we have to 1877 compute. */ 1878 1879 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1880 { 1881 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1882 1883 switch (ty) { 1884 case Ity_I8: ccOp += 0; break; 1885 case Ity_I16: ccOp += 1; break; 1886 case Ity_I32: ccOp += 2; break; 1887 case Ity_I64: ccOp += 3; break; 1888 default: vassert(0); 1889 } 1890 1891 /* This has to come first, because calculating the C flag 1892 may require reading all four thunk fields. */ 1893 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1895 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1896 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1897 } 1898 1899 1900 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1901 two arguments. */ 1902 1903 static 1904 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1905 { 1906 switch (ty) { 1907 case Ity_I8: 1908 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1909 break; 1910 case Ity_I16: 1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1912 break; 1913 case Ity_I32: 1914 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1915 break; 1916 case Ity_I64: 1917 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1918 break; 1919 default: 1920 vpanic("setFlags_MUL(amd64)"); 1921 } 1922 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1923 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1924 } 1925 1926 1927 /* -------------- Condition codes. -------------- */ 1928 1929 /* Condition codes, using the AMD encoding. */ 1930 1931 static const HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1932 { 1933 switch (cond) { 1934 case AMD64CondO: return "o"; 1935 case AMD64CondNO: return "no"; 1936 case AMD64CondB: return "b"; 1937 case AMD64CondNB: return "ae"; /*"nb";*/ 1938 case AMD64CondZ: return "e"; /*"z";*/ 1939 case AMD64CondNZ: return "ne"; /*"nz";*/ 1940 case AMD64CondBE: return "be"; 1941 case AMD64CondNBE: return "a"; /*"nbe";*/ 1942 case AMD64CondS: return "s"; 1943 case AMD64CondNS: return "ns"; 1944 case AMD64CondP: return "p"; 1945 case AMD64CondNP: return "np"; 1946 case AMD64CondL: return "l"; 1947 case AMD64CondNL: return "ge"; /*"nl";*/ 1948 case AMD64CondLE: return "le"; 1949 case AMD64CondNLE: return "g"; /*"nle";*/ 1950 case AMD64CondAlways: return "ALWAYS"; 1951 default: vpanic("name_AMD64Condcode"); 1952 } 1953 } 1954 1955 static 1956 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1957 /*OUT*/Bool* needInvert ) 1958 { 1959 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1960 if (cond & 1) { 1961 *needInvert = True; 1962 return cond-1; 1963 } else { 1964 *needInvert = False; 1965 return cond; 1966 } 1967 } 1968 1969 1970 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1971 1972 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1973 appropriately. 1974 1975 Optionally, generate a store for the 'tres' value. This can either 1976 be a normal store, or it can be a cas-with-possible-failure style 1977 store: 1978 1979 if taddr is IRTemp_INVALID, then no store is generated. 1980 1981 if taddr is not IRTemp_INVALID, then a store (using taddr as 1982 the address) is generated: 1983 1984 if texpVal is IRTemp_INVALID then a normal store is 1985 generated, and restart_point must be zero (it is irrelevant). 1986 1987 if texpVal is not IRTemp_INVALID then a cas-style store is 1988 generated. texpVal is the expected value, restart_point 1989 is the restart point if the store fails, and texpVal must 1990 have the same type as tres. 1991 1992 */ 1993 static void helper_ADC ( Int sz, 1994 IRTemp tres, IRTemp ta1, IRTemp ta2, 1995 /* info about optional store: */ 1996 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 1997 { 1998 UInt thunkOp; 1999 IRType ty = szToITy(sz); 2000 IRTemp oldc = newTemp(Ity_I64); 2001 IRTemp oldcn = newTemp(ty); 2002 IROp plus = mkSizedOp(ty, Iop_Add8); 2003 IROp xor = mkSizedOp(ty, Iop_Xor8); 2004 2005 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2006 2007 switch (sz) { 2008 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 2009 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 2010 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 2011 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 2012 default: vassert(0); 2013 } 2014 2015 /* oldc = old carry flag, 0 or 1 */ 2016 assign( oldc, binop(Iop_And64, 2017 mk_amd64g_calculate_rflags_c(), 2018 mkU64(1)) ); 2019 2020 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2021 2022 assign( tres, binop(plus, 2023 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2024 mkexpr(oldcn)) ); 2025 2026 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2027 start of this function. */ 2028 if (taddr != IRTemp_INVALID) { 2029 if (texpVal == IRTemp_INVALID) { 2030 vassert(restart_point == 0); 2031 storeLE( mkexpr(taddr), mkexpr(tres) ); 2032 } else { 2033 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2034 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2035 casLE( mkexpr(taddr), 2036 mkexpr(texpVal), mkexpr(tres), restart_point ); 2037 } 2038 } 2039 2040 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2041 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2042 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2043 mkexpr(oldcn)) )) ); 2044 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2045 } 2046 2047 2048 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2049 appropriately. As with helper_ADC, possibly generate a store of 2050 the result -- see comments on helper_ADC for details. 2051 */ 2052 static void helper_SBB ( Int sz, 2053 IRTemp tres, IRTemp ta1, IRTemp ta2, 2054 /* info about optional store: */ 2055 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2056 { 2057 UInt thunkOp; 2058 IRType ty = szToITy(sz); 2059 IRTemp oldc = newTemp(Ity_I64); 2060 IRTemp oldcn = newTemp(ty); 2061 IROp minus = mkSizedOp(ty, Iop_Sub8); 2062 IROp xor = mkSizedOp(ty, Iop_Xor8); 2063 2064 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2065 2066 switch (sz) { 2067 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2068 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2069 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2070 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2071 default: vassert(0); 2072 } 2073 2074 /* oldc = old carry flag, 0 or 1 */ 2075 assign( oldc, binop(Iop_And64, 2076 mk_amd64g_calculate_rflags_c(), 2077 mkU64(1)) ); 2078 2079 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2080 2081 assign( tres, binop(minus, 2082 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2083 mkexpr(oldcn)) ); 2084 2085 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2086 start of this function. */ 2087 if (taddr != IRTemp_INVALID) { 2088 if (texpVal == IRTemp_INVALID) { 2089 vassert(restart_point == 0); 2090 storeLE( mkexpr(taddr), mkexpr(tres) ); 2091 } else { 2092 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2093 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2094 casLE( mkexpr(taddr), 2095 mkexpr(texpVal), mkexpr(tres), restart_point ); 2096 } 2097 } 2098 2099 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2100 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2101 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2102 mkexpr(oldcn)) )) ); 2103 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2104 } 2105 2106 2107 /* -------------- Helpers for disassembly printing. -------------- */ 2108 2109 static const HChar* nameGrp1 ( Int opc_aux ) 2110 { 2111 static const HChar* grp1_names[8] 2112 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2113 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2114 return grp1_names[opc_aux]; 2115 } 2116 2117 static const HChar* nameGrp2 ( Int opc_aux ) 2118 { 2119 static const HChar* grp2_names[8] 2120 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2121 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2122 return grp2_names[opc_aux]; 2123 } 2124 2125 static const HChar* nameGrp4 ( Int opc_aux ) 2126 { 2127 static const HChar* grp4_names[8] 2128 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2129 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2130 return grp4_names[opc_aux]; 2131 } 2132 2133 static const HChar* nameGrp5 ( Int opc_aux ) 2134 { 2135 static const HChar* grp5_names[8] 2136 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2137 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2138 return grp5_names[opc_aux]; 2139 } 2140 2141 static const HChar* nameGrp8 ( Int opc_aux ) 2142 { 2143 static const HChar* grp8_names[8] 2144 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2145 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2146 return grp8_names[opc_aux]; 2147 } 2148 2149 //.. static const HChar* nameSReg ( UInt sreg ) 2150 //.. { 2151 //.. switch (sreg) { 2152 //.. case R_ES: return "%es"; 2153 //.. case R_CS: return "%cs"; 2154 //.. case R_SS: return "%ss"; 2155 //.. case R_DS: return "%ds"; 2156 //.. case R_FS: return "%fs"; 2157 //.. case R_GS: return "%gs"; 2158 //.. default: vpanic("nameSReg(x86)"); 2159 //.. } 2160 //.. } 2161 2162 static const HChar* nameMMXReg ( Int mmxreg ) 2163 { 2164 static const HChar* mmx_names[8] 2165 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2166 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2167 return mmx_names[mmxreg]; 2168 } 2169 2170 static const HChar* nameXMMReg ( Int xmmreg ) 2171 { 2172 static const HChar* xmm_names[16] 2173 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2174 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2175 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2176 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2177 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2178 return xmm_names[xmmreg]; 2179 } 2180 2181 static const HChar* nameMMXGran ( Int gran ) 2182 { 2183 switch (gran) { 2184 case 0: return "b"; 2185 case 1: return "w"; 2186 case 2: return "d"; 2187 case 3: return "q"; 2188 default: vpanic("nameMMXGran(amd64,guest)"); 2189 } 2190 } 2191 2192 static HChar nameISize ( Int size ) 2193 { 2194 switch (size) { 2195 case 8: return 'q'; 2196 case 4: return 'l'; 2197 case 2: return 'w'; 2198 case 1: return 'b'; 2199 default: vpanic("nameISize(amd64)"); 2200 } 2201 } 2202 2203 static const HChar* nameYMMReg ( Int ymmreg ) 2204 { 2205 static const HChar* ymm_names[16] 2206 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2207 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2208 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2209 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2210 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2211 return ymm_names[ymmreg]; 2212 } 2213 2214 2215 /*------------------------------------------------------------*/ 2216 /*--- JMP helpers ---*/ 2217 /*------------------------------------------------------------*/ 2218 2219 static void jmp_lit( /*MOD*/DisResult* dres, 2220 IRJumpKind kind, Addr64 d64 ) 2221 { 2222 vassert(dres->whatNext == Dis_Continue); 2223 vassert(dres->len == 0); 2224 vassert(dres->continueAt == 0); 2225 vassert(dres->jk_StopHere == Ijk_INVALID); 2226 dres->whatNext = Dis_StopHere; 2227 dres->jk_StopHere = kind; 2228 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2229 } 2230 2231 static void jmp_treg( /*MOD*/DisResult* dres, 2232 IRJumpKind kind, IRTemp t ) 2233 { 2234 vassert(dres->whatNext == Dis_Continue); 2235 vassert(dres->len == 0); 2236 vassert(dres->continueAt == 0); 2237 vassert(dres->jk_StopHere == Ijk_INVALID); 2238 dres->whatNext = Dis_StopHere; 2239 dres->jk_StopHere = kind; 2240 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2241 } 2242 2243 static 2244 void jcc_01 ( /*MOD*/DisResult* dres, 2245 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2246 { 2247 Bool invert; 2248 AMD64Condcode condPos; 2249 vassert(dres->whatNext == Dis_Continue); 2250 vassert(dres->len == 0); 2251 vassert(dres->continueAt == 0); 2252 vassert(dres->jk_StopHere == Ijk_INVALID); 2253 dres->whatNext = Dis_StopHere; 2254 dres->jk_StopHere = Ijk_Boring; 2255 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2256 if (invert) { 2257 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2258 Ijk_Boring, 2259 IRConst_U64(d64_false), 2260 OFFB_RIP ) ); 2261 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2262 } else { 2263 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2264 Ijk_Boring, 2265 IRConst_U64(d64_true), 2266 OFFB_RIP ) ); 2267 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2268 } 2269 } 2270 2271 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2272 guest address of the next instruction to be executed. 2273 2274 This function generates an AbiHint to say that -128(%rsp) 2275 .. -1(%rsp) should now be regarded as uninitialised. 2276 */ 2277 static 2278 void make_redzone_AbiHint ( const VexAbiInfo* vbi, 2279 IRTemp new_rsp, IRTemp nia, const HChar* who ) 2280 { 2281 Int szB = vbi->guest_stack_redzone_size; 2282 vassert(szB >= 0); 2283 2284 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2285 for is ELF. So just check it's the expected 128 value 2286 (paranoia). */ 2287 vassert(szB == 128); 2288 2289 if (0) vex_printf("AbiHint: %s\n", who); 2290 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2291 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2292 if (szB > 0) 2293 stmt( IRStmt_AbiHint( 2294 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2295 szB, 2296 mkexpr(nia) 2297 )); 2298 } 2299 2300 2301 /*------------------------------------------------------------*/ 2302 /*--- Disassembling addressing modes ---*/ 2303 /*------------------------------------------------------------*/ 2304 2305 static 2306 const HChar* segRegTxt ( Prefix pfx ) 2307 { 2308 if (pfx & PFX_CS) return "%cs:"; 2309 if (pfx & PFX_DS) return "%ds:"; 2310 if (pfx & PFX_ES) return "%es:"; 2311 if (pfx & PFX_FS) return "%fs:"; 2312 if (pfx & PFX_GS) return "%gs:"; 2313 if (pfx & PFX_SS) return "%ss:"; 2314 return ""; /* no override */ 2315 } 2316 2317 2318 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2319 linear address by adding any required segment override as indicated 2320 by sorb, and also dealing with any address size override 2321 present. */ 2322 static 2323 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi, 2324 Prefix pfx, IRExpr* virtual ) 2325 { 2326 /* Note that the below are hacks that relies on the assumption 2327 that %fs or %gs are constant. 2328 Typically, %fs is always 0x63 on linux (in the main thread, it 2329 stays at value 0), %gs always 0x60 on Darwin, ... */ 2330 /* --- segment overrides --- */ 2331 if (pfx & PFX_FS) { 2332 if (vbi->guest_amd64_assume_fs_is_const) { 2333 /* return virtual + guest_FS_CONST. */ 2334 virtual = binop(Iop_Add64, virtual, 2335 IRExpr_Get(OFFB_FS_CONST, Ity_I64)); 2336 } else { 2337 unimplemented("amd64 %fs segment override"); 2338 } 2339 } 2340 2341 if (pfx & PFX_GS) { 2342 if (vbi->guest_amd64_assume_gs_is_const) { 2343 /* return virtual + guest_GS_CONST. */ 2344 virtual = binop(Iop_Add64, virtual, 2345 IRExpr_Get(OFFB_GS_CONST, Ity_I64)); 2346 } else { 2347 unimplemented("amd64 %gs segment override"); 2348 } 2349 } 2350 2351 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2352 2353 /* --- address size override --- */ 2354 if (haveASO(pfx)) 2355 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2356 2357 return virtual; 2358 } 2359 2360 //.. { 2361 //.. Int sreg; 2362 //.. IRType hWordTy; 2363 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2364 //.. 2365 //.. if (sorb == 0) 2366 //.. /* the common case - no override */ 2367 //.. return virtual; 2368 //.. 2369 //.. switch (sorb) { 2370 //.. case 0x3E: sreg = R_DS; break; 2371 //.. case 0x26: sreg = R_ES; break; 2372 //.. case 0x64: sreg = R_FS; break; 2373 //.. case 0x65: sreg = R_GS; break; 2374 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2375 //.. } 2376 //.. 2377 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2378 //.. 2379 //.. seg_selector = newTemp(Ity_I32); 2380 //.. ldt_ptr = newTemp(hWordTy); 2381 //.. gdt_ptr = newTemp(hWordTy); 2382 //.. r64 = newTemp(Ity_I64); 2383 //.. 2384 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2385 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2386 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2387 //.. 2388 //.. /* 2389 //.. Call this to do the translation and limit checks: 2390 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2391 //.. UInt seg_selector, UInt virtual_addr ) 2392 //.. */ 2393 //.. assign( 2394 //.. r64, 2395 //.. mkIRExprCCall( 2396 //.. Ity_I64, 2397 //.. 0/*regparms*/, 2398 //.. "x86g_use_seg_selector", 2399 //.. &x86g_use_seg_selector, 2400 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2401 //.. mkexpr(seg_selector), virtual) 2402 //.. ) 2403 //.. ); 2404 //.. 2405 //.. /* If the high 32 of the result are non-zero, there was a 2406 //.. failure in address translation. In which case, make a 2407 //.. quick exit. 2408 //.. */ 2409 //.. stmt( 2410 //.. IRStmt_Exit( 2411 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2412 //.. Ijk_MapFail, 2413 //.. IRConst_U32( guest_eip_curr_instr ) 2414 //.. ) 2415 //.. ); 2416 //.. 2417 //.. /* otherwise, here's the translated result. */ 2418 //.. return unop(Iop_64to32, mkexpr(r64)); 2419 //.. } 2420 2421 2422 /* Generate IR to calculate an address indicated by a ModRM and 2423 following SIB bytes. The expression, and the number of bytes in 2424 the address mode, are returned (the latter in *len). Note that 2425 this fn should not be called if the R/M part of the address denotes 2426 a register instead of memory. If print_codegen is true, text of 2427 the addressing mode is placed in buf. 2428 2429 The computed address is stored in a new tempreg, and the 2430 identity of the tempreg is returned. 2431 2432 extra_bytes holds the number of bytes after the amode, as supplied 2433 by the caller. This is needed to make sense of %rip-relative 2434 addresses. Note that the value that *len is set to is only the 2435 length of the amode itself and does not include the value supplied 2436 in extra_bytes. 2437 */ 2438 2439 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2440 { 2441 IRTemp tmp = newTemp(Ity_I64); 2442 assign( tmp, addr64 ); 2443 return tmp; 2444 } 2445 2446 static 2447 IRTemp disAMode ( /*OUT*/Int* len, 2448 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2449 /*OUT*/HChar* buf, Int extra_bytes ) 2450 { 2451 UChar mod_reg_rm = getUChar(delta); 2452 delta++; 2453 2454 buf[0] = (UChar)0; 2455 vassert(extra_bytes >= 0 && extra_bytes < 10); 2456 2457 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2458 jump table seems a bit excessive. 2459 */ 2460 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2461 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2462 /* is now XX0XXYYY */ 2463 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2464 switch (mod_reg_rm) { 2465 2466 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2467 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2468 */ 2469 case 0x00: case 0x01: case 0x02: case 0x03: 2470 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2471 { UChar rm = toUChar(mod_reg_rm & 7); 2472 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2473 *len = 1; 2474 return disAMode_copy2tmp( 2475 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2476 } 2477 2478 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2479 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2480 */ 2481 case 0x08: case 0x09: case 0x0A: case 0x0B: 2482 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2483 { UChar rm = toUChar(mod_reg_rm & 7); 2484 Long d = getSDisp8(delta); 2485 if (d == 0) { 2486 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2487 } else { 2488 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2489 } 2490 *len = 2; 2491 return disAMode_copy2tmp( 2492 handleAddrOverrides(vbi, pfx, 2493 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2494 } 2495 2496 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2497 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2498 */ 2499 case 0x10: case 0x11: case 0x12: case 0x13: 2500 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2501 { UChar rm = toUChar(mod_reg_rm & 7); 2502 Long d = getSDisp32(delta); 2503 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2504 *len = 5; 2505 return disAMode_copy2tmp( 2506 handleAddrOverrides(vbi, pfx, 2507 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2508 } 2509 2510 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2511 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2512 case 0x18: case 0x19: case 0x1A: case 0x1B: 2513 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2514 vpanic("disAMode(amd64): not an addr!"); 2515 2516 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2517 correctly at the start of handling each instruction. */ 2518 case 0x05: 2519 { Long d = getSDisp32(delta); 2520 *len = 5; 2521 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2522 /* We need to know the next instruction's start address. 2523 Try and figure out what it is, record the guess, and ask 2524 the top-level driver logic (bbToIR_AMD64) to check we 2525 guessed right, after the instruction is completely 2526 decoded. */ 2527 guest_RIP_next_mustcheck = True; 2528 guest_RIP_next_assumed = guest_RIP_bbstart 2529 + delta+4 + extra_bytes; 2530 return disAMode_copy2tmp( 2531 handleAddrOverrides(vbi, pfx, 2532 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2533 mkU64(d)))); 2534 } 2535 2536 case 0x04: { 2537 /* SIB, with no displacement. Special cases: 2538 -- %rsp cannot act as an index value. 2539 If index_r indicates %rsp, zero is used for the index. 2540 -- when mod is zero and base indicates RBP or R13, base is 2541 instead a 32-bit sign-extended literal. 2542 It's all madness, I tell you. Extract %index, %base and 2543 scale from the SIB byte. The value denoted is then: 2544 | %index == %RSP && (%base == %RBP || %base == %R13) 2545 = d32 following SIB byte 2546 | %index == %RSP && !(%base == %RBP || %base == %R13) 2547 = %base 2548 | %index != %RSP && (%base == %RBP || %base == %R13) 2549 = d32 following SIB byte + (%index << scale) 2550 | %index != %RSP && !(%base == %RBP || %base == %R13) 2551 = %base + (%index << scale) 2552 */ 2553 UChar sib = getUChar(delta); 2554 UChar scale = toUChar((sib >> 6) & 3); 2555 UChar index_r = toUChar((sib >> 3) & 7); 2556 UChar base_r = toUChar(sib & 7); 2557 /* correct since #(R13) == 8 + #(RBP) */ 2558 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2559 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2560 delta++; 2561 2562 if ((!index_is_SP) && (!base_is_BPor13)) { 2563 if (scale == 0) { 2564 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2565 nameIRegRexB(8,pfx,base_r), 2566 nameIReg64rexX(pfx,index_r)); 2567 } else { 2568 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2569 nameIRegRexB(8,pfx,base_r), 2570 nameIReg64rexX(pfx,index_r), 1<<scale); 2571 } 2572 *len = 2; 2573 return 2574 disAMode_copy2tmp( 2575 handleAddrOverrides(vbi, pfx, 2576 binop(Iop_Add64, 2577 getIRegRexB(8,pfx,base_r), 2578 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2579 mkU8(scale))))); 2580 } 2581 2582 if ((!index_is_SP) && base_is_BPor13) { 2583 Long d = getSDisp32(delta); 2584 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2585 nameIReg64rexX(pfx,index_r), 1<<scale); 2586 *len = 6; 2587 return 2588 disAMode_copy2tmp( 2589 handleAddrOverrides(vbi, pfx, 2590 binop(Iop_Add64, 2591 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2592 mkU8(scale)), 2593 mkU64(d)))); 2594 } 2595 2596 if (index_is_SP && (!base_is_BPor13)) { 2597 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2598 *len = 2; 2599 return disAMode_copy2tmp( 2600 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2601 } 2602 2603 if (index_is_SP && base_is_BPor13) { 2604 Long d = getSDisp32(delta); 2605 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2606 *len = 6; 2607 return disAMode_copy2tmp( 2608 handleAddrOverrides(vbi, pfx, mkU64(d))); 2609 } 2610 2611 vassert(0); 2612 } 2613 2614 /* SIB, with 8-bit displacement. Special cases: 2615 -- %esp cannot act as an index value. 2616 If index_r indicates %esp, zero is used for the index. 2617 Denoted value is: 2618 | %index == %ESP 2619 = d8 + %base 2620 | %index != %ESP 2621 = d8 + %base + (%index << scale) 2622 */ 2623 case 0x0C: { 2624 UChar sib = getUChar(delta); 2625 UChar scale = toUChar((sib >> 6) & 3); 2626 UChar index_r = toUChar((sib >> 3) & 7); 2627 UChar base_r = toUChar(sib & 7); 2628 Long d = getSDisp8(delta+1); 2629 2630 if (index_r == R_RSP && 0==getRexX(pfx)) { 2631 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2632 d, nameIRegRexB(8,pfx,base_r)); 2633 *len = 3; 2634 return disAMode_copy2tmp( 2635 handleAddrOverrides(vbi, pfx, 2636 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2637 } else { 2638 if (scale == 0) { 2639 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2640 nameIRegRexB(8,pfx,base_r), 2641 nameIReg64rexX(pfx,index_r)); 2642 } else { 2643 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2644 nameIRegRexB(8,pfx,base_r), 2645 nameIReg64rexX(pfx,index_r), 1<<scale); 2646 } 2647 *len = 3; 2648 return 2649 disAMode_copy2tmp( 2650 handleAddrOverrides(vbi, pfx, 2651 binop(Iop_Add64, 2652 binop(Iop_Add64, 2653 getIRegRexB(8,pfx,base_r), 2654 binop(Iop_Shl64, 2655 getIReg64rexX(pfx,index_r), mkU8(scale))), 2656 mkU64(d)))); 2657 } 2658 vassert(0); /*NOTREACHED*/ 2659 } 2660 2661 /* SIB, with 32-bit displacement. Special cases: 2662 -- %rsp cannot act as an index value. 2663 If index_r indicates %rsp, zero is used for the index. 2664 Denoted value is: 2665 | %index == %RSP 2666 = d32 + %base 2667 | %index != %RSP 2668 = d32 + %base + (%index << scale) 2669 */ 2670 case 0x14: { 2671 UChar sib = getUChar(delta); 2672 UChar scale = toUChar((sib >> 6) & 3); 2673 UChar index_r = toUChar((sib >> 3) & 7); 2674 UChar base_r = toUChar(sib & 7); 2675 Long d = getSDisp32(delta+1); 2676 2677 if (index_r == R_RSP && 0==getRexX(pfx)) { 2678 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2679 d, nameIRegRexB(8,pfx,base_r)); 2680 *len = 6; 2681 return disAMode_copy2tmp( 2682 handleAddrOverrides(vbi, pfx, 2683 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2684 } else { 2685 if (scale == 0) { 2686 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2687 nameIRegRexB(8,pfx,base_r), 2688 nameIReg64rexX(pfx,index_r)); 2689 } else { 2690 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2691 nameIRegRexB(8,pfx,base_r), 2692 nameIReg64rexX(pfx,index_r), 1<<scale); 2693 } 2694 *len = 6; 2695 return 2696 disAMode_copy2tmp( 2697 handleAddrOverrides(vbi, pfx, 2698 binop(Iop_Add64, 2699 binop(Iop_Add64, 2700 getIRegRexB(8,pfx,base_r), 2701 binop(Iop_Shl64, 2702 getIReg64rexX(pfx,index_r), mkU8(scale))), 2703 mkU64(d)))); 2704 } 2705 vassert(0); /*NOTREACHED*/ 2706 } 2707 2708 default: 2709 vpanic("disAMode(amd64)"); 2710 return 0; /*notreached*/ 2711 } 2712 } 2713 2714 2715 /* Similarly for VSIB addressing. This returns just the addend, 2716 and fills in *rI and *vscale with the register number of the vector 2717 index and its multiplicand. */ 2718 static 2719 IRTemp disAVSIBMode ( /*OUT*/Int* len, 2720 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2721 /*OUT*/HChar* buf, /*OUT*/UInt* rI, 2722 IRType ty, /*OUT*/Int* vscale ) 2723 { 2724 UChar mod_reg_rm = getUChar(delta); 2725 const HChar *vindex; 2726 2727 *len = 0; 2728 *rI = 0; 2729 *vscale = 0; 2730 buf[0] = (UChar)0; 2731 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm)) 2732 return IRTemp_INVALID; 2733 2734 UChar sib = getUChar(delta+1); 2735 UChar scale = toUChar((sib >> 6) & 3); 2736 UChar index_r = toUChar((sib >> 3) & 7); 2737 UChar base_r = toUChar(sib & 7); 2738 Long d = 0; 2739 /* correct since #(R13) == 8 + #(RBP) */ 2740 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2741 delta += 2; 2742 *len = 2; 2743 2744 *rI = index_r | (getRexX(pfx) << 3); 2745 if (ty == Ity_V128) 2746 vindex = nameXMMReg(*rI); 2747 else 2748 vindex = nameYMMReg(*rI); 2749 *vscale = 1<<scale; 2750 2751 switch (mod_reg_rm >> 6) { 2752 case 0: 2753 if (base_is_BPor13) { 2754 d = getSDisp32(delta); 2755 *len += 4; 2756 if (scale == 0) { 2757 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex); 2758 } else { 2759 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale); 2760 } 2761 return disAMode_copy2tmp( mkU64(d) ); 2762 } else { 2763 if (scale == 0) { 2764 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2765 nameIRegRexB(8,pfx,base_r), vindex); 2766 } else { 2767 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2768 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2769 } 2770 } 2771 break; 2772 case 1: 2773 d = getSDisp8(delta); 2774 *len += 1; 2775 goto have_disp; 2776 case 2: 2777 d = getSDisp32(delta); 2778 *len += 4; 2779 have_disp: 2780 if (scale == 0) { 2781 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2782 nameIRegRexB(8,pfx,base_r), vindex); 2783 } else { 2784 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2785 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2786 } 2787 break; 2788 } 2789 2790 if (!d) 2791 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) ); 2792 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r), 2793 mkU64(d)) ); 2794 } 2795 2796 2797 /* Figure out the number of (insn-stream) bytes constituting the amode 2798 beginning at delta. Is useful for getting hold of literals beyond 2799 the end of the amode before it has been disassembled. */ 2800 2801 static UInt lengthAMode ( Prefix pfx, Long delta ) 2802 { 2803 UChar mod_reg_rm = getUChar(delta); 2804 delta++; 2805 2806 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2807 jump table seems a bit excessive. 2808 */ 2809 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2810 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2811 /* is now XX0XXYYY */ 2812 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2813 switch (mod_reg_rm) { 2814 2815 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2816 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2817 */ 2818 case 0x00: case 0x01: case 0x02: case 0x03: 2819 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2820 return 1; 2821 2822 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2823 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2824 */ 2825 case 0x08: case 0x09: case 0x0A: case 0x0B: 2826 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2827 return 2; 2828 2829 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2830 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2831 */ 2832 case 0x10: case 0x11: case 0x12: case 0x13: 2833 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2834 return 5; 2835 2836 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2837 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2838 /* Not an address, but still handled. */ 2839 case 0x18: case 0x19: case 0x1A: case 0x1B: 2840 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2841 return 1; 2842 2843 /* RIP + disp32. */ 2844 case 0x05: 2845 return 5; 2846 2847 case 0x04: { 2848 /* SIB, with no displacement. */ 2849 UChar sib = getUChar(delta); 2850 UChar base_r = toUChar(sib & 7); 2851 /* correct since #(R13) == 8 + #(RBP) */ 2852 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2853 2854 if (base_is_BPor13) { 2855 return 6; 2856 } else { 2857 return 2; 2858 } 2859 } 2860 2861 /* SIB, with 8-bit displacement. */ 2862 case 0x0C: 2863 return 3; 2864 2865 /* SIB, with 32-bit displacement. */ 2866 case 0x14: 2867 return 6; 2868 2869 default: 2870 vpanic("lengthAMode(amd64)"); 2871 return 0; /*notreached*/ 2872 } 2873 } 2874 2875 2876 /*------------------------------------------------------------*/ 2877 /*--- Disassembling common idioms ---*/ 2878 /*------------------------------------------------------------*/ 2879 2880 /* Handle binary integer instructions of the form 2881 op E, G meaning 2882 op reg-or-mem, reg 2883 Is passed the a ptr to the modRM byte, the actual operation, and the 2884 data size. Returns the address advanced completely over this 2885 instruction. 2886 2887 E(src) is reg-or-mem 2888 G(dst) is reg. 2889 2890 If E is reg, --> GET %G, tmp 2891 OP %E, tmp 2892 PUT tmp, %G 2893 2894 If E is mem and OP is not reversible, 2895 --> (getAddr E) -> tmpa 2896 LD (tmpa), tmpa 2897 GET %G, tmp2 2898 OP tmpa, tmp2 2899 PUT tmp2, %G 2900 2901 If E is mem and OP is reversible 2902 --> (getAddr E) -> tmpa 2903 LD (tmpa), tmpa 2904 OP %G, tmpa 2905 PUT tmpa, %G 2906 */ 2907 static 2908 ULong dis_op2_E_G ( const VexAbiInfo* vbi, 2909 Prefix pfx, 2910 Bool addSubCarry, 2911 IROp op8, 2912 Bool keep, 2913 Int size, 2914 Long delta0, 2915 const HChar* t_amd64opc ) 2916 { 2917 HChar dis_buf[50]; 2918 Int len; 2919 IRType ty = szToITy(size); 2920 IRTemp dst1 = newTemp(ty); 2921 IRTemp src = newTemp(ty); 2922 IRTemp dst0 = newTemp(ty); 2923 UChar rm = getUChar(delta0); 2924 IRTemp addr = IRTemp_INVALID; 2925 2926 /* addSubCarry == True indicates the intended operation is 2927 add-with-carry or subtract-with-borrow. */ 2928 if (addSubCarry) { 2929 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2930 vassert(keep); 2931 } 2932 2933 if (epartIsReg(rm)) { 2934 /* Specially handle XOR reg,reg, because that doesn't really 2935 depend on reg, and doing the obvious thing potentially 2936 generates a spurious value check failure due to the bogus 2937 dependency. */ 2938 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2939 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2940 if (False && op8 == Iop_Sub8) 2941 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2942 putIRegG(size,pfx,rm, mkU(ty,0)); 2943 } 2944 2945 assign( dst0, getIRegG(size,pfx,rm) ); 2946 assign( src, getIRegE(size,pfx,rm) ); 2947 2948 if (addSubCarry && op8 == Iop_Add8) { 2949 helper_ADC( size, dst1, dst0, src, 2950 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2951 putIRegG(size, pfx, rm, mkexpr(dst1)); 2952 } else 2953 if (addSubCarry && op8 == Iop_Sub8) { 2954 helper_SBB( size, dst1, dst0, src, 2955 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2956 putIRegG(size, pfx, rm, mkexpr(dst1)); 2957 } else { 2958 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2959 if (isAddSub(op8)) 2960 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2961 else 2962 setFlags_DEP1(op8, dst1, ty); 2963 if (keep) 2964 putIRegG(size, pfx, rm, mkexpr(dst1)); 2965 } 2966 2967 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2968 nameIRegE(size,pfx,rm), 2969 nameIRegG(size,pfx,rm)); 2970 return 1+delta0; 2971 } else { 2972 /* E refers to memory */ 2973 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2974 assign( dst0, getIRegG(size,pfx,rm) ); 2975 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2976 2977 if (addSubCarry && op8 == Iop_Add8) { 2978 helper_ADC( size, dst1, dst0, src, 2979 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2980 putIRegG(size, pfx, rm, mkexpr(dst1)); 2981 } else 2982 if (addSubCarry && op8 == Iop_Sub8) { 2983 helper_SBB( size, dst1, dst0, src, 2984 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2985 putIRegG(size, pfx, rm, mkexpr(dst1)); 2986 } else { 2987 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2988 if (isAddSub(op8)) 2989 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2990 else 2991 setFlags_DEP1(op8, dst1, ty); 2992 if (keep) 2993 putIRegG(size, pfx, rm, mkexpr(dst1)); 2994 } 2995 2996 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2997 dis_buf, nameIRegG(size, pfx, rm)); 2998 return len+delta0; 2999 } 3000 } 3001 3002 3003 3004 /* Handle binary integer instructions of the form 3005 op G, E meaning 3006 op reg, reg-or-mem 3007 Is passed the a ptr to the modRM byte, the actual operation, and the 3008 data size. Returns the address advanced completely over this 3009 instruction. 3010 3011 G(src) is reg. 3012 E(dst) is reg-or-mem 3013 3014 If E is reg, --> GET %E, tmp 3015 OP %G, tmp 3016 PUT tmp, %E 3017 3018 If E is mem, --> (getAddr E) -> tmpa 3019 LD (tmpa), tmpv 3020 OP %G, tmpv 3021 ST tmpv, (tmpa) 3022 */ 3023 static 3024 ULong dis_op2_G_E ( const VexAbiInfo* vbi, 3025 Prefix pfx, 3026 Bool addSubCarry, 3027 IROp op8, 3028 Bool keep, 3029 Int size, 3030 Long delta0, 3031 const HChar* t_amd64opc ) 3032 { 3033 HChar dis_buf[50]; 3034 Int len; 3035 IRType ty = szToITy(size); 3036 IRTemp dst1 = newTemp(ty); 3037 IRTemp src = newTemp(ty); 3038 IRTemp dst0 = newTemp(ty); 3039 UChar rm = getUChar(delta0); 3040 IRTemp addr = IRTemp_INVALID; 3041 3042 /* addSubCarry == True indicates the intended operation is 3043 add-with-carry or subtract-with-borrow. */ 3044 if (addSubCarry) { 3045 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 3046 vassert(keep); 3047 } 3048 3049 if (epartIsReg(rm)) { 3050 /* Specially handle XOR reg,reg, because that doesn't really 3051 depend on reg, and doing the obvious thing potentially 3052 generates a spurious value check failure due to the bogus 3053 dependency. Ditto SBB reg,reg. */ 3054 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 3055 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3056 putIRegE(size,pfx,rm, mkU(ty,0)); 3057 } 3058 3059 assign(dst0, getIRegE(size,pfx,rm)); 3060 assign(src, getIRegG(size,pfx,rm)); 3061 3062 if (addSubCarry && op8 == Iop_Add8) { 3063 helper_ADC( size, dst1, dst0, src, 3064 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3065 putIRegE(size, pfx, rm, mkexpr(dst1)); 3066 } else 3067 if (addSubCarry && op8 == Iop_Sub8) { 3068 helper_SBB( size, dst1, dst0, src, 3069 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3070 putIRegE(size, pfx, rm, mkexpr(dst1)); 3071 } else { 3072 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3073 if (isAddSub(op8)) 3074 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3075 else 3076 setFlags_DEP1(op8, dst1, ty); 3077 if (keep) 3078 putIRegE(size, pfx, rm, mkexpr(dst1)); 3079 } 3080 3081 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3082 nameIRegG(size,pfx,rm), 3083 nameIRegE(size,pfx,rm)); 3084 return 1+delta0; 3085 } 3086 3087 /* E refers to memory */ 3088 { 3089 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3090 assign(dst0, loadLE(ty,mkexpr(addr))); 3091 assign(src, getIRegG(size,pfx,rm)); 3092 3093 if (addSubCarry && op8 == Iop_Add8) { 3094 if (haveLOCK(pfx)) { 3095 /* cas-style store */ 3096 helper_ADC( size, dst1, dst0, src, 3097 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3098 } else { 3099 /* normal store */ 3100 helper_ADC( size, dst1, dst0, src, 3101 /*store*/addr, IRTemp_INVALID, 0 ); 3102 } 3103 } else 3104 if (addSubCarry && op8 == Iop_Sub8) { 3105 if (haveLOCK(pfx)) { 3106 /* cas-style store */ 3107 helper_SBB( size, dst1, dst0, src, 3108 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3109 } else { 3110 /* normal store */ 3111 helper_SBB( size, dst1, dst0, src, 3112 /*store*/addr, IRTemp_INVALID, 0 ); 3113 } 3114 } else { 3115 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3116 if (keep) { 3117 if (haveLOCK(pfx)) { 3118 if (0) vex_printf("locked case\n" ); 3119 casLE( mkexpr(addr), 3120 mkexpr(dst0)/*expval*/, 3121 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3122 } else { 3123 if (0) vex_printf("nonlocked case\n"); 3124 storeLE(mkexpr(addr), mkexpr(dst1)); 3125 } 3126 } 3127 if (isAddSub(op8)) 3128 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3129 else 3130 setFlags_DEP1(op8, dst1, ty); 3131 } 3132 3133 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3134 nameIRegG(size,pfx,rm), dis_buf); 3135 return len+delta0; 3136 } 3137 } 3138 3139 3140 /* Handle move instructions of the form 3141 mov E, G meaning 3142 mov reg-or-mem, reg 3143 Is passed the a ptr to the modRM byte, and the data size. Returns 3144 the address advanced completely over this instruction. 3145 3146 E(src) is reg-or-mem 3147 G(dst) is reg. 3148 3149 If E is reg, --> GET %E, tmpv 3150 PUT tmpv, %G 3151 3152 If E is mem --> (getAddr E) -> tmpa 3153 LD (tmpa), tmpb 3154 PUT tmpb, %G 3155 */ 3156 static 3157 ULong dis_mov_E_G ( const VexAbiInfo* vbi, 3158 Prefix pfx, 3159 Int size, 3160 Long delta0 ) 3161 { 3162 Int len; 3163 UChar rm = getUChar(delta0); 3164 HChar dis_buf[50]; 3165 3166 if (epartIsReg(rm)) { 3167 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3168 DIP("mov%c %s,%s\n", nameISize(size), 3169 nameIRegE(size,pfx,rm), 3170 nameIRegG(size,pfx,rm)); 3171 return 1+delta0; 3172 } 3173 3174 /* E refers to memory */ 3175 { 3176 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3177 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3178 DIP("mov%c %s,%s\n", nameISize(size), 3179 dis_buf, 3180 nameIRegG(size,pfx,rm)); 3181 return delta0+len; 3182 } 3183 } 3184 3185 3186 /* Handle move instructions of the form 3187 mov G, E meaning 3188 mov reg, reg-or-mem 3189 Is passed the a ptr to the modRM byte, and the data size. Returns 3190 the address advanced completely over this instruction. 3191 We have to decide here whether F2 or F3 are acceptable. F2 never is. 3192 3193 G(src) is reg. 3194 E(dst) is reg-or-mem 3195 3196 If E is reg, --> GET %G, tmp 3197 PUT tmp, %E 3198 3199 If E is mem, --> (getAddr E) -> tmpa 3200 GET %G, tmpv 3201 ST tmpv, (tmpa) 3202 */ 3203 static 3204 ULong dis_mov_G_E ( const VexAbiInfo* vbi, 3205 Prefix pfx, 3206 Int size, 3207 Long delta0, 3208 /*OUT*/Bool* ok ) 3209 { 3210 Int len; 3211 UChar rm = getUChar(delta0); 3212 HChar dis_buf[50]; 3213 3214 *ok = True; 3215 3216 if (epartIsReg(rm)) { 3217 if (haveF2orF3(pfx)) { *ok = False; return delta0; } 3218 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3219 DIP("mov%c %s,%s\n", nameISize(size), 3220 nameIRegG(size,pfx,rm), 3221 nameIRegE(size,pfx,rm)); 3222 return 1+delta0; 3223 } 3224 3225 /* E refers to memory */ 3226 { 3227 if (haveF2(pfx)) { *ok = False; return delta0; } 3228 /* F3(XRELEASE) is acceptable, though. */ 3229 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3230 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3231 DIP("mov%c %s,%s\n", nameISize(size), 3232 nameIRegG(size,pfx,rm), 3233 dis_buf); 3234 return len+delta0; 3235 } 3236 } 3237 3238 3239 /* op $immediate, AL/AX/EAX/RAX. */ 3240 static 3241 ULong dis_op_imm_A ( Int size, 3242 Bool carrying, 3243 IROp op8, 3244 Bool keep, 3245 Long delta, 3246 const HChar* t_amd64opc ) 3247 { 3248 Int size4 = imin(size,4); 3249 IRType ty = szToITy(size); 3250 IRTemp dst0 = newTemp(ty); 3251 IRTemp src = newTemp(ty); 3252 IRTemp dst1 = newTemp(ty); 3253 Long lit = getSDisp(size4,delta); 3254 assign(dst0, getIRegRAX(size)); 3255 assign(src, mkU(ty,lit & mkSizeMask(size))); 3256 3257 if (isAddSub(op8) && !carrying) { 3258 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3259 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3260 } 3261 else 3262 if (isLogic(op8)) { 3263 vassert(!carrying); 3264 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3265 setFlags_DEP1(op8, dst1, ty); 3266 } 3267 else 3268 if (op8 == Iop_Add8 && carrying) { 3269 helper_ADC( size, dst1, dst0, src, 3270 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3271 } 3272 else 3273 if (op8 == Iop_Sub8 && carrying) { 3274 helper_SBB( size, dst1, dst0, src, 3275 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3276 } 3277 else 3278 vpanic("dis_op_imm_A(amd64,guest)"); 3279 3280 if (keep) 3281 putIRegRAX(size, mkexpr(dst1)); 3282 3283 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3284 lit, nameIRegRAX(size)); 3285 return delta+size4; 3286 } 3287 3288 3289 /* Sign- and Zero-extending moves. */ 3290 static 3291 ULong dis_movx_E_G ( const VexAbiInfo* vbi, 3292 Prefix pfx, 3293 Long delta, Int szs, Int szd, Bool sign_extend ) 3294 { 3295 UChar rm = getUChar(delta); 3296 if (epartIsReg(rm)) { 3297 putIRegG(szd, pfx, rm, 3298 doScalarWidening( 3299 szs,szd,sign_extend, 3300 getIRegE(szs,pfx,rm))); 3301 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3302 nameISize(szs), 3303 nameISize(szd), 3304 nameIRegE(szs,pfx,rm), 3305 nameIRegG(szd,pfx,rm)); 3306 return 1+delta; 3307 } 3308 3309 /* E refers to memory */ 3310 { 3311 Int len; 3312 HChar dis_buf[50]; 3313 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3314 putIRegG(szd, pfx, rm, 3315 doScalarWidening( 3316 szs,szd,sign_extend, 3317 loadLE(szToITy(szs),mkexpr(addr)))); 3318 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3319 nameISize(szs), 3320 nameISize(szd), 3321 dis_buf, 3322 nameIRegG(szd,pfx,rm)); 3323 return len+delta; 3324 } 3325 } 3326 3327 3328 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3329 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3330 static 3331 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3332 { 3333 /* special-case the 64-bit case */ 3334 if (sz == 8) { 3335 IROp op = signed_divide ? Iop_DivModS128to64 3336 : Iop_DivModU128to64; 3337 IRTemp src128 = newTemp(Ity_I128); 3338 IRTemp dst128 = newTemp(Ity_I128); 3339 assign( src128, binop(Iop_64HLto128, 3340 getIReg64(R_RDX), 3341 getIReg64(R_RAX)) ); 3342 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3343 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3344 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3345 } else { 3346 IROp op = signed_divide ? Iop_DivModS64to32 3347 : Iop_DivModU64to32; 3348 IRTemp src64 = newTemp(Ity_I64); 3349 IRTemp dst64 = newTemp(Ity_I64); 3350 switch (sz) { 3351 case 4: 3352 assign( src64, 3353 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3354 assign( dst64, 3355 binop(op, mkexpr(src64), mkexpr(t)) ); 3356 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3357 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3358 break; 3359 case 2: { 3360 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3361 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3362 assign( src64, unop(widen3264, 3363 binop(Iop_16HLto32, 3364 getIRegRDX(2), 3365 getIRegRAX(2))) ); 3366 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3367 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3368 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3369 break; 3370 } 3371 case 1: { 3372 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3373 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3374 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3375 assign( src64, unop(widen3264, 3376 unop(widen1632, getIRegRAX(2))) ); 3377 assign( dst64, 3378 binop(op, mkexpr(src64), 3379 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3380 putIRegRAX( 1, unop(Iop_16to8, 3381 unop(Iop_32to16, 3382 unop(Iop_64to32,mkexpr(dst64)))) ); 3383 putIRegAH( unop(Iop_16to8, 3384 unop(Iop_32to16, 3385 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3386 break; 3387 } 3388 default: 3389 vpanic("codegen_div(amd64)"); 3390 } 3391 } 3392 } 3393 3394 static 3395 ULong dis_Grp1 ( const VexAbiInfo* vbi, 3396 Prefix pfx, 3397 Long delta, UChar modrm, 3398 Int am_sz, Int d_sz, Int sz, Long d64 ) 3399 { 3400 Int len; 3401 HChar dis_buf[50]; 3402 IRType ty = szToITy(sz); 3403 IRTemp dst1 = newTemp(ty); 3404 IRTemp src = newTemp(ty); 3405 IRTemp dst0 = newTemp(ty); 3406 IRTemp addr = IRTemp_INVALID; 3407 IROp op8 = Iop_INVALID; 3408 ULong mask = mkSizeMask(sz); 3409 3410 switch (gregLO3ofRM(modrm)) { 3411 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3412 case 2: break; // ADC 3413 case 3: break; // SBB 3414 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3415 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3416 /*NOTREACHED*/ 3417 default: vpanic("dis_Grp1(amd64): unhandled case"); 3418 } 3419 3420 if (epartIsReg(modrm)) { 3421 vassert(am_sz == 1); 3422 3423 assign(dst0, getIRegE(sz,pfx,modrm)); 3424 assign(src, mkU(ty,d64 & mask)); 3425 3426 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3427 helper_ADC( sz, dst1, dst0, src, 3428 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3429 } else 3430 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3431 helper_SBB( sz, dst1, dst0, src, 3432 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3433 } else { 3434 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3435 if (isAddSub(op8)) 3436 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3437 else 3438 setFlags_DEP1(op8, dst1, ty); 3439 } 3440 3441 if (gregLO3ofRM(modrm) < 7) 3442 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3443 3444 delta += (am_sz + d_sz); 3445 DIP("%s%c $%lld, %s\n", 3446 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3447 nameIRegE(sz,pfx,modrm)); 3448 } else { 3449 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3450 3451 assign(dst0, loadLE(ty,mkexpr(addr))); 3452 assign(src, mkU(ty,d64 & mask)); 3453 3454 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3455 if (haveLOCK(pfx)) { 3456 /* cas-style store */ 3457 helper_ADC( sz, dst1, dst0, src, 3458 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3459 } else { 3460 /* normal store */ 3461 helper_ADC( sz, dst1, dst0, src, 3462 /*store*/addr, IRTemp_INVALID, 0 ); 3463 } 3464 } else 3465 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3466 if (haveLOCK(pfx)) { 3467 /* cas-style store */ 3468 helper_SBB( sz, dst1, dst0, src, 3469 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3470 } else { 3471 /* normal store */ 3472 helper_SBB( sz, dst1, dst0, src, 3473 /*store*/addr, IRTemp_INVALID, 0 ); 3474 } 3475 } else { 3476 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3477 if (gregLO3ofRM(modrm) < 7) { 3478 if (haveLOCK(pfx)) { 3479 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3480 mkexpr(dst1)/*newVal*/, 3481 guest_RIP_curr_instr ); 3482 } else { 3483 storeLE(mkexpr(addr), mkexpr(dst1)); 3484 } 3485 } 3486 if (isAddSub(op8)) 3487 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3488 else 3489 setFlags_DEP1(op8, dst1, ty); 3490 } 3491 3492 delta += (len+d_sz); 3493 DIP("%s%c $%lld, %s\n", 3494 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3495 d64, dis_buf); 3496 } 3497 return delta; 3498 } 3499 3500 3501 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3502 expression. */ 3503 3504 static 3505 ULong dis_Grp2 ( const VexAbiInfo* vbi, 3506 Prefix pfx, 3507 Long delta, UChar modrm, 3508 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3509 const HChar* shift_expr_txt, Bool* decode_OK ) 3510 { 3511 /* delta on entry points at the modrm byte. */ 3512 HChar dis_buf[50]; 3513 Int len; 3514 Bool isShift, isRotate, isRotateC; 3515 IRType ty = szToITy(sz); 3516 IRTemp dst0 = newTemp(ty); 3517 IRTemp dst1 = newTemp(ty); 3518 IRTemp addr = IRTemp_INVALID; 3519 3520 *decode_OK = True; 3521 3522 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3523 3524 /* Put value to shift/rotate in dst0. */ 3525 if (epartIsReg(modrm)) { 3526 assign(dst0, getIRegE(sz, pfx, modrm)); 3527 delta += (am_sz + d_sz); 3528 } else { 3529 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3530 assign(dst0, loadLE(ty,mkexpr(addr))); 3531 delta += len + d_sz; 3532 } 3533 3534 isShift = False; 3535 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3536 3537 isRotate = False; 3538 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3539 3540 isRotateC = False; 3541 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3542 3543 if (!isShift && !isRotate && !isRotateC) { 3544 /*NOTREACHED*/ 3545 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3546 } 3547 3548 if (isRotateC) { 3549 /* Call a helper; this insn is so ridiculous it does not deserve 3550 better. One problem is, the helper has to calculate both the 3551 new value and the new flags. This is more than 64 bits, and 3552 there is no way to return more than 64 bits from the helper. 3553 Hence the crude and obvious solution is to call it twice, 3554 using the sign of the sz field to indicate whether it is the 3555 value or rflags result we want. 3556 */ 3557 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3558 IRExpr** argsVALUE; 3559 IRExpr** argsRFLAGS; 3560 3561 IRTemp new_value = newTemp(Ity_I64); 3562 IRTemp new_rflags = newTemp(Ity_I64); 3563 IRTemp old_rflags = newTemp(Ity_I64); 3564 3565 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3566 3567 argsVALUE 3568 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3569 widenUto64(shift_expr), /* rotate amount */ 3570 mkexpr(old_rflags), 3571 mkU64(sz) ); 3572 assign( new_value, 3573 mkIRExprCCall( 3574 Ity_I64, 3575 0/*regparm*/, 3576 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3577 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3578 argsVALUE 3579 ) 3580 ); 3581 3582 argsRFLAGS 3583 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3584 widenUto64(shift_expr), /* rotate amount */ 3585 mkexpr(old_rflags), 3586 mkU64(-sz) ); 3587 assign( new_rflags, 3588 mkIRExprCCall( 3589 Ity_I64, 3590 0/*regparm*/, 3591 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3592 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3593 argsRFLAGS 3594 ) 3595 ); 3596 3597 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3598 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3599 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3600 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3601 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3602 } 3603 3604 else 3605 if (isShift) { 3606 3607 IRTemp pre64 = newTemp(Ity_I64); 3608 IRTemp res64 = newTemp(Ity_I64); 3609 IRTemp res64ss = newTemp(Ity_I64); 3610 IRTemp shift_amt = newTemp(Ity_I8); 3611 UChar mask = toUChar(sz==8 ? 63 : 31); 3612 IROp op64; 3613 3614 switch (gregLO3ofRM(modrm)) { 3615 case 4: op64 = Iop_Shl64; break; 3616 case 5: op64 = Iop_Shr64; break; 3617 case 6: op64 = Iop_Shl64; break; 3618 case 7: op64 = Iop_Sar64; break; 3619 /*NOTREACHED*/ 3620 default: vpanic("dis_Grp2:shift"); break; 3621 } 3622 3623 /* Widen the value to be shifted to 64 bits, do the shift, and 3624 narrow back down. This seems surprisingly long-winded, but 3625 unfortunately the AMD semantics requires that 8/16/32-bit 3626 shifts give defined results for shift values all the way up 3627 to 32, and this seems the simplest way to do it. It has the 3628 advantage that the only IR level shifts generated are of 64 3629 bit values, and the shift amount is guaranteed to be in the 3630 range 0 .. 63, thereby observing the IR semantics requiring 3631 all shift values to be in the range 0 .. 2^word_size-1. 3632 3633 Therefore the shift amount is masked with 63 for 64-bit shifts 3634 and 31 for all others. 3635 */ 3636 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3637 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3638 3639 /* suitably widen the value to be shifted to 64 bits. */ 3640 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3641 : widenUto64(mkexpr(dst0)) ); 3642 3643 /* res64 = pre64 `shift` shift_amt */ 3644 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3645 3646 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3647 assign( res64ss, 3648 binop(op64, 3649 mkexpr(pre64), 3650 binop(Iop_And8, 3651 binop(Iop_Sub8, 3652 mkexpr(shift_amt), mkU8(1)), 3653 mkU8(mask))) ); 3654 3655 /* Build the flags thunk. */ 3656 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3657 3658 /* Narrow the result back down. */ 3659 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3660 3661 } /* if (isShift) */ 3662 3663 else 3664 if (isRotate) { 3665 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3666 : (ty==Ity_I32 ? 2 : 3)); 3667 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3668 IRTemp rot_amt = newTemp(Ity_I8); 3669 IRTemp rot_amt64 = newTemp(Ity_I8); 3670 IRTemp oldFlags = newTemp(Ity_I64); 3671 UChar mask = toUChar(sz==8 ? 63 : 31); 3672 3673 /* rot_amt = shift_expr & mask */ 3674 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3675 expressions never shift beyond the word size and thus remain 3676 well defined. */ 3677 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3678 3679 if (ty == Ity_I64) 3680 assign(rot_amt, mkexpr(rot_amt64)); 3681 else 3682 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3683 3684 if (left) { 3685 3686 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3687 assign(dst1, 3688 binop( mkSizedOp(ty,Iop_Or8), 3689 binop( mkSizedOp(ty,Iop_Shl8), 3690 mkexpr(dst0), 3691 mkexpr(rot_amt) 3692 ), 3693 binop( mkSizedOp(ty,Iop_Shr8), 3694 mkexpr(dst0), 3695 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3696 ) 3697 ) 3698 ); 3699 ccOp += AMD64G_CC_OP_ROLB; 3700 3701 } else { /* right */ 3702 3703 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3704 assign(dst1, 3705 binop( mkSizedOp(ty,Iop_Or8), 3706 binop( mkSizedOp(ty,Iop_Shr8), 3707 mkexpr(dst0), 3708 mkexpr(rot_amt) 3709 ), 3710 binop( mkSizedOp(ty,Iop_Shl8), 3711 mkexpr(dst0), 3712 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3713 ) 3714 ) 3715 ); 3716 ccOp += AMD64G_CC_OP_RORB; 3717 3718 } 3719 3720 /* dst1 now holds the rotated value. Build flag thunk. We 3721 need the resulting value for this, and the previous flags. 3722 Except don't set it if the rotate count is zero. */ 3723 3724 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3725 3726 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */ 3727 IRTemp rot_amt64b = newTemp(Ity_I1); 3728 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) ); 3729 3730 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3731 stmt( IRStmt_Put( OFFB_CC_OP, 3732 IRExpr_ITE( mkexpr(rot_amt64b), 3733 mkU64(ccOp), 3734 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 3735 stmt( IRStmt_Put( OFFB_CC_DEP1, 3736 IRExpr_ITE( mkexpr(rot_amt64b), 3737 widenUto64(mkexpr(dst1)), 3738 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 3739 stmt( IRStmt_Put( OFFB_CC_DEP2, 3740 IRExpr_ITE( mkexpr(rot_amt64b), 3741 mkU64(0), 3742 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 3743 stmt( IRStmt_Put( OFFB_CC_NDEP, 3744 IRExpr_ITE( mkexpr(rot_amt64b), 3745 mkexpr(oldFlags), 3746 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) )); 3747 } /* if (isRotate) */ 3748 3749 /* Save result, and finish up. */ 3750 if (epartIsReg(modrm)) { 3751 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3752 if (vex_traceflags & VEX_TRACE_FE) { 3753 vex_printf("%s%c ", 3754 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3755 if (shift_expr_txt) 3756 vex_printf("%s", shift_expr_txt); 3757 else 3758 ppIRExpr(shift_expr); 3759 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3760 } 3761 } else { 3762 storeLE(mkexpr(addr), mkexpr(dst1)); 3763 if (vex_traceflags & VEX_TRACE_FE) { 3764 vex_printf("%s%c ", 3765 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3766 if (shift_expr_txt) 3767 vex_printf("%s", shift_expr_txt); 3768 else 3769 ppIRExpr(shift_expr); 3770 vex_printf(", %s\n", dis_buf); 3771 } 3772 } 3773 return delta; 3774 } 3775 3776 3777 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3778 static 3779 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi, 3780 Prefix pfx, 3781 Long delta, UChar modrm, 3782 Int am_sz, Int sz, ULong src_val, 3783 Bool* decode_OK ) 3784 { 3785 /* src_val denotes a d8. 3786 And delta on entry points at the modrm byte. */ 3787 3788 IRType ty = szToITy(sz); 3789 IRTemp t2 = newTemp(Ity_I64); 3790 IRTemp t2m = newTemp(Ity_I64); 3791 IRTemp t_addr = IRTemp_INVALID; 3792 HChar dis_buf[50]; 3793 ULong mask; 3794 3795 /* we're optimists :-) */ 3796 *decode_OK = True; 3797 3798 /* Check whether F2 or F3 are acceptable. */ 3799 if (epartIsReg(modrm)) { 3800 /* F2 or F3 are not allowed in the register case. */ 3801 if (haveF2orF3(pfx)) { 3802 *decode_OK = False; 3803 return delta; 3804 } 3805 } else { 3806 /* F2 or F3 (but not both) are allowable provided LOCK is also 3807 present. */ 3808 if (haveF2orF3(pfx)) { 3809 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 3810 *decode_OK = False; 3811 return delta; 3812 } 3813 } 3814 } 3815 3816 /* Limit src_val -- the bit offset -- to something within a word. 3817 The Intel docs say that literal offsets larger than a word are 3818 masked in this way. */ 3819 switch (sz) { 3820 case 2: src_val &= 15; break; 3821 case 4: src_val &= 31; break; 3822 case 8: src_val &= 63; break; 3823 default: *decode_OK = False; return delta; 3824 } 3825 3826 /* Invent a mask suitable for the operation. */ 3827 switch (gregLO3ofRM(modrm)) { 3828 case 4: /* BT */ mask = 0; break; 3829 case 5: /* BTS */ mask = 1ULL << src_val; break; 3830 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3831 case 7: /* BTC */ mask = 1ULL << src_val; break; 3832 /* If this needs to be extended, probably simplest to make a 3833 new function to handle the other cases (0 .. 3). The 3834 Intel docs do however not indicate any use for 0 .. 3, so 3835 we don't expect this to happen. */ 3836 default: *decode_OK = False; return delta; 3837 } 3838 3839 /* Fetch the value to be tested and modified into t2, which is 3840 64-bits wide regardless of sz. */ 3841 if (epartIsReg(modrm)) { 3842 vassert(am_sz == 1); 3843 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3844 delta += (am_sz + 1); 3845 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3846 nameISize(sz), 3847 src_val, nameIRegE(sz,pfx,modrm)); 3848 } else { 3849 Int len; 3850 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3851 delta += (len+1); 3852 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3853 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3854 nameISize(sz), 3855 src_val, dis_buf); 3856 } 3857 3858 /* Compute the new value into t2m, if non-BT. */ 3859 switch (gregLO3ofRM(modrm)) { 3860 case 4: /* BT */ 3861 break; 3862 case 5: /* BTS */ 3863 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3864 break; 3865 case 6: /* BTR */ 3866 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3867 break; 3868 case 7: /* BTC */ 3869 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3870 break; 3871 default: 3872 /*NOTREACHED*/ /*the previous switch guards this*/ 3873 vassert(0); 3874 } 3875 3876 /* Write the result back, if non-BT. */ 3877 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3878 if (epartIsReg(modrm)) { 3879 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3880 } else { 3881 if (haveLOCK(pfx)) { 3882 casLE( mkexpr(t_addr), 3883 narrowTo(ty, mkexpr(t2))/*expd*/, 3884 narrowTo(ty, mkexpr(t2m))/*new*/, 3885 guest_RIP_curr_instr ); 3886 } else { 3887 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3888 } 3889 } 3890 } 3891 3892 /* Copy relevant bit from t2 into the carry flag. */ 3893 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3895 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3896 stmt( IRStmt_Put( 3897 OFFB_CC_DEP1, 3898 binop(Iop_And64, 3899 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3900 mkU64(1)) 3901 )); 3902 /* Set NDEP even though it isn't used. This makes redundant-PUT 3903 elimination of previous stores to this field work better. */ 3904 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3905 3906 return delta; 3907 } 3908 3909 3910 /* Signed/unsigned widening multiply. Generate IR to multiply the 3911 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3912 RDX:RAX/EDX:EAX/DX:AX/AX. 3913 */ 3914 static void codegen_mulL_A_D ( Int sz, Bool syned, 3915 IRTemp tmp, const HChar* tmp_txt ) 3916 { 3917 IRType ty = szToITy(sz); 3918 IRTemp t1 = newTemp(ty); 3919 3920 assign( t1, getIRegRAX(sz) ); 3921 3922 switch (ty) { 3923 case Ity_I64: { 3924 IRTemp res128 = newTemp(Ity_I128); 3925 IRTemp resHi = newTemp(Ity_I64); 3926 IRTemp resLo = newTemp(Ity_I64); 3927 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3928 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3929 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3930 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3931 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3932 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3933 putIReg64(R_RDX, mkexpr(resHi)); 3934 putIReg64(R_RAX, mkexpr(resLo)); 3935 break; 3936 } 3937 case Ity_I32: { 3938 IRTemp res64 = newTemp(Ity_I64); 3939 IRTemp resHi = newTemp(Ity_I32); 3940 IRTemp resLo = newTemp(Ity_I32); 3941 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3942 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3943 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3944 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3945 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3946 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3947 putIRegRDX(4, mkexpr(resHi)); 3948 putIRegRAX(4, mkexpr(resLo)); 3949 break; 3950 } 3951 case Ity_I16: { 3952 IRTemp res32 = newTemp(Ity_I32); 3953 IRTemp resHi = newTemp(Ity_I16); 3954 IRTemp resLo = newTemp(Ity_I16); 3955 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3956 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3957 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3958 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3959 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3960 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3961 putIRegRDX(2, mkexpr(resHi)); 3962 putIRegRAX(2, mkexpr(resLo)); 3963 break; 3964 } 3965 case Ity_I8: { 3966 IRTemp res16 = newTemp(Ity_I16); 3967 IRTemp resHi = newTemp(Ity_I8); 3968 IRTemp resLo = newTemp(Ity_I8); 3969 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3970 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3971 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3972 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3973 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3974 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3975 putIRegRAX(2, mkexpr(res16)); 3976 break; 3977 } 3978 default: 3979 ppIRType(ty); 3980 vpanic("codegen_mulL_A_D(amd64)"); 3981 } 3982 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3983 } 3984 3985 3986 /* Group 3 extended opcodes. We have to decide here whether F2 and F3 3987 might be valid.*/ 3988 static 3989 ULong dis_Grp3 ( const VexAbiInfo* vbi, 3990 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3991 { 3992 Long d64; 3993 UChar modrm; 3994 HChar dis_buf[50]; 3995 Int len; 3996 IRTemp addr; 3997 IRType ty = szToITy(sz); 3998 IRTemp t1 = newTemp(ty); 3999 IRTemp dst1, src, dst0; 4000 *decode_OK = True; 4001 modrm = getUChar(delta); 4002 if (epartIsReg(modrm)) { 4003 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4004 if (haveF2orF3(pfx)) goto unhandled; 4005 switch (gregLO3ofRM(modrm)) { 4006 case 0: { /* TEST */ 4007 delta++; 4008 d64 = getSDisp(imin(4,sz), delta); 4009 delta += imin(4,sz); 4010 dst1 = newTemp(ty); 4011 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4012 getIRegE(sz,pfx,modrm), 4013 mkU(ty, d64 & mkSizeMask(sz)))); 4014 setFlags_DEP1( Iop_And8, dst1, ty ); 4015 DIP("test%c $%lld, %s\n", 4016 nameISize(sz), d64, 4017 nameIRegE(sz, pfx, modrm)); 4018 break; 4019 } 4020 case 1: 4021 *decode_OK = False; 4022 return delta; 4023 case 2: /* NOT */ 4024 delta++; 4025 putIRegE(sz, pfx, modrm, 4026 unop(mkSizedOp(ty,Iop_Not8), 4027 getIRegE(sz, pfx, modrm))); 4028 DIP("not%c %s\n", nameISize(sz), 4029 nameIRegE(sz, pfx, modrm)); 4030 break; 4031 case 3: /* NEG */ 4032 delta++; 4033 dst0 = newTemp(ty); 4034 src = newTemp(ty); 4035 dst1 = newTemp(ty); 4036 assign(dst0, mkU(ty,0)); 4037 assign(src, getIRegE(sz, pfx, modrm)); 4038 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4039 mkexpr(src))); 4040 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4041 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 4042 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 4043 break; 4044 case 4: /* MUL (unsigned widening) */ 4045 delta++; 4046 src = newTemp(ty); 4047 assign(src, getIRegE(sz,pfx,modrm)); 4048 codegen_mulL_A_D ( sz, False, src, 4049 nameIRegE(sz,pfx,modrm) ); 4050 break; 4051 case 5: /* IMUL (signed widening) */ 4052 delta++; 4053 src = newTemp(ty); 4054 assign(src, getIRegE(sz,pfx,modrm)); 4055 codegen_mulL_A_D ( sz, True, src, 4056 nameIRegE(sz,pfx,modrm) ); 4057 break; 4058 case 6: /* DIV */ 4059 delta++; 4060 assign( t1, getIRegE(sz, pfx, modrm) ); 4061 codegen_div ( sz, t1, False ); 4062 DIP("div%c %s\n", nameISize(sz), 4063 nameIRegE(sz, pfx, modrm)); 4064 break; 4065 case 7: /* IDIV */ 4066 delta++; 4067 assign( t1, getIRegE(sz, pfx, modrm) ); 4068 codegen_div ( sz, t1, True ); 4069 DIP("idiv%c %s\n", nameISize(sz), 4070 nameIRegE(sz, pfx, modrm)); 4071 break; 4072 default: 4073 /*NOTREACHED*/ 4074 vpanic("Grp3(amd64,R)"); 4075 } 4076 } else { 4077 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4078 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4079 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/) 4080 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4081 validF2orF3 = True; 4082 } 4083 if (!validF2orF3) goto unhandled; 4084 /* */ 4085 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 4086 /* we have to inform disAMode of any immediate 4087 bytes used */ 4088 gregLO3ofRM(modrm)==0/*TEST*/ 4089 ? imin(4,sz) 4090 : 0 4091 ); 4092 t1 = newTemp(ty); 4093 delta += len; 4094 assign(t1, loadLE(ty,mkexpr(addr))); 4095 switch (gregLO3ofRM(modrm)) { 4096 case 0: { /* TEST */ 4097 d64 = getSDisp(imin(4,sz), delta); 4098 delta += imin(4,sz); 4099 dst1 = newTemp(ty); 4100 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4101 mkexpr(t1), 4102 mkU(ty, d64 & mkSizeMask(sz)))); 4103 setFlags_DEP1( Iop_And8, dst1, ty ); 4104 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 4105 break; 4106 } 4107 case 1: 4108 *decode_OK = False; 4109 return delta; 4110 case 2: /* NOT */ 4111 dst1 = newTemp(ty); 4112 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 4113 if (haveLOCK(pfx)) { 4114 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4115 guest_RIP_curr_instr ); 4116 } else { 4117 storeLE( mkexpr(addr), mkexpr(dst1) ); 4118 } 4119 DIP("not%c %s\n", nameISize(sz), dis_buf); 4120 break; 4121 case 3: /* NEG */ 4122 dst0 = newTemp(ty); 4123 src = newTemp(ty); 4124 dst1 = newTemp(ty); 4125 assign(dst0, mkU(ty,0)); 4126 assign(src, mkexpr(t1)); 4127 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4128 mkexpr(src))); 4129 if (haveLOCK(pfx)) { 4130 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4131 guest_RIP_curr_instr ); 4132 } else { 4133 storeLE( mkexpr(addr), mkexpr(dst1) ); 4134 } 4135 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4136 DIP("neg%c %s\n", nameISize(sz), dis_buf); 4137 break; 4138 case 4: /* MUL (unsigned widening) */ 4139 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 4140 break; 4141 case 5: /* IMUL */ 4142 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 4143 break; 4144 case 6: /* DIV */ 4145 codegen_div ( sz, t1, False ); 4146 DIP("div%c %s\n", nameISize(sz), dis_buf); 4147 break; 4148 case 7: /* IDIV */ 4149 codegen_div ( sz, t1, True ); 4150 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 4151 break; 4152 default: 4153 /*NOTREACHED*/ 4154 vpanic("Grp3(amd64,M)"); 4155 } 4156 } 4157 return delta; 4158 unhandled: 4159 *decode_OK = False; 4160 return delta; 4161 } 4162 4163 4164 /* Group 4 extended opcodes. We have to decide here whether F2 and F3 4165 might be valid. */ 4166 static 4167 ULong dis_Grp4 ( const VexAbiInfo* vbi, 4168 Prefix pfx, Long delta, Bool* decode_OK ) 4169 { 4170 Int alen; 4171 UChar modrm; 4172 HChar dis_buf[50]; 4173 IRType ty = Ity_I8; 4174 IRTemp t1 = newTemp(ty); 4175 IRTemp t2 = newTemp(ty); 4176 4177 *decode_OK = True; 4178 4179 modrm = getUChar(delta); 4180 if (epartIsReg(modrm)) { 4181 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4182 if (haveF2orF3(pfx)) goto unhandled; 4183 assign(t1, getIRegE(1, pfx, modrm)); 4184 switch (gregLO3ofRM(modrm)) { 4185 case 0: /* INC */ 4186 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4187 putIRegE(1, pfx, modrm, mkexpr(t2)); 4188 setFlags_INC_DEC( True, t2, ty ); 4189 break; 4190 case 1: /* DEC */ 4191 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4192 putIRegE(1, pfx, modrm, mkexpr(t2)); 4193 setFlags_INC_DEC( False, t2, ty ); 4194 break; 4195 default: 4196 *decode_OK = False; 4197 return delta; 4198 } 4199 delta++; 4200 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4201 nameIRegE(1, pfx, modrm)); 4202 } else { 4203 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4204 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4205 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4206 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4207 validF2orF3 = True; 4208 } 4209 if (!validF2orF3) goto unhandled; 4210 /* */ 4211 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4212 assign( t1, loadLE(ty, mkexpr(addr)) ); 4213 switch (gregLO3ofRM(modrm)) { 4214 case 0: /* INC */ 4215 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4216 if (haveLOCK(pfx)) { 4217 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4218 guest_RIP_curr_instr ); 4219 } else { 4220 storeLE( mkexpr(addr), mkexpr(t2) ); 4221 } 4222 setFlags_INC_DEC( True, t2, ty ); 4223 break; 4224 case 1: /* DEC */ 4225 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4226 if (haveLOCK(pfx)) { 4227 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4228 guest_RIP_curr_instr ); 4229 } else { 4230 storeLE( mkexpr(addr), mkexpr(t2) ); 4231 } 4232 setFlags_INC_DEC( False, t2, ty ); 4233 break; 4234 default: 4235 *decode_OK = False; 4236 return delta; 4237 } 4238 delta += alen; 4239 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4240 } 4241 return delta; 4242 unhandled: 4243 *decode_OK = False; 4244 return delta; 4245 } 4246 4247 4248 /* Group 5 extended opcodes. We have to decide here whether F2 and F3 4249 might be valid. */ 4250 static 4251 ULong dis_Grp5 ( const VexAbiInfo* vbi, 4252 Prefix pfx, Int sz, Long delta, 4253 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4254 { 4255 Int len; 4256 UChar modrm; 4257 HChar dis_buf[50]; 4258 IRTemp addr = IRTemp_INVALID; 4259 IRType ty = szToITy(sz); 4260 IRTemp t1 = newTemp(ty); 4261 IRTemp t2 = IRTemp_INVALID; 4262 IRTemp t3 = IRTemp_INVALID; 4263 Bool showSz = True; 4264 4265 *decode_OK = True; 4266 4267 modrm = getUChar(delta); 4268 if (epartIsReg(modrm)) { 4269 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. 4270 F2/CALL and F2/JMP may have bnd prefix. */ 4271 if (haveF2orF3(pfx) 4272 && ! (haveF2(pfx) 4273 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) 4274 goto unhandledR; 4275 assign(t1, getIRegE(sz,pfx,modrm)); 4276 switch (gregLO3ofRM(modrm)) { 4277 case 0: /* INC */ 4278 t2 = newTemp(ty); 4279 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4280 mkexpr(t1), mkU(ty,1))); 4281 setFlags_INC_DEC( True, t2, ty ); 4282 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4283 break; 4284 case 1: /* DEC */ 4285 t2 = newTemp(ty); 4286 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4287 mkexpr(t1), mkU(ty,1))); 4288 setFlags_INC_DEC( False, t2, ty ); 4289 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4290 break; 4291 case 2: /* call Ev */ 4292 /* Ignore any sz value and operate as if sz==8. */ 4293 if (!(sz == 4 || sz == 8)) goto unhandledR; 4294 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4295 sz = 8; 4296 t3 = newTemp(Ity_I64); 4297 assign(t3, getIRegE(sz,pfx,modrm)); 4298 t2 = newTemp(Ity_I64); 4299 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4300 putIReg64(R_RSP, mkexpr(t2)); 4301 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4302 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4303 jmp_treg(dres, Ijk_Call, t3); 4304 vassert(dres->whatNext == Dis_StopHere); 4305 showSz = False; 4306 break; 4307 case 4: /* jmp Ev */ 4308 /* Ignore any sz value and operate as if sz==8. */ 4309 if (!(sz == 4 || sz == 8)) goto unhandledR; 4310 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4311 sz = 8; 4312 t3 = newTemp(Ity_I64); 4313 assign(t3, getIRegE(sz,pfx,modrm)); 4314 jmp_treg(dres, Ijk_Boring, t3); 4315 vassert(dres->whatNext == Dis_StopHere); 4316 showSz = False; 4317 break; 4318 case 6: /* PUSH Ev */ 4319 /* There is no encoding for 32-bit operand size; hence ... */ 4320 if (sz == 4) sz = 8; 4321 if (sz == 8 || sz == 2) { 4322 ty = szToITy(sz); /* redo it, since sz might have changed */ 4323 t3 = newTemp(ty); 4324 assign(t3, getIRegE(sz,pfx,modrm)); 4325 t2 = newTemp(Ity_I64); 4326 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4327 putIReg64(R_RSP, mkexpr(t2) ); 4328 storeLE( mkexpr(t2), mkexpr(t3) ); 4329 break; 4330 } else { 4331 goto unhandledR; /* awaiting test case */ 4332 } 4333 default: 4334 unhandledR: 4335 *decode_OK = False; 4336 return delta; 4337 } 4338 delta++; 4339 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4340 showSz ? nameISize(sz) : ' ', 4341 nameIRegE(sz, pfx, modrm)); 4342 } else { 4343 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ 4344 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4345 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4346 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4347 validF2orF3 = True; 4348 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) 4349 && (haveF2(pfx) && !haveF3(pfx))) { 4350 validF2orF3 = True; 4351 } 4352 if (!validF2orF3) goto unhandledM; 4353 /* */ 4354 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4355 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4356 && gregLO3ofRM(modrm) != 6) { 4357 assign(t1, loadLE(ty,mkexpr(addr))); 4358 } 4359 switch (gregLO3ofRM(modrm)) { 4360 case 0: /* INC */ 4361 t2 = newTemp(ty); 4362 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4363 mkexpr(t1), mkU(ty,1))); 4364 if (haveLOCK(pfx)) { 4365 casLE( mkexpr(addr), 4366 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4367 } else { 4368 storeLE(mkexpr(addr),mkexpr(t2)); 4369 } 4370 setFlags_INC_DEC( True, t2, ty ); 4371 break; 4372 case 1: /* DEC */ 4373 t2 = newTemp(ty); 4374 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4375 mkexpr(t1), mkU(ty,1))); 4376 if (haveLOCK(pfx)) { 4377 casLE( mkexpr(addr), 4378 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4379 } else { 4380 storeLE(mkexpr(addr),mkexpr(t2)); 4381 } 4382 setFlags_INC_DEC( False, t2, ty ); 4383 break; 4384 case 2: /* call Ev */ 4385 /* Ignore any sz value and operate as if sz==8. */ 4386 if (!(sz == 4 || sz == 8)) goto unhandledM; 4387 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4388 sz = 8; 4389 t3 = newTemp(Ity_I64); 4390 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4391 t2 = newTemp(Ity_I64); 4392 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4393 putIReg64(R_RSP, mkexpr(t2)); 4394 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4395 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4396 jmp_treg(dres, Ijk_Call, t3); 4397 vassert(dres->whatNext == Dis_StopHere); 4398 showSz = False; 4399 break; 4400 case 4: /* JMP Ev */ 4401 /* Ignore any sz value and operate as if sz==8. */ 4402 if (!(sz == 4 || sz == 8)) goto unhandledM; 4403 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4404 sz = 8; 4405 t3 = newTemp(Ity_I64); 4406 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4407 jmp_treg(dres, Ijk_Boring, t3); 4408 vassert(dres->whatNext == Dis_StopHere); 4409 showSz = False; 4410 break; 4411 case 6: /* PUSH Ev */ 4412 /* There is no encoding for 32-bit operand size; hence ... */ 4413 if (sz == 4) sz = 8; 4414 if (sz == 8 || sz == 2) { 4415 ty = szToITy(sz); /* redo it, since sz might have changed */ 4416 t3 = newTemp(ty); 4417 assign(t3, loadLE(ty,mkexpr(addr))); 4418 t2 = newTemp(Ity_I64); 4419 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4420 putIReg64(R_RSP, mkexpr(t2) ); 4421 storeLE( mkexpr(t2), mkexpr(t3) ); 4422 break; 4423 } else { 4424 goto unhandledM; /* awaiting test case */ 4425 } 4426 default: 4427 unhandledM: 4428 *decode_OK = False; 4429 return delta; 4430 } 4431 delta += len; 4432 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4433 showSz ? nameISize(sz) : ' ', 4434 dis_buf); 4435 } 4436 return delta; 4437 } 4438 4439 4440 /*------------------------------------------------------------*/ 4441 /*--- Disassembling string ops (including REP prefixes) ---*/ 4442 /*------------------------------------------------------------*/ 4443 4444 /* Code shared by all the string ops */ 4445 static 4446 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4447 { 4448 UChar logSz; 4449 if (sz == 8 || sz == 4 || sz == 2) { 4450 logSz = 1; 4451 if (sz == 4) logSz = 2; 4452 if (sz == 8) logSz = 3; 4453 assign( t_inc, 4454 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4455 mkU8(logSz) ) ); 4456 } else { 4457 assign( t_inc, 4458 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4459 } 4460 } 4461 4462 static 4463 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4464 Int sz, const HChar* name, Prefix pfx ) 4465 { 4466 IRTemp t_inc = newTemp(Ity_I64); 4467 /* Really we ought to inspect the override prefixes, but we don't. 4468 The following assertion catches any resulting sillyness. */ 4469 vassert(pfx == clearSegBits(pfx)); 4470 dis_string_op_increment(sz, t_inc); 4471 dis_OP( sz, t_inc, pfx ); 4472 DIP("%s%c\n", name, nameISize(sz)); 4473 } 4474 4475 static 4476 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4477 { 4478 IRType ty = szToITy(sz); 4479 IRTemp td = newTemp(Ity_I64); /* RDI */ 4480 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4481 IRExpr *incd, *incs; 4482 4483 if (haveASO(pfx)) { 4484 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4485 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4486 } else { 4487 assign( td, getIReg64(R_RDI) ); 4488 assign( ts, getIReg64(R_RSI) ); 4489 } 4490 4491 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4492 4493 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4494 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4495 if (haveASO(pfx)) { 4496 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4497 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4498 } 4499 putIReg64( R_RDI, incd ); 4500 putIReg64( R_RSI, incs ); 4501 } 4502 4503 static 4504 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4505 { 4506 IRType ty = szToITy(sz); 4507 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4508 IRExpr *incs; 4509 4510 if (haveASO(pfx)) 4511 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4512 else 4513 assign( ts, getIReg64(R_RSI) ); 4514 4515 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4516 4517 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4518 if (haveASO(pfx)) 4519 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4520 putIReg64( R_RSI, incs ); 4521 } 4522 4523 static 4524 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4525 { 4526 IRType ty = szToITy(sz); 4527 IRTemp ta = newTemp(ty); /* rAX */ 4528 IRTemp td = newTemp(Ity_I64); /* RDI */ 4529 IRExpr *incd; 4530 4531 assign( ta, getIRegRAX(sz) ); 4532 4533 if (haveASO(pfx)) 4534 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4535 else 4536 assign( td, getIReg64(R_RDI) ); 4537 4538 storeLE( mkexpr(td), mkexpr(ta) ); 4539 4540 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4541 if (haveASO(pfx)) 4542 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4543 putIReg64( R_RDI, incd ); 4544 } 4545 4546 static 4547 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4548 { 4549 IRType ty = szToITy(sz); 4550 IRTemp tdv = newTemp(ty); /* (RDI) */ 4551 IRTemp tsv = newTemp(ty); /* (RSI) */ 4552 IRTemp td = newTemp(Ity_I64); /* RDI */ 4553 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4554 IRExpr *incd, *incs; 4555 4556 if (haveASO(pfx)) { 4557 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4558 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4559 } else { 4560 assign( td, getIReg64(R_RDI) ); 4561 assign( ts, getIReg64(R_RSI) ); 4562 } 4563 4564 assign( tdv, loadLE(ty,mkexpr(td)) ); 4565 4566 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4567 4568 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4569 4570 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4571 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4572 if (haveASO(pfx)) { 4573 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4574 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4575 } 4576 putIReg64( R_RDI, incd ); 4577 putIReg64( R_RSI, incs ); 4578 } 4579 4580 static 4581 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4582 { 4583 IRType ty = szToITy(sz); 4584 IRTemp ta = newTemp(ty); /* rAX */ 4585 IRTemp td = newTemp(Ity_I64); /* RDI */ 4586 IRTemp tdv = newTemp(ty); /* (RDI) */ 4587 IRExpr *incd; 4588 4589 assign( ta, getIRegRAX(sz) ); 4590 4591 if (haveASO(pfx)) 4592 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4593 else 4594 assign( td, getIReg64(R_RDI) ); 4595 4596 assign( tdv, loadLE(ty,mkexpr(td)) ); 4597 4598 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4599 4600 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4601 if (haveASO(pfx)) 4602 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4603 putIReg64( R_RDI, incd ); 4604 } 4605 4606 4607 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4608 the insn is the last one in the basic block, and so emit a jump to 4609 the next insn, rather than just falling through. */ 4610 static 4611 void dis_REP_op ( /*MOD*/DisResult* dres, 4612 AMD64Condcode cond, 4613 void (*dis_OP)(Int, IRTemp, Prefix), 4614 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name, 4615 Prefix pfx ) 4616 { 4617 IRTemp t_inc = newTemp(Ity_I64); 4618 IRTemp tc; 4619 IRExpr* cmp; 4620 4621 /* Really we ought to inspect the override prefixes, but we don't. 4622 The following assertion catches any resulting sillyness. */ 4623 vassert(pfx == clearSegBits(pfx)); 4624 4625 if (haveASO(pfx)) { 4626 tc = newTemp(Ity_I32); /* ECX */ 4627 assign( tc, getIReg32(R_RCX) ); 4628 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4629 } else { 4630 tc = newTemp(Ity_I64); /* RCX */ 4631 assign( tc, getIReg64(R_RCX) ); 4632 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4633 } 4634 4635 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4636 IRConst_U64(rip_next), OFFB_RIP ) ); 4637 4638 if (haveASO(pfx)) 4639 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4640 else 4641 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4642 4643 dis_string_op_increment(sz, t_inc); 4644 dis_OP (sz, t_inc, pfx); 4645 4646 if (cond == AMD64CondAlways) { 4647 jmp_lit(dres, Ijk_Boring, rip); 4648 vassert(dres->whatNext == Dis_StopHere); 4649 } else { 4650 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4651 Ijk_Boring, 4652 IRConst_U64(rip), 4653 OFFB_RIP ) ); 4654 jmp_lit(dres, Ijk_Boring, rip_next); 4655 vassert(dres->whatNext == Dis_StopHere); 4656 } 4657 DIP("%s%c\n", name, nameISize(sz)); 4658 } 4659 4660 4661 /*------------------------------------------------------------*/ 4662 /*--- Arithmetic, etc. ---*/ 4663 /*------------------------------------------------------------*/ 4664 4665 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4666 static 4667 ULong dis_mul_E_G ( const VexAbiInfo* vbi, 4668 Prefix pfx, 4669 Int size, 4670 Long delta0 ) 4671 { 4672 Int alen; 4673 HChar dis_buf[50]; 4674 UChar rm = getUChar(delta0); 4675 IRType ty = szToITy(size); 4676 IRTemp te = newTemp(ty); 4677 IRTemp tg = newTemp(ty); 4678 IRTemp resLo = newTemp(ty); 4679 4680 assign( tg, getIRegG(size, pfx, rm) ); 4681 if (epartIsReg(rm)) { 4682 assign( te, getIRegE(size, pfx, rm) ); 4683 } else { 4684 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4685 assign( te, loadLE(ty,mkexpr(addr)) ); 4686 } 4687 4688 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4689 4690 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4691 4692 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4693 4694 if (epartIsReg(rm)) { 4695 DIP("imul%c %s, %s\n", nameISize(size), 4696 nameIRegE(size,pfx,rm), 4697 nameIRegG(size,pfx,rm)); 4698 return 1+delta0; 4699 } else { 4700 DIP("imul%c %s, %s\n", nameISize(size), 4701 dis_buf, 4702 nameIRegG(size,pfx,rm)); 4703 return alen+delta0; 4704 } 4705 } 4706 4707 4708 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4709 static 4710 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi, 4711 Prefix pfx, 4712 Int size, 4713 Long delta, 4714 Int litsize ) 4715 { 4716 Long d64; 4717 Int alen; 4718 HChar dis_buf[50]; 4719 UChar rm = getUChar(delta); 4720 IRType ty = szToITy(size); 4721 IRTemp te = newTemp(ty); 4722 IRTemp tl = newTemp(ty); 4723 IRTemp resLo = newTemp(ty); 4724 4725 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4726 4727 if (epartIsReg(rm)) { 4728 assign(te, getIRegE(size, pfx, rm)); 4729 delta++; 4730 } else { 4731 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4732 imin(4,litsize) ); 4733 assign(te, loadLE(ty, mkexpr(addr))); 4734 delta += alen; 4735 } 4736 d64 = getSDisp(imin(4,litsize),delta); 4737 delta += imin(4,litsize); 4738 4739 d64 &= mkSizeMask(size); 4740 assign(tl, mkU(ty,d64)); 4741 4742 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4743 4744 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4745 4746 putIRegG(size, pfx, rm, mkexpr(resLo)); 4747 4748 DIP("imul%c $%lld, %s, %s\n", 4749 nameISize(size), d64, 4750 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4751 nameIRegG(size,pfx,rm) ); 4752 return delta; 4753 } 4754 4755 4756 /* Generate an IR sequence to do a popcount operation on the supplied 4757 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4758 Ity_I16, Ity_I32 or Ity_I64 only. */ 4759 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4760 { 4761 Int i; 4762 if (ty == Ity_I16) { 4763 IRTemp old = IRTemp_INVALID; 4764 IRTemp nyu = IRTemp_INVALID; 4765 IRTemp mask[4], shift[4]; 4766 for (i = 0; i < 4; i++) { 4767 mask[i] = newTemp(ty); 4768 shift[i] = 1 << i; 4769 } 4770 assign(mask[0], mkU16(0x5555)); 4771 assign(mask[1], mkU16(0x3333)); 4772 assign(mask[2], mkU16(0x0F0F)); 4773 assign(mask[3], mkU16(0x00FF)); 4774 old = src; 4775 for (i = 0; i < 4; i++) { 4776 nyu = newTemp(ty); 4777 assign(nyu, 4778 binop(Iop_Add16, 4779 binop(Iop_And16, 4780 mkexpr(old), 4781 mkexpr(mask[i])), 4782 binop(Iop_And16, 4783 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4784 mkexpr(mask[i])))); 4785 old = nyu; 4786 } 4787 return nyu; 4788 } 4789 if (ty == Ity_I32) { 4790 IRTemp old = IRTemp_INVALID; 4791 IRTemp nyu = IRTemp_INVALID; 4792 IRTemp mask[5], shift[5]; 4793 for (i = 0; i < 5; i++) { 4794 mask[i] = newTemp(ty); 4795 shift[i] = 1 << i; 4796 } 4797 assign(mask[0], mkU32(0x55555555)); 4798 assign(mask[1], mkU32(0x33333333)); 4799 assign(mask[2], mkU32(0x0F0F0F0F)); 4800 assign(mask[3], mkU32(0x00FF00FF)); 4801 assign(mask[4], mkU32(0x0000FFFF)); 4802 old = src; 4803 for (i = 0; i < 5; i++) { 4804 nyu = newTemp(ty); 4805 assign(nyu, 4806 binop(Iop_Add32, 4807 binop(Iop_And32, 4808 mkexpr(old), 4809 mkexpr(mask[i])), 4810 binop(Iop_And32, 4811 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4812 mkexpr(mask[i])))); 4813 old = nyu; 4814 } 4815 return nyu; 4816 } 4817 if (ty == Ity_I64) { 4818 IRTemp old = IRTemp_INVALID; 4819 IRTemp nyu = IRTemp_INVALID; 4820 IRTemp mask[6], shift[6]; 4821 for (i = 0; i < 6; i++) { 4822 mask[i] = newTemp(ty); 4823 shift[i] = 1 << i; 4824 } 4825 assign(mask[0], mkU64(0x5555555555555555ULL)); 4826 assign(mask[1], mkU64(0x3333333333333333ULL)); 4827 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4828 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4829 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4830 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4831 old = src; 4832 for (i = 0; i < 6; i++) { 4833 nyu = newTemp(ty); 4834 assign(nyu, 4835 binop(Iop_Add64, 4836 binop(Iop_And64, 4837 mkexpr(old), 4838 mkexpr(mask[i])), 4839 binop(Iop_And64, 4840 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4841 mkexpr(mask[i])))); 4842 old = nyu; 4843 } 4844 return nyu; 4845 } 4846 /*NOTREACHED*/ 4847 vassert(0); 4848 } 4849 4850 4851 /* Generate an IR sequence to do a count-leading-zeroes operation on 4852 the supplied IRTemp, and return a new IRTemp holding the result. 4853 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4854 the argument is zero, return the number of bits in the word (the 4855 natural semantics). */ 4856 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4857 { 4858 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4859 4860 IRTemp src64 = newTemp(Ity_I64); 4861 assign(src64, widenUto64( mkexpr(src) )); 4862 4863 IRTemp src64x = newTemp(Ity_I64); 4864 assign(src64x, 4865 binop(Iop_Shl64, mkexpr(src64), 4866 mkU8(64 - 8 * sizeofIRType(ty)))); 4867 4868 // Clz64 has undefined semantics when its input is zero, so 4869 // special-case around that. 4870 IRTemp res64 = newTemp(Ity_I64); 4871 assign(res64, 4872 IRExpr_ITE( 4873 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), 4874 mkU64(8 * sizeofIRType(ty)), 4875 unop(Iop_Clz64, mkexpr(src64x)) 4876 )); 4877 4878 IRTemp res = newTemp(ty); 4879 assign(res, narrowTo(ty, mkexpr(res64))); 4880 return res; 4881 } 4882 4883 4884 /* Generate an IR sequence to do a count-trailing-zeroes operation on 4885 the supplied IRTemp, and return a new IRTemp holding the result. 4886 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4887 the argument is zero, return the number of bits in the word (the 4888 natural semantics). */ 4889 static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) 4890 { 4891 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4892 4893 IRTemp src64 = newTemp(Ity_I64); 4894 assign(src64, widenUto64( mkexpr(src) )); 4895 4896 // Ctz64 has undefined semantics when its input is zero, so 4897 // special-case around that. 4898 IRTemp res64 = newTemp(Ity_I64); 4899 assign(res64, 4900 IRExpr_ITE( 4901 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), 4902 mkU64(8 * sizeofIRType(ty)), 4903 unop(Iop_Ctz64, mkexpr(src64)) 4904 )); 4905 4906 IRTemp res = newTemp(ty); 4907 assign(res, narrowTo(ty, mkexpr(res64))); 4908 return res; 4909 } 4910 4911 4912 /*------------------------------------------------------------*/ 4913 /*--- ---*/ 4914 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4915 /*--- ---*/ 4916 /*------------------------------------------------------------*/ 4917 4918 /* --- Helper functions for dealing with the register stack. --- */ 4919 4920 /* --- Set the emulation-warning pseudo-register. --- */ 4921 4922 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4923 { 4924 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4925 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 4926 } 4927 4928 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4929 4930 static IRExpr* mkQNaN64 ( void ) 4931 { 4932 /* QNaN is 0 2047 1 0(51times) 4933 == 0b 11111111111b 1 0(51times) 4934 == 0x7FF8 0000 0000 0000 4935 */ 4936 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4937 } 4938 4939 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4940 4941 static IRExpr* get_ftop ( void ) 4942 { 4943 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4944 } 4945 4946 static void put_ftop ( IRExpr* e ) 4947 { 4948 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4949 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4950 } 4951 4952 /* --------- Get/put the C3210 bits. --------- */ 4953 4954 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4955 { 4956 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4957 } 4958 4959 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4960 { 4961 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4962 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4963 } 4964 4965 /* --------- Get/put the FPU rounding mode. --------- */ 4966 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4967 { 4968 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4969 } 4970 4971 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4972 { 4973 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4974 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4975 } 4976 4977 4978 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4979 /* Produces a value in 0 .. 3, which is encoded as per the type 4980 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4981 per IRRoundingMode, we merely need to get it and mask it for 4982 safety. 4983 */ 4984 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4985 { 4986 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4987 } 4988 4989 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4990 { 4991 return mkU32(Irrm_NEAREST); 4992 } 4993 4994 4995 /* --------- Get/set FP register tag bytes. --------- */ 4996 4997 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4998 4999 static void put_ST_TAG ( Int i, IRExpr* value ) 5000 { 5001 IRRegArray* descr; 5002 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 5003 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5004 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5005 } 5006 5007 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 5008 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 5009 5010 static IRExpr* get_ST_TAG ( Int i ) 5011 { 5012 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5013 return IRExpr_GetI( descr, get_ftop(), i ); 5014 } 5015 5016 5017 /* --------- Get/set FP registers. --------- */ 5018 5019 /* Given i, and some expression e, emit 'ST(i) = e' and set the 5020 register's tag to indicate the register is full. The previous 5021 state of the register is not checked. */ 5022 5023 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 5024 { 5025 IRRegArray* descr; 5026 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 5027 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5028 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5029 /* Mark the register as in-use. */ 5030 put_ST_TAG(i, mkU8(1)); 5031 } 5032 5033 /* Given i, and some expression e, emit 5034 ST(i) = is_full(i) ? NaN : e 5035 and set the tag accordingly. 5036 */ 5037 5038 static void put_ST ( Int i, IRExpr* value ) 5039 { 5040 put_ST_UNCHECKED( 5041 i, 5042 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5043 /* non-0 means full */ 5044 mkQNaN64(), 5045 /* 0 means empty */ 5046 value 5047 ) 5048 ); 5049 } 5050 5051 5052 /* Given i, generate an expression yielding 'ST(i)'. */ 5053 5054 static IRExpr* get_ST_UNCHECKED ( Int i ) 5055 { 5056 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5057 return IRExpr_GetI( descr, get_ftop(), i ); 5058 } 5059 5060 5061 /* Given i, generate an expression yielding 5062 is_full(i) ? ST(i) : NaN 5063 */ 5064 5065 static IRExpr* get_ST ( Int i ) 5066 { 5067 return 5068 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5069 /* non-0 means full */ 5070 get_ST_UNCHECKED(i), 5071 /* 0 means empty */ 5072 mkQNaN64()); 5073 } 5074 5075 5076 /* Given i, and some expression e, and a condition cond, generate IR 5077 which has the same effect as put_ST(i,e) when cond is true and has 5078 no effect when cond is false. Given the lack of proper 5079 if-then-else in the IR, this is pretty tricky. 5080 */ 5081 5082 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 5083 { 5084 // new_tag = if cond then FULL else old_tag 5085 // new_val = if cond then (if old_tag==FULL then NaN else val) 5086 // else old_val 5087 5088 IRTemp old_tag = newTemp(Ity_I8); 5089 assign(old_tag, get_ST_TAG(i)); 5090 IRTemp new_tag = newTemp(Ity_I8); 5091 assign(new_tag, 5092 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 5093 5094 IRTemp old_val = newTemp(Ity_F64); 5095 assign(old_val, get_ST_UNCHECKED(i)); 5096 IRTemp new_val = newTemp(Ity_F64); 5097 assign(new_val, 5098 IRExpr_ITE(mkexpr(cond), 5099 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 5100 /* non-0 means full */ 5101 mkQNaN64(), 5102 /* 0 means empty */ 5103 value), 5104 mkexpr(old_val))); 5105 5106 put_ST_UNCHECKED(i, mkexpr(new_val)); 5107 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 5108 // now set it to new_tag instead. 5109 put_ST_TAG(i, mkexpr(new_tag)); 5110 } 5111 5112 /* Adjust FTOP downwards by one register. */ 5113 5114 static void fp_push ( void ) 5115 { 5116 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 5117 } 5118 5119 /* Adjust FTOP downwards by one register when COND is 1:I1. Else 5120 don't change it. */ 5121 5122 static void maybe_fp_push ( IRTemp cond ) 5123 { 5124 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 5125 } 5126 5127 /* Adjust FTOP upwards by one register, and mark the vacated register 5128 as empty. */ 5129 5130 static void fp_pop ( void ) 5131 { 5132 put_ST_TAG(0, mkU8(0)); 5133 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5134 } 5135 5136 /* Set the C2 bit of the FPU status register to e[0]. Assumes that 5137 e[31:1] == 0. 5138 */ 5139 static void set_C2 ( IRExpr* e ) 5140 { 5141 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); 5142 put_C3210( binop(Iop_Or64, 5143 cleared, 5144 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); 5145 } 5146 5147 /* Generate code to check that abs(d64) < 2^63 and is finite. This is 5148 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 5149 test is simple, but the derivation of it is not so simple. 5150 5151 The exponent field for an IEEE754 double is 11 bits. That means it 5152 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 5153 the number is either a NaN or an Infinity and so is not finite. 5154 Furthermore, a finite value of exactly 2^63 is the smallest value 5155 that has exponent value 0x43E. Hence, what we need to do is 5156 extract the exponent, ignoring the sign bit and mantissa, and check 5157 it is < 0x43E, or <= 0x43D. 5158 5159 To make this easily applicable to 32- and 64-bit targets, a 5160 roundabout approach is used. First the number is converted to I64, 5161 then the top 32 bits are taken. Shifting them right by 20 bits 5162 places the sign bit and exponent in the bottom 12 bits. Anding 5163 with 0x7FF gets rid of the sign bit, leaving just the exponent 5164 available for comparison. 5165 */ 5166 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 5167 { 5168 IRTemp i64 = newTemp(Ity_I64); 5169 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 5170 IRTemp exponent = newTemp(Ity_I32); 5171 assign(exponent, 5172 binop(Iop_And32, 5173 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 5174 mkU32(0x7FF))); 5175 IRTemp in_range_and_finite = newTemp(Ity_I1); 5176 assign(in_range_and_finite, 5177 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 5178 return in_range_and_finite; 5179 } 5180 5181 /* Invent a plausible-looking FPU status word value: 5182 ((ftop & 7) << 11) | (c3210 & 0x4700) 5183 */ 5184 static IRExpr* get_FPU_sw ( void ) 5185 { 5186 return 5187 unop(Iop_32to16, 5188 binop(Iop_Or32, 5189 binop(Iop_Shl32, 5190 binop(Iop_And32, get_ftop(), mkU32(7)), 5191 mkU8(11)), 5192 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 5193 mkU32(0x4700)) 5194 )); 5195 } 5196 5197 5198 /* ------------------------------------------------------- */ 5199 /* Given all that stack-mangling junk, we can now go ahead 5200 and describe FP instructions. 5201 */ 5202 5203 /* ST(0) = ST(0) `op` mem64/32(addr) 5204 Need to check ST(0)'s tag on read, but not on write. 5205 */ 5206 static 5207 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5208 IROp op, Bool dbl ) 5209 { 5210 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5211 if (dbl) { 5212 put_ST_UNCHECKED(0, 5213 triop( op, 5214 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5215 get_ST(0), 5216 loadLE(Ity_F64,mkexpr(addr)) 5217 )); 5218 } else { 5219 put_ST_UNCHECKED(0, 5220 triop( op, 5221 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5222 get_ST(0), 5223 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 5224 )); 5225 } 5226 } 5227 5228 5229 /* ST(0) = mem64/32(addr) `op` ST(0) 5230 Need to check ST(0)'s tag on read, but not on write. 5231 */ 5232 static 5233 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5234 IROp op, Bool dbl ) 5235 { 5236 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5237 if (dbl) { 5238 put_ST_UNCHECKED(0, 5239 triop( op, 5240 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5241 loadLE(Ity_F64,mkexpr(addr)), 5242 get_ST(0) 5243 )); 5244 } else { 5245 put_ST_UNCHECKED(0, 5246 triop( op, 5247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5248 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 5249 get_ST(0) 5250 )); 5251 } 5252 } 5253 5254 5255 /* ST(dst) = ST(dst) `op` ST(src). 5256 Check dst and src tags when reading but not on write. 5257 */ 5258 static 5259 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5260 Bool pop_after ) 5261 { 5262 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5263 put_ST_UNCHECKED( 5264 st_dst, 5265 triop( op, 5266 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5267 get_ST(st_dst), 5268 get_ST(st_src) ) 5269 ); 5270 if (pop_after) 5271 fp_pop(); 5272 } 5273 5274 /* ST(dst) = ST(src) `op` ST(dst). 5275 Check dst and src tags when reading but not on write. 5276 */ 5277 static 5278 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5279 Bool pop_after ) 5280 { 5281 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5282 put_ST_UNCHECKED( 5283 st_dst, 5284 triop( op, 5285 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5286 get_ST(st_src), 5287 get_ST(st_dst) ) 5288 ); 5289 if (pop_after) 5290 fp_pop(); 5291 } 5292 5293 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 5294 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 5295 { 5296 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 5297 /* This is a bit of a hack (and isn't really right). It sets 5298 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 5299 documentation implies A and S are unchanged. 5300 */ 5301 /* It's also fishy in that it is used both for COMIP and 5302 UCOMIP, and they aren't the same (although similar). */ 5303 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 5304 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 5305 stmt( IRStmt_Put( 5306 OFFB_CC_DEP1, 5307 binop( Iop_And64, 5308 unop( Iop_32Uto64, 5309 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 5310 mkU64(0x45) 5311 ))); 5312 if (pop_after) 5313 fp_pop(); 5314 } 5315 5316 5317 /* returns 5318 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 5319 */ 5320 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 5321 { 5322 IRTemp t32 = newTemp(Ity_I32); 5323 assign( t32, e32 ); 5324 return 5325 IRExpr_ITE( 5326 binop(Iop_CmpLT64U, 5327 unop(Iop_32Uto64, 5328 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5329 mkU64(65536)), 5330 unop(Iop_32to16, mkexpr(t32)), 5331 mkU16( 0x8000 ) ); 5332 } 5333 5334 5335 static 5336 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5337 const VexAbiInfo* vbi, Prefix pfx, Long delta ) 5338 { 5339 Int len; 5340 UInt r_src, r_dst; 5341 HChar dis_buf[50]; 5342 IRTemp t1, t2; 5343 5344 /* On entry, delta points at the second byte of the insn (the modrm 5345 byte).*/ 5346 UChar first_opcode = getUChar(delta-1); 5347 UChar modrm = getUChar(delta+0); 5348 5349 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5350 5351 if (first_opcode == 0xD8) { 5352 if (modrm < 0xC0) { 5353 5354 /* bits 5,4,3 are an opcode extension, and the modRM also 5355 specifies an address. */ 5356 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5357 delta += len; 5358 5359 switch (gregLO3ofRM(modrm)) { 5360 5361 case 0: /* FADD single-real */ 5362 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5363 break; 5364 5365 case 1: /* FMUL single-real */ 5366 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5367 break; 5368 5369 case 2: /* FCOM single-real */ 5370 DIP("fcoms %s\n", dis_buf); 5371 /* This forces C1 to zero, which isn't right. */ 5372 /* The AMD documentation suggests that forcing C1 to 5373 zero is correct (Eliot Moss) */ 5374 put_C3210( 5375 unop( Iop_32Uto64, 5376 binop( Iop_And32, 5377 binop(Iop_Shl32, 5378 binop(Iop_CmpF64, 5379 get_ST(0), 5380 unop(Iop_F32toF64, 5381 loadLE(Ity_F32,mkexpr(addr)))), 5382 mkU8(8)), 5383 mkU32(0x4500) 5384 ))); 5385 break; 5386 5387 case 3: /* FCOMP single-real */ 5388 /* The AMD documentation suggests that forcing C1 to 5389 zero is correct (Eliot Moss) */ 5390 DIP("fcomps %s\n", dis_buf); 5391 /* This forces C1 to zero, which isn't right. */ 5392 put_C3210( 5393 unop( Iop_32Uto64, 5394 binop( Iop_And32, 5395 binop(Iop_Shl32, 5396 binop(Iop_CmpF64, 5397 get_ST(0), 5398 unop(Iop_F32toF64, 5399 loadLE(Ity_F32,mkexpr(addr)))), 5400 mkU8(8)), 5401 mkU32(0x4500) 5402 ))); 5403 fp_pop(); 5404 break; 5405 5406 case 4: /* FSUB single-real */ 5407 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5408 break; 5409 5410 case 5: /* FSUBR single-real */ 5411 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5412 break; 5413 5414 case 6: /* FDIV single-real */ 5415 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5416 break; 5417 5418 case 7: /* FDIVR single-real */ 5419 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5420 break; 5421 5422 default: 5423 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5424 vex_printf("first_opcode == 0xD8\n"); 5425 goto decode_fail; 5426 } 5427 } else { 5428 delta++; 5429 switch (modrm) { 5430 5431 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5432 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5433 break; 5434 5435 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5436 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5437 break; 5438 5439 /* Dunno if this is right */ 5440 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5441 r_dst = (UInt)modrm - 0xD0; 5442 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 5443 /* This forces C1 to zero, which isn't right. */ 5444 put_C3210( 5445 unop(Iop_32Uto64, 5446 binop( Iop_And32, 5447 binop(Iop_Shl32, 5448 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5449 mkU8(8)), 5450 mkU32(0x4500) 5451 ))); 5452 break; 5453 5454 /* Dunno if this is right */ 5455 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5456 r_dst = (UInt)modrm - 0xD8; 5457 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 5458 /* This forces C1 to zero, which isn't right. */ 5459 put_C3210( 5460 unop(Iop_32Uto64, 5461 binop( Iop_And32, 5462 binop(Iop_Shl32, 5463 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5464 mkU8(8)), 5465 mkU32(0x4500) 5466 ))); 5467 fp_pop(); 5468 break; 5469 5470 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5471 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5472 break; 5473 5474 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5475 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5476 break; 5477 5478 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5479 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5480 break; 5481 5482 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5483 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5484 break; 5485 5486 default: 5487 goto decode_fail; 5488 } 5489 } 5490 } 5491 5492 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5493 else 5494 if (first_opcode == 0xD9) { 5495 if (modrm < 0xC0) { 5496 5497 /* bits 5,4,3 are an opcode extension, and the modRM also 5498 specifies an address. */ 5499 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5500 delta += len; 5501 5502 switch (gregLO3ofRM(modrm)) { 5503 5504 case 0: /* FLD single-real */ 5505 DIP("flds %s\n", dis_buf); 5506 fp_push(); 5507 put_ST(0, unop(Iop_F32toF64, 5508 loadLE(Ity_F32, mkexpr(addr)))); 5509 break; 5510 5511 case 2: /* FST single-real */ 5512 DIP("fsts %s\n", dis_buf); 5513 storeLE(mkexpr(addr), 5514 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5515 break; 5516 5517 case 3: /* FSTP single-real */ 5518 DIP("fstps %s\n", dis_buf); 5519 storeLE(mkexpr(addr), 5520 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5521 fp_pop(); 5522 break; 5523 5524 case 4: { /* FLDENV m28 */ 5525 /* Uses dirty helper: 5526 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5527 IRTemp ew = newTemp(Ity_I32); 5528 IRTemp w64 = newTemp(Ity_I64); 5529 IRDirty* d = unsafeIRDirty_0_N ( 5530 0/*regparms*/, 5531 "amd64g_dirtyhelper_FLDENV", 5532 &amd64g_dirtyhelper_FLDENV, 5533 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5534 ); 5535 d->tmp = w64; 5536 /* declare we're reading memory */ 5537 d->mFx = Ifx_Read; 5538 d->mAddr = mkexpr(addr); 5539 d->mSize = 28; 5540 5541 /* declare we're writing guest state */ 5542 d->nFxState = 4; 5543 vex_bzero(&d->fxState, sizeof(d->fxState)); 5544 5545 d->fxState[0].fx = Ifx_Write; 5546 d->fxState[0].offset = OFFB_FTOP; 5547 d->fxState[0].size = sizeof(UInt); 5548 5549 d->fxState[1].fx = Ifx_Write; 5550 d->fxState[1].offset = OFFB_FPTAGS; 5551 d->fxState[1].size = 8 * sizeof(UChar); 5552 5553 d->fxState[2].fx = Ifx_Write; 5554 d->fxState[2].offset = OFFB_FPROUND; 5555 d->fxState[2].size = sizeof(ULong); 5556 5557 d->fxState[3].fx = Ifx_Write; 5558 d->fxState[3].offset = OFFB_FC3210; 5559 d->fxState[3].size = sizeof(ULong); 5560 5561 stmt( IRStmt_Dirty(d) ); 5562 5563 /* ew contains any emulation warning we may need to 5564 issue. If needed, side-exit to the next insn, 5565 reporting the warning, so that Valgrind's dispatcher 5566 sees the warning. */ 5567 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5568 put_emwarn( mkexpr(ew) ); 5569 stmt( 5570 IRStmt_Exit( 5571 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5572 Ijk_EmWarn, 5573 IRConst_U64( guest_RIP_bbstart+delta ), 5574 OFFB_RIP 5575 ) 5576 ); 5577 5578 DIP("fldenv %s\n", dis_buf); 5579 break; 5580 } 5581 5582 case 5: {/* FLDCW */ 5583 /* The only thing we observe in the control word is the 5584 rounding mode. Therefore, pass the 16-bit value 5585 (x87 native-format control word) to a clean helper, 5586 getting back a 64-bit value, the lower half of which 5587 is the FPROUND value to store, and the upper half of 5588 which is the emulation-warning token which may be 5589 generated. 5590 */ 5591 /* ULong amd64h_check_fldcw ( ULong ); */ 5592 IRTemp t64 = newTemp(Ity_I64); 5593 IRTemp ew = newTemp(Ity_I32); 5594 DIP("fldcw %s\n", dis_buf); 5595 assign( t64, mkIRExprCCall( 5596 Ity_I64, 0/*regparms*/, 5597 "amd64g_check_fldcw", 5598 &amd64g_check_fldcw, 5599 mkIRExprVec_1( 5600 unop( Iop_16Uto64, 5601 loadLE(Ity_I16, mkexpr(addr))) 5602 ) 5603 ) 5604 ); 5605 5606 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5607 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5608 put_emwarn( mkexpr(ew) ); 5609 /* Finally, if an emulation warning was reported, 5610 side-exit to the next insn, reporting the warning, 5611 so that Valgrind's dispatcher sees the warning. */ 5612 stmt( 5613 IRStmt_Exit( 5614 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5615 Ijk_EmWarn, 5616 IRConst_U64( guest_RIP_bbstart+delta ), 5617 OFFB_RIP 5618 ) 5619 ); 5620 break; 5621 } 5622 5623 case 6: { /* FNSTENV m28 */ 5624 /* Uses dirty helper: 5625 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5626 IRDirty* d = unsafeIRDirty_0_N ( 5627 0/*regparms*/, 5628 "amd64g_dirtyhelper_FSTENV", 5629 &amd64g_dirtyhelper_FSTENV, 5630 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5631 ); 5632 /* declare we're writing memory */ 5633 d->mFx = Ifx_Write; 5634 d->mAddr = mkexpr(addr); 5635 d->mSize = 28; 5636 5637 /* declare we're reading guest state */ 5638 d->nFxState = 4; 5639 vex_bzero(&d->fxState, sizeof(d->fxState)); 5640 5641 d->fxState[0].fx = Ifx_Read; 5642 d->fxState[0].offset = OFFB_FTOP; 5643 d->fxState[0].size = sizeof(UInt); 5644 5645 d->fxState[1].fx = Ifx_Read; 5646 d->fxState[1].offset = OFFB_FPTAGS; 5647 d->fxState[1].size = 8 * sizeof(UChar); 5648 5649 d->fxState[2].fx = Ifx_Read; 5650 d->fxState[2].offset = OFFB_FPROUND; 5651 d->fxState[2].size = sizeof(ULong); 5652 5653 d->fxState[3].fx = Ifx_Read; 5654 d->fxState[3].offset = OFFB_FC3210; 5655 d->fxState[3].size = sizeof(ULong); 5656 5657 stmt( IRStmt_Dirty(d) ); 5658 5659 DIP("fnstenv %s\n", dis_buf); 5660 break; 5661 } 5662 5663 case 7: /* FNSTCW */ 5664 /* Fake up a native x87 FPU control word. The only 5665 thing it depends on is FPROUND[1:0], so call a clean 5666 helper to cook it up. */ 5667 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5668 DIP("fnstcw %s\n", dis_buf); 5669 storeLE( 5670 mkexpr(addr), 5671 unop( Iop_64to16, 5672 mkIRExprCCall( 5673 Ity_I64, 0/*regp*/, 5674 "amd64g_create_fpucw", &amd64g_create_fpucw, 5675 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5676 ) 5677 ) 5678 ); 5679 break; 5680 5681 default: 5682 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5683 vex_printf("first_opcode == 0xD9\n"); 5684 goto decode_fail; 5685 } 5686 5687 } else { 5688 delta++; 5689 switch (modrm) { 5690 5691 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5692 r_src = (UInt)modrm - 0xC0; 5693 DIP("fld %%st(%u)\n", r_src); 5694 t1 = newTemp(Ity_F64); 5695 assign(t1, get_ST(r_src)); 5696 fp_push(); 5697 put_ST(0, mkexpr(t1)); 5698 break; 5699 5700 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5701 r_src = (UInt)modrm - 0xC8; 5702 DIP("fxch %%st(%u)\n", r_src); 5703 t1 = newTemp(Ity_F64); 5704 t2 = newTemp(Ity_F64); 5705 assign(t1, get_ST(0)); 5706 assign(t2, get_ST(r_src)); 5707 put_ST_UNCHECKED(0, mkexpr(t2)); 5708 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5709 break; 5710 5711 case 0xE0: /* FCHS */ 5712 DIP("fchs\n"); 5713 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5714 break; 5715 5716 case 0xE1: /* FABS */ 5717 DIP("fabs\n"); 5718 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5719 break; 5720 5721 case 0xE5: { /* FXAM */ 5722 /* This is an interesting one. It examines %st(0), 5723 regardless of whether the tag says it's empty or not. 5724 Here, just pass both the tag (in our format) and the 5725 value (as a double, actually a ULong) to a helper 5726 function. */ 5727 IRExpr** args 5728 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5729 unop(Iop_ReinterpF64asI64, 5730 get_ST_UNCHECKED(0)) ); 5731 put_C3210(mkIRExprCCall( 5732 Ity_I64, 5733 0/*regparm*/, 5734 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5735 args 5736 )); 5737 DIP("fxam\n"); 5738 break; 5739 } 5740 5741 case 0xE8: /* FLD1 */ 5742 DIP("fld1\n"); 5743 fp_push(); 5744 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5745 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5746 break; 5747 5748 case 0xE9: /* FLDL2T */ 5749 DIP("fldl2t\n"); 5750 fp_push(); 5751 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5752 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5753 break; 5754 5755 case 0xEA: /* FLDL2E */ 5756 DIP("fldl2e\n"); 5757 fp_push(); 5758 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5759 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5760 break; 5761 5762 case 0xEB: /* FLDPI */ 5763 DIP("fldpi\n"); 5764 fp_push(); 5765 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5766 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5767 break; 5768 5769 case 0xEC: /* FLDLG2 */ 5770 DIP("fldlg2\n"); 5771 fp_push(); 5772 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5773 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5774 break; 5775 5776 case 0xED: /* FLDLN2 */ 5777 DIP("fldln2\n"); 5778 fp_push(); 5779 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5780 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5781 break; 5782 5783 case 0xEE: /* FLDZ */ 5784 DIP("fldz\n"); 5785 fp_push(); 5786 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5787 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5788 break; 5789 5790 case 0xF0: /* F2XM1 */ 5791 DIP("f2xm1\n"); 5792 put_ST_UNCHECKED(0, 5793 binop(Iop_2xm1F64, 5794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5795 get_ST(0))); 5796 break; 5797 5798 case 0xF1: /* FYL2X */ 5799 DIP("fyl2x\n"); 5800 put_ST_UNCHECKED(1, 5801 triop(Iop_Yl2xF64, 5802 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5803 get_ST(1), 5804 get_ST(0))); 5805 fp_pop(); 5806 break; 5807 5808 case 0xF2: { /* FPTAN */ 5809 DIP("fptan\n"); 5810 IRTemp argD = newTemp(Ity_F64); 5811 assign(argD, get_ST(0)); 5812 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5813 IRTemp resD = newTemp(Ity_F64); 5814 assign(resD, 5815 IRExpr_ITE( 5816 mkexpr(argOK), 5817 binop(Iop_TanF64, 5818 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5819 mkexpr(argD)), 5820 mkexpr(argD)) 5821 ); 5822 put_ST_UNCHECKED(0, mkexpr(resD)); 5823 /* Conditionally push 1.0 on the stack, if the arg is 5824 in range */ 5825 maybe_fp_push(argOK); 5826 maybe_put_ST(argOK, 0, 5827 IRExpr_Const(IRConst_F64(1.0))); 5828 set_C2( binop(Iop_Xor64, 5829 unop(Iop_1Uto64, mkexpr(argOK)), 5830 mkU64(1)) ); 5831 break; 5832 } 5833 5834 case 0xF3: /* FPATAN */ 5835 DIP("fpatan\n"); 5836 put_ST_UNCHECKED(1, 5837 triop(Iop_AtanF64, 5838 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5839 get_ST(1), 5840 get_ST(0))); 5841 fp_pop(); 5842 break; 5843 5844 case 0xF4: { /* FXTRACT */ 5845 IRTemp argF = newTemp(Ity_F64); 5846 IRTemp sigF = newTemp(Ity_F64); 5847 IRTemp expF = newTemp(Ity_F64); 5848 IRTemp argI = newTemp(Ity_I64); 5849 IRTemp sigI = newTemp(Ity_I64); 5850 IRTemp expI = newTemp(Ity_I64); 5851 DIP("fxtract\n"); 5852 assign( argF, get_ST(0) ); 5853 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5854 assign( sigI, 5855 mkIRExprCCall( 5856 Ity_I64, 0/*regparms*/, 5857 "x86amd64g_calculate_FXTRACT", 5858 &x86amd64g_calculate_FXTRACT, 5859 mkIRExprVec_2( mkexpr(argI), 5860 mkIRExpr_HWord(0)/*sig*/ )) 5861 ); 5862 assign( expI, 5863 mkIRExprCCall( 5864 Ity_I64, 0/*regparms*/, 5865 "x86amd64g_calculate_FXTRACT", 5866 &x86amd64g_calculate_FXTRACT, 5867 mkIRExprVec_2( mkexpr(argI), 5868 mkIRExpr_HWord(1)/*exp*/ )) 5869 ); 5870 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5871 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5872 /* exponent */ 5873 put_ST_UNCHECKED(0, mkexpr(expF) ); 5874 fp_push(); 5875 /* significand */ 5876 put_ST(0, mkexpr(sigF) ); 5877 break; 5878 } 5879 5880 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5881 IRTemp a1 = newTemp(Ity_F64); 5882 IRTemp a2 = newTemp(Ity_F64); 5883 DIP("fprem1\n"); 5884 /* Do FPREM1 twice, once to get the remainder, and once 5885 to get the C3210 flag values. */ 5886 assign( a1, get_ST(0) ); 5887 assign( a2, get_ST(1) ); 5888 put_ST_UNCHECKED(0, 5889 triop(Iop_PRem1F64, 5890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5891 mkexpr(a1), 5892 mkexpr(a2))); 5893 put_C3210( 5894 unop(Iop_32Uto64, 5895 triop(Iop_PRem1C3210F64, 5896 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5897 mkexpr(a1), 5898 mkexpr(a2)) )); 5899 break; 5900 } 5901 5902 case 0xF7: /* FINCSTP */ 5903 DIP("fincstp\n"); 5904 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5905 break; 5906 5907 case 0xF8: { /* FPREM -- not IEEE compliant */ 5908 IRTemp a1 = newTemp(Ity_F64); 5909 IRTemp a2 = newTemp(Ity_F64); 5910 DIP("fprem\n"); 5911 /* Do FPREM twice, once to get the remainder, and once 5912 to get the C3210 flag values. */ 5913 assign( a1, get_ST(0) ); 5914 assign( a2, get_ST(1) ); 5915 put_ST_UNCHECKED(0, 5916 triop(Iop_PRemF64, 5917 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5918 mkexpr(a1), 5919 mkexpr(a2))); 5920 put_C3210( 5921 unop(Iop_32Uto64, 5922 triop(Iop_PRemC3210F64, 5923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5924 mkexpr(a1), 5925 mkexpr(a2)) )); 5926 break; 5927 } 5928 5929 case 0xF9: /* FYL2XP1 */ 5930 DIP("fyl2xp1\n"); 5931 put_ST_UNCHECKED(1, 5932 triop(Iop_Yl2xp1F64, 5933 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5934 get_ST(1), 5935 get_ST(0))); 5936 fp_pop(); 5937 break; 5938 5939 case 0xFA: /* FSQRT */ 5940 DIP("fsqrt\n"); 5941 put_ST_UNCHECKED(0, 5942 binop(Iop_SqrtF64, 5943 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5944 get_ST(0))); 5945 break; 5946 5947 case 0xFB: { /* FSINCOS */ 5948 DIP("fsincos\n"); 5949 IRTemp argD = newTemp(Ity_F64); 5950 assign(argD, get_ST(0)); 5951 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5952 IRTemp resD = newTemp(Ity_F64); 5953 assign(resD, 5954 IRExpr_ITE( 5955 mkexpr(argOK), 5956 binop(Iop_SinF64, 5957 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5958 mkexpr(argD)), 5959 mkexpr(argD)) 5960 ); 5961 put_ST_UNCHECKED(0, mkexpr(resD)); 5962 /* Conditionally push the cos value on the stack, if 5963 the arg is in range */ 5964 maybe_fp_push(argOK); 5965 maybe_put_ST(argOK, 0, 5966 binop(Iop_CosF64, 5967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5968 mkexpr(argD))); 5969 set_C2( binop(Iop_Xor64, 5970 unop(Iop_1Uto64, mkexpr(argOK)), 5971 mkU64(1)) ); 5972 break; 5973 } 5974 5975 case 0xFC: /* FRNDINT */ 5976 DIP("frndint\n"); 5977 put_ST_UNCHECKED(0, 5978 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5979 break; 5980 5981 case 0xFD: /* FSCALE */ 5982 DIP("fscale\n"); 5983 put_ST_UNCHECKED(0, 5984 triop(Iop_ScaleF64, 5985 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5986 get_ST(0), 5987 get_ST(1))); 5988 break; 5989 5990 case 0xFE: /* FSIN */ 5991 case 0xFF: { /* FCOS */ 5992 Bool isSIN = modrm == 0xFE; 5993 DIP("%s\n", isSIN ? "fsin" : "fcos"); 5994 IRTemp argD = newTemp(Ity_F64); 5995 assign(argD, get_ST(0)); 5996 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5997 IRTemp resD = newTemp(Ity_F64); 5998 assign(resD, 5999 IRExpr_ITE( 6000 mkexpr(argOK), 6001 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 6002 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6003 mkexpr(argD)), 6004 mkexpr(argD)) 6005 ); 6006 put_ST_UNCHECKED(0, mkexpr(resD)); 6007 set_C2( binop(Iop_Xor64, 6008 unop(Iop_1Uto64, mkexpr(argOK)), 6009 mkU64(1)) ); 6010 break; 6011 } 6012 6013 default: 6014 goto decode_fail; 6015 } 6016 } 6017 } 6018 6019 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 6020 else 6021 if (first_opcode == 0xDA) { 6022 6023 if (modrm < 0xC0) { 6024 6025 /* bits 5,4,3 are an opcode extension, and the modRM also 6026 specifies an address. */ 6027 IROp fop; 6028 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6029 delta += len; 6030 switch (gregLO3ofRM(modrm)) { 6031 6032 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 6033 DIP("fiaddl %s\n", dis_buf); 6034 fop = Iop_AddF64; 6035 goto do_fop_m32; 6036 6037 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 6038 DIP("fimull %s\n", dis_buf); 6039 fop = Iop_MulF64; 6040 goto do_fop_m32; 6041 6042 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 6043 DIP("fisubl %s\n", dis_buf); 6044 fop = Iop_SubF64; 6045 goto do_fop_m32; 6046 6047 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 6048 DIP("fisubrl %s\n", dis_buf); 6049 fop = Iop_SubF64; 6050 goto do_foprev_m32; 6051 6052 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 6053 DIP("fisubl %s\n", dis_buf); 6054 fop = Iop_DivF64; 6055 goto do_fop_m32; 6056 6057 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 6058 DIP("fidivrl %s\n", dis_buf); 6059 fop = Iop_DivF64; 6060 goto do_foprev_m32; 6061 6062 do_fop_m32: 6063 put_ST_UNCHECKED(0, 6064 triop(fop, 6065 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6066 get_ST(0), 6067 unop(Iop_I32StoF64, 6068 loadLE(Ity_I32, mkexpr(addr))))); 6069 break; 6070 6071 do_foprev_m32: 6072 put_ST_UNCHECKED(0, 6073 triop(fop, 6074 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6075 unop(Iop_I32StoF64, 6076 loadLE(Ity_I32, mkexpr(addr))), 6077 get_ST(0))); 6078 break; 6079 6080 default: 6081 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6082 vex_printf("first_opcode == 0xDA\n"); 6083 goto decode_fail; 6084 } 6085 6086 } else { 6087 6088 delta++; 6089 switch (modrm) { 6090 6091 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 6092 r_src = (UInt)modrm - 0xC0; 6093 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 6094 put_ST_UNCHECKED(0, 6095 IRExpr_ITE( 6096 mk_amd64g_calculate_condition(AMD64CondB), 6097 get_ST(r_src), get_ST(0)) ); 6098 break; 6099 6100 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 6101 r_src = (UInt)modrm - 0xC8; 6102 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 6103 put_ST_UNCHECKED(0, 6104 IRExpr_ITE( 6105 mk_amd64g_calculate_condition(AMD64CondZ), 6106 get_ST(r_src), get_ST(0)) ); 6107 break; 6108 6109 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 6110 r_src = (UInt)modrm - 0xD0; 6111 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 6112 put_ST_UNCHECKED(0, 6113 IRExpr_ITE( 6114 mk_amd64g_calculate_condition(AMD64CondBE), 6115 get_ST(r_src), get_ST(0)) ); 6116 break; 6117 6118 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 6119 r_src = (UInt)modrm - 0xD8; 6120 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 6121 put_ST_UNCHECKED(0, 6122 IRExpr_ITE( 6123 mk_amd64g_calculate_condition(AMD64CondP), 6124 get_ST(r_src), get_ST(0)) ); 6125 break; 6126 6127 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 6128 DIP("fucompp %%st(0),%%st(1)\n"); 6129 /* This forces C1 to zero, which isn't right. */ 6130 put_C3210( 6131 unop(Iop_32Uto64, 6132 binop( Iop_And32, 6133 binop(Iop_Shl32, 6134 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6135 mkU8(8)), 6136 mkU32(0x4500) 6137 ))); 6138 fp_pop(); 6139 fp_pop(); 6140 break; 6141 6142 default: 6143 goto decode_fail; 6144 } 6145 6146 } 6147 } 6148 6149 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 6150 else 6151 if (first_opcode == 0xDB) { 6152 if (modrm < 0xC0) { 6153 6154 /* bits 5,4,3 are an opcode extension, and the modRM also 6155 specifies an address. */ 6156 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6157 delta += len; 6158 6159 switch (gregLO3ofRM(modrm)) { 6160 6161 case 0: /* FILD m32int */ 6162 DIP("fildl %s\n", dis_buf); 6163 fp_push(); 6164 put_ST(0, unop(Iop_I32StoF64, 6165 loadLE(Ity_I32, mkexpr(addr)))); 6166 break; 6167 6168 case 1: /* FISTTPL m32 (SSE3) */ 6169 DIP("fisttpl %s\n", dis_buf); 6170 storeLE( mkexpr(addr), 6171 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 6172 fp_pop(); 6173 break; 6174 6175 case 2: /* FIST m32 */ 6176 DIP("fistl %s\n", dis_buf); 6177 storeLE( mkexpr(addr), 6178 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6179 break; 6180 6181 case 3: /* FISTP m32 */ 6182 DIP("fistpl %s\n", dis_buf); 6183 storeLE( mkexpr(addr), 6184 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6185 fp_pop(); 6186 break; 6187 6188 case 5: { /* FLD extended-real */ 6189 /* Uses dirty helper: 6190 ULong amd64g_loadF80le ( ULong ) 6191 addr holds the address. First, do a dirty call to 6192 get hold of the data. */ 6193 IRTemp val = newTemp(Ity_I64); 6194 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 6195 6196 IRDirty* d = unsafeIRDirty_1_N ( 6197 val, 6198 0/*regparms*/, 6199 "amd64g_dirtyhelper_loadF80le", 6200 &amd64g_dirtyhelper_loadF80le, 6201 args 6202 ); 6203 /* declare that we're reading memory */ 6204 d->mFx = Ifx_Read; 6205 d->mAddr = mkexpr(addr); 6206 d->mSize = 10; 6207 6208 /* execute the dirty call, dumping the result in val. */ 6209 stmt( IRStmt_Dirty(d) ); 6210 fp_push(); 6211 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 6212 6213 DIP("fldt %s\n", dis_buf); 6214 break; 6215 } 6216 6217 case 7: { /* FSTP extended-real */ 6218 /* Uses dirty helper: 6219 void amd64g_storeF80le ( ULong addr, ULong data ) 6220 */ 6221 IRExpr** args 6222 = mkIRExprVec_2( mkexpr(addr), 6223 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 6224 6225 IRDirty* d = unsafeIRDirty_0_N ( 6226 0/*regparms*/, 6227 "amd64g_dirtyhelper_storeF80le", 6228 &amd64g_dirtyhelper_storeF80le, 6229 args 6230 ); 6231 /* declare we're writing memory */ 6232 d->mFx = Ifx_Write; 6233 d->mAddr = mkexpr(addr); 6234 d->mSize = 10; 6235 6236 /* execute the dirty call. */ 6237 stmt( IRStmt_Dirty(d) ); 6238 fp_pop(); 6239 6240 DIP("fstpt\n %s", dis_buf); 6241 break; 6242 } 6243 6244 default: 6245 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6246 vex_printf("first_opcode == 0xDB\n"); 6247 goto decode_fail; 6248 } 6249 6250 } else { 6251 6252 delta++; 6253 switch (modrm) { 6254 6255 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 6256 r_src = (UInt)modrm - 0xC0; 6257 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 6258 put_ST_UNCHECKED(0, 6259 IRExpr_ITE( 6260 mk_amd64g_calculate_condition(AMD64CondNB), 6261 get_ST(r_src), get_ST(0)) ); 6262 break; 6263 6264 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 6265 r_src = (UInt)modrm - 0xC8; 6266 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 6267 put_ST_UNCHECKED( 6268 0, 6269 IRExpr_ITE( 6270 mk_amd64g_calculate_condition(AMD64CondNZ), 6271 get_ST(r_src), 6272 get_ST(0) 6273 ) 6274 ); 6275 break; 6276 6277 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 6278 r_src = (UInt)modrm - 0xD0; 6279 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 6280 put_ST_UNCHECKED( 6281 0, 6282 IRExpr_ITE( 6283 mk_amd64g_calculate_condition(AMD64CondNBE), 6284 get_ST(r_src), 6285 get_ST(0) 6286 ) 6287 ); 6288 break; 6289 6290 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 6291 r_src = (UInt)modrm - 0xD8; 6292 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 6293 put_ST_UNCHECKED( 6294 0, 6295 IRExpr_ITE( 6296 mk_amd64g_calculate_condition(AMD64CondNP), 6297 get_ST(r_src), 6298 get_ST(0) 6299 ) 6300 ); 6301 break; 6302 6303 case 0xE2: 6304 DIP("fnclex\n"); 6305 break; 6306 6307 case 0xE3: { 6308 /* Uses dirty helper: 6309 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 6310 IRDirty* d = unsafeIRDirty_0_N ( 6311 0/*regparms*/, 6312 "amd64g_dirtyhelper_FINIT", 6313 &amd64g_dirtyhelper_FINIT, 6314 mkIRExprVec_1( IRExpr_BBPTR() ) 6315 ); 6316 6317 /* declare we're writing guest state */ 6318 d->nFxState = 5; 6319 vex_bzero(&d->fxState, sizeof(d->fxState)); 6320 6321 d->fxState[0].fx = Ifx_Write; 6322 d->fxState[0].offset = OFFB_FTOP; 6323 d->fxState[0].size = sizeof(UInt); 6324 6325 d->fxState[1].fx = Ifx_Write; 6326 d->fxState[1].offset = OFFB_FPREGS; 6327 d->fxState[1].size = 8 * sizeof(ULong); 6328 6329 d->fxState[2].fx = Ifx_Write; 6330 d->fxState[2].offset = OFFB_FPTAGS; 6331 d->fxState[2].size = 8 * sizeof(UChar); 6332 6333 d->fxState[3].fx = Ifx_Write; 6334 d->fxState[3].offset = OFFB_FPROUND; 6335 d->fxState[3].size = sizeof(ULong); 6336 6337 d->fxState[4].fx = Ifx_Write; 6338 d->fxState[4].offset = OFFB_FC3210; 6339 d->fxState[4].size = sizeof(ULong); 6340 6341 stmt( IRStmt_Dirty(d) ); 6342 6343 DIP("fninit\n"); 6344 break; 6345 } 6346 6347 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6348 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6349 break; 6350 6351 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6352 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6353 break; 6354 6355 default: 6356 goto decode_fail; 6357 } 6358 } 6359 } 6360 6361 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6362 else 6363 if (first_opcode == 0xDC) { 6364 if (modrm < 0xC0) { 6365 6366 /* bits 5,4,3 are an opcode extension, and the modRM also 6367 specifies an address. */ 6368 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6369 delta += len; 6370 6371 switch (gregLO3ofRM(modrm)) { 6372 6373 case 0: /* FADD double-real */ 6374 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6375 break; 6376 6377 case 1: /* FMUL double-real */ 6378 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6379 break; 6380 6381 //.. case 2: /* FCOM double-real */ 6382 //.. DIP("fcoml %s\n", dis_buf); 6383 //.. /* This forces C1 to zero, which isn't right. */ 6384 //.. put_C3210( 6385 //.. binop( Iop_And32, 6386 //.. binop(Iop_Shl32, 6387 //.. binop(Iop_CmpF64, 6388 //.. get_ST(0), 6389 //.. loadLE(Ity_F64,mkexpr(addr))), 6390 //.. mkU8(8)), 6391 //.. mkU32(0x4500) 6392 //.. )); 6393 //.. break; 6394 6395 case 3: /* FCOMP double-real */ 6396 DIP("fcompl %s\n", dis_buf); 6397 /* This forces C1 to zero, which isn't right. */ 6398 put_C3210( 6399 unop(Iop_32Uto64, 6400 binop( Iop_And32, 6401 binop(Iop_Shl32, 6402 binop(Iop_CmpF64, 6403 get_ST(0), 6404 loadLE(Ity_F64,mkexpr(addr))), 6405 mkU8(8)), 6406 mkU32(0x4500) 6407 ))); 6408 fp_pop(); 6409 break; 6410 6411 case 4: /* FSUB double-real */ 6412 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6413 break; 6414 6415 case 5: /* FSUBR double-real */ 6416 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6417 break; 6418 6419 case 6: /* FDIV double-real */ 6420 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6421 break; 6422 6423 case 7: /* FDIVR double-real */ 6424 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6425 break; 6426 6427 default: 6428 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6429 vex_printf("first_opcode == 0xDC\n"); 6430 goto decode_fail; 6431 } 6432 6433 } else { 6434 6435 delta++; 6436 switch (modrm) { 6437 6438 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6439 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6440 break; 6441 6442 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6443 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6444 break; 6445 6446 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6447 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6448 break; 6449 6450 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6451 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6452 break; 6453 6454 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6455 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6456 break; 6457 6458 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6459 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6460 break; 6461 6462 default: 6463 goto decode_fail; 6464 } 6465 6466 } 6467 } 6468 6469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6470 else 6471 if (first_opcode == 0xDD) { 6472 6473 if (modrm < 0xC0) { 6474 6475 /* bits 5,4,3 are an opcode extension, and the modRM also 6476 specifies an address. */ 6477 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6478 delta += len; 6479 6480 switch (gregLO3ofRM(modrm)) { 6481 6482 case 0: /* FLD double-real */ 6483 DIP("fldl %s\n", dis_buf); 6484 fp_push(); 6485 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6486 break; 6487 6488 case 1: /* FISTTPQ m64 (SSE3) */ 6489 DIP("fistppll %s\n", dis_buf); 6490 storeLE( mkexpr(addr), 6491 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6492 fp_pop(); 6493 break; 6494 6495 case 2: /* FST double-real */ 6496 DIP("fstl %s\n", dis_buf); 6497 storeLE(mkexpr(addr), get_ST(0)); 6498 break; 6499 6500 case 3: /* FSTP double-real */ 6501 DIP("fstpl %s\n", dis_buf); 6502 storeLE(mkexpr(addr), get_ST(0)); 6503 fp_pop(); 6504 break; 6505 6506 case 4: { /* FRSTOR m94/m108 */ 6507 IRTemp ew = newTemp(Ity_I32); 6508 IRTemp w64 = newTemp(Ity_I64); 6509 IRDirty* d; 6510 if ( have66(pfx) ) { 6511 /* Uses dirty helper: 6512 VexEmNote amd64g_dirtyhelper_FRSTORS 6513 ( VexGuestAMD64State*, HWord ) */ 6514 d = unsafeIRDirty_0_N ( 6515 0/*regparms*/, 6516 "amd64g_dirtyhelper_FRSTORS", 6517 &amd64g_dirtyhelper_FRSTORS, 6518 mkIRExprVec_1( mkexpr(addr) ) 6519 ); 6520 d->mSize = 94; 6521 } else { 6522 /* Uses dirty helper: 6523 VexEmNote amd64g_dirtyhelper_FRSTOR 6524 ( VexGuestAMD64State*, HWord ) */ 6525 d = unsafeIRDirty_0_N ( 6526 0/*regparms*/, 6527 "amd64g_dirtyhelper_FRSTOR", 6528 &amd64g_dirtyhelper_FRSTOR, 6529 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6530 ); 6531 d->mSize = 108; 6532 } 6533 6534 d->tmp = w64; 6535 /* declare we're reading memory */ 6536 d->mFx = Ifx_Read; 6537 d->mAddr = mkexpr(addr); 6538 /* d->mSize set above */ 6539 6540 /* declare we're writing guest state */ 6541 d->nFxState = 5; 6542 vex_bzero(&d->fxState, sizeof(d->fxState)); 6543 6544 d->fxState[0].fx = Ifx_Write; 6545 d->fxState[0].offset = OFFB_FTOP; 6546 d->fxState[0].size = sizeof(UInt); 6547 6548 d->fxState[1].fx = Ifx_Write; 6549 d->fxState[1].offset = OFFB_FPREGS; 6550 d->fxState[1].size = 8 * sizeof(ULong); 6551 6552 d->fxState[2].fx = Ifx_Write; 6553 d->fxState[2].offset = OFFB_FPTAGS; 6554 d->fxState[2].size = 8 * sizeof(UChar); 6555 6556 d->fxState[3].fx = Ifx_Write; 6557 d->fxState[3].offset = OFFB_FPROUND; 6558 d->fxState[3].size = sizeof(ULong); 6559 6560 d->fxState[4].fx = Ifx_Write; 6561 d->fxState[4].offset = OFFB_FC3210; 6562 d->fxState[4].size = sizeof(ULong); 6563 6564 stmt( IRStmt_Dirty(d) ); 6565 6566 /* ew contains any emulation warning we may need to 6567 issue. If needed, side-exit to the next insn, 6568 reporting the warning, so that Valgrind's dispatcher 6569 sees the warning. */ 6570 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6571 put_emwarn( mkexpr(ew) ); 6572 stmt( 6573 IRStmt_Exit( 6574 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6575 Ijk_EmWarn, 6576 IRConst_U64( guest_RIP_bbstart+delta ), 6577 OFFB_RIP 6578 ) 6579 ); 6580 6581 if ( have66(pfx) ) { 6582 DIP("frstors %s\n", dis_buf); 6583 } else { 6584 DIP("frstor %s\n", dis_buf); 6585 } 6586 break; 6587 } 6588 6589 case 6: { /* FNSAVE m94/m108 */ 6590 IRDirty *d; 6591 if ( have66(pfx) ) { 6592 /* Uses dirty helper: 6593 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, 6594 HWord ) */ 6595 d = unsafeIRDirty_0_N ( 6596 0/*regparms*/, 6597 "amd64g_dirtyhelper_FNSAVES", 6598 &amd64g_dirtyhelper_FNSAVES, 6599 mkIRExprVec_1( mkexpr(addr) ) 6600 ); 6601 d->mSize = 94; 6602 } else { 6603 /* Uses dirty helper: 6604 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*, 6605 HWord ) */ 6606 d = unsafeIRDirty_0_N ( 6607 0/*regparms*/, 6608 "amd64g_dirtyhelper_FNSAVE", 6609 &amd64g_dirtyhelper_FNSAVE, 6610 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6611 ); 6612 d->mSize = 108; 6613 } 6614 6615 /* declare we're writing memory */ 6616 d->mFx = Ifx_Write; 6617 d->mAddr = mkexpr(addr); 6618 /* d->mSize set above */ 6619 6620 /* declare we're reading guest state */ 6621 d->nFxState = 5; 6622 vex_bzero(&d->fxState, sizeof(d->fxState)); 6623 6624 d->fxState[0].fx = Ifx_Read; 6625 d->fxState[0].offset = OFFB_FTOP; 6626 d->fxState[0].size = sizeof(UInt); 6627 6628 d->fxState[1].fx = Ifx_Read; 6629 d->fxState[1].offset = OFFB_FPREGS; 6630 d->fxState[1].size = 8 * sizeof(ULong); 6631 6632 d->fxState[2].fx = Ifx_Read; 6633 d->fxState[2].offset = OFFB_FPTAGS; 6634 d->fxState[2].size = 8 * sizeof(UChar); 6635 6636 d->fxState[3].fx = Ifx_Read; 6637 d->fxState[3].offset = OFFB_FPROUND; 6638 d->fxState[3].size = sizeof(ULong); 6639 6640 d->fxState[4].fx = Ifx_Read; 6641 d->fxState[4].offset = OFFB_FC3210; 6642 d->fxState[4].size = sizeof(ULong); 6643 6644 stmt( IRStmt_Dirty(d) ); 6645 6646 if ( have66(pfx) ) { 6647 DIP("fnsaves %s\n", dis_buf); 6648 } else { 6649 DIP("fnsave %s\n", dis_buf); 6650 } 6651 break; 6652 } 6653 6654 case 7: { /* FNSTSW m16 */ 6655 IRExpr* sw = get_FPU_sw(); 6656 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6657 storeLE( mkexpr(addr), sw ); 6658 DIP("fnstsw %s\n", dis_buf); 6659 break; 6660 } 6661 6662 default: 6663 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6664 vex_printf("first_opcode == 0xDD\n"); 6665 goto decode_fail; 6666 } 6667 } else { 6668 delta++; 6669 switch (modrm) { 6670 6671 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6672 r_dst = (UInt)modrm - 0xC0; 6673 DIP("ffree %%st(%u)\n", r_dst); 6674 put_ST_TAG ( r_dst, mkU8(0) ); 6675 break; 6676 6677 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6678 r_dst = (UInt)modrm - 0xD0; 6679 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6680 /* P4 manual says: "If the destination operand is a 6681 non-empty register, the invalid-operation exception 6682 is not generated. Hence put_ST_UNCHECKED. */ 6683 put_ST_UNCHECKED(r_dst, get_ST(0)); 6684 break; 6685 6686 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6687 r_dst = (UInt)modrm - 0xD8; 6688 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6689 /* P4 manual says: "If the destination operand is a 6690 non-empty register, the invalid-operation exception 6691 is not generated. Hence put_ST_UNCHECKED. */ 6692 put_ST_UNCHECKED(r_dst, get_ST(0)); 6693 fp_pop(); 6694 break; 6695 6696 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6697 r_dst = (UInt)modrm - 0xE0; 6698 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6699 /* This forces C1 to zero, which isn't right. */ 6700 put_C3210( 6701 unop(Iop_32Uto64, 6702 binop( Iop_And32, 6703 binop(Iop_Shl32, 6704 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6705 mkU8(8)), 6706 mkU32(0x4500) 6707 ))); 6708 break; 6709 6710 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6711 r_dst = (UInt)modrm - 0xE8; 6712 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6713 /* This forces C1 to zero, which isn't right. */ 6714 put_C3210( 6715 unop(Iop_32Uto64, 6716 binop( Iop_And32, 6717 binop(Iop_Shl32, 6718 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6719 mkU8(8)), 6720 mkU32(0x4500) 6721 ))); 6722 fp_pop(); 6723 break; 6724 6725 default: 6726 goto decode_fail; 6727 } 6728 } 6729 } 6730 6731 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6732 else 6733 if (first_opcode == 0xDE) { 6734 6735 if (modrm < 0xC0) { 6736 6737 /* bits 5,4,3 are an opcode extension, and the modRM also 6738 specifies an address. */ 6739 IROp fop; 6740 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6741 delta += len; 6742 6743 switch (gregLO3ofRM(modrm)) { 6744 6745 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6746 DIP("fiaddw %s\n", dis_buf); 6747 fop = Iop_AddF64; 6748 goto do_fop_m16; 6749 6750 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6751 DIP("fimulw %s\n", dis_buf); 6752 fop = Iop_MulF64; 6753 goto do_fop_m16; 6754 6755 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6756 DIP("fisubw %s\n", dis_buf); 6757 fop = Iop_SubF64; 6758 goto do_fop_m16; 6759 6760 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6761 DIP("fisubrw %s\n", dis_buf); 6762 fop = Iop_SubF64; 6763 goto do_foprev_m16; 6764 6765 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6766 DIP("fisubw %s\n", dis_buf); 6767 fop = Iop_DivF64; 6768 goto do_fop_m16; 6769 6770 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6771 DIP("fidivrw %s\n", dis_buf); 6772 fop = Iop_DivF64; 6773 goto do_foprev_m16; 6774 6775 do_fop_m16: 6776 put_ST_UNCHECKED(0, 6777 triop(fop, 6778 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6779 get_ST(0), 6780 unop(Iop_I32StoF64, 6781 unop(Iop_16Sto32, 6782 loadLE(Ity_I16, mkexpr(addr)))))); 6783 break; 6784 6785 do_foprev_m16: 6786 put_ST_UNCHECKED(0, 6787 triop(fop, 6788 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6789 unop(Iop_I32StoF64, 6790 unop(Iop_16Sto32, 6791 loadLE(Ity_I16, mkexpr(addr)))), 6792 get_ST(0))); 6793 break; 6794 6795 default: 6796 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6797 vex_printf("first_opcode == 0xDE\n"); 6798 goto decode_fail; 6799 } 6800 6801 } else { 6802 6803 delta++; 6804 switch (modrm) { 6805 6806 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6807 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6808 break; 6809 6810 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6811 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6812 break; 6813 6814 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6815 DIP("fcompp %%st(0),%%st(1)\n"); 6816 /* This forces C1 to zero, which isn't right. */ 6817 put_C3210( 6818 unop(Iop_32Uto64, 6819 binop( Iop_And32, 6820 binop(Iop_Shl32, 6821 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6822 mkU8(8)), 6823 mkU32(0x4500) 6824 ))); 6825 fp_pop(); 6826 fp_pop(); 6827 break; 6828 6829 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6830 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6831 break; 6832 6833 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6834 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6835 break; 6836 6837 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6838 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6839 break; 6840 6841 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6842 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6843 break; 6844 6845 default: 6846 goto decode_fail; 6847 } 6848 6849 } 6850 } 6851 6852 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6853 else 6854 if (first_opcode == 0xDF) { 6855 6856 if (modrm < 0xC0) { 6857 6858 /* bits 5,4,3 are an opcode extension, and the modRM also 6859 specifies an address. */ 6860 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6861 delta += len; 6862 6863 switch (gregLO3ofRM(modrm)) { 6864 6865 case 0: /* FILD m16int */ 6866 DIP("fildw %s\n", dis_buf); 6867 fp_push(); 6868 put_ST(0, unop(Iop_I32StoF64, 6869 unop(Iop_16Sto32, 6870 loadLE(Ity_I16, mkexpr(addr))))); 6871 break; 6872 6873 case 1: /* FISTTPS m16 (SSE3) */ 6874 DIP("fisttps %s\n", dis_buf); 6875 storeLE( mkexpr(addr), 6876 x87ishly_qnarrow_32_to_16( 6877 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6878 fp_pop(); 6879 break; 6880 6881 case 2: /* FIST m16 */ 6882 DIP("fists %s\n", dis_buf); 6883 storeLE( mkexpr(addr), 6884 x87ishly_qnarrow_32_to_16( 6885 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6886 break; 6887 6888 case 3: /* FISTP m16 */ 6889 DIP("fistps %s\n", dis_buf); 6890 storeLE( mkexpr(addr), 6891 x87ishly_qnarrow_32_to_16( 6892 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6893 fp_pop(); 6894 break; 6895 6896 case 5: /* FILD m64 */ 6897 DIP("fildll %s\n", dis_buf); 6898 fp_push(); 6899 put_ST(0, binop(Iop_I64StoF64, 6900 get_roundingmode(), 6901 loadLE(Ity_I64, mkexpr(addr)))); 6902 break; 6903 6904 case 7: /* FISTP m64 */ 6905 DIP("fistpll %s\n", dis_buf); 6906 storeLE( mkexpr(addr), 6907 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6908 fp_pop(); 6909 break; 6910 6911 default: 6912 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6913 vex_printf("first_opcode == 0xDF\n"); 6914 goto decode_fail; 6915 } 6916 6917 } else { 6918 6919 delta++; 6920 switch (modrm) { 6921 6922 case 0xC0: /* FFREEP %st(0) */ 6923 DIP("ffreep %%st(%d)\n", 0); 6924 put_ST_TAG ( 0, mkU8(0) ); 6925 fp_pop(); 6926 break; 6927 6928 case 0xE0: /* FNSTSW %ax */ 6929 DIP("fnstsw %%ax\n"); 6930 /* Invent a plausible-looking FPU status word value and 6931 dump it in %AX: 6932 ((ftop & 7) << 11) | (c3210 & 0x4700) 6933 */ 6934 putIRegRAX( 6935 2, 6936 unop(Iop_32to16, 6937 binop(Iop_Or32, 6938 binop(Iop_Shl32, 6939 binop(Iop_And32, get_ftop(), mkU32(7)), 6940 mkU8(11)), 6941 binop(Iop_And32, 6942 unop(Iop_64to32, get_C3210()), 6943 mkU32(0x4700)) 6944 ))); 6945 break; 6946 6947 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6948 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6949 break; 6950 6951 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6952 /* not really right since COMIP != UCOMIP */ 6953 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6954 break; 6955 6956 default: 6957 goto decode_fail; 6958 } 6959 } 6960 6961 } 6962 6963 else 6964 goto decode_fail; 6965 6966 *decode_ok = True; 6967 return delta; 6968 6969 decode_fail: 6970 *decode_ok = False; 6971 return delta; 6972 } 6973 6974 6975 /*------------------------------------------------------------*/ 6976 /*--- ---*/ 6977 /*--- MMX INSTRUCTIONS ---*/ 6978 /*--- ---*/ 6979 /*------------------------------------------------------------*/ 6980 6981 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6982 IA32 arch manual, volume 3): 6983 6984 Read from, or write to MMX register (viz, any insn except EMMS): 6985 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6986 * FP stack pointer set to zero 6987 6988 EMMS: 6989 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6990 * FP stack pointer set to zero 6991 */ 6992 6993 static void do_MMX_preamble ( void ) 6994 { 6995 Int i; 6996 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6997 IRExpr* zero = mkU32(0); 6998 IRExpr* tag1 = mkU8(1); 6999 put_ftop(zero); 7000 for (i = 0; i < 8; i++) 7001 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 7002 } 7003 7004 static void do_EMMS_preamble ( void ) 7005 { 7006 Int i; 7007 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7008 IRExpr* zero = mkU32(0); 7009 IRExpr* tag0 = mkU8(0); 7010 put_ftop(zero); 7011 for (i = 0; i < 8; i++) 7012 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 7013 } 7014 7015 7016 static IRExpr* getMMXReg ( UInt archreg ) 7017 { 7018 vassert(archreg < 8); 7019 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 7020 } 7021 7022 7023 static void putMMXReg ( UInt archreg, IRExpr* e ) 7024 { 7025 vassert(archreg < 8); 7026 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 7027 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 7028 } 7029 7030 7031 /* Helper for non-shift MMX insns. Note this is incomplete in the 7032 sense that it does not first call do_MMX_preamble() -- that is the 7033 responsibility of its caller. */ 7034 7035 static 7036 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi, 7037 Prefix pfx, 7038 Long delta, 7039 UChar opc, 7040 const HChar* name, 7041 Bool show_granularity ) 7042 { 7043 HChar dis_buf[50]; 7044 UChar modrm = getUChar(delta); 7045 Bool isReg = epartIsReg(modrm); 7046 IRExpr* argL = NULL; 7047 IRExpr* argR = NULL; 7048 IRExpr* argG = NULL; 7049 IRExpr* argE = NULL; 7050 IRTemp res = newTemp(Ity_I64); 7051 7052 Bool invG = False; 7053 IROp op = Iop_INVALID; 7054 void* hAddr = NULL; 7055 const HChar* hName = NULL; 7056 Bool eLeft = False; 7057 7058 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 7059 7060 switch (opc) { 7061 /* Original MMX ones */ 7062 case 0xFC: op = Iop_Add8x8; break; 7063 case 0xFD: op = Iop_Add16x4; break; 7064 case 0xFE: op = Iop_Add32x2; break; 7065 7066 case 0xEC: op = Iop_QAdd8Sx8; break; 7067 case 0xED: op = Iop_QAdd16Sx4; break; 7068 7069 case 0xDC: op = Iop_QAdd8Ux8; break; 7070 case 0xDD: op = Iop_QAdd16Ux4; break; 7071 7072 case 0xF8: op = Iop_Sub8x8; break; 7073 case 0xF9: op = Iop_Sub16x4; break; 7074 case 0xFA: op = Iop_Sub32x2; break; 7075 7076 case 0xE8: op = Iop_QSub8Sx8; break; 7077 case 0xE9: op = Iop_QSub16Sx4; break; 7078 7079 case 0xD8: op = Iop_QSub8Ux8; break; 7080 case 0xD9: op = Iop_QSub16Ux4; break; 7081 7082 case 0xE5: op = Iop_MulHi16Sx4; break; 7083 case 0xD5: op = Iop_Mul16x4; break; 7084 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 7085 7086 case 0x74: op = Iop_CmpEQ8x8; break; 7087 case 0x75: op = Iop_CmpEQ16x4; break; 7088 case 0x76: op = Iop_CmpEQ32x2; break; 7089 7090 case 0x64: op = Iop_CmpGT8Sx8; break; 7091 case 0x65: op = Iop_CmpGT16Sx4; break; 7092 case 0x66: op = Iop_CmpGT32Sx2; break; 7093 7094 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 7095 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 7096 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 7097 7098 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 7099 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 7100 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 7101 7102 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 7103 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 7104 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 7105 7106 case 0xDB: op = Iop_And64; break; 7107 case 0xDF: op = Iop_And64; invG = True; break; 7108 case 0xEB: op = Iop_Or64; break; 7109 case 0xEF: /* Possibly do better here if argL and argR are the 7110 same reg */ 7111 op = Iop_Xor64; break; 7112 7113 /* Introduced in SSE1 */ 7114 case 0xE0: op = Iop_Avg8Ux8; break; 7115 case 0xE3: op = Iop_Avg16Ux4; break; 7116 case 0xEE: op = Iop_Max16Sx4; break; 7117 case 0xDE: op = Iop_Max8Ux8; break; 7118 case 0xEA: op = Iop_Min16Sx4; break; 7119 case 0xDA: op = Iop_Min8Ux8; break; 7120 case 0xE4: op = Iop_MulHi16Ux4; break; 7121 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 7122 7123 /* Introduced in SSE2 */ 7124 case 0xD4: op = Iop_Add64; break; 7125 case 0xFB: op = Iop_Sub64; break; 7126 7127 default: 7128 vex_printf("\n0x%x\n", (Int)opc); 7129 vpanic("dis_MMXop_regmem_to_reg"); 7130 } 7131 7132 # undef XXX 7133 7134 argG = getMMXReg(gregLO3ofRM(modrm)); 7135 if (invG) 7136 argG = unop(Iop_Not64, argG); 7137 7138 if (isReg) { 7139 delta++; 7140 argE = getMMXReg(eregLO3ofRM(modrm)); 7141 } else { 7142 Int len; 7143 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7144 delta += len; 7145 argE = loadLE(Ity_I64, mkexpr(addr)); 7146 } 7147 7148 if (eLeft) { 7149 argL = argE; 7150 argR = argG; 7151 } else { 7152 argL = argG; 7153 argR = argE; 7154 } 7155 7156 if (op != Iop_INVALID) { 7157 vassert(hName == NULL); 7158 vassert(hAddr == NULL); 7159 assign(res, binop(op, argL, argR)); 7160 } else { 7161 vassert(hName != NULL); 7162 vassert(hAddr != NULL); 7163 assign( res, 7164 mkIRExprCCall( 7165 Ity_I64, 7166 0/*regparms*/, hName, hAddr, 7167 mkIRExprVec_2( argL, argR ) 7168 ) 7169 ); 7170 } 7171 7172 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 7173 7174 DIP("%s%s %s, %s\n", 7175 name, show_granularity ? nameMMXGran(opc & 3) : "", 7176 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 7177 nameMMXReg(gregLO3ofRM(modrm)) ); 7178 7179 return delta; 7180 } 7181 7182 7183 /* Vector by scalar shift of G by the amount specified at the bottom 7184 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 7185 7186 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi, 7187 Prefix pfx, Long delta, 7188 const HChar* opname, IROp op ) 7189 { 7190 HChar dis_buf[50]; 7191 Int alen, size; 7192 IRTemp addr; 7193 Bool shl, shr, sar; 7194 UChar rm = getUChar(delta); 7195 IRTemp g0 = newTemp(Ity_I64); 7196 IRTemp g1 = newTemp(Ity_I64); 7197 IRTemp amt = newTemp(Ity_I64); 7198 IRTemp amt8 = newTemp(Ity_I8); 7199 7200 if (epartIsReg(rm)) { 7201 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 7202 DIP("%s %s,%s\n", opname, 7203 nameMMXReg(eregLO3ofRM(rm)), 7204 nameMMXReg(gregLO3ofRM(rm)) ); 7205 delta++; 7206 } else { 7207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 7208 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 7209 DIP("%s %s,%s\n", opname, 7210 dis_buf, 7211 nameMMXReg(gregLO3ofRM(rm)) ); 7212 delta += alen; 7213 } 7214 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 7215 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 7216 7217 shl = shr = sar = False; 7218 size = 0; 7219 switch (op) { 7220 case Iop_ShlN16x4: shl = True; size = 32; break; 7221 case Iop_ShlN32x2: shl = True; size = 32; break; 7222 case Iop_Shl64: shl = True; size = 64; break; 7223 case Iop_ShrN16x4: shr = True; size = 16; break; 7224 case Iop_ShrN32x2: shr = True; size = 32; break; 7225 case Iop_Shr64: shr = True; size = 64; break; 7226 case Iop_SarN16x4: sar = True; size = 16; break; 7227 case Iop_SarN32x2: sar = True; size = 32; break; 7228 default: vassert(0); 7229 } 7230 7231 if (shl || shr) { 7232 assign( 7233 g1, 7234 IRExpr_ITE( 7235 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7236 binop(op, mkexpr(g0), mkexpr(amt8)), 7237 mkU64(0) 7238 ) 7239 ); 7240 } else 7241 if (sar) { 7242 assign( 7243 g1, 7244 IRExpr_ITE( 7245 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7246 binop(op, mkexpr(g0), mkexpr(amt8)), 7247 binop(op, mkexpr(g0), mkU8(size-1)) 7248 ) 7249 ); 7250 } else { 7251 vassert(0); 7252 } 7253 7254 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 7255 return delta; 7256 } 7257 7258 7259 /* Vector by scalar shift of E by an immediate byte. This is a 7260 straight copy of dis_SSE_shiftE_imm. */ 7261 7262 static 7263 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op ) 7264 { 7265 Bool shl, shr, sar; 7266 UChar rm = getUChar(delta); 7267 IRTemp e0 = newTemp(Ity_I64); 7268 IRTemp e1 = newTemp(Ity_I64); 7269 UChar amt, size; 7270 vassert(epartIsReg(rm)); 7271 vassert(gregLO3ofRM(rm) == 2 7272 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 7273 amt = getUChar(delta+1); 7274 delta += 2; 7275 DIP("%s $%d,%s\n", opname, 7276 (Int)amt, 7277 nameMMXReg(eregLO3ofRM(rm)) ); 7278 7279 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 7280 7281 shl = shr = sar = False; 7282 size = 0; 7283 switch (op) { 7284 case Iop_ShlN16x4: shl = True; size = 16; break; 7285 case Iop_ShlN32x2: shl = True; size = 32; break; 7286 case Iop_Shl64: shl = True; size = 64; break; 7287 case Iop_SarN16x4: sar = True; size = 16; break; 7288 case Iop_SarN32x2: sar = True; size = 32; break; 7289 case Iop_ShrN16x4: shr = True; size = 16; break; 7290 case Iop_ShrN32x2: shr = True; size = 32; break; 7291 case Iop_Shr64: shr = True; size = 64; break; 7292 default: vassert(0); 7293 } 7294 7295 if (shl || shr) { 7296 assign( e1, amt >= size 7297 ? mkU64(0) 7298 : binop(op, mkexpr(e0), mkU8(amt)) 7299 ); 7300 } else 7301 if (sar) { 7302 assign( e1, amt >= size 7303 ? binop(op, mkexpr(e0), mkU8(size-1)) 7304 : binop(op, mkexpr(e0), mkU8(amt)) 7305 ); 7306 } else { 7307 vassert(0); 7308 } 7309 7310 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 7311 return delta; 7312 } 7313 7314 7315 /* Completely handle all MMX instructions except emms. */ 7316 7317 static 7318 ULong dis_MMX ( Bool* decode_ok, 7319 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 7320 { 7321 Int len; 7322 UChar modrm; 7323 HChar dis_buf[50]; 7324 UChar opc = getUChar(delta); 7325 delta++; 7326 7327 /* dis_MMX handles all insns except emms. */ 7328 do_MMX_preamble(); 7329 7330 switch (opc) { 7331 7332 case 0x6E: 7333 if (sz == 4) { 7334 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 7335 modrm = getUChar(delta); 7336 if (epartIsReg(modrm)) { 7337 delta++; 7338 putMMXReg( 7339 gregLO3ofRM(modrm), 7340 binop( Iop_32HLto64, 7341 mkU32(0), 7342 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 7343 DIP("movd %s, %s\n", 7344 nameIReg32(eregOfRexRM(pfx,modrm)), 7345 nameMMXReg(gregLO3ofRM(modrm))); 7346 } else { 7347 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7348 delta += len; 7349 putMMXReg( 7350 gregLO3ofRM(modrm), 7351 binop( Iop_32HLto64, 7352 mkU32(0), 7353 loadLE(Ity_I32, mkexpr(addr)) ) ); 7354 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7355 } 7356 } 7357 else 7358 if (sz == 8) { 7359 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7360 modrm = getUChar(delta); 7361 if (epartIsReg(modrm)) { 7362 delta++; 7363 putMMXReg( gregLO3ofRM(modrm), 7364 getIReg64(eregOfRexRM(pfx,modrm)) ); 7365 DIP("movd %s, %s\n", 7366 nameIReg64(eregOfRexRM(pfx,modrm)), 7367 nameMMXReg(gregLO3ofRM(modrm))); 7368 } else { 7369 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7370 delta += len; 7371 putMMXReg( gregLO3ofRM(modrm), 7372 loadLE(Ity_I64, mkexpr(addr)) ); 7373 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7374 } 7375 } 7376 else { 7377 goto mmx_decode_failure; 7378 } 7379 break; 7380 7381 case 0x7E: 7382 if (sz == 4) { 7383 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7384 modrm = getUChar(delta); 7385 if (epartIsReg(modrm)) { 7386 delta++; 7387 putIReg32( eregOfRexRM(pfx,modrm), 7388 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7389 DIP("movd %s, %s\n", 7390 nameMMXReg(gregLO3ofRM(modrm)), 7391 nameIReg32(eregOfRexRM(pfx,modrm))); 7392 } else { 7393 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7394 delta += len; 7395 storeLE( mkexpr(addr), 7396 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7397 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7398 } 7399 } 7400 else 7401 if (sz == 8) { 7402 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7403 modrm = getUChar(delta); 7404 if (epartIsReg(modrm)) { 7405 delta++; 7406 putIReg64( eregOfRexRM(pfx,modrm), 7407 getMMXReg(gregLO3ofRM(modrm)) ); 7408 DIP("movd %s, %s\n", 7409 nameMMXReg(gregLO3ofRM(modrm)), 7410 nameIReg64(eregOfRexRM(pfx,modrm))); 7411 } else { 7412 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7413 delta += len; 7414 storeLE( mkexpr(addr), 7415 getMMXReg(gregLO3ofRM(modrm)) ); 7416 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7417 } 7418 } else { 7419 goto mmx_decode_failure; 7420 } 7421 break; 7422 7423 case 0x6F: 7424 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7425 if (sz != 4 7426 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7427 goto mmx_decode_failure; 7428 modrm = getUChar(delta); 7429 if (epartIsReg(modrm)) { 7430 delta++; 7431 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7432 DIP("movq %s, %s\n", 7433 nameMMXReg(eregLO3ofRM(modrm)), 7434 nameMMXReg(gregLO3ofRM(modrm))); 7435 } else { 7436 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7437 delta += len; 7438 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7439 DIP("movq %s, %s\n", 7440 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7441 } 7442 break; 7443 7444 case 0x7F: 7445 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7446 if (sz != 4 7447 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7448 goto mmx_decode_failure; 7449 modrm = getUChar(delta); 7450 if (epartIsReg(modrm)) { 7451 delta++; 7452 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7453 DIP("movq %s, %s\n", 7454 nameMMXReg(gregLO3ofRM(modrm)), 7455 nameMMXReg(eregLO3ofRM(modrm))); 7456 } else { 7457 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7458 delta += len; 7459 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7460 DIP("mov(nt)q %s, %s\n", 7461 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7462 } 7463 break; 7464 7465 case 0xFC: 7466 case 0xFD: 7467 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7468 if (sz != 4) 7469 goto mmx_decode_failure; 7470 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7471 break; 7472 7473 case 0xEC: 7474 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7475 if (sz != 4 7476 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7477 goto mmx_decode_failure; 7478 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7479 break; 7480 7481 case 0xDC: 7482 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7483 if (sz != 4) 7484 goto mmx_decode_failure; 7485 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7486 break; 7487 7488 case 0xF8: 7489 case 0xF9: 7490 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7491 if (sz != 4) 7492 goto mmx_decode_failure; 7493 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7494 break; 7495 7496 case 0xE8: 7497 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7498 if (sz != 4) 7499 goto mmx_decode_failure; 7500 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7501 break; 7502 7503 case 0xD8: 7504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7505 if (sz != 4) 7506 goto mmx_decode_failure; 7507 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7508 break; 7509 7510 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7511 if (sz != 4) 7512 goto mmx_decode_failure; 7513 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7514 break; 7515 7516 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7517 if (sz != 4) 7518 goto mmx_decode_failure; 7519 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7520 break; 7521 7522 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7523 vassert(sz == 4); 7524 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7525 break; 7526 7527 case 0x74: 7528 case 0x75: 7529 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7530 if (sz != 4) 7531 goto mmx_decode_failure; 7532 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7533 break; 7534 7535 case 0x64: 7536 case 0x65: 7537 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7538 if (sz != 4) 7539 goto mmx_decode_failure; 7540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7541 break; 7542 7543 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7544 if (sz != 4) 7545 goto mmx_decode_failure; 7546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7547 break; 7548 7549 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7550 if (sz != 4) 7551 goto mmx_decode_failure; 7552 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7553 break; 7554 7555 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7556 if (sz != 4) 7557 goto mmx_decode_failure; 7558 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7559 break; 7560 7561 case 0x68: 7562 case 0x69: 7563 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7564 if (sz != 4 7565 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7566 goto mmx_decode_failure; 7567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7568 break; 7569 7570 case 0x60: 7571 case 0x61: 7572 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7573 if (sz != 4 7574 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7575 goto mmx_decode_failure; 7576 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7577 break; 7578 7579 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7580 if (sz != 4) 7581 goto mmx_decode_failure; 7582 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7583 break; 7584 7585 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7586 if (sz != 4) 7587 goto mmx_decode_failure; 7588 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7589 break; 7590 7591 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7592 if (sz != 4) 7593 goto mmx_decode_failure; 7594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7595 break; 7596 7597 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7598 if (sz != 4) 7599 goto mmx_decode_failure; 7600 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7601 break; 7602 7603 # define SHIFT_BY_REG(_name,_op) \ 7604 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7605 break; 7606 7607 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7608 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7609 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7610 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7611 7612 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7613 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7614 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7615 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7616 7617 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7618 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7619 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7620 7621 # undef SHIFT_BY_REG 7622 7623 case 0x71: 7624 case 0x72: 7625 case 0x73: { 7626 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7627 UChar byte2, subopc; 7628 if (sz != 4) 7629 goto mmx_decode_failure; 7630 byte2 = getUChar(delta); /* amode / sub-opcode */ 7631 subopc = toUChar( (byte2 >> 3) & 7 ); 7632 7633 # define SHIFT_BY_IMM(_name,_op) \ 7634 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7635 } while (0) 7636 7637 if (subopc == 2 /*SRL*/ && opc == 0x71) 7638 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7639 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7640 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7641 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7642 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7643 7644 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7645 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7646 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7647 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7648 7649 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7650 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7651 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7652 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7653 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7654 SHIFT_BY_IMM("psllq", Iop_Shl64); 7655 7656 else goto mmx_decode_failure; 7657 7658 # undef SHIFT_BY_IMM 7659 break; 7660 } 7661 7662 case 0xF7: { 7663 IRTemp addr = newTemp(Ity_I64); 7664 IRTemp regD = newTemp(Ity_I64); 7665 IRTemp regM = newTemp(Ity_I64); 7666 IRTemp mask = newTemp(Ity_I64); 7667 IRTemp olddata = newTemp(Ity_I64); 7668 IRTemp newdata = newTemp(Ity_I64); 7669 7670 modrm = getUChar(delta); 7671 if (sz != 4 || (!epartIsReg(modrm))) 7672 goto mmx_decode_failure; 7673 delta++; 7674 7675 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7676 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7677 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7678 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7679 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7680 assign( newdata, 7681 binop(Iop_Or64, 7682 binop(Iop_And64, 7683 mkexpr(regD), 7684 mkexpr(mask) ), 7685 binop(Iop_And64, 7686 mkexpr(olddata), 7687 unop(Iop_Not64, mkexpr(mask)))) ); 7688 storeLE( mkexpr(addr), mkexpr(newdata) ); 7689 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7690 nameMMXReg( gregLO3ofRM(modrm) ) ); 7691 break; 7692 } 7693 7694 /* --- MMX decode failure --- */ 7695 default: 7696 mmx_decode_failure: 7697 *decode_ok = False; 7698 return delta; /* ignored */ 7699 7700 } 7701 7702 *decode_ok = True; 7703 return delta; 7704 } 7705 7706 7707 /*------------------------------------------------------------*/ 7708 /*--- More misc arithmetic and other obscure insns. ---*/ 7709 /*------------------------------------------------------------*/ 7710 7711 /* Generate base << amt with vacated places filled with stuff 7712 from xtra. amt guaranteed in 0 .. 63. */ 7713 static 7714 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7715 { 7716 /* if amt == 0 7717 then base 7718 else (base << amt) | (xtra >>u (64-amt)) 7719 */ 7720 return 7721 IRExpr_ITE( 7722 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7723 binop(Iop_Or64, 7724 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7725 binop(Iop_Shr64, mkexpr(xtra), 7726 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7727 ), 7728 mkexpr(base) 7729 ); 7730 } 7731 7732 /* Generate base >>u amt with vacated places filled with stuff 7733 from xtra. amt guaranteed in 0 .. 63. */ 7734 static 7735 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7736 { 7737 /* if amt == 0 7738 then base 7739 else (base >>u amt) | (xtra << (64-amt)) 7740 */ 7741 return 7742 IRExpr_ITE( 7743 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7744 binop(Iop_Or64, 7745 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7746 binop(Iop_Shl64, mkexpr(xtra), 7747 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7748 ), 7749 mkexpr(base) 7750 ); 7751 } 7752 7753 /* Double length left and right shifts. Apparently only required in 7754 v-size (no b- variant). */ 7755 static 7756 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi, 7757 Prefix pfx, 7758 Long delta, UChar modrm, 7759 Int sz, 7760 IRExpr* shift_amt, 7761 Bool amt_is_literal, 7762 const HChar* shift_amt_txt, 7763 Bool left_shift ) 7764 { 7765 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7766 for printing it. And eip on entry points at the modrm byte. */ 7767 Int len; 7768 HChar dis_buf[50]; 7769 7770 IRType ty = szToITy(sz); 7771 IRTemp gsrc = newTemp(ty); 7772 IRTemp esrc = newTemp(ty); 7773 IRTemp addr = IRTemp_INVALID; 7774 IRTemp tmpSH = newTemp(Ity_I8); 7775 IRTemp tmpSS = newTemp(Ity_I8); 7776 IRTemp tmp64 = IRTemp_INVALID; 7777 IRTemp res64 = IRTemp_INVALID; 7778 IRTemp rss64 = IRTemp_INVALID; 7779 IRTemp resTy = IRTemp_INVALID; 7780 IRTemp rssTy = IRTemp_INVALID; 7781 Int mask = sz==8 ? 63 : 31; 7782 7783 vassert(sz == 2 || sz == 4 || sz == 8); 7784 7785 /* The E-part is the destination; this is shifted. The G-part 7786 supplies bits to be shifted into the E-part, but is not 7787 changed. 7788 7789 If shifting left, form a double-length word with E at the top 7790 and G at the bottom, and shift this left. The result is then in 7791 the high part. 7792 7793 If shifting right, form a double-length word with G at the top 7794 and E at the bottom, and shift this right. The result is then 7795 at the bottom. */ 7796 7797 /* Fetch the operands. */ 7798 7799 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7800 7801 if (epartIsReg(modrm)) { 7802 delta++; 7803 assign( esrc, getIRegE(sz, pfx, modrm) ); 7804 DIP("sh%cd%c %s, %s, %s\n", 7805 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7806 shift_amt_txt, 7807 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7808 } else { 7809 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7810 /* # bytes following amode */ 7811 amt_is_literal ? 1 : 0 ); 7812 delta += len; 7813 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7814 DIP("sh%cd%c %s, %s, %s\n", 7815 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7816 shift_amt_txt, 7817 nameIRegG(sz, pfx, modrm), dis_buf); 7818 } 7819 7820 /* Calculate the masked shift amount (tmpSH), the masked subshift 7821 amount (tmpSS), the shifted value (res64) and the subshifted 7822 value (rss64). */ 7823 7824 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7825 assign( tmpSS, binop(Iop_And8, 7826 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7827 mkU8(mask))); 7828 7829 tmp64 = newTemp(Ity_I64); 7830 res64 = newTemp(Ity_I64); 7831 rss64 = newTemp(Ity_I64); 7832 7833 if (sz == 2 || sz == 4) { 7834 7835 /* G is xtra; E is data */ 7836 /* what a freaking nightmare: */ 7837 if (sz == 4 && left_shift) { 7838 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7839 assign( res64, 7840 binop(Iop_Shr64, 7841 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7842 mkU8(32)) ); 7843 assign( rss64, 7844 binop(Iop_Shr64, 7845 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7846 mkU8(32)) ); 7847 } 7848 else 7849 if (sz == 4 && !left_shift) { 7850 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7851 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7852 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7853 } 7854 else 7855 if (sz == 2 && left_shift) { 7856 assign( tmp64, 7857 binop(Iop_32HLto64, 7858 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7859 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7860 )); 7861 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7862 assign( res64, 7863 binop(Iop_Shr64, 7864 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7865 mkU8(48)) ); 7866 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7867 assign( rss64, 7868 binop(Iop_Shr64, 7869 binop(Iop_Shl64, 7870 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7871 mkU8(48)), 7872 mkexpr(tmpSS)), 7873 mkU8(48)) ); 7874 } 7875 else 7876 if (sz == 2 && !left_shift) { 7877 assign( tmp64, 7878 binop(Iop_32HLto64, 7879 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7880 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7881 )); 7882 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7883 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7884 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7885 assign( rss64, binop(Iop_Shr64, 7886 unop(Iop_16Uto64, mkexpr(esrc)), 7887 mkexpr(tmpSS)) ); 7888 } 7889 7890 } else { 7891 7892 vassert(sz == 8); 7893 if (left_shift) { 7894 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7895 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7896 } else { 7897 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7898 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7899 } 7900 7901 } 7902 7903 resTy = newTemp(ty); 7904 rssTy = newTemp(ty); 7905 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7906 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7907 7908 /* Put result back and write the flags thunk. */ 7909 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7910 resTy, rssTy, ty, tmpSH ); 7911 7912 if (epartIsReg(modrm)) { 7913 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7914 } else { 7915 storeLE( mkexpr(addr), mkexpr(resTy) ); 7916 } 7917 7918 if (amt_is_literal) delta++; 7919 return delta; 7920 } 7921 7922 7923 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7924 required. */ 7925 7926 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7927 7928 static const HChar* nameBtOp ( BtOp op ) 7929 { 7930 switch (op) { 7931 case BtOpNone: return ""; 7932 case BtOpSet: return "s"; 7933 case BtOpReset: return "r"; 7934 case BtOpComp: return "c"; 7935 default: vpanic("nameBtOp(amd64)"); 7936 } 7937 } 7938 7939 7940 static 7941 ULong dis_bt_G_E ( const VexAbiInfo* vbi, 7942 Prefix pfx, Int sz, Long delta, BtOp op, 7943 /*OUT*/Bool* decode_OK ) 7944 { 7945 HChar dis_buf[50]; 7946 UChar modrm; 7947 Int len; 7948 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7949 t_addr1, t_rsp, t_mask, t_new; 7950 7951 vassert(sz == 2 || sz == 4 || sz == 8); 7952 7953 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7954 = t_addr0 = t_addr1 = t_rsp 7955 = t_mask = t_new = IRTemp_INVALID; 7956 7957 t_fetched = newTemp(Ity_I8); 7958 t_new = newTemp(Ity_I8); 7959 t_bitno0 = newTemp(Ity_I64); 7960 t_bitno1 = newTemp(Ity_I64); 7961 t_bitno2 = newTemp(Ity_I8); 7962 t_addr1 = newTemp(Ity_I64); 7963 modrm = getUChar(delta); 7964 7965 *decode_OK = True; 7966 if (epartIsReg(modrm)) { 7967 /* F2 and F3 are never acceptable. */ 7968 if (haveF2orF3(pfx)) { 7969 *decode_OK = False; 7970 return delta; 7971 } 7972 } else { 7973 /* F2 or F3 (but not both) are allowed, provided LOCK is also 7974 present, and only for the BTC/BTS/BTR cases (not BT). */ 7975 if (haveF2orF3(pfx)) { 7976 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) { 7977 *decode_OK = False; 7978 return delta; 7979 } 7980 } 7981 } 7982 7983 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 7984 7985 if (epartIsReg(modrm)) { 7986 delta++; 7987 /* Get it onto the client's stack. Oh, this is a horrible 7988 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 7989 Because of the ELF ABI stack redzone, there may be live data 7990 up to 128 bytes below %RSP. So we can't just push it on the 7991 stack, else we may wind up trashing live data, and causing 7992 impossible-to-find simulation errors. (Yes, this did 7993 happen.) So we need to drop RSP before at least 128 before 7994 pushing it. That unfortunately means hitting Memcheck's 7995 fast-case painting code. Ideally we should drop more than 7996 128, to reduce the chances of breaking buggy programs that 7997 have live data below -128(%RSP). Memcheck fast-cases moves 7998 of 288 bytes due to the need to handle ppc64-linux quickly, 7999 so let's use 288. Of course the real fix is to get rid of 8000 this kludge entirely. */ 8001 t_rsp = newTemp(Ity_I64); 8002 t_addr0 = newTemp(Ity_I64); 8003 8004 vassert(vbi->guest_stack_redzone_size == 128); 8005 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 8006 putIReg64(R_RSP, mkexpr(t_rsp)); 8007 8008 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 8009 8010 /* Make t_addr0 point at it. */ 8011 assign( t_addr0, mkexpr(t_rsp) ); 8012 8013 /* Mask out upper bits of the shift amount, since we're doing a 8014 reg. */ 8015 assign( t_bitno1, binop(Iop_And64, 8016 mkexpr(t_bitno0), 8017 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 8018 8019 } else { 8020 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 8021 delta += len; 8022 assign( t_bitno1, mkexpr(t_bitno0) ); 8023 } 8024 8025 /* At this point: t_addr0 is the address being operated on. If it 8026 was a reg, we will have pushed it onto the client's stack. 8027 t_bitno1 is the bit number, suitably masked in the case of a 8028 reg. */ 8029 8030 /* Now the main sequence. */ 8031 assign( t_addr1, 8032 binop(Iop_Add64, 8033 mkexpr(t_addr0), 8034 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 8035 8036 /* t_addr1 now holds effective address */ 8037 8038 assign( t_bitno2, 8039 unop(Iop_64to8, 8040 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 8041 8042 /* t_bitno2 contains offset of bit within byte */ 8043 8044 if (op != BtOpNone) { 8045 t_mask = newTemp(Ity_I8); 8046 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 8047 } 8048 8049 /* t_mask is now a suitable byte mask */ 8050 8051 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 8052 8053 if (op != BtOpNone) { 8054 switch (op) { 8055 case BtOpSet: 8056 assign( t_new, 8057 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8058 break; 8059 case BtOpComp: 8060 assign( t_new, 8061 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8062 break; 8063 case BtOpReset: 8064 assign( t_new, 8065 binop(Iop_And8, mkexpr(t_fetched), 8066 unop(Iop_Not8, mkexpr(t_mask))) ); 8067 break; 8068 default: 8069 vpanic("dis_bt_G_E(amd64)"); 8070 } 8071 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) { 8072 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 8073 mkexpr(t_new)/*new*/, 8074 guest_RIP_curr_instr ); 8075 } else { 8076 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 8077 } 8078 } 8079 8080 /* Side effect done; now get selected bit into Carry flag */ 8081 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 8082 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8083 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8084 stmt( IRStmt_Put( 8085 OFFB_CC_DEP1, 8086 binop(Iop_And64, 8087 binop(Iop_Shr64, 8088 unop(Iop_8Uto64, mkexpr(t_fetched)), 8089 mkexpr(t_bitno2)), 8090 mkU64(1))) 8091 ); 8092 /* Set NDEP even though it isn't used. This makes redundant-PUT 8093 elimination of previous stores to this field work better. */ 8094 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8095 8096 /* Move reg operand from stack back to reg */ 8097 if (epartIsReg(modrm)) { 8098 /* t_rsp still points at it. */ 8099 /* only write the reg if actually modifying it; doing otherwise 8100 zeroes the top half erroneously when doing btl due to 8101 standard zero-extend rule */ 8102 if (op != BtOpNone) 8103 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 8104 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 8105 } 8106 8107 DIP("bt%s%c %s, %s\n", 8108 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 8109 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 8110 8111 return delta; 8112 } 8113 8114 8115 8116 /* Handle BSF/BSR. Only v-size seems necessary. */ 8117 static 8118 ULong dis_bs_E_G ( const VexAbiInfo* vbi, 8119 Prefix pfx, Int sz, Long delta, Bool fwds ) 8120 { 8121 Bool isReg; 8122 UChar modrm; 8123 HChar dis_buf[50]; 8124 8125 IRType ty = szToITy(sz); 8126 IRTemp src = newTemp(ty); 8127 IRTemp dst = newTemp(ty); 8128 IRTemp src64 = newTemp(Ity_I64); 8129 IRTemp dst64 = newTemp(Ity_I64); 8130 IRTemp srcB = newTemp(Ity_I1); 8131 8132 vassert(sz == 8 || sz == 4 || sz == 2); 8133 8134 modrm = getUChar(delta); 8135 isReg = epartIsReg(modrm); 8136 if (isReg) { 8137 delta++; 8138 assign( src, getIRegE(sz, pfx, modrm) ); 8139 } else { 8140 Int len; 8141 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 8142 delta += len; 8143 assign( src, loadLE(ty, mkexpr(addr)) ); 8144 } 8145 8146 DIP("bs%c%c %s, %s\n", 8147 fwds ? 'f' : 'r', nameISize(sz), 8148 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 8149 nameIRegG(sz, pfx, modrm)); 8150 8151 /* First, widen src to 64 bits if it is not already. */ 8152 assign( src64, widenUto64(mkexpr(src)) ); 8153 8154 /* Generate a bool expression which is zero iff the original is 8155 zero, and nonzero otherwise. Ask for a CmpNE version which, if 8156 instrumented by Memcheck, is instrumented expensively, since 8157 this may be used on the output of a preceding movmskb insn, 8158 which has been known to be partially defined, and in need of 8159 careful handling. */ 8160 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) ); 8161 8162 /* Flags: Z is 1 iff source value is zero. All others 8163 are undefined -- we force them to zero. */ 8164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8165 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8166 stmt( IRStmt_Put( 8167 OFFB_CC_DEP1, 8168 IRExpr_ITE( mkexpr(srcB), 8169 /* src!=0 */ 8170 mkU64(0), 8171 /* src==0 */ 8172 mkU64(AMD64G_CC_MASK_Z) 8173 ) 8174 )); 8175 /* Set NDEP even though it isn't used. This makes redundant-PUT 8176 elimination of previous stores to this field work better. */ 8177 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8178 8179 /* Result: iff source value is zero, we can't use 8180 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 8181 But anyway, amd64 semantics say the result is undefined in 8182 such situations. Hence handle the zero case specially. */ 8183 8184 /* Bleh. What we compute: 8185 8186 bsf64: if src == 0 then {dst is unchanged} 8187 else Ctz64(src) 8188 8189 bsr64: if src == 0 then {dst is unchanged} 8190 else 63 - Clz64(src) 8191 8192 bsf32: if src == 0 then {dst is unchanged} 8193 else Ctz64(32Uto64(src)) 8194 8195 bsr32: if src == 0 then {dst is unchanged} 8196 else 63 - Clz64(32Uto64(src)) 8197 8198 bsf16: if src == 0 then {dst is unchanged} 8199 else Ctz64(32Uto64(16Uto32(src))) 8200 8201 bsr16: if src == 0 then {dst is unchanged} 8202 else 63 - Clz64(32Uto64(16Uto32(src))) 8203 */ 8204 8205 /* The main computation, guarding against zero. */ 8206 assign( dst64, 8207 IRExpr_ITE( 8208 mkexpr(srcB), 8209 /* src != 0 */ 8210 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 8211 : binop(Iop_Sub64, 8212 mkU64(63), 8213 unop(Iop_Clz64, mkexpr(src64))), 8214 /* src == 0 -- leave dst unchanged */ 8215 widenUto64( getIRegG( sz, pfx, modrm ) ) 8216 ) 8217 ); 8218 8219 if (sz == 2) 8220 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 8221 else 8222 if (sz == 4) 8223 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 8224 else 8225 assign( dst, mkexpr(dst64) ); 8226 8227 /* dump result back */ 8228 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 8229 8230 return delta; 8231 } 8232 8233 8234 /* swap rAX with the reg specified by reg and REX.B */ 8235 static 8236 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 8237 { 8238 IRType ty = szToITy(sz); 8239 IRTemp t1 = newTemp(ty); 8240 IRTemp t2 = newTemp(ty); 8241 vassert(sz == 2 || sz == 4 || sz == 8); 8242 vassert(regLo3 < 8); 8243 if (sz == 8) { 8244 assign( t1, getIReg64(R_RAX) ); 8245 assign( t2, getIRegRexB(8, pfx, regLo3) ); 8246 putIReg64( R_RAX, mkexpr(t2) ); 8247 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 8248 } else if (sz == 4) { 8249 assign( t1, getIReg32(R_RAX) ); 8250 assign( t2, getIRegRexB(4, pfx, regLo3) ); 8251 putIReg32( R_RAX, mkexpr(t2) ); 8252 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 8253 } else { 8254 assign( t1, getIReg16(R_RAX) ); 8255 assign( t2, getIRegRexB(2, pfx, regLo3) ); 8256 putIReg16( R_RAX, mkexpr(t2) ); 8257 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 8258 } 8259 DIP("xchg%c %s, %s\n", 8260 nameISize(sz), nameIRegRAX(sz), 8261 nameIRegRexB(sz,pfx, regLo3)); 8262 } 8263 8264 8265 static 8266 void codegen_SAHF ( void ) 8267 { 8268 /* Set the flags to: 8269 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 8270 -- retain the old O flag 8271 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8272 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 8273 */ 8274 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8275 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8276 IRTemp oldflags = newTemp(Ity_I64); 8277 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 8278 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8279 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8281 stmt( IRStmt_Put( OFFB_CC_DEP1, 8282 binop(Iop_Or64, 8283 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 8284 binop(Iop_And64, 8285 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 8286 mkU64(mask_SZACP)) 8287 ) 8288 )); 8289 } 8290 8291 8292 static 8293 void codegen_LAHF ( void ) 8294 { 8295 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 8296 IRExpr* rax_with_hole; 8297 IRExpr* new_byte; 8298 IRExpr* new_rax; 8299 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8300 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8301 8302 IRTemp flags = newTemp(Ity_I64); 8303 assign( flags, mk_amd64g_calculate_rflags_all() ); 8304 8305 rax_with_hole 8306 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 8307 new_byte 8308 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 8309 mkU64(1<<1)); 8310 new_rax 8311 = binop(Iop_Or64, rax_with_hole, 8312 binop(Iop_Shl64, new_byte, mkU8(8))); 8313 putIReg64(R_RAX, new_rax); 8314 } 8315 8316 8317 static 8318 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 8319 const VexAbiInfo* vbi, 8320 Prefix pfx, 8321 Int size, 8322 Long delta0 ) 8323 { 8324 HChar dis_buf[50]; 8325 Int len; 8326 8327 IRType ty = szToITy(size); 8328 IRTemp acc = newTemp(ty); 8329 IRTemp src = newTemp(ty); 8330 IRTemp dest = newTemp(ty); 8331 IRTemp dest2 = newTemp(ty); 8332 IRTemp acc2 = newTemp(ty); 8333 IRTemp cond = newTemp(Ity_I1); 8334 IRTemp addr = IRTemp_INVALID; 8335 UChar rm = getUChar(delta0); 8336 8337 /* There are 3 cases to consider: 8338 8339 reg-reg: ignore any lock prefix, generate sequence based 8340 on ITE 8341 8342 reg-mem, not locked: ignore any lock prefix, generate sequence 8343 based on ITE 8344 8345 reg-mem, locked: use IRCAS 8346 */ 8347 8348 /* Decide whether F2 or F3 are acceptable. Never for register 8349 case, but for the memory case, one or the other is OK provided 8350 LOCK is also present. */ 8351 if (epartIsReg(rm)) { 8352 if (haveF2orF3(pfx)) { 8353 *ok = False; 8354 return delta0; 8355 } 8356 } else { 8357 if (haveF2orF3(pfx)) { 8358 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 8359 *ok = False; 8360 return delta0; 8361 } 8362 } 8363 } 8364 8365 if (epartIsReg(rm)) { 8366 /* case 1 */ 8367 assign( dest, getIRegE(size, pfx, rm) ); 8368 delta0++; 8369 assign( src, getIRegG(size, pfx, rm) ); 8370 assign( acc, getIRegRAX(size) ); 8371 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8372 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8373 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8374 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8375 putIRegRAX(size, mkexpr(acc2)); 8376 putIRegE(size, pfx, rm, mkexpr(dest2)); 8377 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8378 nameIRegG(size,pfx,rm), 8379 nameIRegE(size,pfx,rm) ); 8380 } 8381 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8382 /* case 2 */ 8383 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8384 assign( dest, loadLE(ty, mkexpr(addr)) ); 8385 delta0 += len; 8386 assign( src, getIRegG(size, pfx, rm) ); 8387 assign( acc, getIRegRAX(size) ); 8388 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8389 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8390 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8391 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8392 putIRegRAX(size, mkexpr(acc2)); 8393 storeLE( mkexpr(addr), mkexpr(dest2) ); 8394 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8395 nameIRegG(size,pfx,rm), dis_buf); 8396 } 8397 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8398 /* case 3 */ 8399 /* src is new value. acc is expected value. dest is old value. 8400 Compute success from the output of the IRCAS, and steer the 8401 new value for RAX accordingly: in case of success, RAX is 8402 unchanged. */ 8403 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8404 delta0 += len; 8405 assign( src, getIRegG(size, pfx, rm) ); 8406 assign( acc, getIRegRAX(size) ); 8407 stmt( IRStmt_CAS( 8408 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8409 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8410 )); 8411 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8412 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8413 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8414 putIRegRAX(size, mkexpr(acc2)); 8415 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8416 nameIRegG(size,pfx,rm), dis_buf); 8417 } 8418 else vassert(0); 8419 8420 *ok = True; 8421 return delta0; 8422 } 8423 8424 8425 /* Handle conditional move instructions of the form 8426 cmovcc E(reg-or-mem), G(reg) 8427 8428 E(src) is reg-or-mem 8429 G(dst) is reg. 8430 8431 If E is reg, --> GET %E, tmps 8432 GET %G, tmpd 8433 CMOVcc tmps, tmpd 8434 PUT tmpd, %G 8435 8436 If E is mem --> (getAddr E) -> tmpa 8437 LD (tmpa), tmps 8438 GET %G, tmpd 8439 CMOVcc tmps, tmpd 8440 PUT tmpd, %G 8441 */ 8442 static 8443 ULong dis_cmov_E_G ( const VexAbiInfo* vbi, 8444 Prefix pfx, 8445 Int sz, 8446 AMD64Condcode cond, 8447 Long delta0 ) 8448 { 8449 UChar rm = getUChar(delta0); 8450 HChar dis_buf[50]; 8451 Int len; 8452 8453 IRType ty = szToITy(sz); 8454 IRTemp tmps = newTemp(ty); 8455 IRTemp tmpd = newTemp(ty); 8456 8457 if (epartIsReg(rm)) { 8458 assign( tmps, getIRegE(sz, pfx, rm) ); 8459 assign( tmpd, getIRegG(sz, pfx, rm) ); 8460 8461 putIRegG( sz, pfx, rm, 8462 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8463 mkexpr(tmps), 8464 mkexpr(tmpd) ) 8465 ); 8466 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8467 nameIRegE(sz,pfx,rm), 8468 nameIRegG(sz,pfx,rm)); 8469 return 1+delta0; 8470 } 8471 8472 /* E refers to memory */ 8473 { 8474 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8475 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8476 assign( tmpd, getIRegG(sz, pfx, rm) ); 8477 8478 putIRegG( sz, pfx, rm, 8479 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8480 mkexpr(tmps), 8481 mkexpr(tmpd) ) 8482 ); 8483 8484 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8485 dis_buf, 8486 nameIRegG(sz,pfx,rm)); 8487 return len+delta0; 8488 } 8489 } 8490 8491 8492 static 8493 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8494 const VexAbiInfo* vbi, 8495 Prefix pfx, Int sz, Long delta0 ) 8496 { 8497 Int len; 8498 UChar rm = getUChar(delta0); 8499 HChar dis_buf[50]; 8500 8501 IRType ty = szToITy(sz); 8502 IRTemp tmpd = newTemp(ty); 8503 IRTemp tmpt0 = newTemp(ty); 8504 IRTemp tmpt1 = newTemp(ty); 8505 8506 /* There are 3 cases to consider: 8507 8508 reg-reg: ignore any lock prefix, 8509 generate 'naive' (non-atomic) sequence 8510 8511 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8512 (non-atomic) sequence 8513 8514 reg-mem, locked: use IRCAS 8515 */ 8516 8517 if (epartIsReg(rm)) { 8518 /* case 1 */ 8519 assign( tmpd, getIRegE(sz, pfx, rm) ); 8520 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8521 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8522 mkexpr(tmpd), mkexpr(tmpt0)) ); 8523 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8524 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8525 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8526 DIP("xadd%c %s, %s\n", 8527 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm)); 8528 *decode_ok = True; 8529 return 1+delta0; 8530 } 8531 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8532 /* case 2 */ 8533 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8534 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8535 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8536 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8537 mkexpr(tmpd), mkexpr(tmpt0)) ); 8538 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8539 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8540 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8541 DIP("xadd%c %s, %s\n", 8542 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8543 *decode_ok = True; 8544 return len+delta0; 8545 } 8546 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8547 /* case 3 */ 8548 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8549 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8550 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8551 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8552 mkexpr(tmpd), mkexpr(tmpt0)) ); 8553 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8554 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8555 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8556 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8557 DIP("xadd%c %s, %s\n", 8558 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8559 *decode_ok = True; 8560 return len+delta0; 8561 } 8562 /*UNREACHED*/ 8563 vassert(0); 8564 } 8565 8566 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8567 //.. 8568 //.. static 8569 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8570 //.. { 8571 //.. Int len; 8572 //.. IRTemp addr; 8573 //.. UChar rm = getUChar(delta0); 8574 //.. HChar dis_buf[50]; 8575 //.. 8576 //.. if (epartIsReg(rm)) { 8577 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8578 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8579 //.. return 1+delta0; 8580 //.. } else { 8581 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8582 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8583 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8584 //.. return len+delta0; 8585 //.. } 8586 //.. } 8587 //.. 8588 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8589 //.. dst is ireg and sz==4, zero out top half of it. */ 8590 //.. 8591 //.. static 8592 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8593 //.. Int sz, 8594 //.. UInt delta0 ) 8595 //.. { 8596 //.. Int len; 8597 //.. IRTemp addr; 8598 //.. UChar rm = getUChar(delta0); 8599 //.. HChar dis_buf[50]; 8600 //.. 8601 //.. vassert(sz == 2 || sz == 4); 8602 //.. 8603 //.. if (epartIsReg(rm)) { 8604 //.. if (sz == 4) 8605 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8606 //.. else 8607 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8608 //.. 8609 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8610 //.. return 1+delta0; 8611 //.. } else { 8612 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8613 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8614 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8615 //.. return len+delta0; 8616 //.. } 8617 //.. } 8618 //.. 8619 //.. 8620 //.. static 8621 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8622 //.. { 8623 //.. IRTemp t1 = newTemp(Ity_I16); 8624 //.. IRTemp ta = newTemp(Ity_I32); 8625 //.. vassert(sz == 2 || sz == 4); 8626 //.. 8627 //.. assign( t1, getSReg(sreg) ); 8628 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8629 //.. putIReg(4, R_ESP, mkexpr(ta)); 8630 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8631 //.. 8632 //.. DIP("pushw %s\n", nameSReg(sreg)); 8633 //.. } 8634 //.. 8635 //.. static 8636 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8637 //.. { 8638 //.. IRTemp t1 = newTemp(Ity_I16); 8639 //.. IRTemp ta = newTemp(Ity_I32); 8640 //.. vassert(sz == 2 || sz == 4); 8641 //.. 8642 //.. assign( ta, getIReg(4, R_ESP) ); 8643 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8644 //.. 8645 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8646 //.. putSReg( sreg, mkexpr(t1) ); 8647 //.. DIP("pop %s\n", nameSReg(sreg)); 8648 //.. } 8649 8650 static 8651 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 ) 8652 { 8653 IRTemp t1 = newTemp(Ity_I64); 8654 IRTemp t2 = newTemp(Ity_I64); 8655 IRTemp t3 = newTemp(Ity_I64); 8656 assign(t1, getIReg64(R_RSP)); 8657 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8658 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8659 putIReg64(R_RSP, mkexpr(t3)); 8660 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8661 jmp_treg(dres, Ijk_Ret, t2); 8662 vassert(dres->whatNext == Dis_StopHere); 8663 } 8664 8665 8666 /*------------------------------------------------------------*/ 8667 /*--- SSE/SSE2/SSE3 helpers ---*/ 8668 /*------------------------------------------------------------*/ 8669 8670 /* Indicates whether the op requires a rounding-mode argument. Note 8671 that this covers only vector floating point arithmetic ops, and 8672 omits the scalar ones that need rounding modes. Note also that 8673 inconsistencies here will get picked up later by the IR sanity 8674 checker, so this isn't correctness-critical. */ 8675 static Bool requiresRMode ( IROp op ) 8676 { 8677 switch (op) { 8678 /* 128 bit ops */ 8679 case Iop_Add32Fx4: case Iop_Sub32Fx4: 8680 case Iop_Mul32Fx4: case Iop_Div32Fx4: 8681 case Iop_Add64Fx2: case Iop_Sub64Fx2: 8682 case Iop_Mul64Fx2: case Iop_Div64Fx2: 8683 /* 256 bit ops */ 8684 case Iop_Add32Fx8: case Iop_Sub32Fx8: 8685 case Iop_Mul32Fx8: case Iop_Div32Fx8: 8686 case Iop_Add64Fx4: case Iop_Sub64Fx4: 8687 case Iop_Mul64Fx4: case Iop_Div64Fx4: 8688 return True; 8689 default: 8690 break; 8691 } 8692 return False; 8693 } 8694 8695 8696 /* Worker function; do not call directly. 8697 Handles full width G = G `op` E and G = (not G) `op` E. 8698 */ 8699 8700 static ULong dis_SSE_E_to_G_all_wrk ( 8701 const VexAbiInfo* vbi, 8702 Prefix pfx, Long delta, 8703 const HChar* opname, IROp op, 8704 Bool invertG 8705 ) 8706 { 8707 HChar dis_buf[50]; 8708 Int alen; 8709 IRTemp addr; 8710 UChar rm = getUChar(delta); 8711 Bool needsRMode = requiresRMode(op); 8712 IRExpr* gpart 8713 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8714 : getXMMReg(gregOfRexRM(pfx,rm)); 8715 if (epartIsReg(rm)) { 8716 putXMMReg( 8717 gregOfRexRM(pfx,rm), 8718 needsRMode 8719 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8720 gpart, 8721 getXMMReg(eregOfRexRM(pfx,rm))) 8722 : binop(op, gpart, 8723 getXMMReg(eregOfRexRM(pfx,rm))) 8724 ); 8725 DIP("%s %s,%s\n", opname, 8726 nameXMMReg(eregOfRexRM(pfx,rm)), 8727 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8728 return delta+1; 8729 } else { 8730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8731 putXMMReg( 8732 gregOfRexRM(pfx,rm), 8733 needsRMode 8734 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8735 gpart, 8736 loadLE(Ity_V128, mkexpr(addr))) 8737 : binop(op, gpart, 8738 loadLE(Ity_V128, mkexpr(addr))) 8739 ); 8740 DIP("%s %s,%s\n", opname, 8741 dis_buf, 8742 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8743 return delta+alen; 8744 } 8745 } 8746 8747 8748 /* All lanes SSE binary operation, G = G `op` E. */ 8749 8750 static 8751 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi, 8752 Prefix pfx, Long delta, 8753 const HChar* opname, IROp op ) 8754 { 8755 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8756 } 8757 8758 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8759 8760 static 8761 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi, 8762 Prefix pfx, Long delta, 8763 const HChar* opname, IROp op ) 8764 { 8765 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8766 } 8767 8768 8769 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8770 8771 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi, 8772 Prefix pfx, Long delta, 8773 const HChar* opname, IROp op ) 8774 { 8775 HChar dis_buf[50]; 8776 Int alen; 8777 IRTemp addr; 8778 UChar rm = getUChar(delta); 8779 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8780 if (epartIsReg(rm)) { 8781 putXMMReg( gregOfRexRM(pfx,rm), 8782 binop(op, gpart, 8783 getXMMReg(eregOfRexRM(pfx,rm))) ); 8784 DIP("%s %s,%s\n", opname, 8785 nameXMMReg(eregOfRexRM(pfx,rm)), 8786 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8787 return delta+1; 8788 } else { 8789 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8790 E operand needs to be made simply of zeroes. */ 8791 IRTemp epart = newTemp(Ity_V128); 8792 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8793 assign( epart, unop( Iop_32UtoV128, 8794 loadLE(Ity_I32, mkexpr(addr))) ); 8795 putXMMReg( gregOfRexRM(pfx,rm), 8796 binop(op, gpart, mkexpr(epart)) ); 8797 DIP("%s %s,%s\n", opname, 8798 dis_buf, 8799 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8800 return delta+alen; 8801 } 8802 } 8803 8804 8805 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8806 8807 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi, 8808 Prefix pfx, Long delta, 8809 const HChar* opname, IROp op ) 8810 { 8811 HChar dis_buf[50]; 8812 Int alen; 8813 IRTemp addr; 8814 UChar rm = getUChar(delta); 8815 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8816 if (epartIsReg(rm)) { 8817 putXMMReg( gregOfRexRM(pfx,rm), 8818 binop(op, gpart, 8819 getXMMReg(eregOfRexRM(pfx,rm))) ); 8820 DIP("%s %s,%s\n", opname, 8821 nameXMMReg(eregOfRexRM(pfx,rm)), 8822 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8823 return delta+1; 8824 } else { 8825 /* We can only do a 64-bit memory read, so the upper half of the 8826 E operand needs to be made simply of zeroes. */ 8827 IRTemp epart = newTemp(Ity_V128); 8828 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8829 assign( epart, unop( Iop_64UtoV128, 8830 loadLE(Ity_I64, mkexpr(addr))) ); 8831 putXMMReg( gregOfRexRM(pfx,rm), 8832 binop(op, gpart, mkexpr(epart)) ); 8833 DIP("%s %s,%s\n", opname, 8834 dis_buf, 8835 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8836 return delta+alen; 8837 } 8838 } 8839 8840 8841 /* All lanes unary SSE operation, G = op(E). */ 8842 8843 static ULong dis_SSE_E_to_G_unary_all ( 8844 const VexAbiInfo* vbi, 8845 Prefix pfx, Long delta, 8846 const HChar* opname, IROp op 8847 ) 8848 { 8849 HChar dis_buf[50]; 8850 Int alen; 8851 IRTemp addr; 8852 UChar rm = getUChar(delta); 8853 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 8854 // up in the usual way. 8855 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 8856 if (epartIsReg(rm)) { 8857 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm)); 8858 /* XXXROUNDINGFIXME */ 8859 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8860 : unop(op, src); 8861 putXMMReg( gregOfRexRM(pfx,rm), res ); 8862 DIP("%s %s,%s\n", opname, 8863 nameXMMReg(eregOfRexRM(pfx,rm)), 8864 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8865 return delta+1; 8866 } else { 8867 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8868 IRExpr* src = loadLE(Ity_V128, mkexpr(addr)); 8869 /* XXXROUNDINGFIXME */ 8870 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8871 : unop(op, src); 8872 putXMMReg( gregOfRexRM(pfx,rm), res ); 8873 DIP("%s %s,%s\n", opname, 8874 dis_buf, 8875 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8876 return delta+alen; 8877 } 8878 } 8879 8880 8881 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8882 8883 static ULong dis_SSE_E_to_G_unary_lo32 ( 8884 const VexAbiInfo* vbi, 8885 Prefix pfx, Long delta, 8886 const HChar* opname, IROp op 8887 ) 8888 { 8889 /* First we need to get the old G value and patch the low 32 bits 8890 of the E operand into it. Then apply op and write back to G. */ 8891 HChar dis_buf[50]; 8892 Int alen; 8893 IRTemp addr; 8894 UChar rm = getUChar(delta); 8895 IRTemp oldG0 = newTemp(Ity_V128); 8896 IRTemp oldG1 = newTemp(Ity_V128); 8897 8898 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8899 8900 if (epartIsReg(rm)) { 8901 assign( oldG1, 8902 binop( Iop_SetV128lo32, 8903 mkexpr(oldG0), 8904 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8905 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8906 DIP("%s %s,%s\n", opname, 8907 nameXMMReg(eregOfRexRM(pfx,rm)), 8908 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8909 return delta+1; 8910 } else { 8911 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8912 assign( oldG1, 8913 binop( Iop_SetV128lo32, 8914 mkexpr(oldG0), 8915 loadLE(Ity_I32, mkexpr(addr)) )); 8916 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8917 DIP("%s %s,%s\n", opname, 8918 dis_buf, 8919 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8920 return delta+alen; 8921 } 8922 } 8923 8924 8925 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 8926 8927 static ULong dis_SSE_E_to_G_unary_lo64 ( 8928 const VexAbiInfo* vbi, 8929 Prefix pfx, Long delta, 8930 const HChar* opname, IROp op 8931 ) 8932 { 8933 /* First we need to get the old G value and patch the low 64 bits 8934 of the E operand into it. Then apply op and write back to G. */ 8935 HChar dis_buf[50]; 8936 Int alen; 8937 IRTemp addr; 8938 UChar rm = getUChar(delta); 8939 IRTemp oldG0 = newTemp(Ity_V128); 8940 IRTemp oldG1 = newTemp(Ity_V128); 8941 8942 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8943 8944 if (epartIsReg(rm)) { 8945 assign( oldG1, 8946 binop( Iop_SetV128lo64, 8947 mkexpr(oldG0), 8948 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 8949 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8950 DIP("%s %s,%s\n", opname, 8951 nameXMMReg(eregOfRexRM(pfx,rm)), 8952 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8953 return delta+1; 8954 } else { 8955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8956 assign( oldG1, 8957 binop( Iop_SetV128lo64, 8958 mkexpr(oldG0), 8959 loadLE(Ity_I64, mkexpr(addr)) )); 8960 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8961 DIP("%s %s,%s\n", opname, 8962 dis_buf, 8963 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8964 return delta+alen; 8965 } 8966 } 8967 8968 8969 /* SSE integer binary operation: 8970 G = G `op` E (eLeft == False) 8971 G = E `op` G (eLeft == True) 8972 */ 8973 static ULong dis_SSEint_E_to_G( 8974 const VexAbiInfo* vbi, 8975 Prefix pfx, Long delta, 8976 const HChar* opname, IROp op, 8977 Bool eLeft 8978 ) 8979 { 8980 HChar dis_buf[50]; 8981 Int alen; 8982 IRTemp addr; 8983 UChar rm = getUChar(delta); 8984 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8985 IRExpr* epart = NULL; 8986 if (epartIsReg(rm)) { 8987 epart = getXMMReg(eregOfRexRM(pfx,rm)); 8988 DIP("%s %s,%s\n", opname, 8989 nameXMMReg(eregOfRexRM(pfx,rm)), 8990 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8991 delta += 1; 8992 } else { 8993 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8994 epart = loadLE(Ity_V128, mkexpr(addr)); 8995 DIP("%s %s,%s\n", opname, 8996 dis_buf, 8997 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8998 delta += alen; 8999 } 9000 putXMMReg( gregOfRexRM(pfx,rm), 9001 eLeft ? binop(op, epart, gpart) 9002 : binop(op, gpart, epart) ); 9003 return delta; 9004 } 9005 9006 9007 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 9008 This is all a bit of a kludge in that it ignores the subtleties of 9009 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 9010 spec. */ 9011 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 9012 /*OUT*/IROp* opP, 9013 /*OUT*/Bool* postNotP, 9014 UInt imm8, Bool all_lanes, Int sz ) 9015 { 9016 if (imm8 >= 32) return False; 9017 9018 /* First, compute a (preSwap, op, postNot) triple from 9019 the supplied imm8. */ 9020 Bool pre = False; 9021 IROp op = Iop_INVALID; 9022 Bool not = False; 9023 9024 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 9025 // If you add a case here, add a corresponding test for both VCMPSD_128 9026 // and VCMPSS_128 in avx-1.c. 9027 switch (imm8) { 9028 // "O" = ordered, "U" = unordered 9029 // "Q" = non-signalling (quiet), "S" = signalling 9030 // 9031 // swap operands? 9032 // | 9033 // | cmp op invert after? 9034 // | | | 9035 // v v v 9036 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 9037 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 9038 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 9039 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 9040 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 9041 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 9042 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 9043 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 9044 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ 9045 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 9046 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */ 9047 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 9048 // 0xB FALSE_OQ 9049 // 0xC: this isn't really right because it returns all-1s when 9050 // either operand is a NaN, and it should return all-0s. 9051 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 9052 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 9053 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 9054 // 0xF TRUE_UQ 9055 // 0x10 EQ_OS 9056 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 9057 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 9058 // 0x13 UNORD_S 9059 // 0x14 NEQ_US 9060 // 0x15 NLT_UQ 9061 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 9062 // 0x17 ORD_S 9063 // 0x18 EQ_US 9064 // 0x19 NGE_UQ 9065 // 0x1A NGT_UQ 9066 // 0x1B FALSE_OS 9067 // 0x1C NEQ_OS 9068 // 0x1D GE_OQ 9069 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 9070 // 0x1F TRUE_US 9071 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 9072 avx-1.c if new cases turn up. */ 9073 default: break; 9074 } 9075 # undef XXX 9076 if (op == Iop_INVALID) return False; 9077 9078 /* Now convert the op into one with the same arithmetic but that is 9079 correct for the width and laneage requirements. */ 9080 9081 /**/ if (sz == 4 && all_lanes) { 9082 switch (op) { 9083 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 9084 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 9085 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 9086 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 9087 default: vassert(0); 9088 } 9089 } 9090 else if (sz == 4 && !all_lanes) { 9091 switch (op) { 9092 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 9093 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 9094 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 9095 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 9096 default: vassert(0); 9097 } 9098 } 9099 else if (sz == 8 && all_lanes) { 9100 switch (op) { 9101 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 9102 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 9103 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 9104 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 9105 default: vassert(0); 9106 } 9107 } 9108 else if (sz == 8 && !all_lanes) { 9109 switch (op) { 9110 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 9111 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 9112 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 9113 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 9114 default: vassert(0); 9115 } 9116 } 9117 else { 9118 vpanic("findSSECmpOp(amd64,guest)"); 9119 } 9120 9121 *preSwapP = pre; *opP = op; *postNotP = not; 9122 return True; 9123 } 9124 9125 9126 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 9127 returns the original delta to indicate failure. */ 9128 9129 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi, 9130 Prefix pfx, Long delta, 9131 const HChar* opname, Bool all_lanes, Int sz ) 9132 { 9133 Long delta0 = delta; 9134 HChar dis_buf[50]; 9135 Int alen; 9136 UInt imm8; 9137 IRTemp addr; 9138 Bool preSwap = False; 9139 IROp op = Iop_INVALID; 9140 Bool postNot = False; 9141 IRTemp plain = newTemp(Ity_V128); 9142 UChar rm = getUChar(delta); 9143 UShort mask = 0; 9144 vassert(sz == 4 || sz == 8); 9145 if (epartIsReg(rm)) { 9146 imm8 = getUChar(delta+1); 9147 if (imm8 >= 8) return delta0; /* FAIL */ 9148 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9149 if (!ok) return delta0; /* FAIL */ 9150 vassert(!preSwap); /* never needed for imm8 < 8 */ 9151 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 9152 getXMMReg(eregOfRexRM(pfx,rm))) ); 9153 delta += 2; 9154 DIP("%s $%d,%s,%s\n", opname, 9155 (Int)imm8, 9156 nameXMMReg(eregOfRexRM(pfx,rm)), 9157 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9158 } else { 9159 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 9160 imm8 = getUChar(delta+alen); 9161 if (imm8 >= 8) return delta0; /* FAIL */ 9162 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9163 if (!ok) return delta0; /* FAIL */ 9164 vassert(!preSwap); /* never needed for imm8 < 8 */ 9165 assign( plain, 9166 binop( 9167 op, 9168 getXMMReg(gregOfRexRM(pfx,rm)), 9169 all_lanes 9170 ? loadLE(Ity_V128, mkexpr(addr)) 9171 : sz == 8 9172 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 9173 : /*sz==4*/ 9174 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 9175 ) 9176 ); 9177 delta += alen+1; 9178 DIP("%s $%d,%s,%s\n", opname, 9179 (Int)imm8, 9180 dis_buf, 9181 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9182 } 9183 9184 if (postNot && all_lanes) { 9185 putXMMReg( gregOfRexRM(pfx,rm), 9186 unop(Iop_NotV128, mkexpr(plain)) ); 9187 } 9188 else 9189 if (postNot && !all_lanes) { 9190 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 9191 putXMMReg( gregOfRexRM(pfx,rm), 9192 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 9193 } 9194 else { 9195 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 9196 } 9197 9198 return delta; 9199 } 9200 9201 9202 /* Vector by scalar shift of G by the amount specified at the bottom 9203 of E. */ 9204 9205 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi, 9206 Prefix pfx, Long delta, 9207 const HChar* opname, IROp op ) 9208 { 9209 HChar dis_buf[50]; 9210 Int alen, size; 9211 IRTemp addr; 9212 Bool shl, shr, sar; 9213 UChar rm = getUChar(delta); 9214 IRTemp g0 = newTemp(Ity_V128); 9215 IRTemp g1 = newTemp(Ity_V128); 9216 IRTemp amt = newTemp(Ity_I64); 9217 IRTemp amt8 = newTemp(Ity_I8); 9218 if (epartIsReg(rm)) { 9219 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 9220 DIP("%s %s,%s\n", opname, 9221 nameXMMReg(eregOfRexRM(pfx,rm)), 9222 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9223 delta++; 9224 } else { 9225 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9226 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 9227 DIP("%s %s,%s\n", opname, 9228 dis_buf, 9229 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9230 delta += alen; 9231 } 9232 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9233 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 9234 9235 shl = shr = sar = False; 9236 size = 0; 9237 switch (op) { 9238 case Iop_ShlN16x8: shl = True; size = 32; break; 9239 case Iop_ShlN32x4: shl = True; size = 32; break; 9240 case Iop_ShlN64x2: shl = True; size = 64; break; 9241 case Iop_SarN16x8: sar = True; size = 16; break; 9242 case Iop_SarN32x4: sar = True; size = 32; break; 9243 case Iop_ShrN16x8: shr = True; size = 16; break; 9244 case Iop_ShrN32x4: shr = True; size = 32; break; 9245 case Iop_ShrN64x2: shr = True; size = 64; break; 9246 default: vassert(0); 9247 } 9248 9249 if (shl || shr) { 9250 assign( 9251 g1, 9252 IRExpr_ITE( 9253 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9254 binop(op, mkexpr(g0), mkexpr(amt8)), 9255 mkV128(0x0000) 9256 ) 9257 ); 9258 } else 9259 if (sar) { 9260 assign( 9261 g1, 9262 IRExpr_ITE( 9263 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9264 binop(op, mkexpr(g0), mkexpr(amt8)), 9265 binop(op, mkexpr(g0), mkU8(size-1)) 9266 ) 9267 ); 9268 } else { 9269 vassert(0); 9270 } 9271 9272 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 9273 return delta; 9274 } 9275 9276 9277 /* Vector by scalar shift of E by an immediate byte. */ 9278 9279 static 9280 ULong dis_SSE_shiftE_imm ( Prefix pfx, 9281 Long delta, const HChar* opname, IROp op ) 9282 { 9283 Bool shl, shr, sar; 9284 UChar rm = getUChar(delta); 9285 IRTemp e0 = newTemp(Ity_V128); 9286 IRTemp e1 = newTemp(Ity_V128); 9287 UChar amt, size; 9288 vassert(epartIsReg(rm)); 9289 vassert(gregLO3ofRM(rm) == 2 9290 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 9291 amt = getUChar(delta+1); 9292 delta += 2; 9293 DIP("%s $%d,%s\n", opname, 9294 (Int)amt, 9295 nameXMMReg(eregOfRexRM(pfx,rm)) ); 9296 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 9297 9298 shl = shr = sar = False; 9299 size = 0; 9300 switch (op) { 9301 case Iop_ShlN16x8: shl = True; size = 16; break; 9302 case Iop_ShlN32x4: shl = True; size = 32; break; 9303 case Iop_ShlN64x2: shl = True; size = 64; break; 9304 case Iop_SarN16x8: sar = True; size = 16; break; 9305 case Iop_SarN32x4: sar = True; size = 32; break; 9306 case Iop_ShrN16x8: shr = True; size = 16; break; 9307 case Iop_ShrN32x4: shr = True; size = 32; break; 9308 case Iop_ShrN64x2: shr = True; size = 64; break; 9309 default: vassert(0); 9310 } 9311 9312 if (shl || shr) { 9313 assign( e1, amt >= size 9314 ? mkV128(0x0000) 9315 : binop(op, mkexpr(e0), mkU8(amt)) 9316 ); 9317 } else 9318 if (sar) { 9319 assign( e1, amt >= size 9320 ? binop(op, mkexpr(e0), mkU8(size-1)) 9321 : binop(op, mkexpr(e0), mkU8(amt)) 9322 ); 9323 } else { 9324 vassert(0); 9325 } 9326 9327 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 9328 return delta; 9329 } 9330 9331 9332 /* Get the current SSE rounding mode. */ 9333 9334 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 9335 { 9336 return 9337 unop( Iop_64to32, 9338 binop( Iop_And64, 9339 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 9340 mkU64(3) )); 9341 } 9342 9343 static void put_sse_roundingmode ( IRExpr* sseround ) 9344 { 9345 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 9346 stmt( IRStmt_Put( OFFB_SSEROUND, 9347 unop(Iop_32Uto64,sseround) ) ); 9348 } 9349 9350 /* Break a V128-bit value up into four 32-bit ints. */ 9351 9352 static void breakupV128to32s ( IRTemp t128, 9353 /*OUTs*/ 9354 IRTemp* t3, IRTemp* t2, 9355 IRTemp* t1, IRTemp* t0 ) 9356 { 9357 IRTemp hi64 = newTemp(Ity_I64); 9358 IRTemp lo64 = newTemp(Ity_I64); 9359 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 9360 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 9361 9362 vassert(t0 && *t0 == IRTemp_INVALID); 9363 vassert(t1 && *t1 == IRTemp_INVALID); 9364 vassert(t2 && *t2 == IRTemp_INVALID); 9365 vassert(t3 && *t3 == IRTemp_INVALID); 9366 9367 *t0 = newTemp(Ity_I32); 9368 *t1 = newTemp(Ity_I32); 9369 *t2 = newTemp(Ity_I32); 9370 *t3 = newTemp(Ity_I32); 9371 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 9372 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 9373 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 9374 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 9375 } 9376 9377 /* Construct a V128-bit value from four 32-bit ints. */ 9378 9379 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 9380 IRTemp t1, IRTemp t0 ) 9381 { 9382 return 9383 binop( Iop_64HLtoV128, 9384 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9385 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 9386 ); 9387 } 9388 9389 /* Break a 64-bit value up into four 16-bit ints. */ 9390 9391 static void breakup64to16s ( IRTemp t64, 9392 /*OUTs*/ 9393 IRTemp* t3, IRTemp* t2, 9394 IRTemp* t1, IRTemp* t0 ) 9395 { 9396 IRTemp hi32 = newTemp(Ity_I32); 9397 IRTemp lo32 = newTemp(Ity_I32); 9398 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 9399 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 9400 9401 vassert(t0 && *t0 == IRTemp_INVALID); 9402 vassert(t1 && *t1 == IRTemp_INVALID); 9403 vassert(t2 && *t2 == IRTemp_INVALID); 9404 vassert(t3 && *t3 == IRTemp_INVALID); 9405 9406 *t0 = newTemp(Ity_I16); 9407 *t1 = newTemp(Ity_I16); 9408 *t2 = newTemp(Ity_I16); 9409 *t3 = newTemp(Ity_I16); 9410 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 9411 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 9412 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 9413 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 9414 } 9415 9416 /* Construct a 64-bit value from four 16-bit ints. */ 9417 9418 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 9419 IRTemp t1, IRTemp t0 ) 9420 { 9421 return 9422 binop( Iop_32HLto64, 9423 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9424 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9425 ); 9426 } 9427 9428 /* Break a V256-bit value up into four 64-bit ints. */ 9429 9430 static void breakupV256to64s ( IRTemp t256, 9431 /*OUTs*/ 9432 IRTemp* t3, IRTemp* t2, 9433 IRTemp* t1, IRTemp* t0 ) 9434 { 9435 vassert(t0 && *t0 == IRTemp_INVALID); 9436 vassert(t1 && *t1 == IRTemp_INVALID); 9437 vassert(t2 && *t2 == IRTemp_INVALID); 9438 vassert(t3 && *t3 == IRTemp_INVALID); 9439 *t0 = newTemp(Ity_I64); 9440 *t1 = newTemp(Ity_I64); 9441 *t2 = newTemp(Ity_I64); 9442 *t3 = newTemp(Ity_I64); 9443 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9444 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9445 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9446 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9447 } 9448 9449 /* Break a V256-bit value up into two V128s. */ 9450 9451 static void breakupV256toV128s ( IRTemp t256, 9452 /*OUTs*/ 9453 IRTemp* t1, IRTemp* t0 ) 9454 { 9455 vassert(t0 && *t0 == IRTemp_INVALID); 9456 vassert(t1 && *t1 == IRTemp_INVALID); 9457 *t0 = newTemp(Ity_V128); 9458 *t1 = newTemp(Ity_V128); 9459 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9460 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9461 } 9462 9463 /* Break a V256-bit value up into eight 32-bit ints. */ 9464 9465 static void breakupV256to32s ( IRTemp t256, 9466 /*OUTs*/ 9467 IRTemp* t7, IRTemp* t6, 9468 IRTemp* t5, IRTemp* t4, 9469 IRTemp* t3, IRTemp* t2, 9470 IRTemp* t1, IRTemp* t0 ) 9471 { 9472 IRTemp t128_1 = IRTemp_INVALID; 9473 IRTemp t128_0 = IRTemp_INVALID; 9474 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9475 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9476 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9477 } 9478 9479 /* Break a V128-bit value up into two 64-bit ints. */ 9480 9481 static void breakupV128to64s ( IRTemp t128, 9482 /*OUTs*/ 9483 IRTemp* t1, IRTemp* t0 ) 9484 { 9485 vassert(t0 && *t0 == IRTemp_INVALID); 9486 vassert(t1 && *t1 == IRTemp_INVALID); 9487 *t0 = newTemp(Ity_I64); 9488 *t1 = newTemp(Ity_I64); 9489 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9490 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9491 } 9492 9493 /* Construct a V256-bit value from eight 32-bit ints. */ 9494 9495 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9496 IRTemp t5, IRTemp t4, 9497 IRTemp t3, IRTemp t2, 9498 IRTemp t1, IRTemp t0 ) 9499 { 9500 return 9501 binop( Iop_V128HLtoV256, 9502 binop( Iop_64HLtoV128, 9503 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9504 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9505 binop( Iop_64HLtoV128, 9506 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9507 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9508 ); 9509 } 9510 9511 /* Construct a V256-bit value from four 64-bit ints. */ 9512 9513 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9514 IRTemp t1, IRTemp t0 ) 9515 { 9516 return 9517 binop( Iop_V128HLtoV256, 9518 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9519 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9520 ); 9521 } 9522 9523 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9524 values (aa,bb), computes, for each of the 4 16-bit lanes: 9525 9526 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9527 */ 9528 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9529 { 9530 IRTemp aa = newTemp(Ity_I64); 9531 IRTemp bb = newTemp(Ity_I64); 9532 IRTemp aahi32s = newTemp(Ity_I64); 9533 IRTemp aalo32s = newTemp(Ity_I64); 9534 IRTemp bbhi32s = newTemp(Ity_I64); 9535 IRTemp bblo32s = newTemp(Ity_I64); 9536 IRTemp rHi = newTemp(Ity_I64); 9537 IRTemp rLo = newTemp(Ity_I64); 9538 IRTemp one32x2 = newTemp(Ity_I64); 9539 assign(aa, aax); 9540 assign(bb, bbx); 9541 assign( aahi32s, 9542 binop(Iop_SarN32x2, 9543 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9544 mkU8(16) )); 9545 assign( aalo32s, 9546 binop(Iop_SarN32x2, 9547 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9548 mkU8(16) )); 9549 assign( bbhi32s, 9550 binop(Iop_SarN32x2, 9551 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9552 mkU8(16) )); 9553 assign( bblo32s, 9554 binop(Iop_SarN32x2, 9555 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9556 mkU8(16) )); 9557 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9558 assign( 9559 rHi, 9560 binop( 9561 Iop_ShrN32x2, 9562 binop( 9563 Iop_Add32x2, 9564 binop( 9565 Iop_ShrN32x2, 9566 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9567 mkU8(14) 9568 ), 9569 mkexpr(one32x2) 9570 ), 9571 mkU8(1) 9572 ) 9573 ); 9574 assign( 9575 rLo, 9576 binop( 9577 Iop_ShrN32x2, 9578 binop( 9579 Iop_Add32x2, 9580 binop( 9581 Iop_ShrN32x2, 9582 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9583 mkU8(14) 9584 ), 9585 mkexpr(one32x2) 9586 ), 9587 mkU8(1) 9588 ) 9589 ); 9590 return 9591 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9592 } 9593 9594 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9595 values (aa,bb), computes, for each lane: 9596 9597 if aa_lane < 0 then - bb_lane 9598 else if aa_lane > 0 then bb_lane 9599 else 0 9600 */ 9601 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9602 { 9603 IRTemp aa = newTemp(Ity_I64); 9604 IRTemp bb = newTemp(Ity_I64); 9605 IRTemp zero = newTemp(Ity_I64); 9606 IRTemp bbNeg = newTemp(Ity_I64); 9607 IRTemp negMask = newTemp(Ity_I64); 9608 IRTemp posMask = newTemp(Ity_I64); 9609 IROp opSub = Iop_INVALID; 9610 IROp opCmpGTS = Iop_INVALID; 9611 9612 switch (laneszB) { 9613 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9614 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9615 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9616 default: vassert(0); 9617 } 9618 9619 assign( aa, aax ); 9620 assign( bb, bbx ); 9621 assign( zero, mkU64(0) ); 9622 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9623 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9624 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9625 9626 return 9627 binop(Iop_Or64, 9628 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9629 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9630 9631 } 9632 9633 9634 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9635 value aa, computes, for each lane 9636 9637 if aa < 0 then -aa else aa 9638 9639 Note that the result is interpreted as unsigned, so that the 9640 absolute value of the most negative signed input can be 9641 represented. 9642 */ 9643 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9644 { 9645 IRTemp res = newTemp(Ity_I64); 9646 IRTemp zero = newTemp(Ity_I64); 9647 IRTemp aaNeg = newTemp(Ity_I64); 9648 IRTemp negMask = newTemp(Ity_I64); 9649 IRTemp posMask = newTemp(Ity_I64); 9650 IROp opSub = Iop_INVALID; 9651 IROp opSarN = Iop_INVALID; 9652 9653 switch (laneszB) { 9654 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9655 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9656 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9657 default: vassert(0); 9658 } 9659 9660 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9661 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9662 assign( zero, mkU64(0) ); 9663 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9664 assign( res, 9665 binop(Iop_Or64, 9666 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9667 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9668 return res; 9669 } 9670 9671 /* XMM version of math_PABS_MMX. */ 9672 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9673 { 9674 IRTemp res = newTemp(Ity_V128); 9675 IRTemp aaHi = newTemp(Ity_I64); 9676 IRTemp aaLo = newTemp(Ity_I64); 9677 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9678 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9679 assign(res, binop(Iop_64HLtoV128, 9680 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9681 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9682 return res; 9683 } 9684 9685 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9686 partial applications in C :-( */ 9687 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9688 return math_PABS_XMM(aa, 4); 9689 } 9690 9691 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9692 return math_PABS_XMM(aa, 2); 9693 } 9694 9695 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9696 return math_PABS_XMM(aa, 1); 9697 } 9698 9699 /* YMM version of math_PABS_XMM. */ 9700 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB ) 9701 { 9702 IRTemp res = newTemp(Ity_V256); 9703 IRTemp aaHi = IRTemp_INVALID; 9704 IRTemp aaLo = IRTemp_INVALID; 9705 breakupV256toV128s(aa, &aaHi, &aaLo); 9706 assign(res, binop(Iop_V128HLtoV256, 9707 mkexpr(math_PABS_XMM(aaHi, laneszB)), 9708 mkexpr(math_PABS_XMM(aaLo, laneszB)))); 9709 return res; 9710 } 9711 9712 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) { 9713 return math_PABS_YMM(aa, 4); 9714 } 9715 9716 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) { 9717 return math_PABS_YMM(aa, 2); 9718 } 9719 9720 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) { 9721 return math_PABS_YMM(aa, 1); 9722 } 9723 9724 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9725 IRTemp lo64, Long byteShift ) 9726 { 9727 vassert(byteShift >= 1 && byteShift <= 7); 9728 return 9729 binop(Iop_Or64, 9730 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9731 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9732 ); 9733 } 9734 9735 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9736 { 9737 IRTemp res = newTemp(Ity_V128); 9738 IRTemp sHi = newTemp(Ity_I64); 9739 IRTemp sLo = newTemp(Ity_I64); 9740 IRTemp dHi = newTemp(Ity_I64); 9741 IRTemp dLo = newTemp(Ity_I64); 9742 IRTemp rHi = newTemp(Ity_I64); 9743 IRTemp rLo = newTemp(Ity_I64); 9744 9745 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9746 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9747 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9748 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9749 9750 if (imm8 == 0) { 9751 assign( rHi, mkexpr(sHi) ); 9752 assign( rLo, mkexpr(sLo) ); 9753 } 9754 else if (imm8 >= 1 && imm8 <= 7) { 9755 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9756 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9757 } 9758 else if (imm8 == 8) { 9759 assign( rHi, mkexpr(dLo) ); 9760 assign( rLo, mkexpr(sHi) ); 9761 } 9762 else if (imm8 >= 9 && imm8 <= 15) { 9763 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9764 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9765 } 9766 else if (imm8 == 16) { 9767 assign( rHi, mkexpr(dHi) ); 9768 assign( rLo, mkexpr(dLo) ); 9769 } 9770 else if (imm8 >= 17 && imm8 <= 23) { 9771 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9772 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9773 } 9774 else if (imm8 == 24) { 9775 assign( rHi, mkU64(0) ); 9776 assign( rLo, mkexpr(dHi) ); 9777 } 9778 else if (imm8 >= 25 && imm8 <= 31) { 9779 assign( rHi, mkU64(0) ); 9780 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9781 } 9782 else if (imm8 >= 32 && imm8 <= 255) { 9783 assign( rHi, mkU64(0) ); 9784 assign( rLo, mkU64(0) ); 9785 } 9786 else 9787 vassert(0); 9788 9789 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 9790 return res; 9791 } 9792 9793 9794 /* Generate a SIGSEGV followed by a restart of the current instruction 9795 if effective_addr is not 16-aligned. This is required behaviour 9796 for some SSE3 instructions and all 128-bit SSSE3 instructions. 9797 This assumes that guest_RIP_curr_instr is set correctly! */ 9798 static 9799 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 9800 { 9801 stmt( 9802 IRStmt_Exit( 9803 binop(Iop_CmpNE64, 9804 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 9805 mkU64(0)), 9806 Ijk_SigSEGV, 9807 IRConst_U64(guest_RIP_curr_instr), 9808 OFFB_RIP 9809 ) 9810 ); 9811 } 9812 9813 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 9814 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 9815 } 9816 9817 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 9818 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 9819 } 9820 9821 /* Helper for deciding whether a given insn (starting at the opcode 9822 byte) may validly be used with a LOCK prefix. The following insns 9823 may be used with LOCK when their destination operand is in memory. 9824 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 9825 9826 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 9827 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 9828 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 9829 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 9830 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 9831 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 9832 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 9833 9834 DEC FE /1, FF /1 9835 INC FE /0, FF /0 9836 9837 NEG F6 /3, F7 /3 9838 NOT F6 /2, F7 /2 9839 9840 XCHG 86, 87 9841 9842 BTC 0F BB, 0F BA /7 9843 BTR 0F B3, 0F BA /6 9844 BTS 0F AB, 0F BA /5 9845 9846 CMPXCHG 0F B0, 0F B1 9847 CMPXCHG8B 0F C7 /1 9848 9849 XADD 0F C0, 0F C1 9850 9851 ------------------------------ 9852 9853 80 /0 = addb $imm8, rm8 9854 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 9855 82 /0 = addb $imm8, rm8 9856 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 9857 9858 00 = addb r8, rm8 9859 01 = addl r32, rm32 and addw r16, rm16 9860 9861 Same for ADD OR ADC SBB AND SUB XOR 9862 9863 FE /1 = dec rm8 9864 FF /1 = dec rm32 and dec rm16 9865 9866 FE /0 = inc rm8 9867 FF /0 = inc rm32 and inc rm16 9868 9869 F6 /3 = neg rm8 9870 F7 /3 = neg rm32 and neg rm16 9871 9872 F6 /2 = not rm8 9873 F7 /2 = not rm32 and not rm16 9874 9875 0F BB = btcw r16, rm16 and btcl r32, rm32 9876 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 9877 9878 Same for BTS, BTR 9879 */ 9880 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc ) 9881 { 9882 switch (opc[0]) { 9883 case 0x00: case 0x01: case 0x08: case 0x09: 9884 case 0x10: case 0x11: case 0x18: case 0x19: 9885 case 0x20: case 0x21: case 0x28: case 0x29: 9886 case 0x30: case 0x31: 9887 if (!epartIsReg(opc[1])) 9888 return True; 9889 break; 9890 9891 case 0x80: case 0x81: case 0x82: case 0x83: 9892 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 9893 && !epartIsReg(opc[1])) 9894 return True; 9895 break; 9896 9897 case 0xFE: case 0xFF: 9898 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 9899 && !epartIsReg(opc[1])) 9900 return True; 9901 break; 9902 9903 case 0xF6: case 0xF7: 9904 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 9905 && !epartIsReg(opc[1])) 9906 return True; 9907 break; 9908 9909 case 0x86: case 0x87: 9910 if (!epartIsReg(opc[1])) 9911 return True; 9912 break; 9913 9914 case 0x0F: { 9915 switch (opc[1]) { 9916 case 0xBB: case 0xB3: case 0xAB: 9917 if (!epartIsReg(opc[2])) 9918 return True; 9919 break; 9920 case 0xBA: 9921 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 9922 && !epartIsReg(opc[2])) 9923 return True; 9924 break; 9925 case 0xB0: case 0xB1: 9926 if (!epartIsReg(opc[2])) 9927 return True; 9928 break; 9929 case 0xC7: 9930 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 9931 return True; 9932 break; 9933 case 0xC0: case 0xC1: 9934 if (!epartIsReg(opc[2])) 9935 return True; 9936 break; 9937 default: 9938 break; 9939 } /* switch (opc[1]) */ 9940 break; 9941 } 9942 9943 default: 9944 break; 9945 } /* switch (opc[0]) */ 9946 9947 return False; 9948 } 9949 9950 9951 /*------------------------------------------------------------*/ 9952 /*--- ---*/ 9953 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 9954 /*--- ---*/ 9955 /*------------------------------------------------------------*/ 9956 9957 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx, 9958 Long delta, Bool isAvx, UChar opc ) 9959 { 9960 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 9961 Int alen = 0; 9962 HChar dis_buf[50]; 9963 IRTemp argL = newTemp(Ity_F64); 9964 IRTemp argR = newTemp(Ity_F64); 9965 UChar modrm = getUChar(delta); 9966 IRTemp addr = IRTemp_INVALID; 9967 if (epartIsReg(modrm)) { 9968 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 9969 0/*lowest lane*/ ) ); 9970 delta += 1; 9971 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9972 opc==0x2E ? "u" : "", 9973 nameXMMReg(eregOfRexRM(pfx,modrm)), 9974 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9975 } else { 9976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9977 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9978 delta += alen; 9979 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9980 opc==0x2E ? "u" : "", 9981 dis_buf, 9982 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9983 } 9984 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 9985 0/*lowest lane*/ ) ); 9986 9987 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9988 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9989 stmt( IRStmt_Put( 9990 OFFB_CC_DEP1, 9991 binop( Iop_And64, 9992 unop( Iop_32Uto64, 9993 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 9994 mkU64(0x45) 9995 ))); 9996 return delta; 9997 } 9998 9999 10000 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx, 10001 Long delta, Bool isAvx, UChar opc ) 10002 { 10003 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 10004 Int alen = 0; 10005 HChar dis_buf[50]; 10006 IRTemp argL = newTemp(Ity_F32); 10007 IRTemp argR = newTemp(Ity_F32); 10008 UChar modrm = getUChar(delta); 10009 IRTemp addr = IRTemp_INVALID; 10010 if (epartIsReg(modrm)) { 10011 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 10012 0/*lowest lane*/ ) ); 10013 delta += 1; 10014 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10015 opc==0x2E ? "u" : "", 10016 nameXMMReg(eregOfRexRM(pfx,modrm)), 10017 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10018 } else { 10019 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10020 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 10021 delta += alen; 10022 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10023 opc==0x2E ? "u" : "", 10024 dis_buf, 10025 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10026 } 10027 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 10028 0/*lowest lane*/ ) ); 10029 10030 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10031 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10032 stmt( IRStmt_Put( 10033 OFFB_CC_DEP1, 10034 binop( Iop_And64, 10035 unop( Iop_32Uto64, 10036 binop(Iop_CmpF64, 10037 unop(Iop_F32toF64,mkexpr(argL)), 10038 unop(Iop_F32toF64,mkexpr(argR)))), 10039 mkU64(0x45) 10040 ))); 10041 return delta; 10042 } 10043 10044 10045 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx, 10046 Long delta, Bool writesYmm ) 10047 { 10048 Int order; 10049 Int alen = 0; 10050 HChar dis_buf[50]; 10051 IRTemp sV = newTemp(Ity_V128); 10052 UChar modrm = getUChar(delta); 10053 const HChar* strV = writesYmm ? "v" : ""; 10054 IRTemp addr = IRTemp_INVALID; 10055 if (epartIsReg(modrm)) { 10056 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10057 order = (Int)getUChar(delta+1); 10058 delta += 1+1; 10059 DIP("%spshufd $%d,%s,%s\n", strV, order, 10060 nameXMMReg(eregOfRexRM(pfx,modrm)), 10061 nameXMMReg(gregOfRexRM(pfx,modrm))); 10062 } else { 10063 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10064 1/*byte after the amode*/ ); 10065 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10066 order = (Int)getUChar(delta+alen); 10067 delta += alen+1; 10068 DIP("%spshufd $%d,%s,%s\n", strV, order, 10069 dis_buf, 10070 nameXMMReg(gregOfRexRM(pfx,modrm))); 10071 } 10072 10073 IRTemp s3, s2, s1, s0; 10074 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10075 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10076 10077 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10078 IRTemp dV = newTemp(Ity_V128); 10079 assign(dV, 10080 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 10081 SEL((order>>2)&3), SEL((order>>0)&3) ) 10082 ); 10083 # undef SEL 10084 10085 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 10086 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 10087 return delta; 10088 } 10089 10090 10091 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 10092 { 10093 Int order; 10094 Int alen = 0; 10095 HChar dis_buf[50]; 10096 IRTemp sV = newTemp(Ity_V256); 10097 UChar modrm = getUChar(delta); 10098 IRTemp addr = IRTemp_INVALID; 10099 UInt rG = gregOfRexRM(pfx,modrm); 10100 if (epartIsReg(modrm)) { 10101 UInt rE = eregOfRexRM(pfx,modrm); 10102 assign( sV, getYMMReg(rE) ); 10103 order = (Int)getUChar(delta+1); 10104 delta += 1+1; 10105 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG)); 10106 } else { 10107 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10108 1/*byte after the amode*/ ); 10109 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 10110 order = (Int)getUChar(delta+alen); 10111 delta += alen+1; 10112 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG)); 10113 } 10114 10115 IRTemp s[8]; 10116 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 10117 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 10118 &s[3], &s[2], &s[1], &s[0] ); 10119 10120 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)], 10121 s[4 + ((order>>4)&3)], 10122 s[4 + ((order>>2)&3)], 10123 s[4 + ((order>>0)&3)], 10124 s[0 + ((order>>6)&3)], 10125 s[0 + ((order>>4)&3)], 10126 s[0 + ((order>>2)&3)], 10127 s[0 + ((order>>0)&3)] ) ); 10128 return delta; 10129 } 10130 10131 10132 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 10133 { 10134 IRTemp dV = newTemp(Ity_V128); 10135 IRTemp hi64 = newTemp(Ity_I64); 10136 IRTemp lo64 = newTemp(Ity_I64); 10137 IRTemp hi64r = newTemp(Ity_I64); 10138 IRTemp lo64r = newTemp(Ity_I64); 10139 10140 vassert(imm >= 0 && imm <= 255); 10141 if (imm >= 16) { 10142 assign(dV, mkV128(0x0000)); 10143 return dV; 10144 } 10145 10146 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10147 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10148 10149 if (imm == 0) { 10150 assign( lo64r, mkexpr(lo64) ); 10151 assign( hi64r, mkexpr(hi64) ); 10152 } 10153 else 10154 if (imm == 8) { 10155 assign( hi64r, mkU64(0) ); 10156 assign( lo64r, mkexpr(hi64) ); 10157 } 10158 else 10159 if (imm > 8) { 10160 assign( hi64r, mkU64(0) ); 10161 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 10162 } else { 10163 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 10164 assign( lo64r, 10165 binop( Iop_Or64, 10166 binop(Iop_Shr64, mkexpr(lo64), 10167 mkU8(8 * imm)), 10168 binop(Iop_Shl64, mkexpr(hi64), 10169 mkU8(8 * (8 - imm)) ) 10170 ) 10171 ); 10172 } 10173 10174 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10175 return dV; 10176 } 10177 10178 10179 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 10180 { 10181 IRTemp dV = newTemp(Ity_V128); 10182 IRTemp hi64 = newTemp(Ity_I64); 10183 IRTemp lo64 = newTemp(Ity_I64); 10184 IRTemp hi64r = newTemp(Ity_I64); 10185 IRTemp lo64r = newTemp(Ity_I64); 10186 10187 vassert(imm >= 0 && imm <= 255); 10188 if (imm >= 16) { 10189 assign(dV, mkV128(0x0000)); 10190 return dV; 10191 } 10192 10193 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10194 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10195 10196 if (imm == 0) { 10197 assign( lo64r, mkexpr(lo64) ); 10198 assign( hi64r, mkexpr(hi64) ); 10199 } 10200 else 10201 if (imm == 8) { 10202 assign( lo64r, mkU64(0) ); 10203 assign( hi64r, mkexpr(lo64) ); 10204 } 10205 else 10206 if (imm > 8) { 10207 assign( lo64r, mkU64(0) ); 10208 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 10209 } else { 10210 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 10211 assign( hi64r, 10212 binop( Iop_Or64, 10213 binop(Iop_Shl64, mkexpr(hi64), 10214 mkU8(8 * imm)), 10215 binop(Iop_Shr64, mkexpr(lo64), 10216 mkU8(8 * (8 - imm)) ) 10217 ) 10218 ); 10219 } 10220 10221 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10222 return dV; 10223 } 10224 10225 10226 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx, 10227 Long delta, Bool isAvx, UChar opc, Int sz ) 10228 { 10229 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 10230 HChar dis_buf[50]; 10231 Int alen = 0; 10232 UChar modrm = getUChar(delta); 10233 IRTemp addr = IRTemp_INVALID; 10234 IRTemp rmode = newTemp(Ity_I32); 10235 IRTemp f64lo = newTemp(Ity_F64); 10236 Bool r2zero = toBool(opc == 0x2C); 10237 10238 if (epartIsReg(modrm)) { 10239 delta += 1; 10240 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10241 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10242 nameXMMReg(eregOfRexRM(pfx,modrm)), 10243 nameIReg(sz, gregOfRexRM(pfx,modrm), 10244 False)); 10245 } else { 10246 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10247 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10248 delta += alen; 10249 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10250 dis_buf, 10251 nameIReg(sz, gregOfRexRM(pfx,modrm), 10252 False)); 10253 } 10254 10255 if (r2zero) { 10256 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10257 } else { 10258 assign( rmode, get_sse_roundingmode() ); 10259 } 10260 10261 if (sz == 4) { 10262 putIReg32( gregOfRexRM(pfx,modrm), 10263 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10264 } else { 10265 vassert(sz == 8); 10266 putIReg64( gregOfRexRM(pfx,modrm), 10267 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10268 } 10269 10270 return delta; 10271 } 10272 10273 10274 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx, 10275 Long delta, Bool isAvx, UChar opc, Int sz ) 10276 { 10277 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 10278 HChar dis_buf[50]; 10279 Int alen = 0; 10280 UChar modrm = getUChar(delta); 10281 IRTemp addr = IRTemp_INVALID; 10282 IRTemp rmode = newTemp(Ity_I32); 10283 IRTemp f32lo = newTemp(Ity_F32); 10284 Bool r2zero = toBool(opc == 0x2C); 10285 10286 if (epartIsReg(modrm)) { 10287 delta += 1; 10288 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 10289 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10290 nameXMMReg(eregOfRexRM(pfx,modrm)), 10291 nameIReg(sz, gregOfRexRM(pfx,modrm), 10292 False)); 10293 } else { 10294 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10295 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10296 delta += alen; 10297 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10298 dis_buf, 10299 nameIReg(sz, gregOfRexRM(pfx,modrm), 10300 False)); 10301 } 10302 10303 if (r2zero) { 10304 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10305 } else { 10306 assign( rmode, get_sse_roundingmode() ); 10307 } 10308 10309 if (sz == 4) { 10310 putIReg32( gregOfRexRM(pfx,modrm), 10311 binop( Iop_F64toI32S, 10312 mkexpr(rmode), 10313 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10314 } else { 10315 vassert(sz == 8); 10316 putIReg64( gregOfRexRM(pfx,modrm), 10317 binop( Iop_F64toI64S, 10318 mkexpr(rmode), 10319 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10320 } 10321 10322 return delta; 10323 } 10324 10325 10326 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 10327 Long delta, Bool isAvx ) 10328 { 10329 IRTemp addr = IRTemp_INVALID; 10330 Int alen = 0; 10331 HChar dis_buf[50]; 10332 IRTemp f32lo = newTemp(Ity_F32); 10333 IRTemp f32hi = newTemp(Ity_F32); 10334 UChar modrm = getUChar(delta); 10335 UInt rG = gregOfRexRM(pfx,modrm); 10336 if (epartIsReg(modrm)) { 10337 UInt rE = eregOfRexRM(pfx,modrm); 10338 assign( f32lo, getXMMRegLane32F(rE, 0) ); 10339 assign( f32hi, getXMMRegLane32F(rE, 1) ); 10340 delta += 1; 10341 DIP("%scvtps2pd %s,%s\n", 10342 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10343 } else { 10344 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10345 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10346 assign( f32hi, loadLE(Ity_F32, 10347 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10348 delta += alen; 10349 DIP("%scvtps2pd %s,%s\n", 10350 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 10351 } 10352 10353 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 10354 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 10355 if (isAvx) 10356 putYMMRegLane128( rG, 1, mkV128(0)); 10357 return delta; 10358 } 10359 10360 10361 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 10362 Long delta ) 10363 { 10364 IRTemp addr = IRTemp_INVALID; 10365 Int alen = 0; 10366 HChar dis_buf[50]; 10367 IRTemp f32_0 = newTemp(Ity_F32); 10368 IRTemp f32_1 = newTemp(Ity_F32); 10369 IRTemp f32_2 = newTemp(Ity_F32); 10370 IRTemp f32_3 = newTemp(Ity_F32); 10371 UChar modrm = getUChar(delta); 10372 UInt rG = gregOfRexRM(pfx,modrm); 10373 if (epartIsReg(modrm)) { 10374 UInt rE = eregOfRexRM(pfx,modrm); 10375 assign( f32_0, getXMMRegLane32F(rE, 0) ); 10376 assign( f32_1, getXMMRegLane32F(rE, 1) ); 10377 assign( f32_2, getXMMRegLane32F(rE, 2) ); 10378 assign( f32_3, getXMMRegLane32F(rE, 3) ); 10379 delta += 1; 10380 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 10381 } else { 10382 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10383 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 10384 assign( f32_1, loadLE(Ity_F32, 10385 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10386 assign( f32_2, loadLE(Ity_F32, 10387 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 10388 assign( f32_3, loadLE(Ity_F32, 10389 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 10390 delta += alen; 10391 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 10392 } 10393 10394 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 10395 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 10396 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 10397 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 10398 return delta; 10399 } 10400 10401 10402 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10403 Long delta, Bool isAvx ) 10404 { 10405 IRTemp addr = IRTemp_INVALID; 10406 Int alen = 0; 10407 HChar dis_buf[50]; 10408 UChar modrm = getUChar(delta); 10409 UInt rG = gregOfRexRM(pfx,modrm); 10410 IRTemp argV = newTemp(Ity_V128); 10411 IRTemp rmode = newTemp(Ity_I32); 10412 if (epartIsReg(modrm)) { 10413 UInt rE = eregOfRexRM(pfx,modrm); 10414 assign( argV, getXMMReg(rE) ); 10415 delta += 1; 10416 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10417 nameXMMReg(rE), nameXMMReg(rG)); 10418 } else { 10419 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10420 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10421 delta += alen; 10422 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10423 dis_buf, nameXMMReg(rG) ); 10424 } 10425 10426 assign( rmode, get_sse_roundingmode() ); 10427 IRTemp t0 = newTemp(Ity_F64); 10428 IRTemp t1 = newTemp(Ity_F64); 10429 assign( t0, unop(Iop_ReinterpI64asF64, 10430 unop(Iop_V128to64, mkexpr(argV))) ); 10431 assign( t1, unop(Iop_ReinterpI64asF64, 10432 unop(Iop_V128HIto64, mkexpr(argV))) ); 10433 10434 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 10435 putXMMRegLane32( rG, 3, mkU32(0) ); 10436 putXMMRegLane32( rG, 2, mkU32(0) ); 10437 putXMMRegLane32F( rG, 1, CVT(t1) ); 10438 putXMMRegLane32F( rG, 0, CVT(t0) ); 10439 # undef CVT 10440 if (isAvx) 10441 putYMMRegLane128( rG, 1, mkV128(0) ); 10442 10443 return delta; 10444 } 10445 10446 10447 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10448 Long delta, Bool isAvx, Bool r2zero ) 10449 { 10450 IRTemp addr = IRTemp_INVALID; 10451 Int alen = 0; 10452 HChar dis_buf[50]; 10453 UChar modrm = getUChar(delta); 10454 IRTemp argV = newTemp(Ity_V128); 10455 IRTemp rmode = newTemp(Ity_I32); 10456 UInt rG = gregOfRexRM(pfx,modrm); 10457 IRTemp t0, t1, t2, t3; 10458 10459 if (epartIsReg(modrm)) { 10460 UInt rE = eregOfRexRM(pfx,modrm); 10461 assign( argV, getXMMReg(rE) ); 10462 delta += 1; 10463 DIP("%scvt%sps2dq %s,%s\n", 10464 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10465 } else { 10466 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10467 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10468 delta += alen; 10469 DIP("%scvt%sps2dq %s,%s\n", 10470 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10471 } 10472 10473 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10474 : get_sse_roundingmode() ); 10475 t0 = t1 = t2 = t3 = IRTemp_INVALID; 10476 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10477 /* This is less than ideal. If it turns out to be a performance 10478 bottleneck it can be improved. */ 10479 # define CVT(_t) \ 10480 binop( Iop_F64toI32S, \ 10481 mkexpr(rmode), \ 10482 unop( Iop_F32toF64, \ 10483 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10484 10485 putXMMRegLane32( rG, 3, CVT(t3) ); 10486 putXMMRegLane32( rG, 2, CVT(t2) ); 10487 putXMMRegLane32( rG, 1, CVT(t1) ); 10488 putXMMRegLane32( rG, 0, CVT(t0) ); 10489 # undef CVT 10490 if (isAvx) 10491 putYMMRegLane128( rG, 1, mkV128(0) ); 10492 10493 return delta; 10494 } 10495 10496 10497 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10498 Long delta, Bool r2zero ) 10499 { 10500 IRTemp addr = IRTemp_INVALID; 10501 Int alen = 0; 10502 HChar dis_buf[50]; 10503 UChar modrm = getUChar(delta); 10504 IRTemp argV = newTemp(Ity_V256); 10505 IRTemp rmode = newTemp(Ity_I32); 10506 UInt rG = gregOfRexRM(pfx,modrm); 10507 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10508 10509 if (epartIsReg(modrm)) { 10510 UInt rE = eregOfRexRM(pfx,modrm); 10511 assign( argV, getYMMReg(rE) ); 10512 delta += 1; 10513 DIP("vcvt%sps2dq %s,%s\n", 10514 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10515 } else { 10516 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10517 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10518 delta += alen; 10519 DIP("vcvt%sps2dq %s,%s\n", 10520 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10521 } 10522 10523 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10524 : get_sse_roundingmode() ); 10525 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10526 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10527 /* This is less than ideal. If it turns out to be a performance 10528 bottleneck it can be improved. */ 10529 # define CVT(_t) \ 10530 binop( Iop_F64toI32S, \ 10531 mkexpr(rmode), \ 10532 unop( Iop_F32toF64, \ 10533 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10534 10535 putYMMRegLane32( rG, 7, CVT(t7) ); 10536 putYMMRegLane32( rG, 6, CVT(t6) ); 10537 putYMMRegLane32( rG, 5, CVT(t5) ); 10538 putYMMRegLane32( rG, 4, CVT(t4) ); 10539 putYMMRegLane32( rG, 3, CVT(t3) ); 10540 putYMMRegLane32( rG, 2, CVT(t2) ); 10541 putYMMRegLane32( rG, 1, CVT(t1) ); 10542 putYMMRegLane32( rG, 0, CVT(t0) ); 10543 # undef CVT 10544 10545 return delta; 10546 } 10547 10548 10549 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10550 Long delta, Bool isAvx, Bool r2zero ) 10551 { 10552 IRTemp addr = IRTemp_INVALID; 10553 Int alen = 0; 10554 HChar dis_buf[50]; 10555 UChar modrm = getUChar(delta); 10556 IRTemp argV = newTemp(Ity_V128); 10557 IRTemp rmode = newTemp(Ity_I32); 10558 UInt rG = gregOfRexRM(pfx,modrm); 10559 IRTemp t0, t1; 10560 10561 if (epartIsReg(modrm)) { 10562 UInt rE = eregOfRexRM(pfx,modrm); 10563 assign( argV, getXMMReg(rE) ); 10564 delta += 1; 10565 DIP("%scvt%spd2dq %s,%s\n", 10566 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10567 } else { 10568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10569 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10570 delta += alen; 10571 DIP("%scvt%spd2dqx %s,%s\n", 10572 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10573 } 10574 10575 if (r2zero) { 10576 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10577 } else { 10578 assign( rmode, get_sse_roundingmode() ); 10579 } 10580 10581 t0 = newTemp(Ity_F64); 10582 t1 = newTemp(Ity_F64); 10583 assign( t0, unop(Iop_ReinterpI64asF64, 10584 unop(Iop_V128to64, mkexpr(argV))) ); 10585 assign( t1, unop(Iop_ReinterpI64asF64, 10586 unop(Iop_V128HIto64, mkexpr(argV))) ); 10587 10588 # define CVT(_t) binop( Iop_F64toI32S, \ 10589 mkexpr(rmode), \ 10590 mkexpr(_t) ) 10591 10592 putXMMRegLane32( rG, 3, mkU32(0) ); 10593 putXMMRegLane32( rG, 2, mkU32(0) ); 10594 putXMMRegLane32( rG, 1, CVT(t1) ); 10595 putXMMRegLane32( rG, 0, CVT(t0) ); 10596 # undef CVT 10597 if (isAvx) 10598 putYMMRegLane128( rG, 1, mkV128(0) ); 10599 10600 return delta; 10601 } 10602 10603 10604 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10605 Long delta, Bool r2zero ) 10606 { 10607 IRTemp addr = IRTemp_INVALID; 10608 Int alen = 0; 10609 HChar dis_buf[50]; 10610 UChar modrm = getUChar(delta); 10611 IRTemp argV = newTemp(Ity_V256); 10612 IRTemp rmode = newTemp(Ity_I32); 10613 UInt rG = gregOfRexRM(pfx,modrm); 10614 IRTemp t0, t1, t2, t3; 10615 10616 if (epartIsReg(modrm)) { 10617 UInt rE = eregOfRexRM(pfx,modrm); 10618 assign( argV, getYMMReg(rE) ); 10619 delta += 1; 10620 DIP("vcvt%spd2dq %s,%s\n", 10621 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10622 } else { 10623 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10624 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10625 delta += alen; 10626 DIP("vcvt%spd2dqy %s,%s\n", 10627 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10628 } 10629 10630 if (r2zero) { 10631 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10632 } else { 10633 assign( rmode, get_sse_roundingmode() ); 10634 } 10635 10636 t0 = IRTemp_INVALID; 10637 t1 = IRTemp_INVALID; 10638 t2 = IRTemp_INVALID; 10639 t3 = IRTemp_INVALID; 10640 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10641 10642 # define CVT(_t) binop( Iop_F64toI32S, \ 10643 mkexpr(rmode), \ 10644 unop( Iop_ReinterpI64asF64, \ 10645 mkexpr(_t) ) ) 10646 10647 putXMMRegLane32( rG, 3, CVT(t3) ); 10648 putXMMRegLane32( rG, 2, CVT(t2) ); 10649 putXMMRegLane32( rG, 1, CVT(t1) ); 10650 putXMMRegLane32( rG, 0, CVT(t0) ); 10651 # undef CVT 10652 putYMMRegLane128( rG, 1, mkV128(0) ); 10653 10654 return delta; 10655 } 10656 10657 10658 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10659 Long delta, Bool isAvx ) 10660 { 10661 IRTemp addr = IRTemp_INVALID; 10662 Int alen = 0; 10663 HChar dis_buf[50]; 10664 UChar modrm = getUChar(delta); 10665 IRTemp argV = newTemp(Ity_V128); 10666 IRTemp rmode = newTemp(Ity_I32); 10667 UInt rG = gregOfRexRM(pfx,modrm); 10668 IRTemp t0, t1, t2, t3; 10669 10670 if (epartIsReg(modrm)) { 10671 UInt rE = eregOfRexRM(pfx,modrm); 10672 assign( argV, getXMMReg(rE) ); 10673 delta += 1; 10674 DIP("%scvtdq2ps %s,%s\n", 10675 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10676 } else { 10677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10678 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10679 delta += alen; 10680 DIP("%scvtdq2ps %s,%s\n", 10681 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10682 } 10683 10684 assign( rmode, get_sse_roundingmode() ); 10685 t0 = IRTemp_INVALID; 10686 t1 = IRTemp_INVALID; 10687 t2 = IRTemp_INVALID; 10688 t3 = IRTemp_INVALID; 10689 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10690 10691 # define CVT(_t) binop( Iop_F64toF32, \ 10692 mkexpr(rmode), \ 10693 unop(Iop_I32StoF64,mkexpr(_t))) 10694 10695 putXMMRegLane32F( rG, 3, CVT(t3) ); 10696 putXMMRegLane32F( rG, 2, CVT(t2) ); 10697 putXMMRegLane32F( rG, 1, CVT(t1) ); 10698 putXMMRegLane32F( rG, 0, CVT(t0) ); 10699 # undef CVT 10700 if (isAvx) 10701 putYMMRegLane128( rG, 1, mkV128(0) ); 10702 10703 return delta; 10704 } 10705 10706 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 10707 Long delta ) 10708 { 10709 IRTemp addr = IRTemp_INVALID; 10710 Int alen = 0; 10711 HChar dis_buf[50]; 10712 UChar modrm = getUChar(delta); 10713 IRTemp argV = newTemp(Ity_V256); 10714 IRTemp rmode = newTemp(Ity_I32); 10715 UInt rG = gregOfRexRM(pfx,modrm); 10716 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10717 10718 if (epartIsReg(modrm)) { 10719 UInt rE = eregOfRexRM(pfx,modrm); 10720 assign( argV, getYMMReg(rE) ); 10721 delta += 1; 10722 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10723 } else { 10724 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10725 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10726 delta += alen; 10727 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10728 } 10729 10730 assign( rmode, get_sse_roundingmode() ); 10731 t0 = IRTemp_INVALID; 10732 t1 = IRTemp_INVALID; 10733 t2 = IRTemp_INVALID; 10734 t3 = IRTemp_INVALID; 10735 t4 = IRTemp_INVALID; 10736 t5 = IRTemp_INVALID; 10737 t6 = IRTemp_INVALID; 10738 t7 = IRTemp_INVALID; 10739 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10740 10741 # define CVT(_t) binop( Iop_F64toF32, \ 10742 mkexpr(rmode), \ 10743 unop(Iop_I32StoF64,mkexpr(_t))) 10744 10745 putYMMRegLane32F( rG, 7, CVT(t7) ); 10746 putYMMRegLane32F( rG, 6, CVT(t6) ); 10747 putYMMRegLane32F( rG, 5, CVT(t5) ); 10748 putYMMRegLane32F( rG, 4, CVT(t4) ); 10749 putYMMRegLane32F( rG, 3, CVT(t3) ); 10750 putYMMRegLane32F( rG, 2, CVT(t2) ); 10751 putYMMRegLane32F( rG, 1, CVT(t1) ); 10752 putYMMRegLane32F( rG, 0, CVT(t0) ); 10753 # undef CVT 10754 10755 return delta; 10756 } 10757 10758 10759 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx, 10760 Long delta, Bool isAvx ) 10761 { 10762 UChar modrm = getUChar(delta); 10763 vassert(epartIsReg(modrm)); /* ensured by caller */ 10764 UInt rE = eregOfRexRM(pfx,modrm); 10765 UInt rG = gregOfRexRM(pfx,modrm); 10766 IRTemp t0 = newTemp(Ity_V128); 10767 IRTemp t1 = newTemp(Ity_I32); 10768 assign(t0, getXMMReg(rE)); 10769 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0)))); 10770 putIReg32(rG, mkexpr(t1)); 10771 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10772 nameIReg32(rG)); 10773 delta += 1; 10774 return delta; 10775 } 10776 10777 10778 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx, 10779 Long delta ) 10780 { 10781 UChar modrm = getUChar(delta); 10782 vassert(epartIsReg(modrm)); /* ensured by caller */ 10783 UInt rE = eregOfRexRM(pfx,modrm); 10784 UInt rG = gregOfRexRM(pfx,modrm); 10785 IRTemp t0 = newTemp(Ity_V128); 10786 IRTemp t1 = newTemp(Ity_V128); 10787 IRTemp t2 = newTemp(Ity_I16); 10788 IRTemp t3 = newTemp(Ity_I16); 10789 assign(t0, getYMMRegLane128(rE, 0)); 10790 assign(t1, getYMMRegLane128(rE, 1)); 10791 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0))); 10792 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1))); 10793 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2))); 10794 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 10795 delta += 1; 10796 return delta; 10797 } 10798 10799 10800 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 10801 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 10802 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 10803 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10804 { 10805 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10806 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10807 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10808 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10809 IRTemp res = newTemp(Ity_V128); 10810 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 10811 : mkV128from32s( s1, d1, s0, d0 )); 10812 return res; 10813 } 10814 10815 10816 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 10817 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 10818 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10819 { 10820 IRTemp s1 = newTemp(Ity_I64); 10821 IRTemp s0 = newTemp(Ity_I64); 10822 IRTemp d1 = newTemp(Ity_I64); 10823 IRTemp d0 = newTemp(Ity_I64); 10824 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10825 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10826 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10827 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10828 IRTemp res = newTemp(Ity_V128); 10829 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 10830 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 10831 return res; 10832 } 10833 10834 10835 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 10836 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 10837 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 10838 way. */ 10839 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10840 { 10841 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10842 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10843 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 10844 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 10845 IRTemp res = newTemp(Ity_V256); 10846 assign(res, xIsH 10847 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 10848 mkexpr(s1), mkexpr(d1)) 10849 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 10850 mkexpr(s0), mkexpr(d0))); 10851 return res; 10852 } 10853 10854 10855 /* FIXME: this is really bad. Surely can do something better here? 10856 One observation is that the steering in the upper and lower 128 bit 10857 halves is the same as with math_UNPCKxPS_128, so we simply split 10858 into two halves, and use that. Consequently any improvement in 10859 math_UNPCKxPS_128 (probably, to use interleave-style primops) 10860 benefits this too. */ 10861 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10862 { 10863 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10864 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10865 breakupV256toV128s( sV, &sVhi, &sVlo ); 10866 breakupV256toV128s( dV, &dVhi, &dVlo ); 10867 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 10868 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 10869 IRTemp rV = newTemp(Ity_V256); 10870 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10871 return rV; 10872 } 10873 10874 10875 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10876 { 10877 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10878 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10879 vassert(imm8 < 256); 10880 10881 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10882 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10883 10884 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10885 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10886 IRTemp res = newTemp(Ity_V128); 10887 assign(res, 10888 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 10889 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 10890 # undef SELD 10891 # undef SELS 10892 return res; 10893 } 10894 10895 10896 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 10897 identically. Hence do the clueless thing and use math_SHUFPS_128 10898 twice. */ 10899 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10900 { 10901 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10902 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10903 breakupV256toV128s( sV, &sVhi, &sVlo ); 10904 breakupV256toV128s( dV, &dVhi, &dVlo ); 10905 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 10906 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 10907 IRTemp rV = newTemp(Ity_V256); 10908 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10909 return rV; 10910 } 10911 10912 10913 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10914 { 10915 IRTemp s1 = newTemp(Ity_I64); 10916 IRTemp s0 = newTemp(Ity_I64); 10917 IRTemp d1 = newTemp(Ity_I64); 10918 IRTemp d0 = newTemp(Ity_I64); 10919 10920 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10921 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10922 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10923 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10924 10925 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 10926 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 10927 10928 IRTemp res = newTemp(Ity_V128); 10929 assign(res, binop( Iop_64HLtoV128, 10930 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 10931 10932 # undef SELD 10933 # undef SELS 10934 return res; 10935 } 10936 10937 10938 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10939 { 10940 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10941 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10942 breakupV256toV128s( sV, &sVhi, &sVlo ); 10943 breakupV256toV128s( dV, &dVhi, &dVlo ); 10944 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10945 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 10946 IRTemp rV = newTemp(Ity_V256); 10947 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10948 return rV; 10949 } 10950 10951 10952 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10953 { 10954 UShort imm8_mask_16; 10955 IRTemp imm8_mask = newTemp(Ity_V128); 10956 10957 switch( imm8 & 3 ) { 10958 case 0: imm8_mask_16 = 0x0000; break; 10959 case 1: imm8_mask_16 = 0x00FF; break; 10960 case 2: imm8_mask_16 = 0xFF00; break; 10961 case 3: imm8_mask_16 = 0xFFFF; break; 10962 default: vassert(0); break; 10963 } 10964 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 10965 10966 IRTemp res = newTemp(Ity_V128); 10967 assign ( res, binop( Iop_OrV128, 10968 binop( Iop_AndV128, mkexpr(sV), 10969 mkexpr(imm8_mask) ), 10970 binop( Iop_AndV128, mkexpr(dV), 10971 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 10972 return res; 10973 } 10974 10975 10976 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10977 { 10978 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10979 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10980 breakupV256toV128s( sV, &sVhi, &sVlo ); 10981 breakupV256toV128s( dV, &dVhi, &dVlo ); 10982 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10983 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 10984 IRTemp rV = newTemp(Ity_V256); 10985 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10986 return rV; 10987 } 10988 10989 10990 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10991 { 10992 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 10993 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 10994 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 10995 0xFFFF }; 10996 IRTemp imm8_mask = newTemp(Ity_V128); 10997 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 10998 10999 IRTemp res = newTemp(Ity_V128); 11000 assign ( res, binop( Iop_OrV128, 11001 binop( Iop_AndV128, mkexpr(sV), 11002 mkexpr(imm8_mask) ), 11003 binop( Iop_AndV128, mkexpr(dV), 11004 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11005 return res; 11006 } 11007 11008 11009 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11010 { 11011 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11012 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11013 breakupV256toV128s( sV, &sVhi, &sVlo ); 11014 breakupV256toV128s( dV, &dVhi, &dVlo ); 11015 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 11016 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 11017 IRTemp rV = newTemp(Ity_V256); 11018 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11019 return rV; 11020 } 11021 11022 11023 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11024 { 11025 /* Make w be a 16-bit version of imm8, formed by duplicating each 11026 bit in imm8. */ 11027 Int i; 11028 UShort imm16 = 0; 11029 for (i = 0; i < 8; i++) { 11030 if (imm8 & (1 << i)) 11031 imm16 |= (3 << (2*i)); 11032 } 11033 IRTemp imm16_mask = newTemp(Ity_V128); 11034 assign( imm16_mask, mkV128( imm16 )); 11035 11036 IRTemp res = newTemp(Ity_V128); 11037 assign ( res, binop( Iop_OrV128, 11038 binop( Iop_AndV128, mkexpr(sV), 11039 mkexpr(imm16_mask) ), 11040 binop( Iop_AndV128, mkexpr(dV), 11041 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 11042 return res; 11043 } 11044 11045 11046 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 11047 { 11048 /* This is a really poor translation -- could be improved if 11049 performance critical */ 11050 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11051 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11052 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11053 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11054 IRTemp res = newTemp(Ity_V128); 11055 assign(res, binop(Iop_64HLtoV128, 11056 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 11057 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 11058 return res; 11059 } 11060 11061 11062 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV ) 11063 { 11064 /* This is a really poor translation -- could be improved if 11065 performance critical */ 11066 IRTemp sHi, sLo, dHi, dLo; 11067 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11068 breakupV256toV128s( dV, &dHi, &dLo); 11069 breakupV256toV128s( sV, &sHi, &sLo); 11070 IRTemp res = newTemp(Ity_V256); 11071 assign(res, binop(Iop_V128HLtoV256, 11072 mkexpr(math_PMULUDQ_128(sHi, dHi)), 11073 mkexpr(math_PMULUDQ_128(sLo, dLo)))); 11074 return res; 11075 } 11076 11077 11078 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 11079 { 11080 /* This is a really poor translation -- could be improved if 11081 performance critical */ 11082 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11083 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11084 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11085 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11086 IRTemp res = newTemp(Ity_V128); 11087 assign(res, binop(Iop_64HLtoV128, 11088 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 11089 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 11090 return res; 11091 } 11092 11093 11094 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV ) 11095 { 11096 /* This is a really poor translation -- could be improved if 11097 performance critical */ 11098 IRTemp sHi, sLo, dHi, dLo; 11099 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11100 breakupV256toV128s( dV, &dHi, &dLo); 11101 breakupV256toV128s( sV, &sHi, &sLo); 11102 IRTemp res = newTemp(Ity_V256); 11103 assign(res, binop(Iop_V128HLtoV256, 11104 mkexpr(math_PMULDQ_128(sHi, dHi)), 11105 mkexpr(math_PMULDQ_128(sLo, dLo)))); 11106 return res; 11107 } 11108 11109 11110 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 11111 { 11112 IRTemp sVhi, sVlo, dVhi, dVlo; 11113 IRTemp resHi = newTemp(Ity_I64); 11114 IRTemp resLo = newTemp(Ity_I64); 11115 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 11116 breakupV128to64s( sV, &sVhi, &sVlo ); 11117 breakupV128to64s( dV, &dVhi, &dVlo ); 11118 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11119 "amd64g_calculate_mmx_pmaddwd", 11120 &amd64g_calculate_mmx_pmaddwd, 11121 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 11122 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11123 "amd64g_calculate_mmx_pmaddwd", 11124 &amd64g_calculate_mmx_pmaddwd, 11125 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 11126 IRTemp res = newTemp(Ity_V128); 11127 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 11128 return res; 11129 } 11130 11131 11132 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV ) 11133 { 11134 IRTemp sHi, sLo, dHi, dLo; 11135 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11136 breakupV256toV128s( dV, &dHi, &dLo); 11137 breakupV256toV128s( sV, &sHi, &sLo); 11138 IRTemp res = newTemp(Ity_V256); 11139 assign(res, binop(Iop_V128HLtoV256, 11140 mkexpr(math_PMADDWD_128(dHi, sHi)), 11141 mkexpr(math_PMADDWD_128(dLo, sLo)))); 11142 return res; 11143 } 11144 11145 11146 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 11147 { 11148 IRTemp addV = newTemp(Ity_V128); 11149 IRTemp subV = newTemp(Ity_V128); 11150 IRTemp a1 = newTemp(Ity_I64); 11151 IRTemp s0 = newTemp(Ity_I64); 11152 IRTemp rm = newTemp(Ity_I32); 11153 11154 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11155 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11156 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11157 11158 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11159 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11160 11161 IRTemp res = newTemp(Ity_V128); 11162 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11163 return res; 11164 } 11165 11166 11167 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 11168 { 11169 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11170 IRTemp addV = newTemp(Ity_V256); 11171 IRTemp subV = newTemp(Ity_V256); 11172 IRTemp rm = newTemp(Ity_I32); 11173 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11174 11175 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11176 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11177 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11178 11179 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 11180 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 11181 11182 IRTemp res = newTemp(Ity_V256); 11183 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 11184 return res; 11185 } 11186 11187 11188 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 11189 { 11190 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11191 IRTemp addV = newTemp(Ity_V128); 11192 IRTemp subV = newTemp(Ity_V128); 11193 IRTemp rm = newTemp(Ity_I32); 11194 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11195 11196 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11197 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11198 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11199 11200 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 11201 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 11202 11203 IRTemp res = newTemp(Ity_V128); 11204 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 11205 return res; 11206 } 11207 11208 11209 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 11210 { 11211 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 11212 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 11213 IRTemp addV = newTemp(Ity_V256); 11214 IRTemp subV = newTemp(Ity_V256); 11215 IRTemp rm = newTemp(Ity_I32); 11216 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 11217 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11218 11219 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11220 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11221 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11222 11223 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 11224 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 11225 11226 IRTemp res = newTemp(Ity_V256); 11227 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 11228 return res; 11229 } 11230 11231 11232 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 11233 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx, 11234 Long delta, Bool isAvx, Bool xIsH ) 11235 { 11236 IRTemp addr = IRTemp_INVALID; 11237 Int alen = 0; 11238 HChar dis_buf[50]; 11239 UChar modrm = getUChar(delta); 11240 UInt rG = gregOfRexRM(pfx,modrm); 11241 UInt imm8; 11242 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 11243 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11244 sV = newTemp(Ity_V128); 11245 dV = newTemp(Ity_V128); 11246 sVmut = newTemp(Ity_I64); 11247 dVmut = newTemp(Ity_I64); 11248 sVcon = newTemp(Ity_I64); 11249 if (epartIsReg(modrm)) { 11250 UInt rE = eregOfRexRM(pfx,modrm); 11251 assign( sV, getXMMReg(rE) ); 11252 imm8 = (UInt)getUChar(delta+1); 11253 delta += 1+1; 11254 DIP("%spshuf%cw $%u,%s,%s\n", 11255 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11256 imm8, nameXMMReg(rE), nameXMMReg(rG)); 11257 } else { 11258 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11259 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11260 imm8 = (UInt)getUChar(delta+alen); 11261 delta += alen+1; 11262 DIP("%spshuf%cw $%u,%s,%s\n", 11263 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11264 imm8, dis_buf, nameXMMReg(rG)); 11265 } 11266 11267 /* Get the to-be-changed (mut) and unchanging (con) bits of the 11268 source. */ 11269 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 11270 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 11271 11272 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 11273 # define SEL(n) \ 11274 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11275 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 11276 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 11277 # undef SEL 11278 11279 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 11280 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 11281 11282 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 11283 return delta; 11284 } 11285 11286 11287 /* Handle 256 bit PSHUFLW and PSHUFHW. */ 11288 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx, 11289 Long delta, Bool xIsH ) 11290 { 11291 IRTemp addr = IRTemp_INVALID; 11292 Int alen = 0; 11293 HChar dis_buf[50]; 11294 UChar modrm = getUChar(delta); 11295 UInt rG = gregOfRexRM(pfx,modrm); 11296 UInt imm8; 11297 IRTemp sV, s[8], sV64[4], dVhi, dVlo; 11298 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID; 11299 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 11300 sV = newTemp(Ity_V256); 11301 dVhi = newTemp(Ity_I64); 11302 dVlo = newTemp(Ity_I64); 11303 if (epartIsReg(modrm)) { 11304 UInt rE = eregOfRexRM(pfx,modrm); 11305 assign( sV, getYMMReg(rE) ); 11306 imm8 = (UInt)getUChar(delta+1); 11307 delta += 1+1; 11308 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11309 imm8, nameYMMReg(rE), nameYMMReg(rG)); 11310 } else { 11311 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11312 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 11313 imm8 = (UInt)getUChar(delta+alen); 11314 delta += alen+1; 11315 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11316 imm8, dis_buf, nameYMMReg(rG)); 11317 } 11318 11319 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] ); 11320 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] ); 11321 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] ); 11322 11323 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)], 11324 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) ); 11325 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)], 11326 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) ); 11327 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3], 11328 xIsH ? sV64[2] : dVhi, 11329 xIsH ? dVlo : sV64[1], 11330 xIsH ? sV64[0] : dVlo ) ); 11331 return delta; 11332 } 11333 11334 11335 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx, 11336 Long delta, Bool isAvx ) 11337 { 11338 Long deltaIN = delta; 11339 UChar modrm = getUChar(delta); 11340 UInt rG = gregOfRexRM(pfx,modrm); 11341 IRTemp sV = newTemp(Ity_V128); 11342 IRTemp d16 = newTemp(Ity_I16); 11343 UInt imm8; 11344 IRTemp s0, s1, s2, s3; 11345 if (epartIsReg(modrm)) { 11346 UInt rE = eregOfRexRM(pfx,modrm); 11347 assign(sV, getXMMReg(rE)); 11348 imm8 = getUChar(delta+1) & 7; 11349 delta += 1+1; 11350 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "", 11351 (Int)imm8, nameXMMReg(rE), nameIReg32(rG)); 11352 } else { 11353 /* The memory case is disallowed, apparently. */ 11354 return deltaIN; /* FAIL */ 11355 } 11356 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11357 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11358 switch (imm8) { 11359 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 11360 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 11361 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 11362 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 11363 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 11364 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 11365 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 11366 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 11367 default: vassert(0); 11368 } 11369 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 11370 return delta; 11371 } 11372 11373 11374 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 11375 Long delta, Bool isAvx ) 11376 { 11377 IRTemp addr = IRTemp_INVALID; 11378 Int alen = 0; 11379 HChar dis_buf[50]; 11380 UChar modrm = getUChar(delta); 11381 IRTemp arg64 = newTemp(Ity_I64); 11382 UInt rG = gregOfRexRM(pfx,modrm); 11383 const HChar* mbV = isAvx ? "v" : ""; 11384 if (epartIsReg(modrm)) { 11385 UInt rE = eregOfRexRM(pfx,modrm); 11386 assign( arg64, getXMMRegLane64(rE, 0) ); 11387 delta += 1; 11388 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 11389 } else { 11390 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11391 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11392 delta += alen; 11393 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 11394 } 11395 putXMMRegLane64F( 11396 rG, 0, 11397 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 11398 ); 11399 putXMMRegLane64F( 11400 rG, 1, 11401 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 11402 ); 11403 if (isAvx) 11404 putYMMRegLane128(rG, 1, mkV128(0)); 11405 return delta; 11406 } 11407 11408 11409 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11410 Long delta, Bool isAvx ) 11411 { 11412 IRTemp addr = IRTemp_INVALID; 11413 Int alen = 0; 11414 HChar dis_buf[50]; 11415 UChar modrm = getUChar(delta); 11416 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11417 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 11418 11419 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11420 delta += alen; 11421 11422 /* Fake up a native SSE mxcsr word. The only thing it depends on 11423 is SSEROUND[1:0], so call a clean helper to cook it up. 11424 */ 11425 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 11426 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11427 storeLE( 11428 mkexpr(addr), 11429 unop(Iop_64to32, 11430 mkIRExprCCall( 11431 Ity_I64, 0/*regp*/, 11432 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 11433 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 11434 ) 11435 ) 11436 ); 11437 return delta; 11438 } 11439 11440 11441 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11442 Long delta, Bool isAvx ) 11443 { 11444 IRTemp addr = IRTemp_INVALID; 11445 Int alen = 0; 11446 HChar dis_buf[50]; 11447 UChar modrm = getUChar(delta); 11448 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11449 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 11450 11451 IRTemp t64 = newTemp(Ity_I64); 11452 IRTemp ew = newTemp(Ity_I32); 11453 11454 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11455 delta += alen; 11456 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11457 11458 /* The only thing we observe in %mxcsr is the rounding mode. 11459 Therefore, pass the 32-bit value (SSE native-format control 11460 word) to a clean helper, getting back a 64-bit value, the 11461 lower half of which is the SSEROUND value to store, and the 11462 upper half of which is the emulation-warning token which may 11463 be generated. 11464 */ 11465 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 11466 assign( t64, mkIRExprCCall( 11467 Ity_I64, 0/*regparms*/, 11468 "amd64g_check_ldmxcsr", 11469 &amd64g_check_ldmxcsr, 11470 mkIRExprVec_1( 11471 unop(Iop_32Uto64, 11472 loadLE(Ity_I32, mkexpr(addr)) 11473 ) 11474 ) 11475 ) 11476 ); 11477 11478 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 11479 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 11480 put_emwarn( mkexpr(ew) ); 11481 /* Finally, if an emulation warning was reported, side-exit to 11482 the next insn, reporting the warning, so that Valgrind's 11483 dispatcher sees the warning. */ 11484 stmt( 11485 IRStmt_Exit( 11486 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 11487 Ijk_EmWarn, 11488 IRConst_U64(guest_RIP_bbstart+delta), 11489 OFFB_RIP 11490 ) 11491 ); 11492 return delta; 11493 } 11494 11495 11496 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 11497 { 11498 vassert(imm8 >= 0 && imm8 <= 7); 11499 11500 // Create a V128 value which has the selected word in the 11501 // specified lane, and zeroes everywhere else. 11502 IRTemp tmp128 = newTemp(Ity_V128); 11503 IRTemp halfshift = newTemp(Ity_I64); 11504 assign(halfshift, binop(Iop_Shl64, 11505 unop(Iop_16Uto64, mkexpr(u16)), 11506 mkU8(16 * (imm8 & 3)))); 11507 if (imm8 < 4) { 11508 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 11509 } else { 11510 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 11511 } 11512 11513 UShort mask = ~(3 << (imm8 * 2)); 11514 IRTemp res = newTemp(Ity_V128); 11515 assign( res, binop(Iop_OrV128, 11516 mkexpr(tmp128), 11517 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 11518 return res; 11519 } 11520 11521 11522 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 11523 { 11524 IRTemp s1, s0, d1, d0; 11525 s1 = s0 = d1 = d0 = IRTemp_INVALID; 11526 11527 breakupV128to64s( sV, &s1, &s0 ); 11528 breakupV128to64s( dV, &d1, &d0 ); 11529 11530 IRTemp res = newTemp(Ity_V128); 11531 assign( res, 11532 binop(Iop_64HLtoV128, 11533 mkIRExprCCall(Ity_I64, 0/*regparms*/, 11534 "amd64g_calculate_mmx_psadbw", 11535 &amd64g_calculate_mmx_psadbw, 11536 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 11537 mkIRExprCCall(Ity_I64, 0/*regparms*/, 11538 "amd64g_calculate_mmx_psadbw", 11539 &amd64g_calculate_mmx_psadbw, 11540 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 11541 return res; 11542 } 11543 11544 11545 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV ) 11546 { 11547 IRTemp sHi, sLo, dHi, dLo; 11548 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11549 breakupV256toV128s( dV, &dHi, &dLo); 11550 breakupV256toV128s( sV, &sHi, &sLo); 11551 IRTemp res = newTemp(Ity_V256); 11552 assign(res, binop(Iop_V128HLtoV256, 11553 mkexpr(math_PSADBW_128(dHi, sHi)), 11554 mkexpr(math_PSADBW_128(dLo, sLo)))); 11555 return res; 11556 } 11557 11558 11559 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx, 11560 Long delta, Bool isAvx ) 11561 { 11562 IRTemp regD = newTemp(Ity_V128); 11563 IRTemp mask = newTemp(Ity_V128); 11564 IRTemp olddata = newTemp(Ity_V128); 11565 IRTemp newdata = newTemp(Ity_V128); 11566 IRTemp addr = newTemp(Ity_I64); 11567 UChar modrm = getUChar(delta); 11568 UInt rG = gregOfRexRM(pfx,modrm); 11569 UInt rE = eregOfRexRM(pfx,modrm); 11570 11571 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 11572 assign( regD, getXMMReg( rG )); 11573 11574 /* Unfortunately can't do the obvious thing with SarN8x16 11575 here since that can't be re-emitted as SSE2 code - no such 11576 insn. */ 11577 assign( mask, 11578 binop(Iop_64HLtoV128, 11579 binop(Iop_SarN8x8, 11580 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 11581 mkU8(7) ), 11582 binop(Iop_SarN8x8, 11583 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 11584 mkU8(7) ) )); 11585 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 11586 assign( newdata, binop(Iop_OrV128, 11587 binop(Iop_AndV128, 11588 mkexpr(regD), 11589 mkexpr(mask) ), 11590 binop(Iop_AndV128, 11591 mkexpr(olddata), 11592 unop(Iop_NotV128, mkexpr(mask)))) ); 11593 storeLE( mkexpr(addr), mkexpr(newdata) ); 11594 11595 delta += 1; 11596 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 11597 nameXMMReg(rE), nameXMMReg(rG) ); 11598 return delta; 11599 } 11600 11601 11602 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx, 11603 Long delta, Bool isAvx ) 11604 { 11605 UChar modrm = getUChar(delta); 11606 UInt rG = gregOfRexRM(pfx,modrm); 11607 UInt rE = eregOfRexRM(pfx,modrm); 11608 IRTemp t0 = newTemp(Ity_I32); 11609 IRTemp t1 = newTemp(Ity_I32); 11610 IRTemp t2 = newTemp(Ity_I32); 11611 IRTemp t3 = newTemp(Ity_I32); 11612 delta += 1; 11613 assign( t0, binop( Iop_And32, 11614 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 11615 mkU32(1) )); 11616 assign( t1, binop( Iop_And32, 11617 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 11618 mkU32(2) )); 11619 assign( t2, binop( Iop_And32, 11620 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 11621 mkU32(4) )); 11622 assign( t3, binop( Iop_And32, 11623 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 11624 mkU32(8) )); 11625 putIReg32( rG, binop(Iop_Or32, 11626 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11627 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11628 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 11629 nameXMMReg(rE), nameIReg32(rG)); 11630 return delta; 11631 } 11632 11633 11634 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 11635 { 11636 UChar modrm = getUChar(delta); 11637 UInt rG = gregOfRexRM(pfx,modrm); 11638 UInt rE = eregOfRexRM(pfx,modrm); 11639 IRTemp t0 = newTemp(Ity_I32); 11640 IRTemp t1 = newTemp(Ity_I32); 11641 IRTemp t2 = newTemp(Ity_I32); 11642 IRTemp t3 = newTemp(Ity_I32); 11643 IRTemp t4 = newTemp(Ity_I32); 11644 IRTemp t5 = newTemp(Ity_I32); 11645 IRTemp t6 = newTemp(Ity_I32); 11646 IRTemp t7 = newTemp(Ity_I32); 11647 delta += 1; 11648 assign( t0, binop( Iop_And32, 11649 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 11650 mkU32(1) )); 11651 assign( t1, binop( Iop_And32, 11652 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 11653 mkU32(2) )); 11654 assign( t2, binop( Iop_And32, 11655 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 11656 mkU32(4) )); 11657 assign( t3, binop( Iop_And32, 11658 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 11659 mkU32(8) )); 11660 assign( t4, binop( Iop_And32, 11661 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 11662 mkU32(16) )); 11663 assign( t5, binop( Iop_And32, 11664 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 11665 mkU32(32) )); 11666 assign( t6, binop( Iop_And32, 11667 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 11668 mkU32(64) )); 11669 assign( t7, binop( Iop_And32, 11670 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 11671 mkU32(128) )); 11672 putIReg32( rG, binop(Iop_Or32, 11673 binop(Iop_Or32, 11674 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11675 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 11676 binop(Iop_Or32, 11677 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 11678 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 11679 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11680 return delta; 11681 } 11682 11683 11684 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx, 11685 Long delta, Bool isAvx ) 11686 { 11687 UChar modrm = getUChar(delta); 11688 UInt rG = gregOfRexRM(pfx,modrm); 11689 UInt rE = eregOfRexRM(pfx,modrm); 11690 IRTemp t0 = newTemp(Ity_I32); 11691 IRTemp t1 = newTemp(Ity_I32); 11692 delta += 1; 11693 assign( t0, binop( Iop_And32, 11694 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 11695 mkU32(1) )); 11696 assign( t1, binop( Iop_And32, 11697 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 11698 mkU32(2) )); 11699 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 11700 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 11701 nameXMMReg(rE), nameIReg32(rG)); 11702 return delta; 11703 } 11704 11705 11706 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 11707 { 11708 UChar modrm = getUChar(delta); 11709 UInt rG = gregOfRexRM(pfx,modrm); 11710 UInt rE = eregOfRexRM(pfx,modrm); 11711 IRTemp t0 = newTemp(Ity_I32); 11712 IRTemp t1 = newTemp(Ity_I32); 11713 IRTemp t2 = newTemp(Ity_I32); 11714 IRTemp t3 = newTemp(Ity_I32); 11715 delta += 1; 11716 assign( t0, binop( Iop_And32, 11717 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 11718 mkU32(1) )); 11719 assign( t1, binop( Iop_And32, 11720 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 11721 mkU32(2) )); 11722 assign( t2, binop( Iop_And32, 11723 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 11724 mkU32(4) )); 11725 assign( t3, binop( Iop_And32, 11726 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 11727 mkU32(8) )); 11728 putIReg32( rG, binop(Iop_Or32, 11729 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11730 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11731 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11732 return delta; 11733 } 11734 11735 11736 /* Note, this also handles SSE(1) insns. */ 11737 __attribute__((noinline)) 11738 static 11739 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 11740 const VexAbiInfo* vbi, 11741 Prefix pfx, Int sz, Long deltaIN, 11742 DisResult* dres ) 11743 { 11744 IRTemp addr = IRTemp_INVALID; 11745 IRTemp t0 = IRTemp_INVALID; 11746 IRTemp t1 = IRTemp_INVALID; 11747 IRTemp t2 = IRTemp_INVALID; 11748 IRTemp t3 = IRTemp_INVALID; 11749 IRTemp t4 = IRTemp_INVALID; 11750 IRTemp t5 = IRTemp_INVALID; 11751 IRTemp t6 = IRTemp_INVALID; 11752 UChar modrm = 0; 11753 Int alen = 0; 11754 HChar dis_buf[50]; 11755 11756 *decode_OK = False; 11757 11758 Long delta = deltaIN; 11759 UChar opc = getUChar(delta); 11760 delta++; 11761 switch (opc) { 11762 11763 case 0x10: 11764 if (have66noF2noF3(pfx) 11765 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11766 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 11767 modrm = getUChar(delta); 11768 if (epartIsReg(modrm)) { 11769 putXMMReg( gregOfRexRM(pfx,modrm), 11770 getXMMReg( eregOfRexRM(pfx,modrm) )); 11771 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11772 nameXMMReg(gregOfRexRM(pfx,modrm))); 11773 delta += 1; 11774 } else { 11775 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11776 putXMMReg( gregOfRexRM(pfx,modrm), 11777 loadLE(Ity_V128, mkexpr(addr)) ); 11778 DIP("movupd %s,%s\n", dis_buf, 11779 nameXMMReg(gregOfRexRM(pfx,modrm))); 11780 delta += alen; 11781 } 11782 goto decode_success; 11783 } 11784 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 11785 G (lo half xmm). If E is mem, upper half of G is zeroed out. 11786 If E is reg, upper half of G is unchanged. */ 11787 if (haveF2no66noF3(pfx) 11788 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 11789 modrm = getUChar(delta); 11790 if (epartIsReg(modrm)) { 11791 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11792 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11793 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11794 nameXMMReg(gregOfRexRM(pfx,modrm))); 11795 delta += 1; 11796 } else { 11797 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11798 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11799 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11800 loadLE(Ity_I64, mkexpr(addr)) ); 11801 DIP("movsd %s,%s\n", dis_buf, 11802 nameXMMReg(gregOfRexRM(pfx,modrm))); 11803 delta += alen; 11804 } 11805 goto decode_success; 11806 } 11807 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 11808 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 11809 if (haveF3no66noF2(pfx) 11810 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11811 modrm = getUChar(delta); 11812 if (epartIsReg(modrm)) { 11813 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11814 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 11815 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11816 nameXMMReg(gregOfRexRM(pfx,modrm))); 11817 delta += 1; 11818 } else { 11819 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11820 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11821 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11822 loadLE(Ity_I32, mkexpr(addr)) ); 11823 DIP("movss %s,%s\n", dis_buf, 11824 nameXMMReg(gregOfRexRM(pfx,modrm))); 11825 delta += alen; 11826 } 11827 goto decode_success; 11828 } 11829 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 11830 if (haveNo66noF2noF3(pfx) 11831 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11832 modrm = getUChar(delta); 11833 if (epartIsReg(modrm)) { 11834 putXMMReg( gregOfRexRM(pfx,modrm), 11835 getXMMReg( eregOfRexRM(pfx,modrm) )); 11836 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11837 nameXMMReg(gregOfRexRM(pfx,modrm))); 11838 delta += 1; 11839 } else { 11840 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11841 putXMMReg( gregOfRexRM(pfx,modrm), 11842 loadLE(Ity_V128, mkexpr(addr)) ); 11843 DIP("movups %s,%s\n", dis_buf, 11844 nameXMMReg(gregOfRexRM(pfx,modrm))); 11845 delta += alen; 11846 } 11847 goto decode_success; 11848 } 11849 break; 11850 11851 case 0x11: 11852 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 11853 or lo half xmm). */ 11854 if (haveF2no66noF3(pfx) 11855 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11856 modrm = getUChar(delta); 11857 if (epartIsReg(modrm)) { 11858 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 11859 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11860 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11861 nameXMMReg(eregOfRexRM(pfx,modrm))); 11862 delta += 1; 11863 } else { 11864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11865 storeLE( mkexpr(addr), 11866 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11867 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11868 dis_buf); 11869 delta += alen; 11870 } 11871 goto decode_success; 11872 } 11873 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 11874 or lo 1/4 xmm). */ 11875 if (haveF3no66noF2(pfx) && sz == 4) { 11876 modrm = getUChar(delta); 11877 if (epartIsReg(modrm)) { 11878 /* fall through, we don't yet have a test case */ 11879 } else { 11880 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11881 storeLE( mkexpr(addr), 11882 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 11883 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11884 dis_buf); 11885 delta += alen; 11886 goto decode_success; 11887 } 11888 } 11889 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 11890 if (have66noF2noF3(pfx) 11891 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11892 modrm = getUChar(delta); 11893 if (epartIsReg(modrm)) { 11894 putXMMReg( eregOfRexRM(pfx,modrm), 11895 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11896 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11897 nameXMMReg(eregOfRexRM(pfx,modrm))); 11898 delta += 1; 11899 } else { 11900 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11901 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11902 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11903 dis_buf ); 11904 delta += alen; 11905 } 11906 goto decode_success; 11907 } 11908 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 11909 if (haveNo66noF2noF3(pfx) 11910 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11911 modrm = getUChar(delta); 11912 if (epartIsReg(modrm)) { 11913 /* fall through; awaiting test case */ 11914 } else { 11915 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11916 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11917 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11918 dis_buf ); 11919 delta += alen; 11920 goto decode_success; 11921 } 11922 } 11923 break; 11924 11925 case 0x12: 11926 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 11927 /* Identical to MOVLPS ? */ 11928 if (have66noF2noF3(pfx) 11929 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11930 modrm = getUChar(delta); 11931 if (epartIsReg(modrm)) { 11932 /* fall through; apparently reg-reg is not possible */ 11933 } else { 11934 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11935 delta += alen; 11936 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11937 0/*lower lane*/, 11938 loadLE(Ity_I64, mkexpr(addr)) ); 11939 DIP("movlpd %s, %s\n", 11940 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11941 goto decode_success; 11942 } 11943 } 11944 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 11945 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 11946 if (haveNo66noF2noF3(pfx) 11947 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11948 modrm = getUChar(delta); 11949 if (epartIsReg(modrm)) { 11950 delta += 1; 11951 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11952 0/*lower lane*/, 11953 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 11954 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11955 nameXMMReg(gregOfRexRM(pfx,modrm))); 11956 } else { 11957 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11958 delta += alen; 11959 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 11960 loadLE(Ity_I64, mkexpr(addr)) ); 11961 DIP("movlps %s, %s\n", 11962 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11963 } 11964 goto decode_success; 11965 } 11966 break; 11967 11968 case 0x13: 11969 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 11970 if (haveNo66noF2noF3(pfx) 11971 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11972 modrm = getUChar(delta); 11973 if (!epartIsReg(modrm)) { 11974 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11975 delta += alen; 11976 storeLE( mkexpr(addr), 11977 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11978 0/*lower lane*/ ) ); 11979 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11980 dis_buf); 11981 goto decode_success; 11982 } 11983 /* else fall through */ 11984 } 11985 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 11986 /* Identical to MOVLPS ? */ 11987 if (have66noF2noF3(pfx) 11988 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11989 modrm = getUChar(delta); 11990 if (!epartIsReg(modrm)) { 11991 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11992 delta += alen; 11993 storeLE( mkexpr(addr), 11994 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11995 0/*lower lane*/ ) ); 11996 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11997 dis_buf); 11998 goto decode_success; 11999 } 12000 /* else fall through */ 12001 } 12002 break; 12003 12004 case 0x14: 12005 case 0x15: 12006 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 12007 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 12008 /* These just appear to be special cases of SHUFPS */ 12009 if (haveNo66noF2noF3(pfx) && sz == 4) { 12010 Bool hi = toBool(opc == 0x15); 12011 IRTemp sV = newTemp(Ity_V128); 12012 IRTemp dV = newTemp(Ity_V128); 12013 modrm = getUChar(delta); 12014 UInt rG = gregOfRexRM(pfx,modrm); 12015 assign( dV, getXMMReg(rG) ); 12016 if (epartIsReg(modrm)) { 12017 UInt rE = eregOfRexRM(pfx,modrm); 12018 assign( sV, getXMMReg(rE) ); 12019 delta += 1; 12020 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12021 nameXMMReg(rE), nameXMMReg(rG)); 12022 } else { 12023 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12024 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12025 delta += alen; 12026 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12027 dis_buf, nameXMMReg(rG)); 12028 } 12029 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 12030 putXMMReg( rG, mkexpr(res) ); 12031 goto decode_success; 12032 } 12033 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 12034 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 12035 /* These just appear to be special cases of SHUFPS */ 12036 if (have66noF2noF3(pfx) 12037 && sz == 2 /* could be 8 if rex also present */) { 12038 Bool hi = toBool(opc == 0x15); 12039 IRTemp sV = newTemp(Ity_V128); 12040 IRTemp dV = newTemp(Ity_V128); 12041 modrm = getUChar(delta); 12042 UInt rG = gregOfRexRM(pfx,modrm); 12043 assign( dV, getXMMReg(rG) ); 12044 if (epartIsReg(modrm)) { 12045 UInt rE = eregOfRexRM(pfx,modrm); 12046 assign( sV, getXMMReg(rE) ); 12047 delta += 1; 12048 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12049 nameXMMReg(rE), nameXMMReg(rG)); 12050 } else { 12051 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12052 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12053 delta += alen; 12054 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12055 dis_buf, nameXMMReg(rG)); 12056 } 12057 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 12058 putXMMReg( rG, mkexpr(res) ); 12059 goto decode_success; 12060 } 12061 break; 12062 12063 case 0x16: 12064 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 12065 /* These seems identical to MOVHPS. This instruction encoding is 12066 completely crazy. */ 12067 if (have66noF2noF3(pfx) 12068 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12069 modrm = getUChar(delta); 12070 if (epartIsReg(modrm)) { 12071 /* fall through; apparently reg-reg is not possible */ 12072 } else { 12073 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12074 delta += alen; 12075 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12076 loadLE(Ity_I64, mkexpr(addr)) ); 12077 DIP("movhpd %s,%s\n", dis_buf, 12078 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12079 goto decode_success; 12080 } 12081 } 12082 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 12083 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 12084 if (haveNo66noF2noF3(pfx) 12085 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12086 modrm = getUChar(delta); 12087 if (epartIsReg(modrm)) { 12088 delta += 1; 12089 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12090 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 12091 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12092 nameXMMReg(gregOfRexRM(pfx,modrm))); 12093 } else { 12094 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12095 delta += alen; 12096 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12097 loadLE(Ity_I64, mkexpr(addr)) ); 12098 DIP("movhps %s,%s\n", dis_buf, 12099 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12100 } 12101 goto decode_success; 12102 } 12103 break; 12104 12105 case 0x17: 12106 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 12107 if (haveNo66noF2noF3(pfx) 12108 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12109 modrm = getUChar(delta); 12110 if (!epartIsReg(modrm)) { 12111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12112 delta += alen; 12113 storeLE( mkexpr(addr), 12114 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12115 1/*upper lane*/ ) ); 12116 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12117 dis_buf); 12118 goto decode_success; 12119 } 12120 /* else fall through */ 12121 } 12122 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 12123 /* Again, this seems identical to MOVHPS. */ 12124 if (have66noF2noF3(pfx) 12125 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12126 modrm = getUChar(delta); 12127 if (!epartIsReg(modrm)) { 12128 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12129 delta += alen; 12130 storeLE( mkexpr(addr), 12131 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12132 1/*upper lane*/ ) ); 12133 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12134 dis_buf); 12135 goto decode_success; 12136 } 12137 /* else fall through */ 12138 } 12139 break; 12140 12141 case 0x18: 12142 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 12143 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 12144 /* 0F 18 /2 = PREFETCH1 */ 12145 /* 0F 18 /3 = PREFETCH2 */ 12146 if (haveNo66noF2noF3(pfx) 12147 && !epartIsReg(getUChar(delta)) 12148 && gregLO3ofRM(getUChar(delta)) >= 0 12149 && gregLO3ofRM(getUChar(delta)) <= 3) { 12150 const HChar* hintstr = "??"; 12151 12152 modrm = getUChar(delta); 12153 vassert(!epartIsReg(modrm)); 12154 12155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12156 delta += alen; 12157 12158 switch (gregLO3ofRM(modrm)) { 12159 case 0: hintstr = "nta"; break; 12160 case 1: hintstr = "t0"; break; 12161 case 2: hintstr = "t1"; break; 12162 case 3: hintstr = "t2"; break; 12163 default: vassert(0); 12164 } 12165 12166 DIP("prefetch%s %s\n", hintstr, dis_buf); 12167 goto decode_success; 12168 } 12169 break; 12170 12171 case 0x28: 12172 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 12173 if (have66noF2noF3(pfx) 12174 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12175 modrm = getUChar(delta); 12176 if (epartIsReg(modrm)) { 12177 putXMMReg( gregOfRexRM(pfx,modrm), 12178 getXMMReg( eregOfRexRM(pfx,modrm) )); 12179 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12180 nameXMMReg(gregOfRexRM(pfx,modrm))); 12181 delta += 1; 12182 } else { 12183 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12184 gen_SEGV_if_not_16_aligned( addr ); 12185 putXMMReg( gregOfRexRM(pfx,modrm), 12186 loadLE(Ity_V128, mkexpr(addr)) ); 12187 DIP("movapd %s,%s\n", dis_buf, 12188 nameXMMReg(gregOfRexRM(pfx,modrm))); 12189 delta += alen; 12190 } 12191 goto decode_success; 12192 } 12193 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 12194 if (haveNo66noF2noF3(pfx) 12195 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12196 modrm = getUChar(delta); 12197 if (epartIsReg(modrm)) { 12198 putXMMReg( gregOfRexRM(pfx,modrm), 12199 getXMMReg( eregOfRexRM(pfx,modrm) )); 12200 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12201 nameXMMReg(gregOfRexRM(pfx,modrm))); 12202 delta += 1; 12203 } else { 12204 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12205 gen_SEGV_if_not_16_aligned( addr ); 12206 putXMMReg( gregOfRexRM(pfx,modrm), 12207 loadLE(Ity_V128, mkexpr(addr)) ); 12208 DIP("movaps %s,%s\n", dis_buf, 12209 nameXMMReg(gregOfRexRM(pfx,modrm))); 12210 delta += alen; 12211 } 12212 goto decode_success; 12213 } 12214 break; 12215 12216 case 0x29: 12217 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 12218 if (haveNo66noF2noF3(pfx) 12219 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12220 modrm = getUChar(delta); 12221 if (epartIsReg(modrm)) { 12222 putXMMReg( eregOfRexRM(pfx,modrm), 12223 getXMMReg( gregOfRexRM(pfx,modrm) )); 12224 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12225 nameXMMReg(eregOfRexRM(pfx,modrm))); 12226 delta += 1; 12227 } else { 12228 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12229 gen_SEGV_if_not_16_aligned( addr ); 12230 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12231 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12232 dis_buf ); 12233 delta += alen; 12234 } 12235 goto decode_success; 12236 } 12237 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 12238 if (have66noF2noF3(pfx) 12239 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12240 modrm = getUChar(delta); 12241 if (epartIsReg(modrm)) { 12242 putXMMReg( eregOfRexRM(pfx,modrm), 12243 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12244 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12245 nameXMMReg(eregOfRexRM(pfx,modrm))); 12246 delta += 1; 12247 } else { 12248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12249 gen_SEGV_if_not_16_aligned( addr ); 12250 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12251 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12252 dis_buf ); 12253 delta += alen; 12254 } 12255 goto decode_success; 12256 } 12257 break; 12258 12259 case 0x2A: 12260 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 12261 half xmm */ 12262 if (haveNo66noF2noF3(pfx) && sz == 4) { 12263 IRTemp arg64 = newTemp(Ity_I64); 12264 IRTemp rmode = newTemp(Ity_I32); 12265 12266 modrm = getUChar(delta); 12267 do_MMX_preamble(); 12268 if (epartIsReg(modrm)) { 12269 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12270 delta += 1; 12271 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12272 nameXMMReg(gregOfRexRM(pfx,modrm))); 12273 } else { 12274 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12275 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12276 delta += alen; 12277 DIP("cvtpi2ps %s,%s\n", dis_buf, 12278 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12279 } 12280 12281 assign( rmode, get_sse_roundingmode() ); 12282 12283 putXMMRegLane32F( 12284 gregOfRexRM(pfx,modrm), 0, 12285 binop(Iop_F64toF32, 12286 mkexpr(rmode), 12287 unop(Iop_I32StoF64, 12288 unop(Iop_64to32, mkexpr(arg64)) )) ); 12289 12290 putXMMRegLane32F( 12291 gregOfRexRM(pfx,modrm), 1, 12292 binop(Iop_F64toF32, 12293 mkexpr(rmode), 12294 unop(Iop_I32StoF64, 12295 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 12296 12297 goto decode_success; 12298 } 12299 /* F3 0F 2A = CVTSI2SS 12300 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 12301 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 12302 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12303 IRTemp rmode = newTemp(Ity_I32); 12304 assign( rmode, get_sse_roundingmode() ); 12305 modrm = getUChar(delta); 12306 if (sz == 4) { 12307 IRTemp arg32 = newTemp(Ity_I32); 12308 if (epartIsReg(modrm)) { 12309 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12310 delta += 1; 12311 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12312 nameXMMReg(gregOfRexRM(pfx,modrm))); 12313 } else { 12314 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12315 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12316 delta += alen; 12317 DIP("cvtsi2ss %s,%s\n", dis_buf, 12318 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12319 } 12320 putXMMRegLane32F( 12321 gregOfRexRM(pfx,modrm), 0, 12322 binop(Iop_F64toF32, 12323 mkexpr(rmode), 12324 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 12325 } else { 12326 /* sz == 8 */ 12327 IRTemp arg64 = newTemp(Ity_I64); 12328 if (epartIsReg(modrm)) { 12329 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12330 delta += 1; 12331 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12332 nameXMMReg(gregOfRexRM(pfx,modrm))); 12333 } else { 12334 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12335 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12336 delta += alen; 12337 DIP("cvtsi2ssq %s,%s\n", dis_buf, 12338 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12339 } 12340 putXMMRegLane32F( 12341 gregOfRexRM(pfx,modrm), 0, 12342 binop(Iop_F64toF32, 12343 mkexpr(rmode), 12344 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 12345 } 12346 goto decode_success; 12347 } 12348 /* F2 0F 2A = CVTSI2SD 12349 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 12350 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 12351 */ 12352 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12353 modrm = getUChar(delta); 12354 if (sz == 4) { 12355 IRTemp arg32 = newTemp(Ity_I32); 12356 if (epartIsReg(modrm)) { 12357 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12358 delta += 1; 12359 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12360 nameXMMReg(gregOfRexRM(pfx,modrm))); 12361 } else { 12362 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12363 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12364 delta += alen; 12365 DIP("cvtsi2sdl %s,%s\n", dis_buf, 12366 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12367 } 12368 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12369 unop(Iop_I32StoF64, mkexpr(arg32)) 12370 ); 12371 } else { 12372 /* sz == 8 */ 12373 IRTemp arg64 = newTemp(Ity_I64); 12374 if (epartIsReg(modrm)) { 12375 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12376 delta += 1; 12377 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12378 nameXMMReg(gregOfRexRM(pfx,modrm))); 12379 } else { 12380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12381 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12382 delta += alen; 12383 DIP("cvtsi2sdq %s,%s\n", dis_buf, 12384 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12385 } 12386 putXMMRegLane64F( 12387 gregOfRexRM(pfx,modrm), 12388 0, 12389 binop( Iop_I64StoF64, 12390 get_sse_roundingmode(), 12391 mkexpr(arg64) 12392 ) 12393 ); 12394 } 12395 goto decode_success; 12396 } 12397 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 12398 xmm(G) */ 12399 if (have66noF2noF3(pfx) && sz == 2) { 12400 IRTemp arg64 = newTemp(Ity_I64); 12401 12402 modrm = getUChar(delta); 12403 if (epartIsReg(modrm)) { 12404 /* Only switch to MMX mode if the source is a MMX register. 12405 This is inconsistent with all other instructions which 12406 convert between XMM and (M64 or MMX), which always switch 12407 to MMX mode even if 64-bit operand is M64 and not MMX. At 12408 least, that's what the Intel docs seem to me to say. 12409 Fixes #210264. */ 12410 do_MMX_preamble(); 12411 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12412 delta += 1; 12413 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12414 nameXMMReg(gregOfRexRM(pfx,modrm))); 12415 } else { 12416 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12417 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12418 delta += alen; 12419 DIP("cvtpi2pd %s,%s\n", dis_buf, 12420 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12421 } 12422 12423 putXMMRegLane64F( 12424 gregOfRexRM(pfx,modrm), 0, 12425 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 12426 ); 12427 12428 putXMMRegLane64F( 12429 gregOfRexRM(pfx,modrm), 1, 12430 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 12431 ); 12432 12433 goto decode_success; 12434 } 12435 break; 12436 12437 case 0x2B: 12438 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 12439 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 12440 if ( (haveNo66noF2noF3(pfx) && sz == 4) 12441 || (have66noF2noF3(pfx) && sz == 2) ) { 12442 modrm = getUChar(delta); 12443 if (!epartIsReg(modrm)) { 12444 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12445 gen_SEGV_if_not_16_aligned( addr ); 12446 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12447 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 12448 dis_buf, 12449 nameXMMReg(gregOfRexRM(pfx,modrm))); 12450 delta += alen; 12451 goto decode_success; 12452 } 12453 /* else fall through */ 12454 } 12455 break; 12456 12457 case 0x2C: 12458 case 0x2D: 12459 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 12460 I32 in mmx, according to prevailing SSE rounding mode */ 12461 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 12462 I32 in mmx, rounding towards zero */ 12463 if (haveNo66noF2noF3(pfx) && sz == 4) { 12464 IRTemp dst64 = newTemp(Ity_I64); 12465 IRTemp rmode = newTemp(Ity_I32); 12466 IRTemp f32lo = newTemp(Ity_F32); 12467 IRTemp f32hi = newTemp(Ity_F32); 12468 Bool r2zero = toBool(opc == 0x2C); 12469 12470 do_MMX_preamble(); 12471 modrm = getUChar(delta); 12472 12473 if (epartIsReg(modrm)) { 12474 delta += 1; 12475 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 12476 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 12477 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 12478 nameXMMReg(eregOfRexRM(pfx,modrm)), 12479 nameMMXReg(gregLO3ofRM(modrm))); 12480 } else { 12481 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12482 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 12483 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 12484 mkexpr(addr), 12485 mkU64(4) ))); 12486 delta += alen; 12487 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 12488 dis_buf, 12489 nameMMXReg(gregLO3ofRM(modrm))); 12490 } 12491 12492 if (r2zero) { 12493 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 12494 } else { 12495 assign( rmode, get_sse_roundingmode() ); 12496 } 12497 12498 assign( 12499 dst64, 12500 binop( Iop_32HLto64, 12501 binop( Iop_F64toI32S, 12502 mkexpr(rmode), 12503 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 12504 binop( Iop_F64toI32S, 12505 mkexpr(rmode), 12506 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 12507 ) 12508 ); 12509 12510 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 12511 goto decode_success; 12512 } 12513 /* F3 0F 2D = CVTSS2SI 12514 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 12515 according to prevailing SSE rounding mode 12516 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 12517 according to prevailing SSE rounding mode 12518 */ 12519 /* F3 0F 2C = CVTTSS2SI 12520 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 12521 truncating towards zero 12522 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 12523 truncating towards zero 12524 */ 12525 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12526 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 12527 goto decode_success; 12528 } 12529 /* F2 0F 2D = CVTSD2SI 12530 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 12531 according to prevailing SSE rounding mode 12532 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 12533 according to prevailing SSE rounding mode 12534 */ 12535 /* F2 0F 2C = CVTTSD2SI 12536 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 12537 truncating towards zero 12538 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 12539 truncating towards zero 12540 */ 12541 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12542 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 12543 goto decode_success; 12544 } 12545 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 12546 I32 in mmx, according to prevailing SSE rounding mode */ 12547 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 12548 I32 in mmx, rounding towards zero */ 12549 if (have66noF2noF3(pfx) && sz == 2) { 12550 IRTemp dst64 = newTemp(Ity_I64); 12551 IRTemp rmode = newTemp(Ity_I32); 12552 IRTemp f64lo = newTemp(Ity_F64); 12553 IRTemp f64hi = newTemp(Ity_F64); 12554 Bool r2zero = toBool(opc == 0x2C); 12555 12556 do_MMX_preamble(); 12557 modrm = getUChar(delta); 12558 12559 if (epartIsReg(modrm)) { 12560 delta += 1; 12561 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 12562 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 12563 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 12564 nameXMMReg(eregOfRexRM(pfx,modrm)), 12565 nameMMXReg(gregLO3ofRM(modrm))); 12566 } else { 12567 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12568 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 12569 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 12570 mkexpr(addr), 12571 mkU64(8) ))); 12572 delta += alen; 12573 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 12574 dis_buf, 12575 nameMMXReg(gregLO3ofRM(modrm))); 12576 } 12577 12578 if (r2zero) { 12579 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 12580 } else { 12581 assign( rmode, get_sse_roundingmode() ); 12582 } 12583 12584 assign( 12585 dst64, 12586 binop( Iop_32HLto64, 12587 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 12588 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 12589 ) 12590 ); 12591 12592 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 12593 goto decode_success; 12594 } 12595 break; 12596 12597 case 0x2E: 12598 case 0x2F: 12599 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 12600 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 12601 if (have66noF2noF3(pfx) && sz == 2) { 12602 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 12603 goto decode_success; 12604 } 12605 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 12606 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 12607 if (haveNo66noF2noF3(pfx) && sz == 4) { 12608 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 12609 goto decode_success; 12610 } 12611 break; 12612 12613 case 0x50: 12614 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 12615 to 4 lowest bits of ireg(G) */ 12616 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 12617 && epartIsReg(getUChar(delta))) { 12618 /* sz == 8 is a kludge to handle insns with REX.W redundantly 12619 set to 1, which has been known to happen: 12620 12621 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 12622 12623 20071106: Intel docs say that REX.W isn't redundant: when 12624 present, a 64-bit register is written; when not present, only 12625 the 32-bit half is written. However, testing on a Core2 12626 machine suggests the entire 64 bit register is written 12627 irrespective of the status of REX.W. That could be because 12628 of the default rule that says "if the lower half of a 32-bit 12629 register is written, the upper half is zeroed". By using 12630 putIReg32 here we inadvertantly produce the same behaviour as 12631 the Core2, for the same reason -- putIReg32 implements said 12632 rule. 12633 12634 AMD docs give no indication that REX.W is even valid for this 12635 insn. */ 12636 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12637 goto decode_success; 12638 } 12639 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 12640 2 lowest bits of ireg(G) */ 12641 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 12642 /* sz == 8 is a kludge to handle insns with REX.W redundantly 12643 set to 1, which has been known to happen: 12644 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 12645 20071106: see further comments on MOVMSKPS implementation above. 12646 */ 12647 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12648 goto decode_success; 12649 } 12650 break; 12651 12652 case 0x51: 12653 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 12654 if (haveF3no66noF2(pfx) && sz == 4) { 12655 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12656 "sqrtss", Iop_Sqrt32F0x4 ); 12657 goto decode_success; 12658 } 12659 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 12660 if (haveNo66noF2noF3(pfx) && sz == 4) { 12661 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12662 "sqrtps", Iop_Sqrt32Fx4 ); 12663 goto decode_success; 12664 } 12665 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 12666 if (haveF2no66noF3(pfx) && sz == 4) { 12667 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 12668 "sqrtsd", Iop_Sqrt64F0x2 ); 12669 goto decode_success; 12670 } 12671 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 12672 if (have66noF2noF3(pfx) && sz == 2) { 12673 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12674 "sqrtpd", Iop_Sqrt64Fx2 ); 12675 goto decode_success; 12676 } 12677 break; 12678 12679 case 0x52: 12680 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 12681 if (haveF3no66noF2(pfx) && sz == 4) { 12682 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12683 "rsqrtss", Iop_RSqrtEst32F0x4 ); 12684 goto decode_success; 12685 } 12686 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 12687 if (haveNo66noF2noF3(pfx) && sz == 4) { 12688 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12689 "rsqrtps", Iop_RSqrtEst32Fx4 ); 12690 goto decode_success; 12691 } 12692 break; 12693 12694 case 0x53: 12695 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 12696 if (haveF3no66noF2(pfx) && sz == 4) { 12697 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12698 "rcpss", Iop_RecipEst32F0x4 ); 12699 goto decode_success; 12700 } 12701 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 12702 if (haveNo66noF2noF3(pfx) && sz == 4) { 12703 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12704 "rcpps", Iop_RecipEst32Fx4 ); 12705 goto decode_success; 12706 } 12707 break; 12708 12709 case 0x54: 12710 /* 0F 54 = ANDPS -- G = G and E */ 12711 if (haveNo66noF2noF3(pfx) && sz == 4) { 12712 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 12713 goto decode_success; 12714 } 12715 /* 66 0F 54 = ANDPD -- G = G and E */ 12716 if (have66noF2noF3(pfx) && sz == 2) { 12717 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 12718 goto decode_success; 12719 } 12720 break; 12721 12722 case 0x55: 12723 /* 0F 55 = ANDNPS -- G = (not G) and E */ 12724 if (haveNo66noF2noF3(pfx) && sz == 4) { 12725 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 12726 Iop_AndV128 ); 12727 goto decode_success; 12728 } 12729 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 12730 if (have66noF2noF3(pfx) && sz == 2) { 12731 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 12732 Iop_AndV128 ); 12733 goto decode_success; 12734 } 12735 break; 12736 12737 case 0x56: 12738 /* 0F 56 = ORPS -- G = G and E */ 12739 if (haveNo66noF2noF3(pfx) && sz == 4) { 12740 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 12741 goto decode_success; 12742 } 12743 /* 66 0F 56 = ORPD -- G = G and E */ 12744 if (have66noF2noF3(pfx) && sz == 2) { 12745 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 12746 goto decode_success; 12747 } 12748 break; 12749 12750 case 0x57: 12751 /* 66 0F 57 = XORPD -- G = G xor E */ 12752 if (have66noF2noF3(pfx) && sz == 2) { 12753 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 12754 goto decode_success; 12755 } 12756 /* 0F 57 = XORPS -- G = G xor E */ 12757 if (haveNo66noF2noF3(pfx) && sz == 4) { 12758 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 12759 goto decode_success; 12760 } 12761 break; 12762 12763 case 0x58: 12764 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 12765 if (haveNo66noF2noF3(pfx) && sz == 4) { 12766 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 12767 goto decode_success; 12768 } 12769 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 12770 if (haveF3no66noF2(pfx) && sz == 4) { 12771 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 12772 goto decode_success; 12773 } 12774 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 12775 if (haveF2no66noF3(pfx) 12776 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12777 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 12778 goto decode_success; 12779 } 12780 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 12781 if (have66noF2noF3(pfx) 12782 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12783 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 12784 goto decode_success; 12785 } 12786 break; 12787 12788 case 0x59: 12789 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 12790 if (haveF2no66noF3(pfx) 12791 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12792 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 12793 goto decode_success; 12794 } 12795 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 12796 if (haveF3no66noF2(pfx) && sz == 4) { 12797 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 12798 goto decode_success; 12799 } 12800 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 12801 if (haveNo66noF2noF3(pfx) && sz == 4) { 12802 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 12803 goto decode_success; 12804 } 12805 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 12806 if (have66noF2noF3(pfx) 12807 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12808 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 12809 goto decode_success; 12810 } 12811 break; 12812 12813 case 0x5A: 12814 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 12815 F64 in xmm(G). */ 12816 if (haveNo66noF2noF3(pfx) && sz == 4) { 12817 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12818 goto decode_success; 12819 } 12820 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 12821 low half xmm(G) */ 12822 if (haveF3no66noF2(pfx) && sz == 4) { 12823 IRTemp f32lo = newTemp(Ity_F32); 12824 12825 modrm = getUChar(delta); 12826 if (epartIsReg(modrm)) { 12827 delta += 1; 12828 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 12829 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12830 nameXMMReg(gregOfRexRM(pfx,modrm))); 12831 } else { 12832 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12833 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 12834 delta += alen; 12835 DIP("cvtss2sd %s,%s\n", dis_buf, 12836 nameXMMReg(gregOfRexRM(pfx,modrm))); 12837 } 12838 12839 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12840 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 12841 12842 goto decode_success; 12843 } 12844 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 12845 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 12846 if (haveF2no66noF3(pfx) && sz == 4) { 12847 IRTemp rmode = newTemp(Ity_I32); 12848 IRTemp f64lo = newTemp(Ity_F64); 12849 12850 modrm = getUChar(delta); 12851 if (epartIsReg(modrm)) { 12852 delta += 1; 12853 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 12854 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12855 nameXMMReg(gregOfRexRM(pfx,modrm))); 12856 } else { 12857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12858 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 12859 delta += alen; 12860 DIP("cvtsd2ss %s,%s\n", dis_buf, 12861 nameXMMReg(gregOfRexRM(pfx,modrm))); 12862 } 12863 12864 assign( rmode, get_sse_roundingmode() ); 12865 putXMMRegLane32F( 12866 gregOfRexRM(pfx,modrm), 0, 12867 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 12868 ); 12869 12870 goto decode_success; 12871 } 12872 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 12873 lo half xmm(G), rounding according to prevailing SSE rounding 12874 mode, and zero upper half */ 12875 /* Note, this is practically identical to CVTPD2DQ. It would have 12876 be nice to merge them together. */ 12877 if (have66noF2noF3(pfx) && sz == 2) { 12878 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12879 goto decode_success; 12880 } 12881 break; 12882 12883 case 0x5B: 12884 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12885 xmm(G), rounding towards zero */ 12886 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12887 xmm(G), as per the prevailing rounding mode */ 12888 if ( (have66noF2noF3(pfx) && sz == 2) 12889 || (haveF3no66noF2(pfx) && sz == 4) ) { 12890 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 12891 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 12892 goto decode_success; 12893 } 12894 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 12895 xmm(G) */ 12896 if (haveNo66noF2noF3(pfx) && sz == 4) { 12897 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12898 goto decode_success; 12899 } 12900 break; 12901 12902 case 0x5C: 12903 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 12904 if (haveF3no66noF2(pfx) && sz == 4) { 12905 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 12906 goto decode_success; 12907 } 12908 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 12909 if (haveF2no66noF3(pfx) 12910 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12911 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 12912 goto decode_success; 12913 } 12914 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 12915 if (haveNo66noF2noF3(pfx) && sz == 4) { 12916 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 12917 goto decode_success; 12918 } 12919 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 12920 if (have66noF2noF3(pfx) && sz == 2) { 12921 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 12922 goto decode_success; 12923 } 12924 break; 12925 12926 case 0x5D: 12927 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 12928 if (haveNo66noF2noF3(pfx) && sz == 4) { 12929 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 12930 goto decode_success; 12931 } 12932 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 12933 if (haveF3no66noF2(pfx) && sz == 4) { 12934 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 12935 goto decode_success; 12936 } 12937 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 12938 if (haveF2no66noF3(pfx) && sz == 4) { 12939 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 12940 goto decode_success; 12941 } 12942 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 12943 if (have66noF2noF3(pfx) && sz == 2) { 12944 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 12945 goto decode_success; 12946 } 12947 break; 12948 12949 case 0x5E: 12950 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 12951 if (haveF2no66noF3(pfx) && sz == 4) { 12952 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 12953 goto decode_success; 12954 } 12955 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 12956 if (haveNo66noF2noF3(pfx) && sz == 4) { 12957 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 12958 goto decode_success; 12959 } 12960 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 12961 if (haveF3no66noF2(pfx) && sz == 4) { 12962 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 12963 goto decode_success; 12964 } 12965 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 12966 if (have66noF2noF3(pfx) && sz == 2) { 12967 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 12968 goto decode_success; 12969 } 12970 break; 12971 12972 case 0x5F: 12973 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 12974 if (haveNo66noF2noF3(pfx) && sz == 4) { 12975 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 12976 goto decode_success; 12977 } 12978 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 12979 if (haveF3no66noF2(pfx) && sz == 4) { 12980 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 12981 goto decode_success; 12982 } 12983 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 12984 if (haveF2no66noF3(pfx) && sz == 4) { 12985 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 12986 goto decode_success; 12987 } 12988 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 12989 if (have66noF2noF3(pfx) && sz == 2) { 12990 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 12991 goto decode_success; 12992 } 12993 break; 12994 12995 case 0x60: 12996 /* 66 0F 60 = PUNPCKLBW */ 12997 if (have66noF2noF3(pfx) && sz == 2) { 12998 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12999 "punpcklbw", 13000 Iop_InterleaveLO8x16, True ); 13001 goto decode_success; 13002 } 13003 break; 13004 13005 case 0x61: 13006 /* 66 0F 61 = PUNPCKLWD */ 13007 if (have66noF2noF3(pfx) && sz == 2) { 13008 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13009 "punpcklwd", 13010 Iop_InterleaveLO16x8, True ); 13011 goto decode_success; 13012 } 13013 break; 13014 13015 case 0x62: 13016 /* 66 0F 62 = PUNPCKLDQ */ 13017 if (have66noF2noF3(pfx) && sz == 2) { 13018 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13019 "punpckldq", 13020 Iop_InterleaveLO32x4, True ); 13021 goto decode_success; 13022 } 13023 break; 13024 13025 case 0x63: 13026 /* 66 0F 63 = PACKSSWB */ 13027 if (have66noF2noF3(pfx) && sz == 2) { 13028 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13029 "packsswb", 13030 Iop_QNarrowBin16Sto8Sx16, True ); 13031 goto decode_success; 13032 } 13033 break; 13034 13035 case 0x64: 13036 /* 66 0F 64 = PCMPGTB */ 13037 if (have66noF2noF3(pfx) && sz == 2) { 13038 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13039 "pcmpgtb", Iop_CmpGT8Sx16, False ); 13040 goto decode_success; 13041 } 13042 break; 13043 13044 case 0x65: 13045 /* 66 0F 65 = PCMPGTW */ 13046 if (have66noF2noF3(pfx) && sz == 2) { 13047 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13048 "pcmpgtw", Iop_CmpGT16Sx8, False ); 13049 goto decode_success; 13050 } 13051 break; 13052 13053 case 0x66: 13054 /* 66 0F 66 = PCMPGTD */ 13055 if (have66noF2noF3(pfx) && sz == 2) { 13056 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13057 "pcmpgtd", Iop_CmpGT32Sx4, False ); 13058 goto decode_success; 13059 } 13060 break; 13061 13062 case 0x67: 13063 /* 66 0F 67 = PACKUSWB */ 13064 if (have66noF2noF3(pfx) && sz == 2) { 13065 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13066 "packuswb", 13067 Iop_QNarrowBin16Sto8Ux16, True ); 13068 goto decode_success; 13069 } 13070 break; 13071 13072 case 0x68: 13073 /* 66 0F 68 = PUNPCKHBW */ 13074 if (have66noF2noF3(pfx) && sz == 2) { 13075 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13076 "punpckhbw", 13077 Iop_InterleaveHI8x16, True ); 13078 goto decode_success; 13079 } 13080 break; 13081 13082 case 0x69: 13083 /* 66 0F 69 = PUNPCKHWD */ 13084 if (have66noF2noF3(pfx) && sz == 2) { 13085 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13086 "punpckhwd", 13087 Iop_InterleaveHI16x8, True ); 13088 goto decode_success; 13089 } 13090 break; 13091 13092 case 0x6A: 13093 /* 66 0F 6A = PUNPCKHDQ */ 13094 if (have66noF2noF3(pfx) && sz == 2) { 13095 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13096 "punpckhdq", 13097 Iop_InterleaveHI32x4, True ); 13098 goto decode_success; 13099 } 13100 break; 13101 13102 case 0x6B: 13103 /* 66 0F 6B = PACKSSDW */ 13104 if (have66noF2noF3(pfx) && sz == 2) { 13105 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13106 "packssdw", 13107 Iop_QNarrowBin32Sto16Sx8, True ); 13108 goto decode_success; 13109 } 13110 break; 13111 13112 case 0x6C: 13113 /* 66 0F 6C = PUNPCKLQDQ */ 13114 if (have66noF2noF3(pfx) && sz == 2) { 13115 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13116 "punpcklqdq", 13117 Iop_InterleaveLO64x2, True ); 13118 goto decode_success; 13119 } 13120 break; 13121 13122 case 0x6D: 13123 /* 66 0F 6D = PUNPCKHQDQ */ 13124 if (have66noF2noF3(pfx) && sz == 2) { 13125 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13126 "punpckhqdq", 13127 Iop_InterleaveHI64x2, True ); 13128 goto decode_success; 13129 } 13130 break; 13131 13132 case 0x6E: 13133 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 13134 zeroing high 3/4 of xmm. */ 13135 /* or from ireg64/m64 to xmm lo 1/2, 13136 zeroing high 1/2 of xmm. */ 13137 if (have66noF2noF3(pfx)) { 13138 vassert(sz == 2 || sz == 8); 13139 if (sz == 2) sz = 4; 13140 modrm = getUChar(delta); 13141 if (epartIsReg(modrm)) { 13142 delta += 1; 13143 if (sz == 4) { 13144 putXMMReg( 13145 gregOfRexRM(pfx,modrm), 13146 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 13147 ); 13148 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13149 nameXMMReg(gregOfRexRM(pfx,modrm))); 13150 } else { 13151 putXMMReg( 13152 gregOfRexRM(pfx,modrm), 13153 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 13154 ); 13155 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13156 nameXMMReg(gregOfRexRM(pfx,modrm))); 13157 } 13158 } else { 13159 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13160 delta += alen; 13161 putXMMReg( 13162 gregOfRexRM(pfx,modrm), 13163 sz == 4 13164 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 13165 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 13166 ); 13167 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 13168 nameXMMReg(gregOfRexRM(pfx,modrm))); 13169 } 13170 goto decode_success; 13171 } 13172 break; 13173 13174 case 0x6F: 13175 if (have66noF2noF3(pfx) 13176 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13177 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 13178 modrm = getUChar(delta); 13179 if (epartIsReg(modrm)) { 13180 putXMMReg( gregOfRexRM(pfx,modrm), 13181 getXMMReg( eregOfRexRM(pfx,modrm) )); 13182 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13183 nameXMMReg(gregOfRexRM(pfx,modrm))); 13184 delta += 1; 13185 } else { 13186 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13187 gen_SEGV_if_not_16_aligned( addr ); 13188 putXMMReg( gregOfRexRM(pfx,modrm), 13189 loadLE(Ity_V128, mkexpr(addr)) ); 13190 DIP("movdqa %s,%s\n", dis_buf, 13191 nameXMMReg(gregOfRexRM(pfx,modrm))); 13192 delta += alen; 13193 } 13194 goto decode_success; 13195 } 13196 if (haveF3no66noF2(pfx) && sz == 4) { 13197 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 13198 modrm = getUChar(delta); 13199 if (epartIsReg(modrm)) { 13200 putXMMReg( gregOfRexRM(pfx,modrm), 13201 getXMMReg( eregOfRexRM(pfx,modrm) )); 13202 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13203 nameXMMReg(gregOfRexRM(pfx,modrm))); 13204 delta += 1; 13205 } else { 13206 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13207 putXMMReg( gregOfRexRM(pfx,modrm), 13208 loadLE(Ity_V128, mkexpr(addr)) ); 13209 DIP("movdqu %s,%s\n", dis_buf, 13210 nameXMMReg(gregOfRexRM(pfx,modrm))); 13211 delta += alen; 13212 } 13213 goto decode_success; 13214 } 13215 break; 13216 13217 case 0x70: 13218 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 13219 if (have66noF2noF3(pfx) && sz == 2) { 13220 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 13221 goto decode_success; 13222 } 13223 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13224 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 13225 if (haveNo66noF2noF3(pfx) && sz == 4) { 13226 Int order; 13227 IRTemp sV, dV, s3, s2, s1, s0; 13228 s3 = s2 = s1 = s0 = IRTemp_INVALID; 13229 sV = newTemp(Ity_I64); 13230 dV = newTemp(Ity_I64); 13231 do_MMX_preamble(); 13232 modrm = getUChar(delta); 13233 if (epartIsReg(modrm)) { 13234 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13235 order = (Int)getUChar(delta+1); 13236 delta += 1+1; 13237 DIP("pshufw $%d,%s,%s\n", order, 13238 nameMMXReg(eregLO3ofRM(modrm)), 13239 nameMMXReg(gregLO3ofRM(modrm))); 13240 } else { 13241 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13242 1/*extra byte after amode*/ ); 13243 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13244 order = (Int)getUChar(delta+alen); 13245 delta += 1+alen; 13246 DIP("pshufw $%d,%s,%s\n", order, 13247 dis_buf, 13248 nameMMXReg(gregLO3ofRM(modrm))); 13249 } 13250 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 13251 # define SEL(n) \ 13252 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 13253 assign(dV, 13254 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 13255 SEL((order>>2)&3), SEL((order>>0)&3) ) 13256 ); 13257 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 13258 # undef SEL 13259 goto decode_success; 13260 } 13261 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 13262 mem) to G(xmm), and copy upper half */ 13263 if (haveF2no66noF3(pfx) && sz == 4) { 13264 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13265 False/*!isAvx*/, False/*!xIsH*/ ); 13266 goto decode_success; 13267 } 13268 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 13269 mem) to G(xmm), and copy lower half */ 13270 if (haveF3no66noF2(pfx) && sz == 4) { 13271 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13272 False/*!isAvx*/, True/*xIsH*/ ); 13273 goto decode_success; 13274 } 13275 break; 13276 13277 case 0x71: 13278 /* 66 0F 71 /2 ib = PSRLW by immediate */ 13279 if (have66noF2noF3(pfx) && sz == 2 13280 && epartIsReg(getUChar(delta)) 13281 && gregLO3ofRM(getUChar(delta)) == 2) { 13282 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 13283 goto decode_success; 13284 } 13285 /* 66 0F 71 /4 ib = PSRAW by immediate */ 13286 if (have66noF2noF3(pfx) && sz == 2 13287 && epartIsReg(getUChar(delta)) 13288 && gregLO3ofRM(getUChar(delta)) == 4) { 13289 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 13290 goto decode_success; 13291 } 13292 /* 66 0F 71 /6 ib = PSLLW by immediate */ 13293 if (have66noF2noF3(pfx) && sz == 2 13294 && epartIsReg(getUChar(delta)) 13295 && gregLO3ofRM(getUChar(delta)) == 6) { 13296 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 13297 goto decode_success; 13298 } 13299 break; 13300 13301 case 0x72: 13302 /* 66 0F 72 /2 ib = PSRLD by immediate */ 13303 if (have66noF2noF3(pfx) && sz == 2 13304 && epartIsReg(getUChar(delta)) 13305 && gregLO3ofRM(getUChar(delta)) == 2) { 13306 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 13307 goto decode_success; 13308 } 13309 /* 66 0F 72 /4 ib = PSRAD by immediate */ 13310 if (have66noF2noF3(pfx) && sz == 2 13311 && epartIsReg(getUChar(delta)) 13312 && gregLO3ofRM(getUChar(delta)) == 4) { 13313 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 13314 goto decode_success; 13315 } 13316 /* 66 0F 72 /6 ib = PSLLD by immediate */ 13317 if (have66noF2noF3(pfx) && sz == 2 13318 && epartIsReg(getUChar(delta)) 13319 && gregLO3ofRM(getUChar(delta)) == 6) { 13320 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 13321 goto decode_success; 13322 } 13323 break; 13324 13325 case 0x73: 13326 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 13327 /* note, if mem case ever filled in, 1 byte after amode */ 13328 if (have66noF2noF3(pfx) && sz == 2 13329 && epartIsReg(getUChar(delta)) 13330 && gregLO3ofRM(getUChar(delta)) == 3) { 13331 Int imm = (Int)getUChar(delta+1); 13332 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13333 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 13334 delta += 2; 13335 IRTemp sV = newTemp(Ity_V128); 13336 assign( sV, getXMMReg(reg) ); 13337 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 13338 goto decode_success; 13339 } 13340 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 13341 /* note, if mem case ever filled in, 1 byte after amode */ 13342 if (have66noF2noF3(pfx) && sz == 2 13343 && epartIsReg(getUChar(delta)) 13344 && gregLO3ofRM(getUChar(delta)) == 7) { 13345 Int imm = (Int)getUChar(delta+1); 13346 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13347 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 13348 vassert(imm >= 0 && imm <= 255); 13349 delta += 2; 13350 IRTemp sV = newTemp(Ity_V128); 13351 assign( sV, getXMMReg(reg) ); 13352 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 13353 goto decode_success; 13354 } 13355 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 13356 if (have66noF2noF3(pfx) && sz == 2 13357 && epartIsReg(getUChar(delta)) 13358 && gregLO3ofRM(getUChar(delta)) == 2) { 13359 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 13360 goto decode_success; 13361 } 13362 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 13363 if (have66noF2noF3(pfx) && sz == 2 13364 && epartIsReg(getUChar(delta)) 13365 && gregLO3ofRM(getUChar(delta)) == 6) { 13366 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 13367 goto decode_success; 13368 } 13369 break; 13370 13371 case 0x74: 13372 /* 66 0F 74 = PCMPEQB */ 13373 if (have66noF2noF3(pfx) && sz == 2) { 13374 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13375 "pcmpeqb", Iop_CmpEQ8x16, False ); 13376 goto decode_success; 13377 } 13378 break; 13379 13380 case 0x75: 13381 /* 66 0F 75 = PCMPEQW */ 13382 if (have66noF2noF3(pfx) && sz == 2) { 13383 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13384 "pcmpeqw", Iop_CmpEQ16x8, False ); 13385 goto decode_success; 13386 } 13387 break; 13388 13389 case 0x76: 13390 /* 66 0F 76 = PCMPEQD */ 13391 if (have66noF2noF3(pfx) && sz == 2) { 13392 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13393 "pcmpeqd", Iop_CmpEQ32x4, False ); 13394 goto decode_success; 13395 } 13396 break; 13397 13398 case 0x7E: 13399 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 13400 G (lo half xmm). Upper half of G is zeroed out. */ 13401 if (haveF3no66noF2(pfx) 13402 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13403 modrm = getUChar(delta); 13404 if (epartIsReg(modrm)) { 13405 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13406 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 13407 /* zero bits 127:64 */ 13408 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 13409 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13410 nameXMMReg(gregOfRexRM(pfx,modrm))); 13411 delta += 1; 13412 } else { 13413 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13414 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 13415 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13416 loadLE(Ity_I64, mkexpr(addr)) ); 13417 DIP("movsd %s,%s\n", dis_buf, 13418 nameXMMReg(gregOfRexRM(pfx,modrm))); 13419 delta += alen; 13420 } 13421 goto decode_success; 13422 } 13423 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 13424 /* or from xmm low 1/2 to ireg64 or m64. */ 13425 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 13426 if (sz == 2) sz = 4; 13427 modrm = getUChar(delta); 13428 if (epartIsReg(modrm)) { 13429 delta += 1; 13430 if (sz == 4) { 13431 putIReg32( eregOfRexRM(pfx,modrm), 13432 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 13433 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13434 nameIReg32(eregOfRexRM(pfx,modrm))); 13435 } else { 13436 putIReg64( eregOfRexRM(pfx,modrm), 13437 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 13438 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13439 nameIReg64(eregOfRexRM(pfx,modrm))); 13440 } 13441 } else { 13442 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13443 delta += alen; 13444 storeLE( mkexpr(addr), 13445 sz == 4 13446 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 13447 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 13448 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 13449 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13450 } 13451 goto decode_success; 13452 } 13453 break; 13454 13455 case 0x7F: 13456 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 13457 if (haveF3no66noF2(pfx) && sz == 4) { 13458 modrm = getUChar(delta); 13459 if (epartIsReg(modrm)) { 13460 goto decode_failure; /* awaiting test case */ 13461 delta += 1; 13462 putXMMReg( eregOfRexRM(pfx,modrm), 13463 getXMMReg(gregOfRexRM(pfx,modrm)) ); 13464 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13465 nameXMMReg(eregOfRexRM(pfx,modrm))); 13466 } else { 13467 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13468 delta += alen; 13469 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13470 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13471 } 13472 goto decode_success; 13473 } 13474 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 13475 if (have66noF2noF3(pfx) && sz == 2) { 13476 modrm = getUChar(delta); 13477 if (epartIsReg(modrm)) { 13478 delta += 1; 13479 putXMMReg( eregOfRexRM(pfx,modrm), 13480 getXMMReg(gregOfRexRM(pfx,modrm)) ); 13481 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13482 nameXMMReg(eregOfRexRM(pfx,modrm))); 13483 } else { 13484 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13485 gen_SEGV_if_not_16_aligned( addr ); 13486 delta += alen; 13487 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13488 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13489 } 13490 goto decode_success; 13491 } 13492 break; 13493 13494 case 0xAE: 13495 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 13496 if (haveNo66noF2noF3(pfx) 13497 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 13498 && sz == 4) { 13499 delta += 1; 13500 /* Insert a memory fence. It's sometimes important that these 13501 are carried through to the generated code. */ 13502 stmt( IRStmt_MBE(Imbe_Fence) ); 13503 DIP("sfence\n"); 13504 goto decode_success; 13505 } 13506 /* mindless duplication follows .. */ 13507 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 13508 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 13509 if (haveNo66noF2noF3(pfx) 13510 && epartIsReg(getUChar(delta)) 13511 && (gregLO3ofRM(getUChar(delta)) == 5 13512 || gregLO3ofRM(getUChar(delta)) == 6) 13513 && sz == 4) { 13514 delta += 1; 13515 /* Insert a memory fence. It's sometimes important that these 13516 are carried through to the generated code. */ 13517 stmt( IRStmt_MBE(Imbe_Fence) ); 13518 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 13519 goto decode_success; 13520 } 13521 13522 /* 0F AE /7 = CLFLUSH -- flush cache line */ 13523 if (haveNo66noF2noF3(pfx) 13524 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 13525 && sz == 4) { 13526 13527 /* This is something of a hack. We need to know the size of 13528 the cache line containing addr. Since we don't (easily), 13529 assume 256 on the basis that no real cache would have a 13530 line that big. It's safe to invalidate more stuff than we 13531 need, just inefficient. */ 13532 ULong lineszB = 256ULL; 13533 13534 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13535 delta += alen; 13536 13537 /* Round addr down to the start of the containing block. */ 13538 stmt( IRStmt_Put( 13539 OFFB_CMSTART, 13540 binop( Iop_And64, 13541 mkexpr(addr), 13542 mkU64( ~(lineszB-1) ))) ); 13543 13544 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); 13545 13546 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); 13547 13548 DIP("clflush %s\n", dis_buf); 13549 goto decode_success; 13550 } 13551 13552 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 13553 if (haveNo66noF2noF3(pfx) 13554 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 13555 && sz == 4) { 13556 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 13557 goto decode_success; 13558 } 13559 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 13560 if (haveNo66noF2noF3(pfx) 13561 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 13562 && sz == 4) { 13563 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 13564 goto decode_success; 13565 } 13566 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. 13567 Note that the presence or absence of REX.W slightly affects the 13568 written format: whether the saved FPU IP and DP pointers are 64 13569 or 32 bits. But the helper function we call simply writes zero 13570 bits in the relevant fields (which are 64 bits regardless of 13571 what REX.W is) and so it's good enough (iow, equally broken) in 13572 both cases. */ 13573 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13574 && !epartIsReg(getUChar(delta)) 13575 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 13576 IRDirty* d; 13577 modrm = getUChar(delta); 13578 vassert(!epartIsReg(modrm)); 13579 13580 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13581 delta += alen; 13582 gen_SEGV_if_not_16_aligned(addr); 13583 13584 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 13585 13586 /* Uses dirty helper: 13587 void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*, 13588 ULong ) */ 13589 d = unsafeIRDirty_0_N ( 13590 0/*regparms*/, 13591 "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM", 13592 &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM, 13593 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 13594 ); 13595 13596 /* declare we're writing memory */ 13597 d->mFx = Ifx_Write; 13598 d->mAddr = mkexpr(addr); 13599 d->mSize = 464; /* according to recent Intel docs */ 13600 13601 /* declare we're reading guest state */ 13602 d->nFxState = 6; 13603 vex_bzero(&d->fxState, sizeof(d->fxState)); 13604 13605 d->fxState[0].fx = Ifx_Read; 13606 d->fxState[0].offset = OFFB_FTOP; 13607 d->fxState[0].size = sizeof(UInt); 13608 13609 d->fxState[1].fx = Ifx_Read; 13610 d->fxState[1].offset = OFFB_FPREGS; 13611 d->fxState[1].size = 8 * sizeof(ULong); 13612 13613 d->fxState[2].fx = Ifx_Read; 13614 d->fxState[2].offset = OFFB_FPTAGS; 13615 d->fxState[2].size = 8 * sizeof(UChar); 13616 13617 d->fxState[3].fx = Ifx_Read; 13618 d->fxState[3].offset = OFFB_FPROUND; 13619 d->fxState[3].size = sizeof(ULong); 13620 13621 d->fxState[4].fx = Ifx_Read; 13622 d->fxState[4].offset = OFFB_FC3210; 13623 d->fxState[4].size = sizeof(ULong); 13624 13625 d->fxState[5].fx = Ifx_Read; 13626 d->fxState[5].offset = OFFB_SSEROUND; 13627 d->fxState[5].size = sizeof(ULong); 13628 13629 /* Call the helper. This creates all parts of the in-memory 13630 image except for the XMM[0..15] array, which we do 13631 separately, in order that any undefinedness in the XMM 13632 registers is tracked separately by Memcheck and does not 13633 "infect" the in-memory shadow for the other parts of the 13634 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210, 13635 SSEROUND). */ 13636 stmt( IRStmt_Dirty(d) ); 13637 13638 /* And now the XMMs themselves. */ 13639 UInt xmm; 13640 for (xmm = 0; xmm < 16; xmm++) { 13641 storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)), 13642 getXMMReg(xmm) ); 13643 } 13644 13645 goto decode_success; 13646 } 13647 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. 13648 As with FXSAVE above we ignore the value of REX.W since we're 13649 not bothering with the FPU DP and IP fields. */ 13650 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13651 && !epartIsReg(getUChar(delta)) 13652 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 13653 IRDirty* d; 13654 modrm = getUChar(delta); 13655 vassert(!epartIsReg(modrm)); 13656 13657 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13658 delta += alen; 13659 gen_SEGV_if_not_16_aligned(addr); 13660 13661 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 13662 13663 /* Uses dirty helper: 13664 VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*, 13665 ULong ) 13666 NOTE: 13667 the VexEmNote value is simply ignored 13668 */ 13669 d = unsafeIRDirty_0_N ( 13670 0/*regparms*/, 13671 "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM", 13672 &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM, 13673 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 13674 ); 13675 13676 /* declare we're reading memory */ 13677 d->mFx = Ifx_Read; 13678 d->mAddr = mkexpr(addr); 13679 d->mSize = 464; /* according to recent Intel docs */ 13680 13681 /* declare we're writing guest state */ 13682 d->nFxState = 6; 13683 vex_bzero(&d->fxState, sizeof(d->fxState)); 13684 13685 d->fxState[0].fx = Ifx_Write; 13686 d->fxState[0].offset = OFFB_FTOP; 13687 d->fxState[0].size = sizeof(UInt); 13688 13689 d->fxState[1].fx = Ifx_Write; 13690 d->fxState[1].offset = OFFB_FPREGS; 13691 d->fxState[1].size = 8 * sizeof(ULong); 13692 13693 d->fxState[2].fx = Ifx_Write; 13694 d->fxState[2].offset = OFFB_FPTAGS; 13695 d->fxState[2].size = 8 * sizeof(UChar); 13696 13697 d->fxState[3].fx = Ifx_Write; 13698 d->fxState[3].offset = OFFB_FPROUND; 13699 d->fxState[3].size = sizeof(ULong); 13700 13701 d->fxState[4].fx = Ifx_Write; 13702 d->fxState[4].offset = OFFB_FC3210; 13703 d->fxState[4].size = sizeof(ULong); 13704 13705 d->fxState[5].fx = Ifx_Write; 13706 d->fxState[5].offset = OFFB_SSEROUND; 13707 d->fxState[5].size = sizeof(ULong); 13708 13709 /* Call the helper. This reads all parts of the in-memory 13710 image except for the XMM[0..15] array, which we do 13711 separately, in order that any undefinedness in the XMM 13712 registers is tracked separately by Memcheck and does not 13713 "infect" the in-guest-state shadow for the other parts of the 13714 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210, 13715 SSEROUND). */ 13716 stmt( IRStmt_Dirty(d) ); 13717 13718 /* And now the XMMs themselves. */ 13719 UInt xmm; 13720 for (xmm = 0; xmm < 16; xmm++) { 13721 putXMMReg(xmm, loadLE(Ity_V128, 13722 binop(Iop_Add64, mkexpr(addr), 13723 mkU64(160 + xmm * 16)))); 13724 } 13725 13726 goto decode_success; 13727 } 13728 break; 13729 13730 case 0xC2: 13731 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 13732 if (haveNo66noF2noF3(pfx) && sz == 4) { 13733 Long delta0 = delta; 13734 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 13735 if (delta > delta0) goto decode_success; 13736 } 13737 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 13738 if (haveF3no66noF2(pfx) && sz == 4) { 13739 Long delta0 = delta; 13740 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 13741 if (delta > delta0) goto decode_success; 13742 } 13743 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 13744 if (haveF2no66noF3(pfx) && sz == 4) { 13745 Long delta0 = delta; 13746 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 13747 if (delta > delta0) goto decode_success; 13748 } 13749 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 13750 if (have66noF2noF3(pfx) && sz == 2) { 13751 Long delta0 = delta; 13752 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 13753 if (delta > delta0) goto decode_success; 13754 } 13755 break; 13756 13757 case 0xC3: 13758 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 13759 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13760 modrm = getUChar(delta); 13761 if (!epartIsReg(modrm)) { 13762 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13763 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 13764 DIP("movnti %s,%s\n", dis_buf, 13765 nameIRegG(sz, pfx, modrm)); 13766 delta += alen; 13767 goto decode_success; 13768 } 13769 /* else fall through */ 13770 } 13771 break; 13772 13773 case 0xC4: 13774 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13775 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13776 put it into the specified lane of mmx(G). */ 13777 if (haveNo66noF2noF3(pfx) 13778 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13779 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 13780 mmx reg. t4 is the new lane value. t5 is the original 13781 mmx value. t6 is the new mmx value. */ 13782 Int lane; 13783 t4 = newTemp(Ity_I16); 13784 t5 = newTemp(Ity_I64); 13785 t6 = newTemp(Ity_I64); 13786 modrm = getUChar(delta); 13787 do_MMX_preamble(); 13788 13789 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 13790 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 13791 13792 if (epartIsReg(modrm)) { 13793 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 13794 delta += 1+1; 13795 lane = getUChar(delta-1); 13796 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13797 nameIReg16(eregOfRexRM(pfx,modrm)), 13798 nameMMXReg(gregLO3ofRM(modrm))); 13799 } else { 13800 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13801 delta += 1+alen; 13802 lane = getUChar(delta-1); 13803 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13804 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13805 dis_buf, 13806 nameMMXReg(gregLO3ofRM(modrm))); 13807 } 13808 13809 switch (lane & 3) { 13810 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 13811 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 13812 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 13813 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 13814 default: vassert(0); 13815 } 13816 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 13817 goto decode_success; 13818 } 13819 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13820 put it into the specified lane of xmm(G). */ 13821 if (have66noF2noF3(pfx) 13822 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13823 Int lane; 13824 t4 = newTemp(Ity_I16); 13825 modrm = getUChar(delta); 13826 UInt rG = gregOfRexRM(pfx,modrm); 13827 if (epartIsReg(modrm)) { 13828 UInt rE = eregOfRexRM(pfx,modrm); 13829 assign(t4, getIReg16(rE)); 13830 delta += 1+1; 13831 lane = getUChar(delta-1); 13832 DIP("pinsrw $%d,%s,%s\n", 13833 (Int)lane, nameIReg16(rE), nameXMMReg(rG)); 13834 } else { 13835 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13836 1/*byte after the amode*/ ); 13837 delta += 1+alen; 13838 lane = getUChar(delta-1); 13839 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13840 DIP("pinsrw $%d,%s,%s\n", 13841 (Int)lane, dis_buf, nameXMMReg(rG)); 13842 } 13843 IRTemp src_vec = newTemp(Ity_V128); 13844 assign(src_vec, getXMMReg(rG)); 13845 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 13846 putXMMReg(rG, mkexpr(res_vec)); 13847 goto decode_success; 13848 } 13849 break; 13850 13851 case 0xC5: 13852 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13853 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 13854 zero-extend of it in ireg(G). */ 13855 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13856 modrm = getUChar(delta); 13857 if (epartIsReg(modrm)) { 13858 IRTemp sV = newTemp(Ity_I64); 13859 t5 = newTemp(Ity_I16); 13860 do_MMX_preamble(); 13861 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 13862 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 13863 switch (getUChar(delta+1) & 3) { 13864 case 0: assign(t5, mkexpr(t0)); break; 13865 case 1: assign(t5, mkexpr(t1)); break; 13866 case 2: assign(t5, mkexpr(t2)); break; 13867 case 3: assign(t5, mkexpr(t3)); break; 13868 default: vassert(0); 13869 } 13870 if (sz == 8) 13871 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 13872 else 13873 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 13874 DIP("pextrw $%d,%s,%s\n", 13875 (Int)getUChar(delta+1), 13876 nameMMXReg(eregLO3ofRM(modrm)), 13877 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 13878 : nameIReg32(gregOfRexRM(pfx,modrm)) 13879 ); 13880 delta += 2; 13881 goto decode_success; 13882 } 13883 /* else fall through */ 13884 /* note, for anyone filling in the mem case: this insn has one 13885 byte after the amode and therefore you must pass 1 as the 13886 last arg to disAMode */ 13887 } 13888 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 13889 zero-extend of it in ireg(G). */ 13890 if (have66noF2noF3(pfx) 13891 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13892 Long delta0 = delta; 13893 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 13894 False/*!isAvx*/ ); 13895 if (delta > delta0) goto decode_success; 13896 /* else fall through -- decoding has failed */ 13897 } 13898 break; 13899 13900 case 0xC6: 13901 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 13902 if (haveNo66noF2noF3(pfx) && sz == 4) { 13903 Int imm8 = 0; 13904 IRTemp sV = newTemp(Ity_V128); 13905 IRTemp dV = newTemp(Ity_V128); 13906 modrm = getUChar(delta); 13907 UInt rG = gregOfRexRM(pfx,modrm); 13908 assign( dV, getXMMReg(rG) ); 13909 if (epartIsReg(modrm)) { 13910 UInt rE = eregOfRexRM(pfx,modrm); 13911 assign( sV, getXMMReg(rE) ); 13912 imm8 = (Int)getUChar(delta+1); 13913 delta += 1+1; 13914 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 13915 } else { 13916 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13917 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13918 imm8 = (Int)getUChar(delta+alen); 13919 delta += 1+alen; 13920 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 13921 } 13922 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 13923 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13924 goto decode_success; 13925 } 13926 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 13927 if (have66noF2noF3(pfx) && sz == 2) { 13928 Int select; 13929 IRTemp sV = newTemp(Ity_V128); 13930 IRTemp dV = newTemp(Ity_V128); 13931 13932 modrm = getUChar(delta); 13933 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13934 13935 if (epartIsReg(modrm)) { 13936 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13937 select = (Int)getUChar(delta+1); 13938 delta += 1+1; 13939 DIP("shufpd $%d,%s,%s\n", select, 13940 nameXMMReg(eregOfRexRM(pfx,modrm)), 13941 nameXMMReg(gregOfRexRM(pfx,modrm))); 13942 } else { 13943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13944 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13945 select = getUChar(delta+alen); 13946 delta += 1+alen; 13947 DIP("shufpd $%d,%s,%s\n", select, 13948 dis_buf, 13949 nameXMMReg(gregOfRexRM(pfx,modrm))); 13950 } 13951 13952 IRTemp res = math_SHUFPD_128( sV, dV, select ); 13953 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13954 goto decode_success; 13955 } 13956 break; 13957 13958 case 0xD1: 13959 /* 66 0F D1 = PSRLW by E */ 13960 if (have66noF2noF3(pfx) && sz == 2) { 13961 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 13962 goto decode_success; 13963 } 13964 break; 13965 13966 case 0xD2: 13967 /* 66 0F D2 = PSRLD by E */ 13968 if (have66noF2noF3(pfx) && sz == 2) { 13969 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 13970 goto decode_success; 13971 } 13972 break; 13973 13974 case 0xD3: 13975 /* 66 0F D3 = PSRLQ by E */ 13976 if (have66noF2noF3(pfx) && sz == 2) { 13977 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 13978 goto decode_success; 13979 } 13980 break; 13981 13982 case 0xD4: 13983 /* 66 0F D4 = PADDQ */ 13984 if (have66noF2noF3(pfx) && sz == 2) { 13985 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13986 "paddq", Iop_Add64x2, False ); 13987 goto decode_success; 13988 } 13989 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 13990 /* 0F D4 = PADDQ -- add 64x1 */ 13991 if (haveNo66noF2noF3(pfx) && sz == 4) { 13992 do_MMX_preamble(); 13993 delta = dis_MMXop_regmem_to_reg ( 13994 vbi, pfx, delta, opc, "paddq", False ); 13995 goto decode_success; 13996 } 13997 break; 13998 13999 case 0xD5: 14000 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 14001 if (have66noF2noF3(pfx) && sz == 2) { 14002 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14003 "pmullw", Iop_Mul16x8, False ); 14004 goto decode_success; 14005 } 14006 break; 14007 14008 case 0xD6: 14009 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 14010 hi half). */ 14011 if (haveF3no66noF2(pfx) && sz == 4) { 14012 modrm = getUChar(delta); 14013 if (epartIsReg(modrm)) { 14014 do_MMX_preamble(); 14015 putXMMReg( gregOfRexRM(pfx,modrm), 14016 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 14017 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14018 nameXMMReg(gregOfRexRM(pfx,modrm))); 14019 delta += 1; 14020 goto decode_success; 14021 } 14022 /* apparently no mem case for this insn */ 14023 } 14024 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 14025 or lo half xmm). */ 14026 if (have66noF2noF3(pfx) 14027 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14028 modrm = getUChar(delta); 14029 if (epartIsReg(modrm)) { 14030 /* fall through, awaiting test case */ 14031 /* dst: lo half copied, hi half zeroed */ 14032 } else { 14033 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14034 storeLE( mkexpr(addr), 14035 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 14036 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 14037 delta += alen; 14038 goto decode_success; 14039 } 14040 } 14041 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 14042 if (haveF2no66noF3(pfx) && sz == 4) { 14043 modrm = getUChar(delta); 14044 if (epartIsReg(modrm)) { 14045 do_MMX_preamble(); 14046 putMMXReg( gregLO3ofRM(modrm), 14047 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14048 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14049 nameMMXReg(gregLO3ofRM(modrm))); 14050 delta += 1; 14051 goto decode_success; 14052 } 14053 /* apparently no mem case for this insn */ 14054 } 14055 break; 14056 14057 case 0xD7: 14058 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 14059 lanes in xmm(E), turn them into a byte, and put 14060 zero-extend of it in ireg(G). Doing this directly is just 14061 too cumbersome; give up therefore and call a helper. */ 14062 if (have66noF2noF3(pfx) 14063 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14064 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 14065 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 14066 goto decode_success; 14067 } 14068 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14069 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 14070 mmx(E), turn them into a byte, and put zero-extend of it in 14071 ireg(G). */ 14072 if (haveNo66noF2noF3(pfx) 14073 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14074 modrm = getUChar(delta); 14075 if (epartIsReg(modrm)) { 14076 do_MMX_preamble(); 14077 t0 = newTemp(Ity_I64); 14078 t1 = newTemp(Ity_I32); 14079 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 14080 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 14081 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); 14082 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14083 nameIReg32(gregOfRexRM(pfx,modrm))); 14084 delta += 1; 14085 goto decode_success; 14086 } 14087 /* else fall through */ 14088 } 14089 break; 14090 14091 case 0xD8: 14092 /* 66 0F D8 = PSUBUSB */ 14093 if (have66noF2noF3(pfx) && sz == 2) { 14094 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14095 "psubusb", Iop_QSub8Ux16, False ); 14096 goto decode_success; 14097 } 14098 break; 14099 14100 case 0xD9: 14101 /* 66 0F D9 = PSUBUSW */ 14102 if (have66noF2noF3(pfx) && sz == 2) { 14103 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14104 "psubusw", Iop_QSub16Ux8, False ); 14105 goto decode_success; 14106 } 14107 break; 14108 14109 case 0xDA: 14110 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14111 /* 0F DA = PMINUB -- 8x8 unsigned min */ 14112 if (haveNo66noF2noF3(pfx) && sz == 4) { 14113 do_MMX_preamble(); 14114 delta = dis_MMXop_regmem_to_reg ( 14115 vbi, pfx, delta, opc, "pminub", False ); 14116 goto decode_success; 14117 } 14118 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 14119 if (have66noF2noF3(pfx) && sz == 2) { 14120 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14121 "pminub", Iop_Min8Ux16, False ); 14122 goto decode_success; 14123 } 14124 break; 14125 14126 case 0xDB: 14127 /* 66 0F DB = PAND */ 14128 if (have66noF2noF3(pfx) && sz == 2) { 14129 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 14130 goto decode_success; 14131 } 14132 break; 14133 14134 case 0xDC: 14135 /* 66 0F DC = PADDUSB */ 14136 if (have66noF2noF3(pfx) && sz == 2) { 14137 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14138 "paddusb", Iop_QAdd8Ux16, False ); 14139 goto decode_success; 14140 } 14141 break; 14142 14143 case 0xDD: 14144 /* 66 0F DD = PADDUSW */ 14145 if (have66noF2noF3(pfx) && sz == 2) { 14146 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14147 "paddusw", Iop_QAdd16Ux8, False ); 14148 goto decode_success; 14149 } 14150 break; 14151 14152 case 0xDE: 14153 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14154 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 14155 if (haveNo66noF2noF3(pfx) && sz == 4) { 14156 do_MMX_preamble(); 14157 delta = dis_MMXop_regmem_to_reg ( 14158 vbi, pfx, delta, opc, "pmaxub", False ); 14159 goto decode_success; 14160 } 14161 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 14162 if (have66noF2noF3(pfx) && sz == 2) { 14163 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14164 "pmaxub", Iop_Max8Ux16, False ); 14165 goto decode_success; 14166 } 14167 break; 14168 14169 case 0xDF: 14170 /* 66 0F DF = PANDN */ 14171 if (have66noF2noF3(pfx) && sz == 2) { 14172 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 14173 goto decode_success; 14174 } 14175 break; 14176 14177 case 0xE0: 14178 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14179 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 14180 if (haveNo66noF2noF3(pfx) && sz == 4) { 14181 do_MMX_preamble(); 14182 delta = dis_MMXop_regmem_to_reg ( 14183 vbi, pfx, delta, opc, "pavgb", False ); 14184 goto decode_success; 14185 } 14186 /* 66 0F E0 = PAVGB */ 14187 if (have66noF2noF3(pfx) && sz == 2) { 14188 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14189 "pavgb", Iop_Avg8Ux16, False ); 14190 goto decode_success; 14191 } 14192 break; 14193 14194 case 0xE1: 14195 /* 66 0F E1 = PSRAW by E */ 14196 if (have66noF2noF3(pfx) && sz == 2) { 14197 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 14198 goto decode_success; 14199 } 14200 break; 14201 14202 case 0xE2: 14203 /* 66 0F E2 = PSRAD by E */ 14204 if (have66noF2noF3(pfx) && sz == 2) { 14205 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 14206 goto decode_success; 14207 } 14208 break; 14209 14210 case 0xE3: 14211 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14212 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 14213 if (haveNo66noF2noF3(pfx) && sz == 4) { 14214 do_MMX_preamble(); 14215 delta = dis_MMXop_regmem_to_reg ( 14216 vbi, pfx, delta, opc, "pavgw", False ); 14217 goto decode_success; 14218 } 14219 /* 66 0F E3 = PAVGW */ 14220 if (have66noF2noF3(pfx) && sz == 2) { 14221 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14222 "pavgw", Iop_Avg16Ux8, False ); 14223 goto decode_success; 14224 } 14225 break; 14226 14227 case 0xE4: 14228 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14229 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 14230 if (haveNo66noF2noF3(pfx) && sz == 4) { 14231 do_MMX_preamble(); 14232 delta = dis_MMXop_regmem_to_reg ( 14233 vbi, pfx, delta, opc, "pmuluh", False ); 14234 goto decode_success; 14235 } 14236 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 14237 if (have66noF2noF3(pfx) && sz == 2) { 14238 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14239 "pmulhuw", Iop_MulHi16Ux8, False ); 14240 goto decode_success; 14241 } 14242 break; 14243 14244 case 0xE5: 14245 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 14246 if (have66noF2noF3(pfx) && sz == 2) { 14247 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14248 "pmulhw", Iop_MulHi16Sx8, False ); 14249 goto decode_success; 14250 } 14251 break; 14252 14253 case 0xE6: 14254 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14255 lo half xmm(G), and zero upper half, rounding towards zero */ 14256 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14257 lo half xmm(G), according to prevailing rounding mode, and zero 14258 upper half */ 14259 if ( (haveF2no66noF3(pfx) && sz == 4) 14260 || (have66noF2noF3(pfx) && sz == 2) ) { 14261 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 14262 toBool(sz == 2)/*r2zero*/); 14263 goto decode_success; 14264 } 14265 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 14266 F64 in xmm(G) */ 14267 if (haveF3no66noF2(pfx) && sz == 4) { 14268 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 14269 goto decode_success; 14270 } 14271 break; 14272 14273 case 0xE7: 14274 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14275 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 14276 Intel manual does not say anything about the usual business of 14277 the FP reg tags getting trashed whenever an MMX insn happens. 14278 So we just leave them alone. 14279 */ 14280 if (haveNo66noF2noF3(pfx) && sz == 4) { 14281 modrm = getUChar(delta); 14282 if (!epartIsReg(modrm)) { 14283 /* do_MMX_preamble(); Intel docs don't specify this */ 14284 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14285 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 14286 DIP("movntq %s,%s\n", dis_buf, 14287 nameMMXReg(gregLO3ofRM(modrm))); 14288 delta += alen; 14289 goto decode_success; 14290 } 14291 /* else fall through */ 14292 } 14293 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 14294 if (have66noF2noF3(pfx) && sz == 2) { 14295 modrm = getUChar(delta); 14296 if (!epartIsReg(modrm)) { 14297 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14298 gen_SEGV_if_not_16_aligned( addr ); 14299 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14300 DIP("movntdq %s,%s\n", dis_buf, 14301 nameXMMReg(gregOfRexRM(pfx,modrm))); 14302 delta += alen; 14303 goto decode_success; 14304 } 14305 /* else fall through */ 14306 } 14307 break; 14308 14309 case 0xE8: 14310 /* 66 0F E8 = PSUBSB */ 14311 if (have66noF2noF3(pfx) && sz == 2) { 14312 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14313 "psubsb", Iop_QSub8Sx16, False ); 14314 goto decode_success; 14315 } 14316 break; 14317 14318 case 0xE9: 14319 /* 66 0F E9 = PSUBSW */ 14320 if (have66noF2noF3(pfx) && sz == 2) { 14321 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14322 "psubsw", Iop_QSub16Sx8, False ); 14323 goto decode_success; 14324 } 14325 break; 14326 14327 case 0xEA: 14328 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14329 /* 0F EA = PMINSW -- 16x4 signed min */ 14330 if (haveNo66noF2noF3(pfx) && sz == 4) { 14331 do_MMX_preamble(); 14332 delta = dis_MMXop_regmem_to_reg ( 14333 vbi, pfx, delta, opc, "pminsw", False ); 14334 goto decode_success; 14335 } 14336 /* 66 0F EA = PMINSW -- 16x8 signed min */ 14337 if (have66noF2noF3(pfx) && sz == 2) { 14338 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14339 "pminsw", Iop_Min16Sx8, False ); 14340 goto decode_success; 14341 } 14342 break; 14343 14344 case 0xEB: 14345 /* 66 0F EB = POR */ 14346 if (have66noF2noF3(pfx) && sz == 2) { 14347 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 14348 goto decode_success; 14349 } 14350 break; 14351 14352 case 0xEC: 14353 /* 66 0F EC = PADDSB */ 14354 if (have66noF2noF3(pfx) && sz == 2) { 14355 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14356 "paddsb", Iop_QAdd8Sx16, False ); 14357 goto decode_success; 14358 } 14359 break; 14360 14361 case 0xED: 14362 /* 66 0F ED = PADDSW */ 14363 if (have66noF2noF3(pfx) && sz == 2) { 14364 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14365 "paddsw", Iop_QAdd16Sx8, False ); 14366 goto decode_success; 14367 } 14368 break; 14369 14370 case 0xEE: 14371 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14372 /* 0F EE = PMAXSW -- 16x4 signed max */ 14373 if (haveNo66noF2noF3(pfx) && sz == 4) { 14374 do_MMX_preamble(); 14375 delta = dis_MMXop_regmem_to_reg ( 14376 vbi, pfx, delta, opc, "pmaxsw", False ); 14377 goto decode_success; 14378 } 14379 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 14380 if (have66noF2noF3(pfx) && sz == 2) { 14381 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14382 "pmaxsw", Iop_Max16Sx8, False ); 14383 goto decode_success; 14384 } 14385 break; 14386 14387 case 0xEF: 14388 /* 66 0F EF = PXOR */ 14389 if (have66noF2noF3(pfx) && sz == 2) { 14390 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 14391 goto decode_success; 14392 } 14393 break; 14394 14395 case 0xF1: 14396 /* 66 0F F1 = PSLLW by E */ 14397 if (have66noF2noF3(pfx) && sz == 2) { 14398 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 14399 goto decode_success; 14400 } 14401 break; 14402 14403 case 0xF2: 14404 /* 66 0F F2 = PSLLD by E */ 14405 if (have66noF2noF3(pfx) && sz == 2) { 14406 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 14407 goto decode_success; 14408 } 14409 break; 14410 14411 case 0xF3: 14412 /* 66 0F F3 = PSLLQ by E */ 14413 if (have66noF2noF3(pfx) && sz == 2) { 14414 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 14415 goto decode_success; 14416 } 14417 break; 14418 14419 case 0xF4: 14420 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14421 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 14422 half */ 14423 if (have66noF2noF3(pfx) && sz == 2) { 14424 IRTemp sV = newTemp(Ity_V128); 14425 IRTemp dV = newTemp(Ity_V128); 14426 modrm = getUChar(delta); 14427 UInt rG = gregOfRexRM(pfx,modrm); 14428 assign( dV, getXMMReg(rG) ); 14429 if (epartIsReg(modrm)) { 14430 UInt rE = eregOfRexRM(pfx,modrm); 14431 assign( sV, getXMMReg(rE) ); 14432 delta += 1; 14433 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14434 } else { 14435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14436 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14437 delta += alen; 14438 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 14439 } 14440 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 14441 goto decode_success; 14442 } 14443 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14444 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14445 0 to form 64-bit result */ 14446 if (haveNo66noF2noF3(pfx) && sz == 4) { 14447 IRTemp sV = newTemp(Ity_I64); 14448 IRTemp dV = newTemp(Ity_I64); 14449 t1 = newTemp(Ity_I32); 14450 t0 = newTemp(Ity_I32); 14451 modrm = getUChar(delta); 14452 14453 do_MMX_preamble(); 14454 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14455 14456 if (epartIsReg(modrm)) { 14457 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14458 delta += 1; 14459 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14460 nameMMXReg(gregLO3ofRM(modrm))); 14461 } else { 14462 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14463 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14464 delta += alen; 14465 DIP("pmuludq %s,%s\n", dis_buf, 14466 nameMMXReg(gregLO3ofRM(modrm))); 14467 } 14468 14469 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 14470 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 14471 putMMXReg( gregLO3ofRM(modrm), 14472 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 14473 goto decode_success; 14474 } 14475 break; 14476 14477 case 0xF5: 14478 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 14479 E(xmm or mem) to G(xmm) */ 14480 if (have66noF2noF3(pfx) && sz == 2) { 14481 IRTemp sV = newTemp(Ity_V128); 14482 IRTemp dV = newTemp(Ity_V128); 14483 modrm = getUChar(delta); 14484 UInt rG = gregOfRexRM(pfx,modrm); 14485 if (epartIsReg(modrm)) { 14486 UInt rE = eregOfRexRM(pfx,modrm); 14487 assign( sV, getXMMReg(rE) ); 14488 delta += 1; 14489 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14490 } else { 14491 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14492 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14493 delta += alen; 14494 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 14495 } 14496 assign( dV, getXMMReg(rG) ); 14497 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 14498 goto decode_success; 14499 } 14500 break; 14501 14502 case 0xF6: 14503 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14504 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 14505 if (haveNo66noF2noF3(pfx) && sz == 4) { 14506 do_MMX_preamble(); 14507 delta = dis_MMXop_regmem_to_reg ( 14508 vbi, pfx, delta, opc, "psadbw", False ); 14509 goto decode_success; 14510 } 14511 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 14512 from E(xmm or mem) to G(xmm) */ 14513 if (have66noF2noF3(pfx) && sz == 2) { 14514 IRTemp sV = newTemp(Ity_V128); 14515 IRTemp dV = newTemp(Ity_V128); 14516 modrm = getUChar(delta); 14517 UInt rG = gregOfRexRM(pfx,modrm); 14518 if (epartIsReg(modrm)) { 14519 UInt rE = eregOfRexRM(pfx,modrm); 14520 assign( sV, getXMMReg(rE) ); 14521 delta += 1; 14522 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14523 } else { 14524 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14525 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14526 delta += alen; 14527 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 14528 } 14529 assign( dV, getXMMReg(rG) ); 14530 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 14531 14532 goto decode_success; 14533 } 14534 break; 14535 14536 case 0xF7: 14537 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14538 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 14539 if (haveNo66noF2noF3(pfx) && sz == 4) { 14540 Bool ok = False; 14541 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 14542 if (ok) goto decode_success; 14543 } 14544 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 14545 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 14546 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 14547 goto decode_success; 14548 } 14549 break; 14550 14551 case 0xF8: 14552 /* 66 0F F8 = PSUBB */ 14553 if (have66noF2noF3(pfx) && sz == 2) { 14554 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14555 "psubb", Iop_Sub8x16, False ); 14556 goto decode_success; 14557 } 14558 break; 14559 14560 case 0xF9: 14561 /* 66 0F F9 = PSUBW */ 14562 if (have66noF2noF3(pfx) && sz == 2) { 14563 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14564 "psubw", Iop_Sub16x8, False ); 14565 goto decode_success; 14566 } 14567 break; 14568 14569 case 0xFA: 14570 /* 66 0F FA = PSUBD */ 14571 if (have66noF2noF3(pfx) && sz == 2) { 14572 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14573 "psubd", Iop_Sub32x4, False ); 14574 goto decode_success; 14575 } 14576 break; 14577 14578 case 0xFB: 14579 /* 66 0F FB = PSUBQ */ 14580 if (have66noF2noF3(pfx) && sz == 2) { 14581 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14582 "psubq", Iop_Sub64x2, False ); 14583 goto decode_success; 14584 } 14585 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14586 /* 0F FB = PSUBQ -- sub 64x1 */ 14587 if (haveNo66noF2noF3(pfx) && sz == 4) { 14588 do_MMX_preamble(); 14589 delta = dis_MMXop_regmem_to_reg ( 14590 vbi, pfx, delta, opc, "psubq", False ); 14591 goto decode_success; 14592 } 14593 break; 14594 14595 case 0xFC: 14596 /* 66 0F FC = PADDB */ 14597 if (have66noF2noF3(pfx) && sz == 2) { 14598 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14599 "paddb", Iop_Add8x16, False ); 14600 goto decode_success; 14601 } 14602 break; 14603 14604 case 0xFD: 14605 /* 66 0F FD = PADDW */ 14606 if (have66noF2noF3(pfx) && sz == 2) { 14607 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14608 "paddw", Iop_Add16x8, False ); 14609 goto decode_success; 14610 } 14611 break; 14612 14613 case 0xFE: 14614 /* 66 0F FE = PADDD */ 14615 if (have66noF2noF3(pfx) && sz == 2) { 14616 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14617 "paddd", Iop_Add32x4, False ); 14618 goto decode_success; 14619 } 14620 break; 14621 14622 default: 14623 goto decode_failure; 14624 14625 } 14626 14627 decode_failure: 14628 *decode_OK = False; 14629 return deltaIN; 14630 14631 decode_success: 14632 *decode_OK = True; 14633 return delta; 14634 } 14635 14636 14637 /*------------------------------------------------------------*/ 14638 /*--- ---*/ 14639 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 14640 /*--- ---*/ 14641 /*------------------------------------------------------------*/ 14642 14643 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 14644 Long delta, Bool isAvx ) 14645 { 14646 IRTemp addr = IRTemp_INVALID; 14647 Int alen = 0; 14648 HChar dis_buf[50]; 14649 IRTemp sV = newTemp(Ity_V128); 14650 IRTemp d0 = newTemp(Ity_I64); 14651 UChar modrm = getUChar(delta); 14652 UInt rG = gregOfRexRM(pfx,modrm); 14653 if (epartIsReg(modrm)) { 14654 UInt rE = eregOfRexRM(pfx,modrm); 14655 assign( sV, getXMMReg(rE) ); 14656 DIP("%smovddup %s,%s\n", 14657 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 14658 delta += 1; 14659 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 14660 } else { 14661 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14662 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14663 DIP("%smovddup %s,%s\n", 14664 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 14665 delta += alen; 14666 } 14667 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14668 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 14669 return delta; 14670 } 14671 14672 14673 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 14674 Long delta ) 14675 { 14676 IRTemp addr = IRTemp_INVALID; 14677 Int alen = 0; 14678 HChar dis_buf[50]; 14679 IRTemp d0 = newTemp(Ity_I64); 14680 IRTemp d1 = newTemp(Ity_I64); 14681 UChar modrm = getUChar(delta); 14682 UInt rG = gregOfRexRM(pfx,modrm); 14683 if (epartIsReg(modrm)) { 14684 UInt rE = eregOfRexRM(pfx,modrm); 14685 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 14686 delta += 1; 14687 assign ( d0, getYMMRegLane64(rE, 0) ); 14688 assign ( d1, getYMMRegLane64(rE, 2) ); 14689 } else { 14690 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14691 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14692 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 14693 mkexpr(addr), mkU64(16))) ); 14694 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 14695 delta += alen; 14696 } 14697 putYMMRegLane64( rG, 0, mkexpr(d0) ); 14698 putYMMRegLane64( rG, 1, mkexpr(d0) ); 14699 putYMMRegLane64( rG, 2, mkexpr(d1) ); 14700 putYMMRegLane64( rG, 3, mkexpr(d1) ); 14701 return delta; 14702 } 14703 14704 14705 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 14706 Long delta, Bool isAvx, Bool isL ) 14707 { 14708 IRTemp addr = IRTemp_INVALID; 14709 Int alen = 0; 14710 HChar dis_buf[50]; 14711 IRTemp sV = newTemp(Ity_V128); 14712 UChar modrm = getUChar(delta); 14713 UInt rG = gregOfRexRM(pfx,modrm); 14714 IRTemp s3, s2, s1, s0; 14715 s3 = s2 = s1 = s0 = IRTemp_INVALID; 14716 if (epartIsReg(modrm)) { 14717 UInt rE = eregOfRexRM(pfx,modrm); 14718 assign( sV, getXMMReg(rE) ); 14719 DIP("%smovs%cdup %s,%s\n", 14720 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 14721 delta += 1; 14722 } else { 14723 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14724 if (!isAvx) 14725 gen_SEGV_if_not_16_aligned( addr ); 14726 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14727 DIP("%smovs%cdup %s,%s\n", 14728 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 14729 delta += alen; 14730 } 14731 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14732 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14733 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 14734 : mkV128from32s( s3, s3, s1, s1 ) ); 14735 return delta; 14736 } 14737 14738 14739 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 14740 Long delta, Bool isL ) 14741 { 14742 IRTemp addr = IRTemp_INVALID; 14743 Int alen = 0; 14744 HChar dis_buf[50]; 14745 IRTemp sV = newTemp(Ity_V256); 14746 UChar modrm = getUChar(delta); 14747 UInt rG = gregOfRexRM(pfx,modrm); 14748 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 14749 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 14750 if (epartIsReg(modrm)) { 14751 UInt rE = eregOfRexRM(pfx,modrm); 14752 assign( sV, getYMMReg(rE) ); 14753 DIP("vmovs%cdup %s,%s\n", 14754 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 14755 delta += 1; 14756 } else { 14757 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14758 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 14759 DIP("vmovs%cdup %s,%s\n", 14760 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 14761 delta += alen; 14762 } 14763 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 14764 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 14765 : mkV128from32s( s7, s7, s5, s5 ) ); 14766 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 14767 : mkV128from32s( s3, s3, s1, s1 ) ); 14768 return delta; 14769 } 14770 14771 14772 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14773 { 14774 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 14775 IRTemp leftV = newTemp(Ity_V128); 14776 IRTemp rightV = newTemp(Ity_V128); 14777 IRTemp rm = newTemp(Ity_I32); 14778 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 14779 14780 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14781 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 14782 14783 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 14784 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 14785 14786 IRTemp res = newTemp(Ity_V128); 14787 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 14788 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 14789 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 14790 return res; 14791 } 14792 14793 14794 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14795 { 14796 IRTemp s1, s0, d1, d0; 14797 IRTemp leftV = newTemp(Ity_V128); 14798 IRTemp rightV = newTemp(Ity_V128); 14799 IRTemp rm = newTemp(Ity_I32); 14800 s1 = s0 = d1 = d0 = IRTemp_INVALID; 14801 14802 breakupV128to64s( sV, &s1, &s0 ); 14803 breakupV128to64s( dV, &d1, &d0 ); 14804 14805 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 14806 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 14807 14808 IRTemp res = newTemp(Ity_V128); 14809 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 14810 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 14811 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 14812 return res; 14813 } 14814 14815 14816 __attribute__((noinline)) 14817 static 14818 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 14819 const VexAbiInfo* vbi, 14820 Prefix pfx, Int sz, Long deltaIN ) 14821 { 14822 IRTemp addr = IRTemp_INVALID; 14823 UChar modrm = 0; 14824 Int alen = 0; 14825 HChar dis_buf[50]; 14826 14827 *decode_OK = False; 14828 14829 Long delta = deltaIN; 14830 UChar opc = getUChar(delta); 14831 delta++; 14832 switch (opc) { 14833 14834 case 0x12: 14835 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 14836 duplicating some lanes (2:2:0:0). */ 14837 if (haveF3no66noF2(pfx) && sz == 4) { 14838 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14839 True/*isL*/ ); 14840 goto decode_success; 14841 } 14842 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 14843 duplicating some lanes (0:1:0:1). */ 14844 if (haveF2no66noF3(pfx) 14845 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14846 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ ); 14847 goto decode_success; 14848 } 14849 break; 14850 14851 case 0x16: 14852 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 14853 duplicating some lanes (3:3:1:1). */ 14854 if (haveF3no66noF2(pfx) && sz == 4) { 14855 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14856 False/*!isL*/ ); 14857 goto decode_success; 14858 } 14859 break; 14860 14861 case 0x7C: 14862 case 0x7D: 14863 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 14864 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 14865 if (haveF2no66noF3(pfx) && sz == 4) { 14866 IRTemp eV = newTemp(Ity_V128); 14867 IRTemp gV = newTemp(Ity_V128); 14868 Bool isAdd = opc == 0x7C; 14869 const HChar* str = isAdd ? "add" : "sub"; 14870 modrm = getUChar(delta); 14871 UInt rG = gregOfRexRM(pfx,modrm); 14872 if (epartIsReg(modrm)) { 14873 UInt rE = eregOfRexRM(pfx,modrm); 14874 assign( eV, getXMMReg(rE) ); 14875 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14876 delta += 1; 14877 } else { 14878 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14879 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14880 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14881 delta += alen; 14882 } 14883 14884 assign( gV, getXMMReg(rG) ); 14885 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) ); 14886 goto decode_success; 14887 } 14888 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 14889 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 14890 if (have66noF2noF3(pfx) && sz == 2) { 14891 IRTemp eV = newTemp(Ity_V128); 14892 IRTemp gV = newTemp(Ity_V128); 14893 Bool isAdd = opc == 0x7C; 14894 const HChar* str = isAdd ? "add" : "sub"; 14895 modrm = getUChar(delta); 14896 UInt rG = gregOfRexRM(pfx,modrm); 14897 if (epartIsReg(modrm)) { 14898 UInt rE = eregOfRexRM(pfx,modrm); 14899 assign( eV, getXMMReg(rE) ); 14900 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14901 delta += 1; 14902 } else { 14903 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14904 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14905 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14906 delta += alen; 14907 } 14908 14909 assign( gV, getXMMReg(rG) ); 14910 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) ); 14911 goto decode_success; 14912 } 14913 break; 14914 14915 case 0xD0: 14916 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 14917 if (have66noF2noF3(pfx) && sz == 2) { 14918 IRTemp eV = newTemp(Ity_V128); 14919 IRTemp gV = newTemp(Ity_V128); 14920 modrm = getUChar(delta); 14921 UInt rG = gregOfRexRM(pfx,modrm); 14922 if (epartIsReg(modrm)) { 14923 UInt rE = eregOfRexRM(pfx,modrm); 14924 assign( eV, getXMMReg(rE) ); 14925 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14926 delta += 1; 14927 } else { 14928 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14929 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14930 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG)); 14931 delta += alen; 14932 } 14933 14934 assign( gV, getXMMReg(rG) ); 14935 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) ); 14936 goto decode_success; 14937 } 14938 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 14939 if (haveF2no66noF3(pfx) && sz == 4) { 14940 IRTemp eV = newTemp(Ity_V128); 14941 IRTemp gV = newTemp(Ity_V128); 14942 modrm = getUChar(delta); 14943 UInt rG = gregOfRexRM(pfx,modrm); 14944 14945 modrm = getUChar(delta); 14946 if (epartIsReg(modrm)) { 14947 UInt rE = eregOfRexRM(pfx,modrm); 14948 assign( eV, getXMMReg(rE) ); 14949 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14950 delta += 1; 14951 } else { 14952 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14953 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14954 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG)); 14955 delta += alen; 14956 } 14957 14958 assign( gV, getXMMReg(rG) ); 14959 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) ); 14960 goto decode_success; 14961 } 14962 break; 14963 14964 case 0xF0: 14965 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 14966 if (haveF2no66noF3(pfx) && sz == 4) { 14967 modrm = getUChar(delta); 14968 if (epartIsReg(modrm)) { 14969 goto decode_failure; 14970 } else { 14971 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14972 putXMMReg( gregOfRexRM(pfx,modrm), 14973 loadLE(Ity_V128, mkexpr(addr)) ); 14974 DIP("lddqu %s,%s\n", dis_buf, 14975 nameXMMReg(gregOfRexRM(pfx,modrm))); 14976 delta += alen; 14977 } 14978 goto decode_success; 14979 } 14980 break; 14981 14982 default: 14983 goto decode_failure; 14984 14985 } 14986 14987 decode_failure: 14988 *decode_OK = False; 14989 return deltaIN; 14990 14991 decode_success: 14992 *decode_OK = True; 14993 return delta; 14994 } 14995 14996 14997 /*------------------------------------------------------------*/ 14998 /*--- ---*/ 14999 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/ 15000 /*--- ---*/ 15001 /*------------------------------------------------------------*/ 15002 15003 static 15004 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 15005 { 15006 IRTemp sHi = newTemp(Ity_I64); 15007 IRTemp sLo = newTemp(Ity_I64); 15008 IRTemp dHi = newTemp(Ity_I64); 15009 IRTemp dLo = newTemp(Ity_I64); 15010 IRTemp rHi = newTemp(Ity_I64); 15011 IRTemp rLo = newTemp(Ity_I64); 15012 IRTemp sevens = newTemp(Ity_I64); 15013 IRTemp mask0x80hi = newTemp(Ity_I64); 15014 IRTemp mask0x80lo = newTemp(Ity_I64); 15015 IRTemp maskBit3hi = newTemp(Ity_I64); 15016 IRTemp maskBit3lo = newTemp(Ity_I64); 15017 IRTemp sAnd7hi = newTemp(Ity_I64); 15018 IRTemp sAnd7lo = newTemp(Ity_I64); 15019 IRTemp permdHi = newTemp(Ity_I64); 15020 IRTemp permdLo = newTemp(Ity_I64); 15021 IRTemp res = newTemp(Ity_V128); 15022 15023 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15024 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15025 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15026 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15027 15028 assign( sevens, mkU64(0x0707070707070707ULL) ); 15029 15030 /* mask0x80hi = Not(SarN8x8(sHi,7)) 15031 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 15032 sAnd7hi = And(sHi,sevens) 15033 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 15034 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 15035 rHi = And(permdHi,mask0x80hi) 15036 */ 15037 assign( 15038 mask0x80hi, 15039 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 15040 15041 assign( 15042 maskBit3hi, 15043 binop(Iop_SarN8x8, 15044 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 15045 mkU8(7))); 15046 15047 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 15048 15049 assign( 15050 permdHi, 15051 binop( 15052 Iop_Or64, 15053 binop(Iop_And64, 15054 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 15055 mkexpr(maskBit3hi)), 15056 binop(Iop_And64, 15057 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 15058 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 15059 15060 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 15061 15062 /* And the same for the lower half of the result. What fun. */ 15063 15064 assign( 15065 mask0x80lo, 15066 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 15067 15068 assign( 15069 maskBit3lo, 15070 binop(Iop_SarN8x8, 15071 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 15072 mkU8(7))); 15073 15074 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 15075 15076 assign( 15077 permdLo, 15078 binop( 15079 Iop_Or64, 15080 binop(Iop_And64, 15081 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 15082 mkexpr(maskBit3lo)), 15083 binop(Iop_And64, 15084 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 15085 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 15086 15087 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 15088 15089 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 15090 return res; 15091 } 15092 15093 15094 static 15095 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 15096 { 15097 IRTemp sHi, sLo, dHi, dLo; 15098 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15099 breakupV256toV128s( dV, &dHi, &dLo); 15100 breakupV256toV128s( sV, &sHi, &sLo); 15101 IRTemp res = newTemp(Ity_V256); 15102 assign(res, binop(Iop_V128HLtoV256, 15103 mkexpr(math_PSHUFB_XMM(dHi, sHi)), 15104 mkexpr(math_PSHUFB_XMM(dLo, sLo)))); 15105 return res; 15106 } 15107 15108 15109 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 15110 Bool isAvx, UChar opc ) 15111 { 15112 IRTemp addr = IRTemp_INVALID; 15113 Int alen = 0; 15114 HChar dis_buf[50]; 15115 const HChar* str = "???"; 15116 IROp opV64 = Iop_INVALID; 15117 IROp opCatO = Iop_CatOddLanes16x4; 15118 IROp opCatE = Iop_CatEvenLanes16x4; 15119 IRTemp sV = newTemp(Ity_V128); 15120 IRTemp dV = newTemp(Ity_V128); 15121 IRTemp sHi = newTemp(Ity_I64); 15122 IRTemp sLo = newTemp(Ity_I64); 15123 IRTemp dHi = newTemp(Ity_I64); 15124 IRTemp dLo = newTemp(Ity_I64); 15125 UChar modrm = getUChar(delta); 15126 UInt rG = gregOfRexRM(pfx,modrm); 15127 UInt rV = isAvx ? getVexNvvvv(pfx) : rG; 15128 15129 switch (opc) { 15130 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15131 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15132 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15133 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15134 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15135 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15136 default: vassert(0); 15137 } 15138 if (opc == 0x02 || opc == 0x06) { 15139 opCatO = Iop_InterleaveHI32x2; 15140 opCatE = Iop_InterleaveLO32x2; 15141 } 15142 15143 assign( dV, getXMMReg(rV) ); 15144 15145 if (epartIsReg(modrm)) { 15146 UInt rE = eregOfRexRM(pfx,modrm); 15147 assign( sV, getXMMReg(rE) ); 15148 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15149 nameXMMReg(rE), nameXMMReg(rG)); 15150 delta += 1; 15151 } else { 15152 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15153 if (!isAvx) 15154 gen_SEGV_if_not_16_aligned( addr ); 15155 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15156 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15157 dis_buf, nameXMMReg(rG)); 15158 delta += alen; 15159 } 15160 15161 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15162 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15163 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15164 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15165 15166 /* This isn't a particularly efficient way to compute the 15167 result, but at least it avoids a proliferation of IROps, 15168 hence avoids complication all the backends. */ 15169 15170 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15171 ( rG, 15172 binop(Iop_64HLtoV128, 15173 binop(opV64, 15174 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 15175 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ), 15176 binop(opV64, 15177 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 15178 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) ); 15179 return delta; 15180 } 15181 15182 15183 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 15184 UChar opc ) 15185 { 15186 IRTemp addr = IRTemp_INVALID; 15187 Int alen = 0; 15188 HChar dis_buf[50]; 15189 const HChar* str = "???"; 15190 IROp opV64 = Iop_INVALID; 15191 IROp opCatO = Iop_CatOddLanes16x4; 15192 IROp opCatE = Iop_CatEvenLanes16x4; 15193 IRTemp sV = newTemp(Ity_V256); 15194 IRTemp dV = newTemp(Ity_V256); 15195 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15196 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15197 UChar modrm = getUChar(delta); 15198 UInt rG = gregOfRexRM(pfx,modrm); 15199 UInt rV = getVexNvvvv(pfx); 15200 15201 switch (opc) { 15202 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15203 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15204 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15205 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15206 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15207 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15208 default: vassert(0); 15209 } 15210 if (opc == 0x02 || opc == 0x06) { 15211 opCatO = Iop_InterleaveHI32x2; 15212 opCatE = Iop_InterleaveLO32x2; 15213 } 15214 15215 assign( dV, getYMMReg(rV) ); 15216 15217 if (epartIsReg(modrm)) { 15218 UInt rE = eregOfRexRM(pfx,modrm); 15219 assign( sV, getYMMReg(rE) ); 15220 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG)); 15221 delta += 1; 15222 } else { 15223 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15224 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15225 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG)); 15226 delta += alen; 15227 } 15228 15229 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 15230 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 15231 15232 /* This isn't a particularly efficient way to compute the 15233 result, but at least it avoids a proliferation of IROps, 15234 hence avoids complication all the backends. */ 15235 15236 putYMMReg( rG, 15237 binop(Iop_V128HLtoV256, 15238 binop(Iop_64HLtoV128, 15239 binop(opV64, 15240 binop(opCatE,mkexpr(s3),mkexpr(s2)), 15241 binop(opCatO,mkexpr(s3),mkexpr(s2)) ), 15242 binop(opV64, 15243 binop(opCatE,mkexpr(d3),mkexpr(d2)), 15244 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ), 15245 binop(Iop_64HLtoV128, 15246 binop(opV64, 15247 binop(opCatE,mkexpr(s1),mkexpr(s0)), 15248 binop(opCatO,mkexpr(s1),mkexpr(s0)) ), 15249 binop(opV64, 15250 binop(opCatE,mkexpr(d1),mkexpr(d0)), 15251 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) ); 15252 return delta; 15253 } 15254 15255 15256 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV ) 15257 { 15258 IRTemp sVoddsSX = newTemp(Ity_V128); 15259 IRTemp sVevensSX = newTemp(Ity_V128); 15260 IRTemp dVoddsZX = newTemp(Ity_V128); 15261 IRTemp dVevensZX = newTemp(Ity_V128); 15262 /* compute dV unsigned x sV signed */ 15263 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 15264 assign( sVevensSX, binop(Iop_SarN16x8, 15265 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 15266 mkU8(8)) ); 15267 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 15268 assign( dVevensZX, binop(Iop_ShrN16x8, 15269 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 15270 mkU8(8)) ); 15271 15272 IRTemp res = newTemp(Ity_V128); 15273 assign( res, binop(Iop_QAdd16Sx8, 15274 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15275 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15276 ) 15277 ); 15278 return res; 15279 } 15280 15281 15282 static 15283 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV ) 15284 { 15285 IRTemp sHi, sLo, dHi, dLo; 15286 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15287 breakupV256toV128s( dV, &dHi, &dLo); 15288 breakupV256toV128s( sV, &sHi, &sLo); 15289 IRTemp res = newTemp(Ity_V256); 15290 assign(res, binop(Iop_V128HLtoV256, 15291 mkexpr(math_PMADDUBSW_128(dHi, sHi)), 15292 mkexpr(math_PMADDUBSW_128(dLo, sLo)))); 15293 return res; 15294 } 15295 15296 15297 __attribute__((noinline)) 15298 static 15299 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK, 15300 const VexAbiInfo* vbi, 15301 Prefix pfx, Int sz, Long deltaIN ) 15302 { 15303 IRTemp addr = IRTemp_INVALID; 15304 UChar modrm = 0; 15305 Int alen = 0; 15306 HChar dis_buf[50]; 15307 15308 *decode_OK = False; 15309 15310 Long delta = deltaIN; 15311 UChar opc = getUChar(delta); 15312 delta++; 15313 switch (opc) { 15314 15315 case 0x00: 15316 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 15317 if (have66noF2noF3(pfx) 15318 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15319 IRTemp sV = newTemp(Ity_V128); 15320 IRTemp dV = newTemp(Ity_V128); 15321 15322 modrm = getUChar(delta); 15323 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15324 15325 if (epartIsReg(modrm)) { 15326 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15327 delta += 1; 15328 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 15329 nameXMMReg(gregOfRexRM(pfx,modrm))); 15330 } else { 15331 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15332 gen_SEGV_if_not_16_aligned( addr ); 15333 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15334 delta += alen; 15335 DIP("pshufb %s,%s\n", dis_buf, 15336 nameXMMReg(gregOfRexRM(pfx,modrm))); 15337 } 15338 15339 IRTemp res = math_PSHUFB_XMM( dV, sV ); 15340 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res)); 15341 goto decode_success; 15342 } 15343 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 15344 if (haveNo66noF2noF3(pfx) && sz == 4) { 15345 IRTemp sV = newTemp(Ity_I64); 15346 IRTemp dV = newTemp(Ity_I64); 15347 15348 modrm = getUChar(delta); 15349 do_MMX_preamble(); 15350 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15351 15352 if (epartIsReg(modrm)) { 15353 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15354 delta += 1; 15355 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15356 nameMMXReg(gregLO3ofRM(modrm))); 15357 } else { 15358 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15359 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15360 delta += alen; 15361 DIP("pshufb %s,%s\n", dis_buf, 15362 nameMMXReg(gregLO3ofRM(modrm))); 15363 } 15364 15365 putMMXReg( 15366 gregLO3ofRM(modrm), 15367 binop( 15368 Iop_And64, 15369 /* permute the lanes */ 15370 binop( 15371 Iop_Perm8x8, 15372 mkexpr(dV), 15373 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 15374 ), 15375 /* mask off lanes which have (index & 0x80) == 0x80 */ 15376 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 15377 ) 15378 ); 15379 goto decode_success; 15380 } 15381 break; 15382 15383 case 0x01: 15384 case 0x02: 15385 case 0x03: 15386 case 0x05: 15387 case 0x06: 15388 case 0x07: 15389 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 15390 G to G (xmm). */ 15391 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 15392 G to G (xmm). */ 15393 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 15394 xmm) and G to G (xmm). */ 15395 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 15396 G to G (xmm). */ 15397 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 15398 G to G (xmm). */ 15399 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 15400 xmm) and G to G (xmm). */ 15401 if (have66noF2noF3(pfx) 15402 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15403 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc ); 15404 goto decode_success; 15405 } 15406 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 15407 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 15408 to G (mmx). */ 15409 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 15410 to G (mmx). */ 15411 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 15412 mmx) and G to G (mmx). */ 15413 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 15414 to G (mmx). */ 15415 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 15416 to G (mmx). */ 15417 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 15418 mmx) and G to G (mmx). */ 15419 if (haveNo66noF2noF3(pfx) && sz == 4) { 15420 const HChar* str = "???"; 15421 IROp opV64 = Iop_INVALID; 15422 IROp opCatO = Iop_CatOddLanes16x4; 15423 IROp opCatE = Iop_CatEvenLanes16x4; 15424 IRTemp sV = newTemp(Ity_I64); 15425 IRTemp dV = newTemp(Ity_I64); 15426 15427 modrm = getUChar(delta); 15428 15429 switch (opc) { 15430 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15431 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15432 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15433 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15434 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15435 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15436 default: vassert(0); 15437 } 15438 if (opc == 0x02 || opc == 0x06) { 15439 opCatO = Iop_InterleaveHI32x2; 15440 opCatE = Iop_InterleaveLO32x2; 15441 } 15442 15443 do_MMX_preamble(); 15444 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15445 15446 if (epartIsReg(modrm)) { 15447 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15448 delta += 1; 15449 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15450 nameMMXReg(gregLO3ofRM(modrm))); 15451 } else { 15452 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15453 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15454 delta += alen; 15455 DIP("ph%s %s,%s\n", str, dis_buf, 15456 nameMMXReg(gregLO3ofRM(modrm))); 15457 } 15458 15459 putMMXReg( 15460 gregLO3ofRM(modrm), 15461 binop(opV64, 15462 binop(opCatE,mkexpr(sV),mkexpr(dV)), 15463 binop(opCatO,mkexpr(sV),mkexpr(dV)) 15464 ) 15465 ); 15466 goto decode_success; 15467 } 15468 break; 15469 15470 case 0x04: 15471 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15472 Unsigned Bytes (XMM) */ 15473 if (have66noF2noF3(pfx) 15474 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15475 IRTemp sV = newTemp(Ity_V128); 15476 IRTemp dV = newTemp(Ity_V128); 15477 modrm = getUChar(delta); 15478 UInt rG = gregOfRexRM(pfx,modrm); 15479 15480 assign( dV, getXMMReg(rG) ); 15481 15482 if (epartIsReg(modrm)) { 15483 UInt rE = eregOfRexRM(pfx,modrm); 15484 assign( sV, getXMMReg(rE) ); 15485 delta += 1; 15486 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15487 } else { 15488 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15489 gen_SEGV_if_not_16_aligned( addr ); 15490 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15491 delta += alen; 15492 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG)); 15493 } 15494 15495 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) ); 15496 goto decode_success; 15497 } 15498 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15499 Unsigned Bytes (MMX) */ 15500 if (haveNo66noF2noF3(pfx) && sz == 4) { 15501 IRTemp sV = newTemp(Ity_I64); 15502 IRTemp dV = newTemp(Ity_I64); 15503 IRTemp sVoddsSX = newTemp(Ity_I64); 15504 IRTemp sVevensSX = newTemp(Ity_I64); 15505 IRTemp dVoddsZX = newTemp(Ity_I64); 15506 IRTemp dVevensZX = newTemp(Ity_I64); 15507 15508 modrm = getUChar(delta); 15509 do_MMX_preamble(); 15510 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15511 15512 if (epartIsReg(modrm)) { 15513 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15514 delta += 1; 15515 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15516 nameMMXReg(gregLO3ofRM(modrm))); 15517 } else { 15518 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15519 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15520 delta += alen; 15521 DIP("pmaddubsw %s,%s\n", dis_buf, 15522 nameMMXReg(gregLO3ofRM(modrm))); 15523 } 15524 15525 /* compute dV unsigned x sV signed */ 15526 assign( sVoddsSX, 15527 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 15528 assign( sVevensSX, 15529 binop(Iop_SarN16x4, 15530 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 15531 mkU8(8)) ); 15532 assign( dVoddsZX, 15533 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 15534 assign( dVevensZX, 15535 binop(Iop_ShrN16x4, 15536 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 15537 mkU8(8)) ); 15538 15539 putMMXReg( 15540 gregLO3ofRM(modrm), 15541 binop(Iop_QAdd16Sx4, 15542 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15543 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15544 ) 15545 ); 15546 goto decode_success; 15547 } 15548 break; 15549 15550 case 0x08: 15551 case 0x09: 15552 case 0x0A: 15553 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 15554 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 15555 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */ 15556 if (have66noF2noF3(pfx) 15557 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15558 IRTemp sV = newTemp(Ity_V128); 15559 IRTemp dV = newTemp(Ity_V128); 15560 IRTemp sHi = newTemp(Ity_I64); 15561 IRTemp sLo = newTemp(Ity_I64); 15562 IRTemp dHi = newTemp(Ity_I64); 15563 IRTemp dLo = newTemp(Ity_I64); 15564 const HChar* str = "???"; 15565 Int laneszB = 0; 15566 15567 switch (opc) { 15568 case 0x08: laneszB = 1; str = "b"; break; 15569 case 0x09: laneszB = 2; str = "w"; break; 15570 case 0x0A: laneszB = 4; str = "d"; break; 15571 default: vassert(0); 15572 } 15573 15574 modrm = getUChar(delta); 15575 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15576 15577 if (epartIsReg(modrm)) { 15578 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15579 delta += 1; 15580 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 15581 nameXMMReg(gregOfRexRM(pfx,modrm))); 15582 } else { 15583 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15584 gen_SEGV_if_not_16_aligned( addr ); 15585 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15586 delta += alen; 15587 DIP("psign%s %s,%s\n", str, dis_buf, 15588 nameXMMReg(gregOfRexRM(pfx,modrm))); 15589 } 15590 15591 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15592 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15593 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15594 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15595 15596 putXMMReg( 15597 gregOfRexRM(pfx,modrm), 15598 binop(Iop_64HLtoV128, 15599 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 15600 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 15601 ) 15602 ); 15603 goto decode_success; 15604 } 15605 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 15606 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 15607 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */ 15608 if (haveNo66noF2noF3(pfx) && sz == 4) { 15609 IRTemp sV = newTemp(Ity_I64); 15610 IRTemp dV = newTemp(Ity_I64); 15611 const HChar* str = "???"; 15612 Int laneszB = 0; 15613 15614 switch (opc) { 15615 case 0x08: laneszB = 1; str = "b"; break; 15616 case 0x09: laneszB = 2; str = "w"; break; 15617 case 0x0A: laneszB = 4; str = "d"; break; 15618 default: vassert(0); 15619 } 15620 15621 modrm = getUChar(delta); 15622 do_MMX_preamble(); 15623 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15624 15625 if (epartIsReg(modrm)) { 15626 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15627 delta += 1; 15628 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15629 nameMMXReg(gregLO3ofRM(modrm))); 15630 } else { 15631 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15632 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15633 delta += alen; 15634 DIP("psign%s %s,%s\n", str, dis_buf, 15635 nameMMXReg(gregLO3ofRM(modrm))); 15636 } 15637 15638 putMMXReg( 15639 gregLO3ofRM(modrm), 15640 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 15641 ); 15642 goto decode_success; 15643 } 15644 break; 15645 15646 case 0x0B: 15647 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 15648 Scale (XMM) */ 15649 if (have66noF2noF3(pfx) 15650 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15651 IRTemp sV = newTemp(Ity_V128); 15652 IRTemp dV = newTemp(Ity_V128); 15653 IRTemp sHi = newTemp(Ity_I64); 15654 IRTemp sLo = newTemp(Ity_I64); 15655 IRTemp dHi = newTemp(Ity_I64); 15656 IRTemp dLo = newTemp(Ity_I64); 15657 15658 modrm = getUChar(delta); 15659 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15660 15661 if (epartIsReg(modrm)) { 15662 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15663 delta += 1; 15664 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 15665 nameXMMReg(gregOfRexRM(pfx,modrm))); 15666 } else { 15667 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15668 gen_SEGV_if_not_16_aligned( addr ); 15669 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15670 delta += alen; 15671 DIP("pmulhrsw %s,%s\n", dis_buf, 15672 nameXMMReg(gregOfRexRM(pfx,modrm))); 15673 } 15674 15675 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15676 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15677 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15678 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15679 15680 putXMMReg( 15681 gregOfRexRM(pfx,modrm), 15682 binop(Iop_64HLtoV128, 15683 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 15684 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 15685 ) 15686 ); 15687 goto decode_success; 15688 } 15689 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 15690 (MMX) */ 15691 if (haveNo66noF2noF3(pfx) && sz == 4) { 15692 IRTemp sV = newTemp(Ity_I64); 15693 IRTemp dV = newTemp(Ity_I64); 15694 15695 modrm = getUChar(delta); 15696 do_MMX_preamble(); 15697 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15698 15699 if (epartIsReg(modrm)) { 15700 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15701 delta += 1; 15702 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15703 nameMMXReg(gregLO3ofRM(modrm))); 15704 } else { 15705 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15706 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15707 delta += alen; 15708 DIP("pmulhrsw %s,%s\n", dis_buf, 15709 nameMMXReg(gregLO3ofRM(modrm))); 15710 } 15711 15712 putMMXReg( 15713 gregLO3ofRM(modrm), 15714 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 15715 ); 15716 goto decode_success; 15717 } 15718 break; 15719 15720 case 0x1C: 15721 case 0x1D: 15722 case 0x1E: 15723 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 15724 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 15725 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 15726 if (have66noF2noF3(pfx) 15727 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15728 IRTemp sV = newTemp(Ity_V128); 15729 const HChar* str = "???"; 15730 Int laneszB = 0; 15731 15732 switch (opc) { 15733 case 0x1C: laneszB = 1; str = "b"; break; 15734 case 0x1D: laneszB = 2; str = "w"; break; 15735 case 0x1E: laneszB = 4; str = "d"; break; 15736 default: vassert(0); 15737 } 15738 15739 modrm = getUChar(delta); 15740 if (epartIsReg(modrm)) { 15741 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15742 delta += 1; 15743 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 15744 nameXMMReg(gregOfRexRM(pfx,modrm))); 15745 } else { 15746 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15747 gen_SEGV_if_not_16_aligned( addr ); 15748 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15749 delta += alen; 15750 DIP("pabs%s %s,%s\n", str, dis_buf, 15751 nameXMMReg(gregOfRexRM(pfx,modrm))); 15752 } 15753 15754 putXMMReg( gregOfRexRM(pfx,modrm), 15755 mkexpr(math_PABS_XMM(sV, laneszB)) ); 15756 goto decode_success; 15757 } 15758 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 15759 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 15760 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 15761 if (haveNo66noF2noF3(pfx) && sz == 4) { 15762 IRTemp sV = newTemp(Ity_I64); 15763 const HChar* str = "???"; 15764 Int laneszB = 0; 15765 15766 switch (opc) { 15767 case 0x1C: laneszB = 1; str = "b"; break; 15768 case 0x1D: laneszB = 2; str = "w"; break; 15769 case 0x1E: laneszB = 4; str = "d"; break; 15770 default: vassert(0); 15771 } 15772 15773 modrm = getUChar(delta); 15774 do_MMX_preamble(); 15775 15776 if (epartIsReg(modrm)) { 15777 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15778 delta += 1; 15779 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15780 nameMMXReg(gregLO3ofRM(modrm))); 15781 } else { 15782 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15783 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15784 delta += alen; 15785 DIP("pabs%s %s,%s\n", str, dis_buf, 15786 nameMMXReg(gregLO3ofRM(modrm))); 15787 } 15788 15789 putMMXReg( gregLO3ofRM(modrm), 15790 mkexpr(math_PABS_MMX( sV, laneszB )) ); 15791 goto decode_success; 15792 } 15793 break; 15794 15795 default: 15796 break; 15797 15798 } 15799 15800 //decode_failure: 15801 *decode_OK = False; 15802 return deltaIN; 15803 15804 decode_success: 15805 *decode_OK = True; 15806 return delta; 15807 } 15808 15809 15810 /*------------------------------------------------------------*/ 15811 /*--- ---*/ 15812 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/ 15813 /*--- ---*/ 15814 /*------------------------------------------------------------*/ 15815 15816 __attribute__((noinline)) 15817 static 15818 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK, 15819 const VexAbiInfo* vbi, 15820 Prefix pfx, Int sz, Long deltaIN ) 15821 { 15822 Long d64 = 0; 15823 IRTemp addr = IRTemp_INVALID; 15824 UChar modrm = 0; 15825 Int alen = 0; 15826 HChar dis_buf[50]; 15827 15828 *decode_OK = False; 15829 15830 Long delta = deltaIN; 15831 UChar opc = getUChar(delta); 15832 delta++; 15833 switch (opc) { 15834 15835 case 0x0F: 15836 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 15837 if (have66noF2noF3(pfx) 15838 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15839 IRTemp sV = newTemp(Ity_V128); 15840 IRTemp dV = newTemp(Ity_V128); 15841 15842 modrm = getUChar(delta); 15843 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15844 15845 if (epartIsReg(modrm)) { 15846 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15847 d64 = (Long)getUChar(delta+1); 15848 delta += 1+1; 15849 DIP("palignr $%d,%s,%s\n", (Int)d64, 15850 nameXMMReg(eregOfRexRM(pfx,modrm)), 15851 nameXMMReg(gregOfRexRM(pfx,modrm))); 15852 } else { 15853 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15854 gen_SEGV_if_not_16_aligned( addr ); 15855 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15856 d64 = (Long)getUChar(delta+alen); 15857 delta += alen+1; 15858 DIP("palignr $%d,%s,%s\n", (Int)d64, 15859 dis_buf, 15860 nameXMMReg(gregOfRexRM(pfx,modrm))); 15861 } 15862 15863 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 ); 15864 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 15865 goto decode_success; 15866 } 15867 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 15868 if (haveNo66noF2noF3(pfx) && sz == 4) { 15869 IRTemp sV = newTemp(Ity_I64); 15870 IRTemp dV = newTemp(Ity_I64); 15871 IRTemp res = newTemp(Ity_I64); 15872 15873 modrm = getUChar(delta); 15874 do_MMX_preamble(); 15875 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15876 15877 if (epartIsReg(modrm)) { 15878 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15879 d64 = (Long)getUChar(delta+1); 15880 delta += 1+1; 15881 DIP("palignr $%d,%s,%s\n", (Int)d64, 15882 nameMMXReg(eregLO3ofRM(modrm)), 15883 nameMMXReg(gregLO3ofRM(modrm))); 15884 } else { 15885 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15886 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15887 d64 = (Long)getUChar(delta+alen); 15888 delta += alen+1; 15889 DIP("palignr $%d%s,%s\n", (Int)d64, 15890 dis_buf, 15891 nameMMXReg(gregLO3ofRM(modrm))); 15892 } 15893 15894 if (d64 == 0) { 15895 assign( res, mkexpr(sV) ); 15896 } 15897 else if (d64 >= 1 && d64 <= 7) { 15898 assign(res, 15899 binop(Iop_Or64, 15900 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 15901 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 15902 ))); 15903 } 15904 else if (d64 == 8) { 15905 assign( res, mkexpr(dV) ); 15906 } 15907 else if (d64 >= 9 && d64 <= 15) { 15908 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 15909 } 15910 else if (d64 >= 16 && d64 <= 255) { 15911 assign( res, mkU64(0) ); 15912 } 15913 else 15914 vassert(0); 15915 15916 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 15917 goto decode_success; 15918 } 15919 break; 15920 15921 default: 15922 break; 15923 15924 } 15925 15926 //decode_failure: 15927 *decode_OK = False; 15928 return deltaIN; 15929 15930 decode_success: 15931 *decode_OK = True; 15932 return delta; 15933 } 15934 15935 15936 /*------------------------------------------------------------*/ 15937 /*--- ---*/ 15938 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/ 15939 /*--- ---*/ 15940 /*------------------------------------------------------------*/ 15941 15942 __attribute__((noinline)) 15943 static 15944 Long dis_ESC_0F__SSE4 ( Bool* decode_OK, 15945 const VexArchInfo* archinfo, 15946 const VexAbiInfo* vbi, 15947 Prefix pfx, Int sz, Long deltaIN ) 15948 { 15949 IRTemp addr = IRTemp_INVALID; 15950 IRType ty = Ity_INVALID; 15951 UChar modrm = 0; 15952 Int alen = 0; 15953 HChar dis_buf[50]; 15954 15955 *decode_OK = False; 15956 15957 Long delta = deltaIN; 15958 UChar opc = getUChar(delta); 15959 delta++; 15960 switch (opc) { 15961 15962 case 0xB8: 15963 /* F3 0F B8 = POPCNT{W,L,Q} 15964 Count the number of 1 bits in a register 15965 */ 15966 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */ 15967 && (sz == 2 || sz == 4 || sz == 8)) { 15968 /*IRType*/ ty = szToITy(sz); 15969 IRTemp src = newTemp(ty); 15970 modrm = getUChar(delta); 15971 if (epartIsReg(modrm)) { 15972 assign(src, getIRegE(sz, pfx, modrm)); 15973 delta += 1; 15974 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15975 nameIRegG(sz, pfx, modrm)); 15976 } else { 15977 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 15978 assign(src, loadLE(ty, mkexpr(addr))); 15979 delta += alen; 15980 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 15981 nameIRegG(sz, pfx, modrm)); 15982 } 15983 15984 IRTemp result = gen_POPCOUNT(ty, src); 15985 putIRegG(sz, pfx, modrm, mkexpr(result)); 15986 15987 // Update flags. This is pretty lame .. perhaps can do better 15988 // if this turns out to be performance critical. 15989 // O S A C P are cleared. Z is set if SRC == 0. 15990 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15991 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15992 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15993 stmt( IRStmt_Put( OFFB_CC_DEP1, 15994 binop(Iop_Shl64, 15995 unop(Iop_1Uto64, 15996 binop(Iop_CmpEQ64, 15997 widenUto64(mkexpr(src)), 15998 mkU64(0))), 15999 mkU8(AMD64G_CC_SHIFT_Z)))); 16000 16001 goto decode_success; 16002 } 16003 break; 16004 16005 case 0xBC: 16006 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension, 16007 which we can only decode if we're sure this is a BMI1 capable cpu 16008 that supports TZCNT, since otherwise it's BSF, which behaves 16009 differently on zero source. */ 16010 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 16011 && (sz == 2 || sz == 4 || sz == 8) 16012 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) { 16013 /*IRType*/ ty = szToITy(sz); 16014 IRTemp src = newTemp(ty); 16015 modrm = getUChar(delta); 16016 if (epartIsReg(modrm)) { 16017 assign(src, getIRegE(sz, pfx, modrm)); 16018 delta += 1; 16019 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16020 nameIRegG(sz, pfx, modrm)); 16021 } else { 16022 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16023 assign(src, loadLE(ty, mkexpr(addr))); 16024 delta += alen; 16025 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16026 nameIRegG(sz, pfx, modrm)); 16027 } 16028 16029 IRTemp res = gen_TZCNT(ty, src); 16030 putIRegG(sz, pfx, modrm, mkexpr(res)); 16031 16032 // Update flags. This is pretty lame .. perhaps can do better 16033 // if this turns out to be performance critical. 16034 // O S A P are cleared. Z is set if RESULT == 0. 16035 // C is set if SRC is zero. 16036 IRTemp src64 = newTemp(Ity_I64); 16037 IRTemp res64 = newTemp(Ity_I64); 16038 assign(src64, widenUto64(mkexpr(src))); 16039 assign(res64, widenUto64(mkexpr(res))); 16040 16041 IRTemp oszacp = newTemp(Ity_I64); 16042 assign( 16043 oszacp, 16044 binop(Iop_Or64, 16045 binop(Iop_Shl64, 16046 unop(Iop_1Uto64, 16047 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16048 mkU8(AMD64G_CC_SHIFT_Z)), 16049 binop(Iop_Shl64, 16050 unop(Iop_1Uto64, 16051 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16052 mkU8(AMD64G_CC_SHIFT_C)) 16053 ) 16054 ); 16055 16056 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16057 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16058 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16059 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16060 16061 goto decode_success; 16062 } 16063 break; 16064 16065 case 0xBD: 16066 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 16067 which we can only decode if we're sure this is an AMD cpu 16068 that supports LZCNT, since otherwise it's BSR, which behaves 16069 differently. Bizarrely, my Sandy Bridge also accepts these 16070 instructions but produces different results. */ 16071 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 16072 && (sz == 2 || sz == 4 || sz == 8) 16073 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 16074 /*IRType*/ ty = szToITy(sz); 16075 IRTemp src = newTemp(ty); 16076 modrm = getUChar(delta); 16077 if (epartIsReg(modrm)) { 16078 assign(src, getIRegE(sz, pfx, modrm)); 16079 delta += 1; 16080 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16081 nameIRegG(sz, pfx, modrm)); 16082 } else { 16083 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16084 assign(src, loadLE(ty, mkexpr(addr))); 16085 delta += alen; 16086 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16087 nameIRegG(sz, pfx, modrm)); 16088 } 16089 16090 IRTemp res = gen_LZCNT(ty, src); 16091 putIRegG(sz, pfx, modrm, mkexpr(res)); 16092 16093 // Update flags. This is pretty lame .. perhaps can do better 16094 // if this turns out to be performance critical. 16095 // O S A P are cleared. Z is set if RESULT == 0. 16096 // C is set if SRC is zero. 16097 IRTemp src64 = newTemp(Ity_I64); 16098 IRTemp res64 = newTemp(Ity_I64); 16099 assign(src64, widenUto64(mkexpr(src))); 16100 assign(res64, widenUto64(mkexpr(res))); 16101 16102 IRTemp oszacp = newTemp(Ity_I64); 16103 assign( 16104 oszacp, 16105 binop(Iop_Or64, 16106 binop(Iop_Shl64, 16107 unop(Iop_1Uto64, 16108 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16109 mkU8(AMD64G_CC_SHIFT_Z)), 16110 binop(Iop_Shl64, 16111 unop(Iop_1Uto64, 16112 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16113 mkU8(AMD64G_CC_SHIFT_C)) 16114 ) 16115 ); 16116 16117 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16118 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16119 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16120 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16121 16122 goto decode_success; 16123 } 16124 break; 16125 16126 default: 16127 break; 16128 16129 } 16130 16131 //decode_failure: 16132 *decode_OK = False; 16133 return deltaIN; 16134 16135 decode_success: 16136 *decode_OK = True; 16137 return delta; 16138 } 16139 16140 16141 /*------------------------------------------------------------*/ 16142 /*--- ---*/ 16143 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/ 16144 /*--- ---*/ 16145 /*------------------------------------------------------------*/ 16146 16147 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG, 16148 IRTemp vec0/*controlling mask*/, 16149 UInt gran, IROp opSAR ) 16150 { 16151 /* The tricky bit is to convert vec0 into a suitable mask, by 16152 copying the most significant bit of each lane into all positions 16153 in the lane. */ 16154 IRTemp sh = newTemp(Ity_I8); 16155 assign(sh, mkU8(8 * gran - 1)); 16156 16157 IRTemp mask = newTemp(Ity_V128); 16158 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 16159 16160 IRTemp notmask = newTemp(Ity_V128); 16161 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 16162 16163 IRTemp res = newTemp(Ity_V128); 16164 assign(res, binop(Iop_OrV128, 16165 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 16166 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)))); 16167 return res; 16168 } 16169 16170 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG, 16171 IRTemp vec0/*controlling mask*/, 16172 UInt gran, IROp opSAR128 ) 16173 { 16174 /* The tricky bit is to convert vec0 into a suitable mask, by 16175 copying the most significant bit of each lane into all positions 16176 in the lane. */ 16177 IRTemp sh = newTemp(Ity_I8); 16178 assign(sh, mkU8(8 * gran - 1)); 16179 16180 IRTemp vec0Hi = IRTemp_INVALID; 16181 IRTemp vec0Lo = IRTemp_INVALID; 16182 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo ); 16183 16184 IRTemp mask = newTemp(Ity_V256); 16185 assign(mask, binop(Iop_V128HLtoV256, 16186 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)), 16187 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh)))); 16188 16189 IRTemp notmask = newTemp(Ity_V256); 16190 assign(notmask, unop(Iop_NotV256, mkexpr(mask))); 16191 16192 IRTemp res = newTemp(Ity_V256); 16193 assign(res, binop(Iop_OrV256, 16194 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)), 16195 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask)))); 16196 return res; 16197 } 16198 16199 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 16200 const HChar *name, UInt gran, IROp opSAR ) 16201 { 16202 IRTemp addr = IRTemp_INVALID; 16203 Int alen = 0; 16204 HChar dis_buf[50]; 16205 UChar modrm = getUChar(delta); 16206 UInt rG = gregOfRexRM(pfx, modrm); 16207 UInt rV = getVexNvvvv(pfx); 16208 UInt rIS4 = 0xFF; /* invalid */ 16209 IRTemp vecE = newTemp(Ity_V128); 16210 IRTemp vecV = newTemp(Ity_V128); 16211 IRTemp vecIS4 = newTemp(Ity_V128); 16212 if (epartIsReg(modrm)) { 16213 delta++; 16214 UInt rE = eregOfRexRM(pfx, modrm); 16215 assign(vecE, getXMMReg(rE)); 16216 UChar ib = getUChar(delta); 16217 rIS4 = (ib >> 4) & 0xF; 16218 DIP("%s %s,%s,%s,%s\n", 16219 name, nameXMMReg(rIS4), nameXMMReg(rE), 16220 nameXMMReg(rV), nameXMMReg(rG)); 16221 } else { 16222 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16223 delta += alen; 16224 assign(vecE, loadLE(Ity_V128, mkexpr(addr))); 16225 UChar ib = getUChar(delta); 16226 rIS4 = (ib >> 4) & 0xF; 16227 DIP("%s %s,%s,%s,%s\n", 16228 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 16229 } 16230 delta++; 16231 assign(vecV, getXMMReg(rV)); 16232 assign(vecIS4, getXMMReg(rIS4)); 16233 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR ); 16234 putYMMRegLoAndZU( rG, mkexpr(res) ); 16235 return delta; 16236 } 16237 16238 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 16239 const HChar *name, UInt gran, IROp opSAR128 ) 16240 { 16241 IRTemp addr = IRTemp_INVALID; 16242 Int alen = 0; 16243 HChar dis_buf[50]; 16244 UChar modrm = getUChar(delta); 16245 UInt rG = gregOfRexRM(pfx, modrm); 16246 UInt rV = getVexNvvvv(pfx); 16247 UInt rIS4 = 0xFF; /* invalid */ 16248 IRTemp vecE = newTemp(Ity_V256); 16249 IRTemp vecV = newTemp(Ity_V256); 16250 IRTemp vecIS4 = newTemp(Ity_V256); 16251 if (epartIsReg(modrm)) { 16252 delta++; 16253 UInt rE = eregOfRexRM(pfx, modrm); 16254 assign(vecE, getYMMReg(rE)); 16255 UChar ib = getUChar(delta); 16256 rIS4 = (ib >> 4) & 0xF; 16257 DIP("%s %s,%s,%s,%s\n", 16258 name, nameYMMReg(rIS4), nameYMMReg(rE), 16259 nameYMMReg(rV), nameYMMReg(rG)); 16260 } else { 16261 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16262 delta += alen; 16263 assign(vecE, loadLE(Ity_V256, mkexpr(addr))); 16264 UChar ib = getUChar(delta); 16265 rIS4 = (ib >> 4) & 0xF; 16266 DIP("%s %s,%s,%s,%s\n", 16267 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 16268 } 16269 delta++; 16270 assign(vecV, getYMMReg(rV)); 16271 assign(vecIS4, getYMMReg(rIS4)); 16272 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 ); 16273 putYMMReg( rG, mkexpr(res) ); 16274 return delta; 16275 } 16276 16277 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) 16278 { 16279 /* Set Z=1 iff (vecE & vecG) == 0 16280 Set C=1 iff (vecE & not vecG) == 0 16281 */ 16282 16283 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16284 16285 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top 16286 and bottom 64-bits together. It relies on this trick: 16287 16288 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 16289 16290 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 16291 InterleaveHI64x2([a,b],[a,b]) == [a,a] 16292 16293 and so the OR of the above 2 exprs produces 16294 [a OR b, a OR b], from which we simply take the lower half. 16295 */ 16296 IRTemp and64 = newTemp(Ity_I64); 16297 IRTemp andn64 = newTemp(Ity_I64); 16298 16299 assign(and64, 16300 unop(Iop_V128to64, 16301 binop(Iop_OrV128, 16302 binop(Iop_InterleaveLO64x2, 16303 mkexpr(andV), mkexpr(andV)), 16304 binop(Iop_InterleaveHI64x2, 16305 mkexpr(andV), mkexpr(andV))))); 16306 16307 assign(andn64, 16308 unop(Iop_V128to64, 16309 binop(Iop_OrV128, 16310 binop(Iop_InterleaveLO64x2, 16311 mkexpr(andnV), mkexpr(andnV)), 16312 binop(Iop_InterleaveHI64x2, 16313 mkexpr(andnV), mkexpr(andnV))))); 16314 16315 IRTemp z64 = newTemp(Ity_I64); 16316 IRTemp c64 = newTemp(Ity_I64); 16317 if (sign == 64) { 16318 /* When only interested in the most significant bit, just shift 16319 arithmetically right and negate. */ 16320 assign(z64, 16321 unop(Iop_Not64, 16322 binop(Iop_Sar64, mkexpr(and64), mkU8(63)))); 16323 16324 assign(c64, 16325 unop(Iop_Not64, 16326 binop(Iop_Sar64, mkexpr(andn64), mkU8(63)))); 16327 } else { 16328 if (sign == 32) { 16329 /* When interested in bit 31 and bit 63, mask those bits and 16330 fallthrough into the PTEST handling. */ 16331 IRTemp t0 = newTemp(Ity_I64); 16332 IRTemp t1 = newTemp(Ity_I64); 16333 IRTemp t2 = newTemp(Ity_I64); 16334 assign(t0, mkU64(0x8000000080000000ULL)); 16335 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0))); 16336 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0))); 16337 and64 = t1; 16338 andn64 = t2; 16339 } 16340 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 16341 slice out the Z and C bits conveniently. We use the standard 16342 trick all-zeroes -> all-zeroes, anything-else -> all-ones 16343 done by "(x | -x) >>s (word-size - 1)". 16344 */ 16345 assign(z64, 16346 unop(Iop_Not64, 16347 binop(Iop_Sar64, 16348 binop(Iop_Or64, 16349 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 16350 mkexpr(and64)), mkU8(63)))); 16351 16352 assign(c64, 16353 unop(Iop_Not64, 16354 binop(Iop_Sar64, 16355 binop(Iop_Or64, 16356 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 16357 mkexpr(andn64)), mkU8(63)))); 16358 } 16359 16360 /* And finally, slice out the Z and C flags and set the flags 16361 thunk to COPY for them. OSAP are set to zero. */ 16362 IRTemp newOSZACP = newTemp(Ity_I64); 16363 assign(newOSZACP, 16364 binop(Iop_Or64, 16365 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 16366 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)))); 16367 16368 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 16369 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16370 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16371 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16372 } 16373 16374 16375 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD. 16376 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16377 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx, 16378 Long delta, Bool isAvx, Int sign ) 16379 { 16380 IRTemp addr = IRTemp_INVALID; 16381 Int alen = 0; 16382 HChar dis_buf[50]; 16383 UChar modrm = getUChar(delta); 16384 UInt rG = gregOfRexRM(pfx, modrm); 16385 IRTemp vecE = newTemp(Ity_V128); 16386 IRTemp vecG = newTemp(Ity_V128); 16387 16388 if ( epartIsReg(modrm) ) { 16389 UInt rE = eregOfRexRM(pfx, modrm); 16390 assign(vecE, getXMMReg(rE)); 16391 delta += 1; 16392 DIP( "%s%stest%s %s,%s\n", 16393 isAvx ? "v" : "", sign == 0 ? "p" : "", 16394 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16395 nameXMMReg(rE), nameXMMReg(rG) ); 16396 } else { 16397 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16398 if (!isAvx) 16399 gen_SEGV_if_not_16_aligned( addr ); 16400 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 16401 delta += alen; 16402 DIP( "%s%stest%s %s,%s\n", 16403 isAvx ? "v" : "", sign == 0 ? "p" : "", 16404 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16405 dis_buf, nameXMMReg(rG) ); 16406 } 16407 16408 assign(vecG, getXMMReg(rG)); 16409 16410 /* Set Z=1 iff (vecE & vecG) == 0 16411 Set C=1 iff (vecE & not vecG) == 0 16412 */ 16413 16414 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16415 IRTemp andV = newTemp(Ity_V128); 16416 IRTemp andnV = newTemp(Ity_V128); 16417 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 16418 assign(andnV, binop(Iop_AndV128, 16419 mkexpr(vecE), 16420 binop(Iop_XorV128, mkexpr(vecG), 16421 mkV128(0xFFFF)))); 16422 16423 finish_xTESTy ( andV, andnV, sign ); 16424 return delta; 16425 } 16426 16427 16428 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD. 16429 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16430 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx, 16431 Long delta, Int sign ) 16432 { 16433 IRTemp addr = IRTemp_INVALID; 16434 Int alen = 0; 16435 HChar dis_buf[50]; 16436 UChar modrm = getUChar(delta); 16437 UInt rG = gregOfRexRM(pfx, modrm); 16438 IRTemp vecE = newTemp(Ity_V256); 16439 IRTemp vecG = newTemp(Ity_V256); 16440 16441 if ( epartIsReg(modrm) ) { 16442 UInt rE = eregOfRexRM(pfx, modrm); 16443 assign(vecE, getYMMReg(rE)); 16444 delta += 1; 16445 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16446 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16447 nameYMMReg(rE), nameYMMReg(rG) ); 16448 } else { 16449 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16450 assign(vecE, loadLE( Ity_V256, mkexpr(addr) )); 16451 delta += alen; 16452 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16453 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16454 dis_buf, nameYMMReg(rG) ); 16455 } 16456 16457 assign(vecG, getYMMReg(rG)); 16458 16459 /* Set Z=1 iff (vecE & vecG) == 0 16460 Set C=1 iff (vecE & not vecG) == 0 16461 */ 16462 16463 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16464 IRTemp andV = newTemp(Ity_V256); 16465 IRTemp andnV = newTemp(Ity_V256); 16466 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG))); 16467 assign(andnV, binop(Iop_AndV256, 16468 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG)))); 16469 16470 IRTemp andVhi = IRTemp_INVALID; 16471 IRTemp andVlo = IRTemp_INVALID; 16472 IRTemp andnVhi = IRTemp_INVALID; 16473 IRTemp andnVlo = IRTemp_INVALID; 16474 breakupV256toV128s( andV, &andVhi, &andVlo ); 16475 breakupV256toV128s( andnV, &andnVhi, &andnVlo ); 16476 16477 IRTemp andV128 = newTemp(Ity_V128); 16478 IRTemp andnV128 = newTemp(Ity_V128); 16479 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) ); 16480 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) ); 16481 16482 finish_xTESTy ( andV128, andnV128, sign ); 16483 return delta; 16484 } 16485 16486 16487 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */ 16488 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx, 16489 Long delta, Bool isAvx, Bool xIsZ ) 16490 { 16491 IRTemp addr = IRTemp_INVALID; 16492 Int alen = 0; 16493 HChar dis_buf[50]; 16494 IRTemp srcVec = newTemp(Ity_V128); 16495 UChar modrm = getUChar(delta); 16496 const HChar* mbV = isAvx ? "v" : ""; 16497 const HChar how = xIsZ ? 'z' : 's'; 16498 UInt rG = gregOfRexRM(pfx, modrm); 16499 if ( epartIsReg(modrm) ) { 16500 UInt rE = eregOfRexRM(pfx, modrm); 16501 assign( srcVec, getXMMReg(rE) ); 16502 delta += 1; 16503 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16504 } else { 16505 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16506 assign( srcVec, 16507 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16508 delta += alen; 16509 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16510 } 16511 16512 IRExpr* res 16513 = xIsZ /* do math for either zero or sign extend */ 16514 ? binop( Iop_InterleaveLO8x16, 16515 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 16516 : binop( Iop_SarN16x8, 16517 binop( Iop_ShlN16x8, 16518 binop( Iop_InterleaveLO8x16, 16519 IRExpr_Const( IRConst_V128(0) ), 16520 mkexpr(srcVec) ), 16521 mkU8(8) ), 16522 mkU8(8) ); 16523 16524 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16525 16526 return delta; 16527 } 16528 16529 16530 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */ 16531 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx, 16532 Long delta, Bool xIsZ ) 16533 { 16534 IRTemp addr = IRTemp_INVALID; 16535 Int alen = 0; 16536 HChar dis_buf[50]; 16537 IRTemp srcVec = newTemp(Ity_V128); 16538 UChar modrm = getUChar(delta); 16539 UChar how = xIsZ ? 'z' : 's'; 16540 UInt rG = gregOfRexRM(pfx, modrm); 16541 if ( epartIsReg(modrm) ) { 16542 UInt rE = eregOfRexRM(pfx, modrm); 16543 assign( srcVec, getXMMReg(rE) ); 16544 delta += 1; 16545 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16546 } else { 16547 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16548 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 16549 delta += alen; 16550 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16551 } 16552 16553 /* First do zero extend. */ 16554 IRExpr* res 16555 = binop( Iop_V128HLtoV256, 16556 binop( Iop_InterleaveHI8x16, 16557 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16558 binop( Iop_InterleaveLO8x16, 16559 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16560 /* And if needed sign extension as well. */ 16561 if (!xIsZ) 16562 res = binop( Iop_SarN16x16, 16563 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) ); 16564 16565 putYMMReg ( rG, res ); 16566 16567 return delta; 16568 } 16569 16570 16571 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx, 16572 Long delta, Bool isAvx, Bool xIsZ ) 16573 { 16574 IRTemp addr = IRTemp_INVALID; 16575 Int alen = 0; 16576 HChar dis_buf[50]; 16577 IRTemp srcVec = newTemp(Ity_V128); 16578 UChar modrm = getUChar(delta); 16579 const HChar* mbV = isAvx ? "v" : ""; 16580 const HChar how = xIsZ ? 'z' : 's'; 16581 UInt rG = gregOfRexRM(pfx, modrm); 16582 16583 if ( epartIsReg(modrm) ) { 16584 UInt rE = eregOfRexRM(pfx, modrm); 16585 assign( srcVec, getXMMReg(rE) ); 16586 delta += 1; 16587 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16588 } else { 16589 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16590 assign( srcVec, 16591 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16592 delta += alen; 16593 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16594 } 16595 16596 IRExpr* res 16597 = binop( Iop_InterleaveLO16x8, 16598 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ); 16599 if (!xIsZ) 16600 res = binop(Iop_SarN32x4, 16601 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16)); 16602 16603 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16604 ( gregOfRexRM(pfx, modrm), res ); 16605 16606 return delta; 16607 } 16608 16609 16610 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx, 16611 Long delta, Bool xIsZ ) 16612 { 16613 IRTemp addr = IRTemp_INVALID; 16614 Int alen = 0; 16615 HChar dis_buf[50]; 16616 IRTemp srcVec = newTemp(Ity_V128); 16617 UChar modrm = getUChar(delta); 16618 UChar how = xIsZ ? 'z' : 's'; 16619 UInt rG = gregOfRexRM(pfx, modrm); 16620 16621 if ( epartIsReg(modrm) ) { 16622 UInt rE = eregOfRexRM(pfx, modrm); 16623 assign( srcVec, getXMMReg(rE) ); 16624 delta += 1; 16625 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16626 } else { 16627 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16628 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 16629 delta += alen; 16630 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16631 } 16632 16633 IRExpr* res 16634 = binop( Iop_V128HLtoV256, 16635 binop( Iop_InterleaveHI16x8, 16636 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16637 binop( Iop_InterleaveLO16x8, 16638 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16639 if (!xIsZ) 16640 res = binop(Iop_SarN32x8, 16641 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16)); 16642 16643 putYMMReg ( rG, res ); 16644 16645 return delta; 16646 } 16647 16648 16649 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 16650 Long delta, Bool isAvx ) 16651 { 16652 IRTemp addr = IRTemp_INVALID; 16653 Int alen = 0; 16654 HChar dis_buf[50]; 16655 IRTemp srcBytes = newTemp(Ity_I32); 16656 UChar modrm = getUChar(delta); 16657 const HChar* mbV = isAvx ? "v" : ""; 16658 UInt rG = gregOfRexRM(pfx, modrm); 16659 16660 if ( epartIsReg( modrm ) ) { 16661 UInt rE = eregOfRexRM(pfx, modrm); 16662 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 16663 delta += 1; 16664 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16665 } else { 16666 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16667 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 16668 delta += alen; 16669 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 16670 } 16671 16672 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16673 ( rG, binop( Iop_64HLtoV128, 16674 unop( Iop_16Sto64, 16675 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 16676 unop( Iop_16Sto64, 16677 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 16678 return delta; 16679 } 16680 16681 16682 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 16683 { 16684 IRTemp addr = IRTemp_INVALID; 16685 Int alen = 0; 16686 HChar dis_buf[50]; 16687 IRTemp srcBytes = newTemp(Ity_I64); 16688 UChar modrm = getUChar(delta); 16689 UInt rG = gregOfRexRM(pfx, modrm); 16690 IRTemp s3, s2, s1, s0; 16691 s3 = s2 = s1 = s0 = IRTemp_INVALID; 16692 16693 if ( epartIsReg( modrm ) ) { 16694 UInt rE = eregOfRexRM(pfx, modrm); 16695 assign( srcBytes, getXMMRegLane64( rE, 0 ) ); 16696 delta += 1; 16697 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 16698 } else { 16699 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16700 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) ); 16701 delta += alen; 16702 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 16703 } 16704 16705 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 ); 16706 putYMMReg( rG, binop( Iop_V128HLtoV256, 16707 binop( Iop_64HLtoV128, 16708 unop( Iop_16Sto64, mkexpr(s3) ), 16709 unop( Iop_16Sto64, mkexpr(s2) ) ), 16710 binop( Iop_64HLtoV128, 16711 unop( Iop_16Sto64, mkexpr(s1) ), 16712 unop( Iop_16Sto64, mkexpr(s0) ) ) ) ); 16713 return delta; 16714 } 16715 16716 16717 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 16718 Long delta, Bool isAvx ) 16719 { 16720 IRTemp addr = IRTemp_INVALID; 16721 Int alen = 0; 16722 HChar dis_buf[50]; 16723 IRTemp srcVec = newTemp(Ity_V128); 16724 UChar modrm = getUChar(delta); 16725 const HChar* mbV = isAvx ? "v" : ""; 16726 UInt rG = gregOfRexRM(pfx, modrm); 16727 16728 if ( epartIsReg( modrm ) ) { 16729 UInt rE = eregOfRexRM(pfx, modrm); 16730 assign( srcVec, getXMMReg(rE) ); 16731 delta += 1; 16732 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16733 } else { 16734 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16735 assign( srcVec, 16736 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 16737 delta += alen; 16738 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 16739 } 16740 16741 IRTemp zeroVec = newTemp( Ity_V128 ); 16742 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16743 16744 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16745 ( rG, binop( Iop_InterleaveLO16x8, 16746 mkexpr(zeroVec), 16747 binop( Iop_InterleaveLO16x8, 16748 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 16749 return delta; 16750 } 16751 16752 16753 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 16754 Long delta ) 16755 { 16756 IRTemp addr = IRTemp_INVALID; 16757 Int alen = 0; 16758 HChar dis_buf[50]; 16759 IRTemp srcVec = newTemp(Ity_V128); 16760 UChar modrm = getUChar(delta); 16761 UInt rG = gregOfRexRM(pfx, modrm); 16762 16763 if ( epartIsReg( modrm ) ) { 16764 UInt rE = eregOfRexRM(pfx, modrm); 16765 assign( srcVec, getXMMReg(rE) ); 16766 delta += 1; 16767 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 16768 } else { 16769 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16770 assign( srcVec, 16771 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16772 delta += alen; 16773 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 16774 } 16775 16776 IRTemp zeroVec = newTemp( Ity_V128 ); 16777 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16778 16779 putYMMReg( rG, binop( Iop_V128HLtoV256, 16780 binop( Iop_InterleaveHI16x8, 16781 mkexpr(zeroVec), 16782 binop( Iop_InterleaveLO16x8, 16783 mkexpr(zeroVec), mkexpr(srcVec) ) ), 16784 binop( Iop_InterleaveLO16x8, 16785 mkexpr(zeroVec), 16786 binop( Iop_InterleaveLO16x8, 16787 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 16788 return delta; 16789 } 16790 16791 16792 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */ 16793 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 16794 Long delta, Bool isAvx, Bool xIsZ ) 16795 { 16796 IRTemp addr = IRTemp_INVALID; 16797 Int alen = 0; 16798 HChar dis_buf[50]; 16799 IRTemp srcI64 = newTemp(Ity_I64); 16800 IRTemp srcVec = newTemp(Ity_V128); 16801 UChar modrm = getUChar(delta); 16802 const HChar* mbV = isAvx ? "v" : ""; 16803 const HChar how = xIsZ ? 'z' : 's'; 16804 UInt rG = gregOfRexRM(pfx, modrm); 16805 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 16806 thing in a V128, with arbitrary junk in the top 64 bits. Use 16807 one or both of them and let iropt clean up afterwards (as 16808 usual). */ 16809 if ( epartIsReg(modrm) ) { 16810 UInt rE = eregOfRexRM(pfx, modrm); 16811 assign( srcVec, getXMMReg(rE) ); 16812 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) ); 16813 delta += 1; 16814 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16815 } else { 16816 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16817 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) ); 16818 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) ); 16819 delta += alen; 16820 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16821 } 16822 16823 IRExpr* res 16824 = xIsZ /* do math for either zero or sign extend */ 16825 ? binop( Iop_InterleaveLO32x4, 16826 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 16827 : binop( Iop_64HLtoV128, 16828 unop( Iop_32Sto64, 16829 unop( Iop_64HIto32, mkexpr(srcI64) ) ), 16830 unop( Iop_32Sto64, 16831 unop( Iop_64to32, mkexpr(srcI64) ) ) ); 16832 16833 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16834 16835 return delta; 16836 } 16837 16838 16839 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */ 16840 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 16841 Long delta, Bool xIsZ ) 16842 { 16843 IRTemp addr = IRTemp_INVALID; 16844 Int alen = 0; 16845 HChar dis_buf[50]; 16846 IRTemp srcVec = newTemp(Ity_V128); 16847 UChar modrm = getUChar(delta); 16848 UChar how = xIsZ ? 'z' : 's'; 16849 UInt rG = gregOfRexRM(pfx, modrm); 16850 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 16851 thing in a V128, with arbitrary junk in the top 64 bits. Use 16852 one or both of them and let iropt clean up afterwards (as 16853 usual). */ 16854 if ( epartIsReg(modrm) ) { 16855 UInt rE = eregOfRexRM(pfx, modrm); 16856 assign( srcVec, getXMMReg(rE) ); 16857 delta += 1; 16858 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16859 } else { 16860 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16861 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) ); 16862 delta += alen; 16863 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16864 } 16865 16866 IRExpr* res; 16867 if (xIsZ) 16868 res = binop( Iop_V128HLtoV256, 16869 binop( Iop_InterleaveHI32x4, 16870 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16871 binop( Iop_InterleaveLO32x4, 16872 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16873 else { 16874 IRTemp s3, s2, s1, s0; 16875 s3 = s2 = s1 = s0 = IRTemp_INVALID; 16876 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 ); 16877 res = binop( Iop_V128HLtoV256, 16878 binop( Iop_64HLtoV128, 16879 unop( Iop_32Sto64, mkexpr(s3) ), 16880 unop( Iop_32Sto64, mkexpr(s2) ) ), 16881 binop( Iop_64HLtoV128, 16882 unop( Iop_32Sto64, mkexpr(s1) ), 16883 unop( Iop_32Sto64, mkexpr(s0) ) ) ); 16884 } 16885 16886 putYMMReg ( rG, res ); 16887 16888 return delta; 16889 } 16890 16891 16892 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */ 16893 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx, 16894 Long delta, Bool isAvx, Bool xIsZ ) 16895 { 16896 IRTemp addr = IRTemp_INVALID; 16897 Int alen = 0; 16898 HChar dis_buf[50]; 16899 IRTemp srcVec = newTemp(Ity_V128); 16900 UChar modrm = getUChar(delta); 16901 const HChar* mbV = isAvx ? "v" : ""; 16902 const HChar how = xIsZ ? 'z' : 's'; 16903 UInt rG = gregOfRexRM(pfx, modrm); 16904 if ( epartIsReg(modrm) ) { 16905 UInt rE = eregOfRexRM(pfx, modrm); 16906 assign( srcVec, getXMMReg(rE) ); 16907 delta += 1; 16908 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16909 } else { 16910 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16911 assign( srcVec, 16912 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 16913 delta += alen; 16914 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16915 } 16916 16917 IRTemp zeroVec = newTemp(Ity_V128); 16918 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16919 16920 IRExpr* res 16921 = binop(Iop_InterleaveLO8x16, 16922 mkexpr(zeroVec), 16923 binop(Iop_InterleaveLO8x16, 16924 mkexpr(zeroVec), mkexpr(srcVec))); 16925 if (!xIsZ) 16926 res = binop(Iop_SarN32x4, 16927 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24)); 16928 16929 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16930 16931 return delta; 16932 } 16933 16934 16935 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */ 16936 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx, 16937 Long delta, Bool xIsZ ) 16938 { 16939 IRTemp addr = IRTemp_INVALID; 16940 Int alen = 0; 16941 HChar dis_buf[50]; 16942 IRTemp srcVec = newTemp(Ity_V128); 16943 UChar modrm = getUChar(delta); 16944 UChar how = xIsZ ? 'z' : 's'; 16945 UInt rG = gregOfRexRM(pfx, modrm); 16946 if ( epartIsReg(modrm) ) { 16947 UInt rE = eregOfRexRM(pfx, modrm); 16948 assign( srcVec, getXMMReg(rE) ); 16949 delta += 1; 16950 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16951 } else { 16952 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16953 assign( srcVec, 16954 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16955 delta += alen; 16956 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16957 } 16958 16959 IRTemp zeroVec = newTemp(Ity_V128); 16960 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16961 16962 IRExpr* res 16963 = binop( Iop_V128HLtoV256, 16964 binop(Iop_InterleaveHI8x16, 16965 mkexpr(zeroVec), 16966 binop(Iop_InterleaveLO8x16, 16967 mkexpr(zeroVec), mkexpr(srcVec)) ), 16968 binop(Iop_InterleaveLO8x16, 16969 mkexpr(zeroVec), 16970 binop(Iop_InterleaveLO8x16, 16971 mkexpr(zeroVec), mkexpr(srcVec)) ) ); 16972 if (!xIsZ) 16973 res = binop(Iop_SarN32x8, 16974 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24)); 16975 16976 putYMMReg ( rG, res ); 16977 16978 return delta; 16979 } 16980 16981 16982 /* Handles 128 bit versions of PMOVSXBQ. */ 16983 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 16984 Long delta, Bool isAvx ) 16985 { 16986 IRTemp addr = IRTemp_INVALID; 16987 Int alen = 0; 16988 HChar dis_buf[50]; 16989 IRTemp srcBytes = newTemp(Ity_I16); 16990 UChar modrm = getUChar(delta); 16991 const HChar* mbV = isAvx ? "v" : ""; 16992 UInt rG = gregOfRexRM(pfx, modrm); 16993 if ( epartIsReg(modrm) ) { 16994 UInt rE = eregOfRexRM(pfx, modrm); 16995 assign( srcBytes, getXMMRegLane16( rE, 0 ) ); 16996 delta += 1; 16997 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16998 } else { 16999 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17000 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 17001 delta += alen; 17002 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17003 } 17004 17005 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17006 ( rG, binop( Iop_64HLtoV128, 17007 unop( Iop_8Sto64, 17008 unop( Iop_16HIto8, mkexpr(srcBytes) ) ), 17009 unop( Iop_8Sto64, 17010 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 17011 return delta; 17012 } 17013 17014 17015 /* Handles 256 bit versions of PMOVSXBQ. */ 17016 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17017 Long delta ) 17018 { 17019 IRTemp addr = IRTemp_INVALID; 17020 Int alen = 0; 17021 HChar dis_buf[50]; 17022 IRTemp srcBytes = newTemp(Ity_I32); 17023 UChar modrm = getUChar(delta); 17024 UInt rG = gregOfRexRM(pfx, modrm); 17025 if ( epartIsReg(modrm) ) { 17026 UInt rE = eregOfRexRM(pfx, modrm); 17027 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 17028 delta += 1; 17029 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17030 } else { 17031 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17032 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 17033 delta += alen; 17034 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17035 } 17036 17037 putYMMReg 17038 ( rG, binop( Iop_V128HLtoV256, 17039 binop( Iop_64HLtoV128, 17040 unop( Iop_8Sto64, 17041 unop( Iop_16HIto8, 17042 unop( Iop_32HIto16, 17043 mkexpr(srcBytes) ) ) ), 17044 unop( Iop_8Sto64, 17045 unop( Iop_16to8, 17046 unop( Iop_32HIto16, 17047 mkexpr(srcBytes) ) ) ) ), 17048 binop( Iop_64HLtoV128, 17049 unop( Iop_8Sto64, 17050 unop( Iop_16HIto8, 17051 unop( Iop_32to16, 17052 mkexpr(srcBytes) ) ) ), 17053 unop( Iop_8Sto64, 17054 unop( Iop_16to8, 17055 unop( Iop_32to16, 17056 mkexpr(srcBytes) ) ) ) ) ) ); 17057 return delta; 17058 } 17059 17060 17061 /* Handles 128 bit versions of PMOVZXBQ. */ 17062 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17063 Long delta, Bool isAvx ) 17064 { 17065 IRTemp addr = IRTemp_INVALID; 17066 Int alen = 0; 17067 HChar dis_buf[50]; 17068 IRTemp srcVec = newTemp(Ity_V128); 17069 UChar modrm = getUChar(delta); 17070 const HChar* mbV = isAvx ? "v" : ""; 17071 UInt rG = gregOfRexRM(pfx, modrm); 17072 if ( epartIsReg(modrm) ) { 17073 UInt rE = eregOfRexRM(pfx, modrm); 17074 assign( srcVec, getXMMReg(rE) ); 17075 delta += 1; 17076 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17077 } else { 17078 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17079 assign( srcVec, 17080 unop( Iop_32UtoV128, 17081 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) )))); 17082 delta += alen; 17083 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17084 } 17085 17086 IRTemp zeroVec = newTemp(Ity_V128); 17087 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17088 17089 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17090 ( rG, binop( Iop_InterleaveLO8x16, 17091 mkexpr(zeroVec), 17092 binop( Iop_InterleaveLO8x16, 17093 mkexpr(zeroVec), 17094 binop( Iop_InterleaveLO8x16, 17095 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 17096 return delta; 17097 } 17098 17099 17100 /* Handles 256 bit versions of PMOVZXBQ. */ 17101 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17102 Long delta ) 17103 { 17104 IRTemp addr = IRTemp_INVALID; 17105 Int alen = 0; 17106 HChar dis_buf[50]; 17107 IRTemp srcVec = newTemp(Ity_V128); 17108 UChar modrm = getUChar(delta); 17109 UInt rG = gregOfRexRM(pfx, modrm); 17110 if ( epartIsReg(modrm) ) { 17111 UInt rE = eregOfRexRM(pfx, modrm); 17112 assign( srcVec, getXMMReg(rE) ); 17113 delta += 1; 17114 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17115 } else { 17116 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17117 assign( srcVec, 17118 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ))); 17119 delta += alen; 17120 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17121 } 17122 17123 IRTemp zeroVec = newTemp(Ity_V128); 17124 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17125 17126 putYMMReg 17127 ( rG, binop( Iop_V128HLtoV256, 17128 binop( Iop_InterleaveHI8x16, 17129 mkexpr(zeroVec), 17130 binop( Iop_InterleaveLO8x16, 17131 mkexpr(zeroVec), 17132 binop( Iop_InterleaveLO8x16, 17133 mkexpr(zeroVec), mkexpr(srcVec) ) ) ), 17134 binop( Iop_InterleaveLO8x16, 17135 mkexpr(zeroVec), 17136 binop( Iop_InterleaveLO8x16, 17137 mkexpr(zeroVec), 17138 binop( Iop_InterleaveLO8x16, 17139 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) 17140 ) ); 17141 return delta; 17142 } 17143 17144 17145 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx, 17146 Long delta, Bool isAvx ) 17147 { 17148 IRTemp addr = IRTemp_INVALID; 17149 Int alen = 0; 17150 HChar dis_buf[50]; 17151 UChar modrm = getUChar(delta); 17152 const HChar* mbV = isAvx ? "v" : ""; 17153 IRTemp sV = newTemp(Ity_V128); 17154 IRTemp sHi = newTemp(Ity_I64); 17155 IRTemp sLo = newTemp(Ity_I64); 17156 IRTemp dLo = newTemp(Ity_I64); 17157 UInt rG = gregOfRexRM(pfx,modrm); 17158 if (epartIsReg(modrm)) { 17159 UInt rE = eregOfRexRM(pfx,modrm); 17160 assign( sV, getXMMReg(rE) ); 17161 delta += 1; 17162 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 17163 } else { 17164 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17165 if (!isAvx) 17166 gen_SEGV_if_not_16_aligned(addr); 17167 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17168 delta += alen; 17169 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG)); 17170 } 17171 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 17172 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 17173 assign( dLo, mkIRExprCCall( 17174 Ity_I64, 0/*regparms*/, 17175 "amd64g_calculate_sse_phminposuw", 17176 &amd64g_calculate_sse_phminposuw, 17177 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) 17178 )); 17179 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17180 (rG, unop(Iop_64UtoV128, mkexpr(dLo))); 17181 return delta; 17182 } 17183 17184 17185 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx, 17186 Long delta, Bool isAvx, UChar opc ) 17187 { 17188 IRTemp addr = IRTemp_INVALID; 17189 Int alen = 0; 17190 HChar dis_buf[50]; 17191 UChar modrm = getUChar(delta); 17192 UInt rG = gregOfRexRM(pfx, modrm); 17193 UInt regNoL = 0; 17194 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG; 17195 17196 /* This is a nasty kludge. We need to pass 2 x V128 to the 17197 helper. Since we can't do that, use a dirty 17198 helper to compute the results directly from the XMM regs in 17199 the guest state. That means for the memory case, we need to 17200 move the left operand into a pseudo-register (XMM16, let's 17201 call it). */ 17202 if (epartIsReg(modrm)) { 17203 regNoL = eregOfRexRM(pfx, modrm); 17204 delta += 1; 17205 } else { 17206 regNoL = 16; /* use XMM16 as an intermediary */ 17207 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17208 /* alignment check needed ???? */ 17209 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17210 delta += alen; 17211 } 17212 17213 void* fn = &amd64g_dirtyhelper_AES; 17214 const HChar* nm = "amd64g_dirtyhelper_AES"; 17215 17216 /* Round up the arguments. Note that this is a kludge -- the 17217 use of mkU64 rather than mkIRExpr_HWord implies the 17218 assumption that the host's word size is 64-bit. */ 17219 UInt gstOffD = ymmGuestRegOffset(rG); 17220 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17221 UInt gstOffR = ymmGuestRegOffset(regNoR); 17222 IRExpr* opc4 = mkU64(opc); 17223 IRExpr* gstOffDe = mkU64(gstOffD); 17224 IRExpr* gstOffLe = mkU64(gstOffL); 17225 IRExpr* gstOffRe = mkU64(gstOffR); 17226 IRExpr** args 17227 = mkIRExprVec_5( IRExpr_BBPTR(), opc4, gstOffDe, gstOffLe, gstOffRe ); 17228 17229 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17230 /* It's not really a dirty call, but we can't use the clean helper 17231 mechanism here for the very lame reason that we can't pass 2 x 17232 V128s by value to a helper. Hence this roundabout scheme. */ 17233 d->nFxState = 2; 17234 vex_bzero(&d->fxState, sizeof(d->fxState)); 17235 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes 17236 the second for !isAvx or the third for isAvx. 17237 AESIMC (0xDB) reads the first register, and writes the second. */ 17238 d->fxState[0].fx = Ifx_Read; 17239 d->fxState[0].offset = gstOffL; 17240 d->fxState[0].size = sizeof(U128); 17241 d->fxState[1].offset = gstOffR; 17242 d->fxState[1].size = sizeof(U128); 17243 if (opc == 0xDB) 17244 d->fxState[1].fx = Ifx_Write; 17245 else if (!isAvx || rG == regNoR) 17246 d->fxState[1].fx = Ifx_Modify; 17247 else { 17248 d->fxState[1].fx = Ifx_Read; 17249 d->nFxState++; 17250 d->fxState[2].fx = Ifx_Write; 17251 d->fxState[2].offset = gstOffD; 17252 d->fxState[2].size = sizeof(U128); 17253 } 17254 17255 stmt( IRStmt_Dirty(d) ); 17256 { 17257 const HChar* opsuf; 17258 switch (opc) { 17259 case 0xDC: opsuf = "enc"; break; 17260 case 0XDD: opsuf = "enclast"; break; 17261 case 0xDE: opsuf = "dec"; break; 17262 case 0xDF: opsuf = "declast"; break; 17263 case 0xDB: opsuf = "imc"; break; 17264 default: vassert(0); 17265 } 17266 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf, 17267 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17268 nameXMMReg(regNoR), 17269 (isAvx && opc != 0xDB) ? "," : "", 17270 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : ""); 17271 } 17272 if (isAvx) 17273 putYMMRegLane128( rG, 1, mkV128(0) ); 17274 return delta; 17275 } 17276 17277 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx, 17278 Long delta, Bool isAvx ) 17279 { 17280 IRTemp addr = IRTemp_INVALID; 17281 Int alen = 0; 17282 HChar dis_buf[50]; 17283 UChar modrm = getUChar(delta); 17284 UInt regNoL = 0; 17285 UInt regNoR = gregOfRexRM(pfx, modrm); 17286 UChar imm = 0; 17287 17288 /* This is a nasty kludge. See AESENC et al. instructions. */ 17289 modrm = getUChar(delta); 17290 if (epartIsReg(modrm)) { 17291 regNoL = eregOfRexRM(pfx, modrm); 17292 imm = getUChar(delta+1); 17293 delta += 1+1; 17294 } else { 17295 regNoL = 16; /* use XMM16 as an intermediary */ 17296 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17297 /* alignment check ???? . */ 17298 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17299 imm = getUChar(delta+alen); 17300 delta += alen+1; 17301 } 17302 17303 /* Who ya gonna call? Presumably not Ghostbusters. */ 17304 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST; 17305 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST"; 17306 17307 /* Round up the arguments. Note that this is a kludge -- the 17308 use of mkU64 rather than mkIRExpr_HWord implies the 17309 assumption that the host's word size is 64-bit. */ 17310 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17311 UInt gstOffR = ymmGuestRegOffset(regNoR); 17312 17313 IRExpr* imme = mkU64(imm & 0xFF); 17314 IRExpr* gstOffLe = mkU64(gstOffL); 17315 IRExpr* gstOffRe = mkU64(gstOffR); 17316 IRExpr** args 17317 = mkIRExprVec_4( IRExpr_BBPTR(), imme, gstOffLe, gstOffRe ); 17318 17319 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17320 /* It's not really a dirty call, but we can't use the clean helper 17321 mechanism here for the very lame reason that we can't pass 2 x 17322 V128s by value to a helper. Hence this roundabout scheme. */ 17323 d->nFxState = 2; 17324 vex_bzero(&d->fxState, sizeof(d->fxState)); 17325 d->fxState[0].fx = Ifx_Read; 17326 d->fxState[0].offset = gstOffL; 17327 d->fxState[0].size = sizeof(U128); 17328 d->fxState[1].fx = Ifx_Write; 17329 d->fxState[1].offset = gstOffR; 17330 d->fxState[1].size = sizeof(U128); 17331 stmt( IRStmt_Dirty(d) ); 17332 17333 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm, 17334 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17335 nameXMMReg(regNoR)); 17336 if (isAvx) 17337 putYMMRegLane128( regNoR, 1, mkV128(0) ); 17338 return delta; 17339 } 17340 17341 17342 __attribute__((noinline)) 17343 static 17344 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK, 17345 const VexAbiInfo* vbi, 17346 Prefix pfx, Int sz, Long deltaIN ) 17347 { 17348 IRTemp addr = IRTemp_INVALID; 17349 UChar modrm = 0; 17350 Int alen = 0; 17351 HChar dis_buf[50]; 17352 17353 *decode_OK = False; 17354 17355 Long delta = deltaIN; 17356 UChar opc = getUChar(delta); 17357 delta++; 17358 switch (opc) { 17359 17360 case 0x10: 17361 case 0x14: 17362 case 0x15: 17363 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 17364 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 17365 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 17366 Blend at various granularities, with XMM0 (implicit operand) 17367 providing the controlling mask. 17368 */ 17369 if (have66noF2noF3(pfx) && sz == 2) { 17370 modrm = getUChar(delta); 17371 17372 const HChar* nm = NULL; 17373 UInt gran = 0; 17374 IROp opSAR = Iop_INVALID; 17375 switch (opc) { 17376 case 0x10: 17377 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 17378 break; 17379 case 0x14: 17380 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 17381 break; 17382 case 0x15: 17383 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 17384 break; 17385 } 17386 vassert(nm); 17387 17388 IRTemp vecE = newTemp(Ity_V128); 17389 IRTemp vecG = newTemp(Ity_V128); 17390 IRTemp vec0 = newTemp(Ity_V128); 17391 17392 if ( epartIsReg(modrm) ) { 17393 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 17394 delta += 1; 17395 DIP( "%s %s,%s\n", nm, 17396 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17397 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17398 } else { 17399 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17400 gen_SEGV_if_not_16_aligned( addr ); 17401 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 17402 delta += alen; 17403 DIP( "%s %s,%s\n", nm, 17404 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17405 } 17406 17407 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 17408 assign(vec0, getXMMReg(0)); 17409 17410 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR ); 17411 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res)); 17412 17413 goto decode_success; 17414 } 17415 break; 17416 17417 case 0x17: 17418 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128 17419 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 17420 if (have66noF2noF3(pfx) 17421 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 17422 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 ); 17423 goto decode_success; 17424 } 17425 break; 17426 17427 case 0x20: 17428 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64 17429 Packed Move with Sign Extend from Byte to Word (XMM) */ 17430 if (have66noF2noF3(pfx) && sz == 2) { 17431 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 17432 False/*!isAvx*/, False/*!xIsZ*/ ); 17433 goto decode_success; 17434 } 17435 break; 17436 17437 case 0x21: 17438 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32 17439 Packed Move with Sign Extend from Byte to DWord (XMM) */ 17440 if (have66noF2noF3(pfx) && sz == 2) { 17441 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 17442 False/*!isAvx*/, False/*!xIsZ*/ ); 17443 goto decode_success; 17444 } 17445 break; 17446 17447 case 0x22: 17448 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 17449 Packed Move with Sign Extend from Byte to QWord (XMM) */ 17450 if (have66noF2noF3(pfx) && sz == 2) { 17451 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17452 goto decode_success; 17453 } 17454 break; 17455 17456 case 0x23: 17457 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64 17458 Packed Move with Sign Extend from Word to DWord (XMM) */ 17459 if (have66noF2noF3(pfx) && sz == 2) { 17460 delta = dis_PMOVxXWD_128(vbi, pfx, delta, 17461 False/*!isAvx*/, False/*!xIsZ*/); 17462 goto decode_success; 17463 } 17464 break; 17465 17466 case 0x24: 17467 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 17468 Packed Move with Sign Extend from Word to QWord (XMM) */ 17469 if (have66noF2noF3(pfx) && sz == 2) { 17470 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17471 goto decode_success; 17472 } 17473 break; 17474 17475 case 0x25: 17476 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 17477 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 17478 if (have66noF2noF3(pfx) && sz == 2) { 17479 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 17480 False/*!isAvx*/, False/*!xIsZ*/ ); 17481 goto decode_success; 17482 } 17483 break; 17484 17485 case 0x28: 17486 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes 17487 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper 17488 64-bit half */ 17489 /* This is a really poor translation -- could be improved if 17490 performance critical. It's a copy-paste of PMULUDQ, too. */ 17491 if (have66noF2noF3(pfx) && sz == 2) { 17492 IRTemp sV = newTemp(Ity_V128); 17493 IRTemp dV = newTemp(Ity_V128); 17494 modrm = getUChar(delta); 17495 UInt rG = gregOfRexRM(pfx,modrm); 17496 assign( dV, getXMMReg(rG) ); 17497 if (epartIsReg(modrm)) { 17498 UInt rE = eregOfRexRM(pfx,modrm); 17499 assign( sV, getXMMReg(rE) ); 17500 delta += 1; 17501 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 17502 } else { 17503 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17504 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17505 delta += alen; 17506 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG)); 17507 } 17508 17509 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) ); 17510 goto decode_success; 17511 } 17512 break; 17513 17514 case 0x29: 17515 /* 66 0F 38 29 = PCMPEQQ 17516 64x2 equality comparison */ 17517 if (have66noF2noF3(pfx) && sz == 2) { 17518 /* FIXME: this needs an alignment check */ 17519 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 17520 "pcmpeqq", Iop_CmpEQ64x2, False ); 17521 goto decode_success; 17522 } 17523 break; 17524 17525 case 0x2A: 17526 /* 66 0F 38 2A = MOVNTDQA 17527 "non-temporal" "streaming" load 17528 Handle like MOVDQA but only memory operand is allowed */ 17529 if (have66noF2noF3(pfx) && sz == 2) { 17530 modrm = getUChar(delta); 17531 if (!epartIsReg(modrm)) { 17532 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17533 gen_SEGV_if_not_16_aligned( addr ); 17534 putXMMReg( gregOfRexRM(pfx,modrm), 17535 loadLE(Ity_V128, mkexpr(addr)) ); 17536 DIP("movntdqa %s,%s\n", dis_buf, 17537 nameXMMReg(gregOfRexRM(pfx,modrm))); 17538 delta += alen; 17539 goto decode_success; 17540 } 17541 } 17542 break; 17543 17544 case 0x2B: 17545 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128 17546 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */ 17547 if (have66noF2noF3(pfx) && sz == 2) { 17548 17549 modrm = getUChar(delta); 17550 17551 IRTemp argL = newTemp(Ity_V128); 17552 IRTemp argR = newTemp(Ity_V128); 17553 17554 if ( epartIsReg(modrm) ) { 17555 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17556 delta += 1; 17557 DIP( "packusdw %s,%s\n", 17558 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17559 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17560 } else { 17561 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17562 gen_SEGV_if_not_16_aligned( addr ); 17563 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 17564 delta += alen; 17565 DIP( "packusdw %s,%s\n", 17566 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17567 } 17568 17569 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 17570 17571 putXMMReg( gregOfRexRM(pfx, modrm), 17572 binop( Iop_QNarrowBin32Sto16Ux8, 17573 mkexpr(argL), mkexpr(argR)) ); 17574 17575 goto decode_success; 17576 } 17577 break; 17578 17579 case 0x30: 17580 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64 17581 Packed Move with Zero Extend from Byte to Word (XMM) */ 17582 if (have66noF2noF3(pfx) && sz == 2) { 17583 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 17584 False/*!isAvx*/, True/*xIsZ*/ ); 17585 goto decode_success; 17586 } 17587 break; 17588 17589 case 0x31: 17590 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32 17591 Packed Move with Zero Extend from Byte to DWord (XMM) */ 17592 if (have66noF2noF3(pfx) && sz == 2) { 17593 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 17594 False/*!isAvx*/, True/*xIsZ*/ ); 17595 goto decode_success; 17596 } 17597 break; 17598 17599 case 0x32: 17600 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 17601 Packed Move with Zero Extend from Byte to QWord (XMM) */ 17602 if (have66noF2noF3(pfx) && sz == 2) { 17603 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17604 goto decode_success; 17605 } 17606 break; 17607 17608 case 0x33: 17609 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64 17610 Packed Move with Zero Extend from Word to DWord (XMM) */ 17611 if (have66noF2noF3(pfx) && sz == 2) { 17612 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 17613 False/*!isAvx*/, True/*xIsZ*/ ); 17614 goto decode_success; 17615 } 17616 break; 17617 17618 case 0x34: 17619 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 17620 Packed Move with Zero Extend from Word to QWord (XMM) */ 17621 if (have66noF2noF3(pfx) && sz == 2) { 17622 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17623 goto decode_success; 17624 } 17625 break; 17626 17627 case 0x35: 17628 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 17629 Packed Move with Zero Extend from DWord to QWord (XMM) */ 17630 if (have66noF2noF3(pfx) && sz == 2) { 17631 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 17632 False/*!isAvx*/, True/*xIsZ*/ ); 17633 goto decode_success; 17634 } 17635 break; 17636 17637 case 0x37: 17638 /* 66 0F 38 37 = PCMPGTQ 17639 64x2 comparison (signed, presumably; the Intel docs don't say :-) 17640 */ 17641 if (have66noF2noF3(pfx) && sz == 2) { 17642 /* FIXME: this needs an alignment check */ 17643 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 17644 "pcmpgtq", Iop_CmpGT64Sx2, False ); 17645 goto decode_success; 17646 } 17647 break; 17648 17649 case 0x38: 17650 case 0x3C: 17651 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min 17652 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max 17653 */ 17654 if (have66noF2noF3(pfx) && sz == 2) { 17655 /* FIXME: this needs an alignment check */ 17656 Bool isMAX = opc == 0x3C; 17657 delta = dis_SSEint_E_to_G( 17658 vbi, pfx, delta, 17659 isMAX ? "pmaxsb" : "pminsb", 17660 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 17661 False 17662 ); 17663 goto decode_success; 17664 } 17665 break; 17666 17667 case 0x39: 17668 case 0x3D: 17669 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 17670 Minimum of Packed Signed Double Word Integers (XMM) 17671 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 17672 Maximum of Packed Signed Double Word Integers (XMM) 17673 */ 17674 if (have66noF2noF3(pfx) && sz == 2) { 17675 /* FIXME: this needs an alignment check */ 17676 Bool isMAX = opc == 0x3D; 17677 delta = dis_SSEint_E_to_G( 17678 vbi, pfx, delta, 17679 isMAX ? "pmaxsd" : "pminsd", 17680 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 17681 False 17682 ); 17683 goto decode_success; 17684 } 17685 break; 17686 17687 case 0x3A: 17688 case 0x3E: 17689 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 17690 Minimum of Packed Unsigned Word Integers (XMM) 17691 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 17692 Maximum of Packed Unsigned Word Integers (XMM) 17693 */ 17694 if (have66noF2noF3(pfx) && sz == 2) { 17695 /* FIXME: this needs an alignment check */ 17696 Bool isMAX = opc == 0x3E; 17697 delta = dis_SSEint_E_to_G( 17698 vbi, pfx, delta, 17699 isMAX ? "pmaxuw" : "pminuw", 17700 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 17701 False 17702 ); 17703 goto decode_success; 17704 } 17705 break; 17706 17707 case 0x3B: 17708 case 0x3F: 17709 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 17710 Minimum of Packed Unsigned Doubleword Integers (XMM) 17711 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 17712 Maximum of Packed Unsigned Doubleword Integers (XMM) 17713 */ 17714 if (have66noF2noF3(pfx) && sz == 2) { 17715 /* FIXME: this needs an alignment check */ 17716 Bool isMAX = opc == 0x3F; 17717 delta = dis_SSEint_E_to_G( 17718 vbi, pfx, delta, 17719 isMAX ? "pmaxud" : "pminud", 17720 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 17721 False 17722 ); 17723 goto decode_success; 17724 } 17725 break; 17726 17727 case 0x40: 17728 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128 17729 32x4 integer multiply from xmm2/m128 to xmm1 */ 17730 if (have66noF2noF3(pfx) && sz == 2) { 17731 17732 modrm = getUChar(delta); 17733 17734 IRTemp argL = newTemp(Ity_V128); 17735 IRTemp argR = newTemp(Ity_V128); 17736 17737 if ( epartIsReg(modrm) ) { 17738 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17739 delta += 1; 17740 DIP( "pmulld %s,%s\n", 17741 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17742 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17743 } else { 17744 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17745 gen_SEGV_if_not_16_aligned( addr ); 17746 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 17747 delta += alen; 17748 DIP( "pmulld %s,%s\n", 17749 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17750 } 17751 17752 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 17753 17754 putXMMReg( gregOfRexRM(pfx, modrm), 17755 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 17756 17757 goto decode_success; 17758 } 17759 break; 17760 17761 case 0x41: 17762 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128 17763 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */ 17764 if (have66noF2noF3(pfx) && sz == 2) { 17765 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ ); 17766 goto decode_success; 17767 } 17768 break; 17769 17770 case 0xDC: 17771 case 0xDD: 17772 case 0xDE: 17773 case 0xDF: 17774 case 0xDB: 17775 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128 17776 DD /r = AESENCLAST xmm1, xmm2/m128 17777 DE /r = AESDEC xmm1, xmm2/m128 17778 DF /r = AESDECLAST xmm1, xmm2/m128 17779 17780 DB /r = AESIMC xmm1, xmm2/m128 */ 17781 if (have66noF2noF3(pfx) && sz == 2) { 17782 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc ); 17783 goto decode_success; 17784 } 17785 break; 17786 17787 case 0xF0: 17788 case 0xF1: 17789 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 17790 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 17791 The decoding on this is a bit unusual. 17792 */ 17793 if (haveF2noF3(pfx) 17794 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) { 17795 modrm = getUChar(delta); 17796 17797 if (opc == 0xF0) 17798 sz = 1; 17799 else 17800 vassert(sz == 2 || sz == 4 || sz == 8); 17801 17802 IRType tyE = szToITy(sz); 17803 IRTemp valE = newTemp(tyE); 17804 17805 if (epartIsReg(modrm)) { 17806 assign(valE, getIRegE(sz, pfx, modrm)); 17807 delta += 1; 17808 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 17809 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 17810 } else { 17811 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17812 assign(valE, loadLE(tyE, mkexpr(addr))); 17813 delta += alen; 17814 DIP("crc32b %s,%s\n", dis_buf, 17815 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 17816 } 17817 17818 /* Somewhat funny getting/putting of the crc32 value, in order 17819 to ensure that it turns into 64-bit gets and puts. However, 17820 mask off the upper 32 bits so as to not get memcheck false 17821 +ves around the helper call. */ 17822 IRTemp valG0 = newTemp(Ity_I64); 17823 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 17824 mkU64(0xFFFFFFFF))); 17825 17826 const HChar* nm = NULL; 17827 void* fn = NULL; 17828 switch (sz) { 17829 case 1: nm = "amd64g_calc_crc32b"; 17830 fn = &amd64g_calc_crc32b; break; 17831 case 2: nm = "amd64g_calc_crc32w"; 17832 fn = &amd64g_calc_crc32w; break; 17833 case 4: nm = "amd64g_calc_crc32l"; 17834 fn = &amd64g_calc_crc32l; break; 17835 case 8: nm = "amd64g_calc_crc32q"; 17836 fn = &amd64g_calc_crc32q; break; 17837 } 17838 vassert(nm && fn); 17839 IRTemp valG1 = newTemp(Ity_I64); 17840 assign(valG1, 17841 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 17842 mkIRExprVec_2(mkexpr(valG0), 17843 widenUto64(mkexpr(valE))))); 17844 17845 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 17846 goto decode_success; 17847 } 17848 break; 17849 17850 default: 17851 break; 17852 17853 } 17854 17855 //decode_failure: 17856 *decode_OK = False; 17857 return deltaIN; 17858 17859 decode_success: 17860 *decode_OK = True; 17861 return delta; 17862 } 17863 17864 17865 /*------------------------------------------------------------*/ 17866 /*--- ---*/ 17867 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/ 17868 /*--- ---*/ 17869 /*------------------------------------------------------------*/ 17870 17871 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx, 17872 Long delta, Bool isAvx ) 17873 { 17874 IRTemp addr = IRTemp_INVALID; 17875 IRTemp t0 = IRTemp_INVALID; 17876 IRTemp t1 = IRTemp_INVALID; 17877 IRTemp t2 = IRTemp_INVALID; 17878 IRTemp t3 = IRTemp_INVALID; 17879 UChar modrm = getUChar(delta); 17880 Int alen = 0; 17881 HChar dis_buf[50]; 17882 UInt rG = gregOfRexRM(pfx,modrm); 17883 Int imm8_20; 17884 IRTemp xmm_vec = newTemp(Ity_V128); 17885 IRTemp d16 = newTemp(Ity_I16); 17886 const HChar* mbV = isAvx ? "v" : ""; 17887 17888 vassert(0==getRexW(pfx)); /* ensured by caller */ 17889 assign( xmm_vec, getXMMReg(rG) ); 17890 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17891 17892 if ( epartIsReg( modrm ) ) { 17893 imm8_20 = (Int)(getUChar(delta+1) & 7); 17894 } else { 17895 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17896 imm8_20 = (Int)(getUChar(delta+alen) & 7); 17897 } 17898 17899 switch (imm8_20) { 17900 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break; 17901 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break; 17902 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break; 17903 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break; 17904 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break; 17905 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break; 17906 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break; 17907 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break; 17908 default: vassert(0); 17909 } 17910 17911 if ( epartIsReg( modrm ) ) { 17912 UInt rE = eregOfRexRM(pfx,modrm); 17913 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) ); 17914 delta += 1+1; 17915 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, 17916 nameXMMReg( rG ), nameIReg32( rE ) ); 17917 } else { 17918 storeLE( mkexpr(addr), mkexpr(d16) ); 17919 delta += alen+1; 17920 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf ); 17921 } 17922 return delta; 17923 } 17924 17925 17926 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx, 17927 Long delta, Bool isAvx ) 17928 { 17929 IRTemp addr = IRTemp_INVALID; 17930 IRTemp t0 = IRTemp_INVALID; 17931 IRTemp t1 = IRTemp_INVALID; 17932 IRTemp t2 = IRTemp_INVALID; 17933 IRTemp t3 = IRTemp_INVALID; 17934 UChar modrm = 0; 17935 Int alen = 0; 17936 HChar dis_buf[50]; 17937 17938 Int imm8_10; 17939 IRTemp xmm_vec = newTemp(Ity_V128); 17940 IRTemp src_dword = newTemp(Ity_I32); 17941 const HChar* mbV = isAvx ? "v" : ""; 17942 17943 vassert(0==getRexW(pfx)); /* ensured by caller */ 17944 modrm = getUChar(delta); 17945 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 17946 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17947 17948 if ( epartIsReg( modrm ) ) { 17949 imm8_10 = (Int)(getUChar(delta+1) & 3); 17950 } else { 17951 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17952 imm8_10 = (Int)(getUChar(delta+alen) & 3); 17953 } 17954 17955 switch ( imm8_10 ) { 17956 case 0: assign( src_dword, mkexpr(t0) ); break; 17957 case 1: assign( src_dword, mkexpr(t1) ); break; 17958 case 2: assign( src_dword, mkexpr(t2) ); break; 17959 case 3: assign( src_dword, mkexpr(t3) ); break; 17960 default: vassert(0); 17961 } 17962 17963 if ( epartIsReg( modrm ) ) { 17964 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 17965 delta += 1+1; 17966 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10, 17967 nameXMMReg( gregOfRexRM(pfx, modrm) ), 17968 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 17969 } else { 17970 storeLE( mkexpr(addr), mkexpr(src_dword) ); 17971 delta += alen+1; 17972 DIP( "%spextrd $%d, %s,%s\n", mbV, 17973 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 17974 } 17975 return delta; 17976 } 17977 17978 17979 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx, 17980 Long delta, Bool isAvx ) 17981 { 17982 IRTemp addr = IRTemp_INVALID; 17983 UChar modrm = 0; 17984 Int alen = 0; 17985 HChar dis_buf[50]; 17986 17987 Int imm8_0; 17988 IRTemp xmm_vec = newTemp(Ity_V128); 17989 IRTemp src_qword = newTemp(Ity_I64); 17990 const HChar* mbV = isAvx ? "v" : ""; 17991 17992 vassert(1==getRexW(pfx)); /* ensured by caller */ 17993 modrm = getUChar(delta); 17994 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 17995 17996 if ( epartIsReg( modrm ) ) { 17997 imm8_0 = (Int)(getUChar(delta+1) & 1); 17998 } else { 17999 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18000 imm8_0 = (Int)(getUChar(delta+alen) & 1); 18001 } 18002 18003 switch ( imm8_0 ) { 18004 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); 18005 break; 18006 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); 18007 break; 18008 default: vassert(0); 18009 } 18010 18011 if ( epartIsReg( modrm ) ) { 18012 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 18013 delta += 1+1; 18014 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0, 18015 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18016 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18017 } else { 18018 storeLE( mkexpr(addr), mkexpr(src_qword) ); 18019 delta += alen+1; 18020 DIP( "%spextrq $%d, %s,%s\n", mbV, 18021 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18022 } 18023 return delta; 18024 } 18025 18026 static IRExpr* math_CTZ32(IRExpr *exp) 18027 { 18028 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */ 18029 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp))); 18030 } 18031 18032 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR, 18033 Long delta, UChar opc, UChar imm, 18034 HChar dis_buf[]) 18035 { 18036 /* We only handle PCMPISTRI for now */ 18037 vassert((opc & 0x03) == 0x03); 18038 /* And only an immediate byte of 0x38 or 0x3A */ 18039 vassert((imm & ~0x02) == 0x38); 18040 18041 /* FIXME: Is this correct when RegNoL == 16 ? */ 18042 IRTemp argL = newTemp(Ity_V128); 18043 assign(argL, getXMMReg(regNoL)); 18044 IRTemp argR = newTemp(Ity_V128); 18045 assign(argR, getXMMReg(regNoR)); 18046 18047 IRTemp zmaskL = newTemp(Ity_I32); 18048 assign(zmaskL, unop(Iop_16Uto32, 18049 unop(Iop_GetMSBs8x16, 18050 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0))))); 18051 IRTemp zmaskR = newTemp(Ity_I32); 18052 assign(zmaskR, unop(Iop_16Uto32, 18053 unop(Iop_GetMSBs8x16, 18054 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0))))); 18055 18056 /* We want validL = ~(zmaskL | -zmaskL) 18057 18058 But this formulation kills memcheck's validity tracking when any 18059 bits above the first "1" are invalid. So reformulate as: 18060 18061 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1 18062 */ 18063 18064 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL))); 18065 18066 /* Generate a bool expression which is zero iff the original is 18067 zero. Do this carefully so memcheck can propagate validity bits 18068 correctly. 18069 */ 18070 IRTemp zmaskL_zero = newTemp(Ity_I1); 18071 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0))); 18072 18073 IRTemp validL = newTemp(Ity_I32); 18074 assign(validL, binop(Iop_Sub32, 18075 IRExpr_ITE(mkexpr(zmaskL_zero), 18076 binop(Iop_Shl32, mkU32(1), ctzL), 18077 mkU32(0)), 18078 mkU32(1))); 18079 18080 /* And similarly for validR. */ 18081 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR))); 18082 IRTemp zmaskR_zero = newTemp(Ity_I1); 18083 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0))); 18084 IRTemp validR = newTemp(Ity_I32); 18085 assign(validR, binop(Iop_Sub32, 18086 IRExpr_ITE(mkexpr(zmaskR_zero), 18087 binop(Iop_Shl32, mkU32(1), ctzR), 18088 mkU32(0)), 18089 mkU32(1))); 18090 18091 /* Do the actual comparison. */ 18092 IRExpr *boolResII = unop(Iop_16Uto32, 18093 unop(Iop_GetMSBs8x16, 18094 binop(Iop_CmpEQ8x16, mkexpr(argL), 18095 mkexpr(argR)))); 18096 18097 /* Compute boolresII & validL & validR (i.e., if both valid, use 18098 comparison result) */ 18099 IRExpr *intRes1_a = binop(Iop_And32, boolResII, 18100 binop(Iop_And32, 18101 mkexpr(validL), mkexpr(validR))); 18102 18103 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */ 18104 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32, 18105 mkexpr(validL), mkexpr(validR))); 18106 /* Otherwise, zero. */ 18107 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF), 18108 binop(Iop_Or32, intRes1_a, intRes1_b)); 18109 18110 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with 18111 result. */ 18112 IRTemp intRes2 = newTemp(Ity_I32); 18113 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF), 18114 binop(Iop_Xor32, intRes1, mkexpr(validL)))); 18115 18116 /* If the 0x40 bit were set in imm=0x3A, we would return the index 18117 of the msb. Since it is clear, we return the index of the 18118 lsb. */ 18119 IRExpr *newECX = math_CTZ32(binop(Iop_Or32, 18120 mkexpr(intRes2), mkU32(0x10000))); 18121 18122 /* And thats our rcx. */ 18123 putIReg32(R_RCX, newECX); 18124 18125 /* Now for the condition codes... */ 18126 18127 /* C == 0 iff intRes2 == 0 */ 18128 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2), 18129 mkU32(0)), 18130 mkU32(1 << AMD64G_CC_SHIFT_C), 18131 mkU32(0)); 18132 /* Z == 1 iff any in argL is 0 */ 18133 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero), 18134 mkU32(1 << AMD64G_CC_SHIFT_Z), 18135 mkU32(0)); 18136 /* S == 1 iff any in argR is 0 */ 18137 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero), 18138 mkU32(1 << AMD64G_CC_SHIFT_S), 18139 mkU32(0)); 18140 /* O == IntRes2[0] */ 18141 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2), 18142 mkU32(0x01)), 18143 mkU8(AMD64G_CC_SHIFT_O)); 18144 18145 /* Put them all together */ 18146 IRTemp cc = newTemp(Ity_I64); 18147 assign(cc, widenUto64(binop(Iop_Or32, 18148 binop(Iop_Or32, c_bit, z_bit), 18149 binop(Iop_Or32, s_bit, o_bit)))); 18150 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY))); 18151 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc))); 18152 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0))); 18153 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0))); 18154 18155 return delta; 18156 } 18157 18158 /* This can fail, in which case it returns the original (unchanged) 18159 delta. */ 18160 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx, 18161 Long delta, Bool isAvx, UChar opc ) 18162 { 18163 Long delta0 = delta; 18164 UInt isISTRx = opc & 2; 18165 UInt isxSTRM = (opc & 1) ^ 1; 18166 UInt regNoL = 0; 18167 UInt regNoR = 0; 18168 UChar imm = 0; 18169 IRTemp addr = IRTemp_INVALID; 18170 Int alen = 0; 18171 HChar dis_buf[50]; 18172 18173 /* This is a nasty kludge. We need to pass 2 x V128 to the helper 18174 (which is clean). Since we can't do that, use a dirty helper to 18175 compute the results directly from the XMM regs in the guest 18176 state. That means for the memory case, we need to move the left 18177 operand into a pseudo-register (XMM16, let's call it). */ 18178 UChar modrm = getUChar(delta); 18179 if (epartIsReg(modrm)) { 18180 regNoL = eregOfRexRM(pfx, modrm); 18181 regNoR = gregOfRexRM(pfx, modrm); 18182 imm = getUChar(delta+1); 18183 delta += 1+1; 18184 } else { 18185 regNoL = 16; /* use XMM16 as an intermediary */ 18186 regNoR = gregOfRexRM(pfx, modrm); 18187 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18188 /* No alignment check; I guess that makes sense, given that 18189 these insns are for dealing with C style strings. */ 18190 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 18191 imm = getUChar(delta+alen); 18192 delta += alen+1; 18193 } 18194 18195 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so 18196 itself. */ 18197 if (regNoL == 16) { 18198 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18199 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18200 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 18201 } else { 18202 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18203 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18204 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 18205 } 18206 18207 /* Handle special case(s). */ 18208 if (imm == 0x3A && isISTRx && !isxSTRM) { 18209 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta, 18210 opc, imm, dis_buf); 18211 } 18212 18213 /* Now we know the XMM reg numbers for the operands, and the 18214 immediate byte. Is it one we can actually handle? Throw out any 18215 cases for which the helper function has not been verified. */ 18216 switch (imm) { 18217 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: 18218 case 0x12: case 0x14: case 0x1A: 18219 case 0x30: case 0x34: case 0x38: case 0x3A: 18220 case 0x40: case 0x44: case 0x46: case 0x4A: 18221 break; 18222 // the 16-bit character versions of the above 18223 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 18224 case 0x13: case 0x1B: 18225 case 0x39: case 0x3B: 18226 case 0x45: case 0x4B: 18227 break; 18228 default: 18229 return delta0; /*FAIL*/ 18230 } 18231 18232 /* Who ya gonna call? Presumably not Ghostbusters. */ 18233 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 18234 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 18235 18236 /* Round up the arguments. Note that this is a kludge -- the use 18237 of mkU64 rather than mkIRExpr_HWord implies the assumption that 18238 the host's word size is 64-bit. */ 18239 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 18240 UInt gstOffR = ymmGuestRegOffset(regNoR); 18241 18242 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF)); 18243 IRExpr* gstOffLe = mkU64(gstOffL); 18244 IRExpr* gstOffRe = mkU64(gstOffR); 18245 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 18246 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 18247 IRExpr** args 18248 = mkIRExprVec_6( IRExpr_BBPTR(), 18249 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 18250 18251 IRTemp resT = newTemp(Ity_I64); 18252 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 18253 /* It's not really a dirty call, but we can't use the clean helper 18254 mechanism here for the very lame reason that we can't pass 2 x 18255 V128s by value to a helper. Hence this roundabout scheme. */ 18256 d->nFxState = 2; 18257 vex_bzero(&d->fxState, sizeof(d->fxState)); 18258 d->fxState[0].fx = Ifx_Read; 18259 d->fxState[0].offset = gstOffL; 18260 d->fxState[0].size = sizeof(U128); 18261 d->fxState[1].fx = Ifx_Read; 18262 d->fxState[1].offset = gstOffR; 18263 d->fxState[1].size = sizeof(U128); 18264 if (isxSTRM) { 18265 /* Declare that the helper writes XMM0. */ 18266 d->nFxState = 3; 18267 d->fxState[2].fx = Ifx_Write; 18268 d->fxState[2].offset = ymmGuestRegOffset(0); 18269 d->fxState[2].size = sizeof(U128); 18270 } 18271 18272 stmt( IRStmt_Dirty(d) ); 18273 18274 /* Now resT[15:0] holds the new OSZACP values, so the condition 18275 codes must be updated. And for a xSTRI case, resT[31:16] holds 18276 the new ECX value, so stash that too. */ 18277 if (!isxSTRM) { 18278 putIReg64(R_RCX, binop(Iop_And64, 18279 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 18280 mkU64(0xFFFF))); 18281 } 18282 18283 /* Zap the upper half of the dest reg as per AVX conventions. */ 18284 if (isxSTRM && isAvx) 18285 putYMMRegLane128(/*YMM*/0, 1, mkV128(0)); 18286 18287 stmt( IRStmt_Put( 18288 OFFB_CC_DEP1, 18289 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 18290 )); 18291 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18292 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18293 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 18294 18295 return delta; 18296 } 18297 18298 18299 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 ) 18300 { 18301 vassert(imm8 >= 0 && imm8 <= 15); 18302 18303 // Create a V128 value which has the selected byte in the 18304 // specified lane, and zeroes everywhere else. 18305 IRTemp tmp128 = newTemp(Ity_V128); 18306 IRTemp halfshift = newTemp(Ity_I64); 18307 assign(halfshift, binop(Iop_Shl64, 18308 unop(Iop_8Uto64, mkexpr(u8)), 18309 mkU8(8 * (imm8 & 7)))); 18310 if (imm8 < 8) { 18311 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 18312 } else { 18313 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 18314 } 18315 18316 UShort mask = ~(1 << imm8); 18317 IRTemp res = newTemp(Ity_V128); 18318 assign( res, binop(Iop_OrV128, 18319 mkexpr(tmp128), 18320 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 18321 return res; 18322 } 18323 18324 18325 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 ) 18326 { 18327 IRTemp z32 = newTemp(Ity_I32); 18328 assign(z32, mkU32(0)); 18329 18330 /* Surround u32 with zeroes as per imm, giving us something we can 18331 OR into a suitably masked-out v128.*/ 18332 IRTemp withZs = newTemp(Ity_V128); 18333 UShort mask = 0; 18334 switch (imm8) { 18335 case 3: mask = 0x0FFF; 18336 assign(withZs, mkV128from32s(u32, z32, z32, z32)); 18337 break; 18338 case 2: mask = 0xF0FF; 18339 assign(withZs, mkV128from32s(z32, u32, z32, z32)); 18340 break; 18341 case 1: mask = 0xFF0F; 18342 assign(withZs, mkV128from32s(z32, z32, u32, z32)); 18343 break; 18344 case 0: mask = 0xFFF0; 18345 assign(withZs, mkV128from32s(z32, z32, z32, u32)); 18346 break; 18347 default: vassert(0); 18348 } 18349 18350 IRTemp res = newTemp(Ity_V128); 18351 assign(res, binop( Iop_OrV128, 18352 mkexpr(withZs), 18353 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18354 return res; 18355 } 18356 18357 18358 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 ) 18359 { 18360 /* Surround u64 with zeroes as per imm, giving us something we can 18361 OR into a suitably masked-out v128.*/ 18362 IRTemp withZs = newTemp(Ity_V128); 18363 UShort mask = 0; 18364 if (imm8 == 0) { 18365 mask = 0xFF00; 18366 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64))); 18367 } else { 18368 vassert(imm8 == 1); 18369 mask = 0x00FF; 18370 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0))); 18371 } 18372 18373 IRTemp res = newTemp(Ity_V128); 18374 assign( res, binop( Iop_OrV128, 18375 mkexpr(withZs), 18376 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18377 return res; 18378 } 18379 18380 18381 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 ) 18382 { 18383 const IRTemp inval = IRTemp_INVALID; 18384 IRTemp dstDs[4] = { inval, inval, inval, inval }; 18385 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] ); 18386 18387 vassert(imm8 <= 255); 18388 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */ 18389 18390 UInt imm8_zmask = (imm8 & 15); 18391 IRTemp zero_32 = newTemp(Ity_I32); 18392 assign( zero_32, mkU32(0) ); 18393 IRTemp resV = newTemp(Ity_V128); 18394 assign( resV, mkV128from32s( 18395 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3], 18396 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2], 18397 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1], 18398 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) ); 18399 return resV; 18400 } 18401 18402 18403 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx, 18404 Long delta, Bool isAvx ) 18405 { 18406 IRTemp addr = IRTemp_INVALID; 18407 Int alen = 0; 18408 HChar dis_buf[50]; 18409 IRTemp xmm_vec = newTemp(Ity_V128); 18410 IRTemp sel_lane = newTemp(Ity_I32); 18411 IRTemp shr_lane = newTemp(Ity_I32); 18412 const HChar* mbV = isAvx ? "v" : ""; 18413 UChar modrm = getUChar(delta); 18414 IRTemp t3, t2, t1, t0; 18415 Int imm8; 18416 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 18417 t3 = t2 = t1 = t0 = IRTemp_INVALID; 18418 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18419 18420 if ( epartIsReg( modrm ) ) { 18421 imm8 = (Int)getUChar(delta+1); 18422 } else { 18423 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18424 imm8 = (Int)getUChar(delta+alen); 18425 } 18426 switch ( (imm8 >> 2) & 3 ) { 18427 case 0: assign( sel_lane, mkexpr(t0) ); break; 18428 case 1: assign( sel_lane, mkexpr(t1) ); break; 18429 case 2: assign( sel_lane, mkexpr(t2) ); break; 18430 case 3: assign( sel_lane, mkexpr(t3) ); break; 18431 default: vassert(0); 18432 } 18433 assign( shr_lane, 18434 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 18435 18436 if ( epartIsReg( modrm ) ) { 18437 putIReg64( eregOfRexRM(pfx,modrm), 18438 unop( Iop_32Uto64, 18439 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 18440 delta += 1+1; 18441 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8, 18442 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18443 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18444 } else { 18445 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 18446 delta += alen+1; 18447 DIP( "%spextrb $%d,%s,%s\n", mbV, 18448 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18449 } 18450 18451 return delta; 18452 } 18453 18454 18455 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18456 { 18457 vassert(imm8 < 256); 18458 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 18459 IRTemp and_vec = newTemp(Ity_V128); 18460 IRTemp sum_vec = newTemp(Ity_V128); 18461 IRTemp rm = newTemp(Ity_I32); 18462 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18463 assign( and_vec, binop( Iop_AndV128, 18464 triop( Iop_Mul64Fx2, 18465 mkexpr(rm), 18466 mkexpr(dst_vec), mkexpr(src_vec) ), 18467 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 18468 18469 assign( sum_vec, binop( Iop_Add64F0x2, 18470 binop( Iop_InterleaveHI64x2, 18471 mkexpr(and_vec), mkexpr(and_vec) ), 18472 binop( Iop_InterleaveLO64x2, 18473 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 18474 IRTemp res = newTemp(Ity_V128); 18475 assign(res, binop( Iop_AndV128, 18476 binop( Iop_InterleaveLO64x2, 18477 mkexpr(sum_vec), mkexpr(sum_vec) ), 18478 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 18479 return res; 18480 } 18481 18482 18483 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18484 { 18485 vassert(imm8 < 256); 18486 IRTemp tmp_prod_vec = newTemp(Ity_V128); 18487 IRTemp prod_vec = newTemp(Ity_V128); 18488 IRTemp sum_vec = newTemp(Ity_V128); 18489 IRTemp rm = newTemp(Ity_I32); 18490 IRTemp v3, v2, v1, v0; 18491 v3 = v2 = v1 = v0 = IRTemp_INVALID; 18492 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 18493 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 18494 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 18495 0xFFFF }; 18496 18497 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18498 assign( tmp_prod_vec, 18499 binop( Iop_AndV128, 18500 triop( Iop_Mul32Fx4, 18501 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ), 18502 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 18503 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 18504 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) ); 18505 18506 assign( sum_vec, triop( Iop_Add32Fx4, 18507 mkexpr(rm), 18508 binop( Iop_InterleaveHI32x4, 18509 mkexpr(prod_vec), mkexpr(prod_vec) ), 18510 binop( Iop_InterleaveLO32x4, 18511 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 18512 18513 IRTemp res = newTemp(Ity_V128); 18514 assign( res, binop( Iop_AndV128, 18515 triop( Iop_Add32Fx4, 18516 mkexpr(rm), 18517 binop( Iop_InterleaveHI32x4, 18518 mkexpr(sum_vec), mkexpr(sum_vec) ), 18519 binop( Iop_InterleaveLO32x4, 18520 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 18521 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 18522 return res; 18523 } 18524 18525 18526 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 ) 18527 { 18528 /* Mask out bits of the operands we don't need. This isn't 18529 strictly necessary, but it does ensure Memcheck doesn't 18530 give us any false uninitialised value errors as a 18531 result. */ 18532 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 }; 18533 UShort dst_mask[2] = { 0x07FF, 0x7FF0 }; 18534 18535 IRTemp src_maskV = newTemp(Ity_V128); 18536 IRTemp dst_maskV = newTemp(Ity_V128); 18537 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] )); 18538 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] )); 18539 18540 IRTemp src_masked = newTemp(Ity_V128); 18541 IRTemp dst_masked = newTemp(Ity_V128); 18542 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV))); 18543 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV))); 18544 18545 /* Generate 4 64 bit values that we can hand to a clean helper */ 18546 IRTemp sHi = newTemp(Ity_I64); 18547 IRTemp sLo = newTemp(Ity_I64); 18548 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) ); 18549 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) ); 18550 18551 IRTemp dHi = newTemp(Ity_I64); 18552 IRTemp dLo = newTemp(Ity_I64); 18553 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) ); 18554 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) ); 18555 18556 /* Compute halves of the result separately */ 18557 IRTemp resHi = newTemp(Ity_I64); 18558 IRTemp resLo = newTemp(Ity_I64); 18559 18560 IRExpr** argsHi 18561 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 18562 mkU64( 0x80 | (imm8 & 7) )); 18563 IRExpr** argsLo 18564 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 18565 mkU64( 0x00 | (imm8 & 7) )); 18566 18567 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/, 18568 "amd64g_calc_mpsadbw", 18569 &amd64g_calc_mpsadbw, argsHi )); 18570 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/, 18571 "amd64g_calc_mpsadbw", 18572 &amd64g_calc_mpsadbw, argsLo )); 18573 18574 IRTemp res = newTemp(Ity_V128); 18575 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))); 18576 return res; 18577 } 18578 18579 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx, 18580 Long delta, Bool isAvx ) 18581 { 18582 IRTemp addr = IRTemp_INVALID; 18583 Int alen = 0; 18584 HChar dis_buf[50]; 18585 UChar modrm = getUChar(delta); 18586 Int imm8_10; 18587 IRTemp xmm_vec = newTemp(Ity_V128); 18588 IRTemp src_dword = newTemp(Ity_I32); 18589 UInt rG = gregOfRexRM(pfx,modrm); 18590 IRTemp t3, t2, t1, t0; 18591 t3 = t2 = t1 = t0 = IRTemp_INVALID; 18592 18593 assign( xmm_vec, getXMMReg( rG ) ); 18594 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18595 18596 if ( epartIsReg( modrm ) ) { 18597 imm8_10 = (Int)(getUChar(delta+1) & 3); 18598 } else { 18599 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18600 imm8_10 = (Int)(getUChar(delta+alen) & 3); 18601 } 18602 18603 switch ( imm8_10 ) { 18604 case 0: assign( src_dword, mkexpr(t0) ); break; 18605 case 1: assign( src_dword, mkexpr(t1) ); break; 18606 case 2: assign( src_dword, mkexpr(t2) ); break; 18607 case 3: assign( src_dword, mkexpr(t3) ); break; 18608 default: vassert(0); 18609 } 18610 18611 if ( epartIsReg( modrm ) ) { 18612 UInt rE = eregOfRexRM(pfx,modrm); 18613 putIReg32( rE, mkexpr(src_dword) ); 18614 delta += 1+1; 18615 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 18616 nameXMMReg( rG ), nameIReg32( rE ) ); 18617 } else { 18618 storeLE( mkexpr(addr), mkexpr(src_dword) ); 18619 delta += alen+1; 18620 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 18621 nameXMMReg( rG ), dis_buf ); 18622 } 18623 18624 return delta; 18625 } 18626 18627 18628 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 ) 18629 { 18630 IRTemp t0 = newTemp(Ity_I64); 18631 IRTemp t1 = newTemp(Ity_I64); 18632 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, 18633 mkexpr(dV))); 18634 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, 18635 mkexpr(sV))); 18636 18637 IRTemp t2 = newTemp(Ity_I64); 18638 IRTemp t3 = newTemp(Ity_I64); 18639 18640 IRExpr** args; 18641 18642 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 18643 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 18644 &amd64g_calculate_pclmul, args)); 18645 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 18646 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 18647 &amd64g_calculate_pclmul, args)); 18648 18649 IRTemp res = newTemp(Ity_V128); 18650 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 18651 return res; 18652 } 18653 18654 18655 __attribute__((noinline)) 18656 static 18657 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK, 18658 const VexAbiInfo* vbi, 18659 Prefix pfx, Int sz, Long deltaIN ) 18660 { 18661 IRTemp addr = IRTemp_INVALID; 18662 UChar modrm = 0; 18663 Int alen = 0; 18664 HChar dis_buf[50]; 18665 18666 *decode_OK = False; 18667 18668 Long delta = deltaIN; 18669 UChar opc = getUChar(delta); 18670 delta++; 18671 switch (opc) { 18672 18673 case 0x08: 18674 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 18675 if (have66noF2noF3(pfx) && sz == 2) { 18676 18677 IRTemp src0 = newTemp(Ity_F32); 18678 IRTemp src1 = newTemp(Ity_F32); 18679 IRTemp src2 = newTemp(Ity_F32); 18680 IRTemp src3 = newTemp(Ity_F32); 18681 IRTemp res0 = newTemp(Ity_F32); 18682 IRTemp res1 = newTemp(Ity_F32); 18683 IRTemp res2 = newTemp(Ity_F32); 18684 IRTemp res3 = newTemp(Ity_F32); 18685 IRTemp rm = newTemp(Ity_I32); 18686 Int imm = 0; 18687 18688 modrm = getUChar(delta); 18689 18690 if (epartIsReg(modrm)) { 18691 assign( src0, 18692 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 18693 assign( src1, 18694 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 18695 assign( src2, 18696 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 18697 assign( src3, 18698 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 18699 imm = getUChar(delta+1); 18700 if (imm & ~15) goto decode_failure; 18701 delta += 1+1; 18702 DIP( "roundps $%d,%s,%s\n", 18703 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18704 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18705 } else { 18706 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18707 gen_SEGV_if_not_16_aligned(addr); 18708 assign( src0, loadLE(Ity_F32, 18709 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 18710 assign( src1, loadLE(Ity_F32, 18711 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 18712 assign( src2, loadLE(Ity_F32, 18713 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 18714 assign( src3, loadLE(Ity_F32, 18715 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 18716 imm = getUChar(delta+alen); 18717 if (imm & ~15) goto decode_failure; 18718 delta += alen+1; 18719 DIP( "roundps $%d,%s,%s\n", 18720 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18721 } 18722 18723 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18724 that encoding is the same as the encoding for IRRoundingMode, 18725 we can use that value directly in the IR as a rounding 18726 mode. */ 18727 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 18728 18729 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 18730 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 18731 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 18732 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 18733 18734 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 18735 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 18736 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 18737 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 18738 18739 goto decode_success; 18740 } 18741 break; 18742 18743 case 0x09: 18744 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 18745 if (have66noF2noF3(pfx) && sz == 2) { 18746 18747 IRTemp src0 = newTemp(Ity_F64); 18748 IRTemp src1 = newTemp(Ity_F64); 18749 IRTemp res0 = newTemp(Ity_F64); 18750 IRTemp res1 = newTemp(Ity_F64); 18751 IRTemp rm = newTemp(Ity_I32); 18752 Int imm = 0; 18753 18754 modrm = getUChar(delta); 18755 18756 if (epartIsReg(modrm)) { 18757 assign( src0, 18758 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 18759 assign( src1, 18760 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 18761 imm = getUChar(delta+1); 18762 if (imm & ~15) goto decode_failure; 18763 delta += 1+1; 18764 DIP( "roundpd $%d,%s,%s\n", 18765 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18766 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18767 } else { 18768 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18769 gen_SEGV_if_not_16_aligned(addr); 18770 assign( src0, loadLE(Ity_F64, 18771 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 18772 assign( src1, loadLE(Ity_F64, 18773 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 18774 imm = getUChar(delta+alen); 18775 if (imm & ~15) goto decode_failure; 18776 delta += alen+1; 18777 DIP( "roundpd $%d,%s,%s\n", 18778 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18779 } 18780 18781 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18782 that encoding is the same as the encoding for IRRoundingMode, 18783 we can use that value directly in the IR as a rounding 18784 mode. */ 18785 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 18786 18787 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 18788 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 18789 18790 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 18791 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 18792 18793 goto decode_success; 18794 } 18795 break; 18796 18797 case 0x0A: 18798 case 0x0B: 18799 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 18800 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 18801 */ 18802 if (have66noF2noF3(pfx) && sz == 2) { 18803 18804 Bool isD = opc == 0x0B; 18805 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 18806 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 18807 Int imm = 0; 18808 18809 modrm = getUChar(delta); 18810 18811 if (epartIsReg(modrm)) { 18812 assign( src, 18813 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 18814 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 18815 imm = getUChar(delta+1); 18816 if (imm & ~15) goto decode_failure; 18817 delta += 1+1; 18818 DIP( "rounds%c $%d,%s,%s\n", 18819 isD ? 'd' : 's', 18820 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18821 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18822 } else { 18823 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18824 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 18825 imm = getUChar(delta+alen); 18826 if (imm & ~15) goto decode_failure; 18827 delta += alen+1; 18828 DIP( "rounds%c $%d,%s,%s\n", 18829 isD ? 'd' : 's', 18830 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18831 } 18832 18833 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18834 that encoding is the same as the encoding for IRRoundingMode, 18835 we can use that value directly in the IR as a rounding 18836 mode. */ 18837 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 18838 (imm & 4) ? get_sse_roundingmode() 18839 : mkU32(imm & 3), 18840 mkexpr(src)) ); 18841 18842 if (isD) 18843 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 18844 else 18845 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 18846 18847 goto decode_success; 18848 } 18849 break; 18850 18851 case 0x0C: 18852 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 18853 Blend Packed Single Precision Floating-Point Values (XMM) */ 18854 if (have66noF2noF3(pfx) && sz == 2) { 18855 18856 Int imm8; 18857 IRTemp dst_vec = newTemp(Ity_V128); 18858 IRTemp src_vec = newTemp(Ity_V128); 18859 18860 modrm = getUChar(delta); 18861 18862 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18863 18864 if ( epartIsReg( modrm ) ) { 18865 imm8 = (Int)getUChar(delta+1); 18866 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18867 delta += 1+1; 18868 DIP( "blendps $%d, %s,%s\n", imm8, 18869 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18870 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18871 } else { 18872 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18873 1/* imm8 is 1 byte after the amode */ ); 18874 gen_SEGV_if_not_16_aligned( addr ); 18875 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18876 imm8 = (Int)getUChar(delta+alen); 18877 delta += alen+1; 18878 DIP( "blendpd $%d, %s,%s\n", 18879 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18880 } 18881 18882 putXMMReg( gregOfRexRM(pfx, modrm), 18883 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) ); 18884 goto decode_success; 18885 } 18886 break; 18887 18888 case 0x0D: 18889 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 18890 Blend Packed Double Precision Floating-Point Values (XMM) */ 18891 if (have66noF2noF3(pfx) && sz == 2) { 18892 18893 Int imm8; 18894 IRTemp dst_vec = newTemp(Ity_V128); 18895 IRTemp src_vec = newTemp(Ity_V128); 18896 18897 modrm = getUChar(delta); 18898 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18899 18900 if ( epartIsReg( modrm ) ) { 18901 imm8 = (Int)getUChar(delta+1); 18902 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18903 delta += 1+1; 18904 DIP( "blendpd $%d, %s,%s\n", imm8, 18905 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18906 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18907 } else { 18908 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18909 1/* imm8 is 1 byte after the amode */ ); 18910 gen_SEGV_if_not_16_aligned( addr ); 18911 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18912 imm8 = (Int)getUChar(delta+alen); 18913 delta += alen+1; 18914 DIP( "blendpd $%d, %s,%s\n", 18915 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18916 } 18917 18918 putXMMReg( gregOfRexRM(pfx, modrm), 18919 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) ); 18920 goto decode_success; 18921 } 18922 break; 18923 18924 case 0x0E: 18925 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 18926 Blend Packed Words (XMM) */ 18927 if (have66noF2noF3(pfx) && sz == 2) { 18928 18929 Int imm8; 18930 IRTemp dst_vec = newTemp(Ity_V128); 18931 IRTemp src_vec = newTemp(Ity_V128); 18932 18933 modrm = getUChar(delta); 18934 18935 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18936 18937 if ( epartIsReg( modrm ) ) { 18938 imm8 = (Int)getUChar(delta+1); 18939 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18940 delta += 1+1; 18941 DIP( "pblendw $%d, %s,%s\n", imm8, 18942 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18943 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18944 } else { 18945 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18946 1/* imm8 is 1 byte after the amode */ ); 18947 gen_SEGV_if_not_16_aligned( addr ); 18948 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18949 imm8 = (Int)getUChar(delta+alen); 18950 delta += alen+1; 18951 DIP( "pblendw $%d, %s,%s\n", 18952 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18953 } 18954 18955 putXMMReg( gregOfRexRM(pfx, modrm), 18956 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) ); 18957 goto decode_success; 18958 } 18959 break; 18960 18961 case 0x14: 18962 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 18963 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. 18964 (XMM) */ 18965 if (have66noF2noF3(pfx) && sz == 2) { 18966 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 18967 goto decode_success; 18968 } 18969 break; 18970 18971 case 0x15: 18972 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 18973 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. 18974 (XMM) */ 18975 if (have66noF2noF3(pfx) && sz == 2) { 18976 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ ); 18977 goto decode_success; 18978 } 18979 break; 18980 18981 case 0x16: 18982 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 18983 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 18984 Note that this insn has the same opcodes as PEXTRQ, but 18985 here the REX.W bit is _not_ present */ 18986 if (have66noF2noF3(pfx) 18987 && sz == 2 /* REX.W is _not_ present */) { 18988 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ ); 18989 goto decode_success; 18990 } 18991 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 18992 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 18993 Note that this insn has the same opcodes as PEXTRD, but 18994 here the REX.W bit is present */ 18995 if (have66noF2noF3(pfx) 18996 && sz == 8 /* REX.W is present */) { 18997 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/); 18998 goto decode_success; 18999 } 19000 break; 19001 19002 case 0x17: 19003 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 19004 float from xmm reg and store in gen.reg or mem. This is 19005 identical to PEXTRD, except that REX.W appears to be ignored. 19006 */ 19007 if (have66noF2noF3(pfx) 19008 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 19009 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ ); 19010 goto decode_success; 19011 } 19012 break; 19013 19014 case 0x20: 19015 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 19016 Extract byte from r32/m8 and insert into xmm1 */ 19017 if (have66noF2noF3(pfx) && sz == 2) { 19018 Int imm8; 19019 IRTemp new8 = newTemp(Ity_I8); 19020 modrm = getUChar(delta); 19021 UInt rG = gregOfRexRM(pfx, modrm); 19022 if ( epartIsReg( modrm ) ) { 19023 UInt rE = eregOfRexRM(pfx,modrm); 19024 imm8 = (Int)(getUChar(delta+1) & 0xF); 19025 assign( new8, unop(Iop_32to8, getIReg32(rE)) ); 19026 delta += 1+1; 19027 DIP( "pinsrb $%d,%s,%s\n", imm8, 19028 nameIReg32(rE), nameXMMReg(rG) ); 19029 } else { 19030 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19031 imm8 = (Int)(getUChar(delta+alen) & 0xF); 19032 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) ); 19033 delta += alen+1; 19034 DIP( "pinsrb $%d,%s,%s\n", 19035 imm8, dis_buf, nameXMMReg(rG) ); 19036 } 19037 IRTemp src_vec = newTemp(Ity_V128); 19038 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) )); 19039 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 ); 19040 putXMMReg( rG, mkexpr(res) ); 19041 goto decode_success; 19042 } 19043 break; 19044 19045 case 0x21: 19046 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1 19047 Insert Packed Single Precision Floating-Point Value (XMM) */ 19048 if (have66noF2noF3(pfx) && sz == 2) { 19049 UInt imm8; 19050 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 19051 const IRTemp inval = IRTemp_INVALID; 19052 19053 modrm = getUChar(delta); 19054 UInt rG = gregOfRexRM(pfx, modrm); 19055 19056 if ( epartIsReg( modrm ) ) { 19057 UInt rE = eregOfRexRM(pfx, modrm); 19058 IRTemp vE = newTemp(Ity_V128); 19059 assign( vE, getXMMReg(rE) ); 19060 IRTemp dsE[4] = { inval, inval, inval, inval }; 19061 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 19062 imm8 = getUChar(delta+1); 19063 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 19064 delta += 1+1; 19065 DIP( "insertps $%u, %s,%s\n", 19066 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19067 } else { 19068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19069 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 19070 imm8 = getUChar(delta+alen); 19071 delta += alen+1; 19072 DIP( "insertps $%u, %s,%s\n", 19073 imm8, dis_buf, nameXMMReg(rG) ); 19074 } 19075 19076 IRTemp vG = newTemp(Ity_V128); 19077 assign( vG, getXMMReg(rG) ); 19078 19079 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) ); 19080 goto decode_success; 19081 } 19082 break; 19083 19084 case 0x22: 19085 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 19086 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 19087 if (have66noF2noF3(pfx) 19088 && sz == 2 /* REX.W is NOT present */) { 19089 Int imm8_10; 19090 IRTemp src_u32 = newTemp(Ity_I32); 19091 modrm = getUChar(delta); 19092 UInt rG = gregOfRexRM(pfx, modrm); 19093 19094 if ( epartIsReg( modrm ) ) { 19095 UInt rE = eregOfRexRM(pfx,modrm); 19096 imm8_10 = (Int)(getUChar(delta+1) & 3); 19097 assign( src_u32, getIReg32( rE ) ); 19098 delta += 1+1; 19099 DIP( "pinsrd $%d, %s,%s\n", 19100 imm8_10, nameIReg32(rE), nameXMMReg(rG) ); 19101 } else { 19102 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19103 imm8_10 = (Int)(getUChar(delta+alen) & 3); 19104 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 19105 delta += alen+1; 19106 DIP( "pinsrd $%d, %s,%s\n", 19107 imm8_10, dis_buf, nameXMMReg(rG) ); 19108 } 19109 19110 IRTemp src_vec = newTemp(Ity_V128); 19111 assign(src_vec, getXMMReg( rG )); 19112 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 19113 putXMMReg( rG, mkexpr(res_vec) ); 19114 goto decode_success; 19115 } 19116 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 19117 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 19118 if (have66noF2noF3(pfx) 19119 && sz == 8 /* REX.W is present */) { 19120 Int imm8_0; 19121 IRTemp src_u64 = newTemp(Ity_I64); 19122 modrm = getUChar(delta); 19123 UInt rG = gregOfRexRM(pfx, modrm); 19124 19125 if ( epartIsReg( modrm ) ) { 19126 UInt rE = eregOfRexRM(pfx,modrm); 19127 imm8_0 = (Int)(getUChar(delta+1) & 1); 19128 assign( src_u64, getIReg64( rE ) ); 19129 delta += 1+1; 19130 DIP( "pinsrq $%d, %s,%s\n", 19131 imm8_0, nameIReg64(rE), nameXMMReg(rG) ); 19132 } else { 19133 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19134 imm8_0 = (Int)(getUChar(delta+alen) & 1); 19135 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 19136 delta += alen+1; 19137 DIP( "pinsrq $%d, %s,%s\n", 19138 imm8_0, dis_buf, nameXMMReg(rG) ); 19139 } 19140 19141 IRTemp src_vec = newTemp(Ity_V128); 19142 assign(src_vec, getXMMReg( rG )); 19143 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 19144 putXMMReg( rG, mkexpr(res_vec) ); 19145 goto decode_success; 19146 } 19147 break; 19148 19149 case 0x40: 19150 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 19151 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 19152 if (have66noF2noF3(pfx) && sz == 2) { 19153 modrm = getUChar(delta); 19154 Int imm8; 19155 IRTemp src_vec = newTemp(Ity_V128); 19156 IRTemp dst_vec = newTemp(Ity_V128); 19157 UInt rG = gregOfRexRM(pfx, modrm); 19158 assign( dst_vec, getXMMReg( rG ) ); 19159 if ( epartIsReg( modrm ) ) { 19160 UInt rE = eregOfRexRM(pfx, modrm); 19161 imm8 = (Int)getUChar(delta+1); 19162 assign( src_vec, getXMMReg(rE) ); 19163 delta += 1+1; 19164 DIP( "dpps $%d, %s,%s\n", 19165 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19166 } else { 19167 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19168 1/* imm8 is 1 byte after the amode */ ); 19169 gen_SEGV_if_not_16_aligned( addr ); 19170 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19171 imm8 = (Int)getUChar(delta+alen); 19172 delta += alen+1; 19173 DIP( "dpps $%d, %s,%s\n", 19174 imm8, dis_buf, nameXMMReg(rG) ); 19175 } 19176 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 ); 19177 putXMMReg( rG, mkexpr(res) ); 19178 goto decode_success; 19179 } 19180 break; 19181 19182 case 0x41: 19183 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 19184 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 19185 if (have66noF2noF3(pfx) && sz == 2) { 19186 modrm = getUChar(delta); 19187 Int imm8; 19188 IRTemp src_vec = newTemp(Ity_V128); 19189 IRTemp dst_vec = newTemp(Ity_V128); 19190 UInt rG = gregOfRexRM(pfx, modrm); 19191 assign( dst_vec, getXMMReg( rG ) ); 19192 if ( epartIsReg( modrm ) ) { 19193 UInt rE = eregOfRexRM(pfx, modrm); 19194 imm8 = (Int)getUChar(delta+1); 19195 assign( src_vec, getXMMReg(rE) ); 19196 delta += 1+1; 19197 DIP( "dppd $%d, %s,%s\n", 19198 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19199 } else { 19200 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19201 1/* imm8 is 1 byte after the amode */ ); 19202 gen_SEGV_if_not_16_aligned( addr ); 19203 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19204 imm8 = (Int)getUChar(delta+alen); 19205 delta += alen+1; 19206 DIP( "dppd $%d, %s,%s\n", 19207 imm8, dis_buf, nameXMMReg(rG) ); 19208 } 19209 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 ); 19210 putXMMReg( rG, mkexpr(res) ); 19211 goto decode_success; 19212 } 19213 break; 19214 19215 case 0x42: 19216 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8 19217 Multiple Packed Sums of Absolule Difference (XMM) */ 19218 if (have66noF2noF3(pfx) && sz == 2) { 19219 Int imm8; 19220 IRTemp src_vec = newTemp(Ity_V128); 19221 IRTemp dst_vec = newTemp(Ity_V128); 19222 modrm = getUChar(delta); 19223 UInt rG = gregOfRexRM(pfx, modrm); 19224 19225 assign( dst_vec, getXMMReg(rG) ); 19226 19227 if ( epartIsReg( modrm ) ) { 19228 UInt rE = eregOfRexRM(pfx, modrm); 19229 19230 imm8 = (Int)getUChar(delta+1); 19231 assign( src_vec, getXMMReg(rE) ); 19232 delta += 1+1; 19233 DIP( "mpsadbw $%d, %s,%s\n", imm8, 19234 nameXMMReg(rE), nameXMMReg(rG) ); 19235 } else { 19236 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19237 1/* imm8 is 1 byte after the amode */ ); 19238 gen_SEGV_if_not_16_aligned( addr ); 19239 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19240 imm8 = (Int)getUChar(delta+alen); 19241 delta += alen+1; 19242 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) ); 19243 } 19244 19245 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) ); 19246 goto decode_success; 19247 } 19248 break; 19249 19250 case 0x44: 19251 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 19252 * Carry-less multiplication of selected XMM quadwords into XMM 19253 * registers (a.k.a multiplication of polynomials over GF(2)) 19254 */ 19255 if (have66noF2noF3(pfx) && sz == 2) { 19256 19257 Int imm8; 19258 IRTemp svec = newTemp(Ity_V128); 19259 IRTemp dvec = newTemp(Ity_V128); 19260 modrm = getUChar(delta); 19261 UInt rG = gregOfRexRM(pfx, modrm); 19262 19263 assign( dvec, getXMMReg(rG) ); 19264 19265 if ( epartIsReg( modrm ) ) { 19266 UInt rE = eregOfRexRM(pfx, modrm); 19267 imm8 = (Int)getUChar(delta+1); 19268 assign( svec, getXMMReg(rE) ); 19269 delta += 1+1; 19270 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 19271 nameXMMReg(rE), nameXMMReg(rG) ); 19272 } else { 19273 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19274 1/* imm8 is 1 byte after the amode */ ); 19275 gen_SEGV_if_not_16_aligned( addr ); 19276 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 19277 imm8 = (Int)getUChar(delta+alen); 19278 delta += alen+1; 19279 DIP( "pclmulqdq $%d, %s,%s\n", 19280 imm8, dis_buf, nameXMMReg(rG) ); 19281 } 19282 19283 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) ); 19284 goto decode_success; 19285 } 19286 break; 19287 19288 case 0x60: 19289 case 0x61: 19290 case 0x62: 19291 case 0x63: 19292 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 19293 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 19294 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 19295 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 19296 (selected special cases that actually occur in glibc, 19297 not by any means a complete implementation.) 19298 */ 19299 if (have66noF2noF3(pfx) && sz == 2) { 19300 Long delta0 = delta; 19301 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc ); 19302 if (delta > delta0) goto decode_success; 19303 /* else fall though; dis_PCMPxSTRx failed to decode it */ 19304 } 19305 break; 19306 19307 case 0xDF: 19308 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */ 19309 if (have66noF2noF3(pfx) && sz == 2) { 19310 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ ); 19311 goto decode_success; 19312 } 19313 break; 19314 19315 default: 19316 break; 19317 19318 } 19319 19320 decode_failure: 19321 *decode_OK = False; 19322 return deltaIN; 19323 19324 decode_success: 19325 *decode_OK = True; 19326 return delta; 19327 } 19328 19329 19330 /*------------------------------------------------------------*/ 19331 /*--- ---*/ 19332 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/ 19333 /*--- ---*/ 19334 /*------------------------------------------------------------*/ 19335 19336 __attribute__((noinline)) 19337 static 19338 Long dis_ESC_NONE ( 19339 /*MB_OUT*/DisResult* dres, 19340 /*MB_OUT*/Bool* expect_CAS, 19341 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 19342 Bool resteerCisOk, 19343 void* callback_opaque, 19344 const VexArchInfo* archinfo, 19345 const VexAbiInfo* vbi, 19346 Prefix pfx, Int sz, Long deltaIN 19347 ) 19348 { 19349 Long d64 = 0; 19350 UChar abyte = 0; 19351 IRTemp addr = IRTemp_INVALID; 19352 IRTemp t1 = IRTemp_INVALID; 19353 IRTemp t2 = IRTemp_INVALID; 19354 IRTemp t3 = IRTemp_INVALID; 19355 IRTemp t4 = IRTemp_INVALID; 19356 IRTemp t5 = IRTemp_INVALID; 19357 IRType ty = Ity_INVALID; 19358 UChar modrm = 0; 19359 Int am_sz = 0; 19360 Int d_sz = 0; 19361 Int alen = 0; 19362 HChar dis_buf[50]; 19363 19364 Long delta = deltaIN; 19365 UChar opc = getUChar(delta); delta++; 19366 19367 /* delta now points at the modrm byte. In most of the cases that 19368 follow, neither the F2 nor F3 prefixes are allowed. However, 19369 for some basic arithmetic operations we have to allow F2/XACQ or 19370 F3/XREL in the case where the destination is memory and the LOCK 19371 prefix is also present. Do this check by looking at the modrm 19372 byte but not advancing delta over it. */ 19373 /* By default, F2 and F3 are not allowed, so let's start off with 19374 that setting. */ 19375 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 19376 { UChar tmp_modrm = getUChar(delta); 19377 switch (opc) { 19378 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */ 19379 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */ 19380 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */ 19381 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */ 19382 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */ 19383 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */ 19384 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */ 19385 if (!epartIsReg(tmp_modrm) 19386 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 19387 /* dst is mem, and we have F2 or F3 but not both */ 19388 validF2orF3 = True; 19389 } 19390 break; 19391 default: 19392 break; 19393 } 19394 } 19395 19396 /* Now, in the switch below, for the opc values examined by the 19397 switch above, use validF2orF3 rather than looking at pfx 19398 directly. */ 19399 switch (opc) { 19400 19401 case 0x00: /* ADD Gb,Eb */ 19402 if (!validF2orF3) goto decode_failure; 19403 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19404 return delta; 19405 case 0x01: /* ADD Gv,Ev */ 19406 if (!validF2orF3) goto decode_failure; 19407 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19408 return delta; 19409 19410 case 0x02: /* ADD Eb,Gb */ 19411 if (haveF2orF3(pfx)) goto decode_failure; 19412 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19413 return delta; 19414 case 0x03: /* ADD Ev,Gv */ 19415 if (haveF2orF3(pfx)) goto decode_failure; 19416 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19417 return delta; 19418 19419 case 0x04: /* ADD Ib, AL */ 19420 if (haveF2orF3(pfx)) goto decode_failure; 19421 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 19422 return delta; 19423 case 0x05: /* ADD Iv, eAX */ 19424 if (haveF2orF3(pfx)) goto decode_failure; 19425 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 19426 return delta; 19427 19428 case 0x08: /* OR Gb,Eb */ 19429 if (!validF2orF3) goto decode_failure; 19430 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19431 return delta; 19432 case 0x09: /* OR Gv,Ev */ 19433 if (!validF2orF3) goto decode_failure; 19434 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19435 return delta; 19436 19437 case 0x0A: /* OR Eb,Gb */ 19438 if (haveF2orF3(pfx)) goto decode_failure; 19439 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19440 return delta; 19441 case 0x0B: /* OR Ev,Gv */ 19442 if (haveF2orF3(pfx)) goto decode_failure; 19443 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19444 return delta; 19445 19446 case 0x0C: /* OR Ib, AL */ 19447 if (haveF2orF3(pfx)) goto decode_failure; 19448 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 19449 return delta; 19450 case 0x0D: /* OR Iv, eAX */ 19451 if (haveF2orF3(pfx)) goto decode_failure; 19452 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 19453 return delta; 19454 19455 case 0x10: /* ADC Gb,Eb */ 19456 if (!validF2orF3) goto decode_failure; 19457 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19458 return delta; 19459 case 0x11: /* ADC Gv,Ev */ 19460 if (!validF2orF3) goto decode_failure; 19461 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19462 return delta; 19463 19464 case 0x12: /* ADC Eb,Gb */ 19465 if (haveF2orF3(pfx)) goto decode_failure; 19466 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19467 return delta; 19468 case 0x13: /* ADC Ev,Gv */ 19469 if (haveF2orF3(pfx)) goto decode_failure; 19470 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19471 return delta; 19472 19473 case 0x14: /* ADC Ib, AL */ 19474 if (haveF2orF3(pfx)) goto decode_failure; 19475 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 19476 return delta; 19477 case 0x15: /* ADC Iv, eAX */ 19478 if (haveF2orF3(pfx)) goto decode_failure; 19479 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 19480 return delta; 19481 19482 case 0x18: /* SBB Gb,Eb */ 19483 if (!validF2orF3) goto decode_failure; 19484 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19485 return delta; 19486 case 0x19: /* SBB Gv,Ev */ 19487 if (!validF2orF3) goto decode_failure; 19488 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19489 return delta; 19490 19491 case 0x1A: /* SBB Eb,Gb */ 19492 if (haveF2orF3(pfx)) goto decode_failure; 19493 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19494 return delta; 19495 case 0x1B: /* SBB Ev,Gv */ 19496 if (haveF2orF3(pfx)) goto decode_failure; 19497 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19498 return delta; 19499 19500 case 0x1C: /* SBB Ib, AL */ 19501 if (haveF2orF3(pfx)) goto decode_failure; 19502 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 19503 return delta; 19504 case 0x1D: /* SBB Iv, eAX */ 19505 if (haveF2orF3(pfx)) goto decode_failure; 19506 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 19507 return delta; 19508 19509 case 0x20: /* AND Gb,Eb */ 19510 if (!validF2orF3) goto decode_failure; 19511 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19512 return delta; 19513 case 0x21: /* AND Gv,Ev */ 19514 if (!validF2orF3) goto decode_failure; 19515 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19516 return delta; 19517 19518 case 0x22: /* AND Eb,Gb */ 19519 if (haveF2orF3(pfx)) goto decode_failure; 19520 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19521 return delta; 19522 case 0x23: /* AND Ev,Gv */ 19523 if (haveF2orF3(pfx)) goto decode_failure; 19524 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19525 return delta; 19526 19527 case 0x24: /* AND Ib, AL */ 19528 if (haveF2orF3(pfx)) goto decode_failure; 19529 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 19530 return delta; 19531 case 0x25: /* AND Iv, eAX */ 19532 if (haveF2orF3(pfx)) goto decode_failure; 19533 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 19534 return delta; 19535 19536 case 0x28: /* SUB Gb,Eb */ 19537 if (!validF2orF3) goto decode_failure; 19538 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 19539 return delta; 19540 case 0x29: /* SUB Gv,Ev */ 19541 if (!validF2orF3) goto decode_failure; 19542 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 19543 return delta; 19544 19545 case 0x2A: /* SUB Eb,Gb */ 19546 if (haveF2orF3(pfx)) goto decode_failure; 19547 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 19548 return delta; 19549 case 0x2B: /* SUB Ev,Gv */ 19550 if (haveF2orF3(pfx)) goto decode_failure; 19551 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 19552 return delta; 19553 19554 case 0x2C: /* SUB Ib, AL */ 19555 if (haveF2orF3(pfx)) goto decode_failure; 19556 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 19557 return delta; 19558 case 0x2D: /* SUB Iv, eAX */ 19559 if (haveF2orF3(pfx)) goto decode_failure; 19560 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 19561 return delta; 19562 19563 case 0x30: /* XOR Gb,Eb */ 19564 if (!validF2orF3) goto decode_failure; 19565 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 19566 return delta; 19567 case 0x31: /* XOR Gv,Ev */ 19568 if (!validF2orF3) goto decode_failure; 19569 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 19570 return delta; 19571 19572 case 0x32: /* XOR Eb,Gb */ 19573 if (haveF2orF3(pfx)) goto decode_failure; 19574 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 19575 return delta; 19576 case 0x33: /* XOR Ev,Gv */ 19577 if (haveF2orF3(pfx)) goto decode_failure; 19578 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 19579 return delta; 19580 19581 case 0x34: /* XOR Ib, AL */ 19582 if (haveF2orF3(pfx)) goto decode_failure; 19583 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 19584 return delta; 19585 case 0x35: /* XOR Iv, eAX */ 19586 if (haveF2orF3(pfx)) goto decode_failure; 19587 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 19588 return delta; 19589 19590 case 0x38: /* CMP Gb,Eb */ 19591 if (haveF2orF3(pfx)) goto decode_failure; 19592 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 19593 return delta; 19594 case 0x39: /* CMP Gv,Ev */ 19595 if (haveF2orF3(pfx)) goto decode_failure; 19596 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 19597 return delta; 19598 19599 case 0x3A: /* CMP Eb,Gb */ 19600 if (haveF2orF3(pfx)) goto decode_failure; 19601 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 19602 return delta; 19603 case 0x3B: /* CMP Ev,Gv */ 19604 if (haveF2orF3(pfx)) goto decode_failure; 19605 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 19606 return delta; 19607 19608 case 0x3C: /* CMP Ib, AL */ 19609 if (haveF2orF3(pfx)) goto decode_failure; 19610 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 19611 return delta; 19612 case 0x3D: /* CMP Iv, eAX */ 19613 if (haveF2orF3(pfx)) goto decode_failure; 19614 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 19615 return delta; 19616 19617 case 0x50: /* PUSH eAX */ 19618 case 0x51: /* PUSH eCX */ 19619 case 0x52: /* PUSH eDX */ 19620 case 0x53: /* PUSH eBX */ 19621 case 0x55: /* PUSH eBP */ 19622 case 0x56: /* PUSH eSI */ 19623 case 0x57: /* PUSH eDI */ 19624 case 0x54: /* PUSH eSP */ 19625 /* This is the Right Way, in that the value to be pushed is 19626 established before %rsp is changed, so that pushq %rsp 19627 correctly pushes the old value. */ 19628 if (haveF2orF3(pfx)) goto decode_failure; 19629 vassert(sz == 2 || sz == 4 || sz == 8); 19630 if (sz == 4) 19631 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 19632 ty = sz==2 ? Ity_I16 : Ity_I64; 19633 t1 = newTemp(ty); 19634 t2 = newTemp(Ity_I64); 19635 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 19636 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 19637 putIReg64(R_RSP, mkexpr(t2) ); 19638 storeLE(mkexpr(t2),mkexpr(t1)); 19639 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 19640 return delta; 19641 19642 case 0x58: /* POP eAX */ 19643 case 0x59: /* POP eCX */ 19644 case 0x5A: /* POP eDX */ 19645 case 0x5B: /* POP eBX */ 19646 case 0x5D: /* POP eBP */ 19647 case 0x5E: /* POP eSI */ 19648 case 0x5F: /* POP eDI */ 19649 case 0x5C: /* POP eSP */ 19650 if (haveF2orF3(pfx)) goto decode_failure; 19651 vassert(sz == 2 || sz == 4 || sz == 8); 19652 if (sz == 4) 19653 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 19654 t1 = newTemp(szToITy(sz)); 19655 t2 = newTemp(Ity_I64); 19656 assign(t2, getIReg64(R_RSP)); 19657 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 19658 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 19659 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 19660 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 19661 return delta; 19662 19663 case 0x63: /* MOVSX */ 19664 if (haveF2orF3(pfx)) goto decode_failure; 19665 if (haveREX(pfx) && 1==getRexW(pfx)) { 19666 vassert(sz == 8); 19667 /* movsx r/m32 to r64 */ 19668 modrm = getUChar(delta); 19669 if (epartIsReg(modrm)) { 19670 delta++; 19671 putIRegG(8, pfx, modrm, 19672 unop(Iop_32Sto64, 19673 getIRegE(4, pfx, modrm))); 19674 DIP("movslq %s,%s\n", 19675 nameIRegE(4, pfx, modrm), 19676 nameIRegG(8, pfx, modrm)); 19677 return delta; 19678 } else { 19679 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19680 delta += alen; 19681 putIRegG(8, pfx, modrm, 19682 unop(Iop_32Sto64, 19683 loadLE(Ity_I32, mkexpr(addr)))); 19684 DIP("movslq %s,%s\n", dis_buf, 19685 nameIRegG(8, pfx, modrm)); 19686 return delta; 19687 } 19688 } else { 19689 goto decode_failure; 19690 } 19691 19692 case 0x68: /* PUSH Iv */ 19693 if (haveF2orF3(pfx)) goto decode_failure; 19694 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 19695 if (sz == 4) sz = 8; 19696 d64 = getSDisp(imin(4,sz),delta); 19697 delta += imin(4,sz); 19698 goto do_push_I; 19699 19700 case 0x69: /* IMUL Iv, Ev, Gv */ 19701 if (haveF2orF3(pfx)) goto decode_failure; 19702 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 19703 return delta; 19704 19705 case 0x6A: /* PUSH Ib, sign-extended to sz */ 19706 if (haveF2orF3(pfx)) goto decode_failure; 19707 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 19708 if (sz == 4) sz = 8; 19709 d64 = getSDisp8(delta); delta += 1; 19710 goto do_push_I; 19711 do_push_I: 19712 ty = szToITy(sz); 19713 t1 = newTemp(Ity_I64); 19714 t2 = newTemp(ty); 19715 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 19716 putIReg64(R_RSP, mkexpr(t1) ); 19717 /* stop mkU16 asserting if d32 is a negative 16-bit number 19718 (bug #132813) */ 19719 if (ty == Ity_I16) 19720 d64 &= 0xFFFF; 19721 storeLE( mkexpr(t1), mkU(ty,d64) ); 19722 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 19723 return delta; 19724 19725 case 0x6B: /* IMUL Ib, Ev, Gv */ 19726 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 19727 return delta; 19728 19729 case 0x70: 19730 case 0x71: 19731 case 0x72: /* JBb/JNAEb (jump below) */ 19732 case 0x73: /* JNBb/JAEb (jump not below) */ 19733 case 0x74: /* JZb/JEb (jump zero) */ 19734 case 0x75: /* JNZb/JNEb (jump not zero) */ 19735 case 0x76: /* JBEb/JNAb (jump below or equal) */ 19736 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 19737 case 0x78: /* JSb (jump negative) */ 19738 case 0x79: /* JSb (jump not negative) */ 19739 case 0x7A: /* JP (jump parity even) */ 19740 case 0x7B: /* JNP/JPO (jump parity odd) */ 19741 case 0x7C: /* JLb/JNGEb (jump less) */ 19742 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 19743 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 19744 case 0x7F: { /* JGb/JNLEb (jump greater) */ 19745 Long jmpDelta; 19746 const HChar* comment = ""; 19747 if (haveF3(pfx)) goto decode_failure; 19748 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 19749 jmpDelta = getSDisp8(delta); 19750 vassert(-128 <= jmpDelta && jmpDelta < 128); 19751 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 19752 delta++; 19753 if (resteerCisOk 19754 && vex_control.guest_chase_cond 19755 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19756 && jmpDelta < 0 19757 && resteerOkFn( callback_opaque, (Addr64)d64) ) { 19758 /* Speculation: assume this backward branch is taken. So we 19759 need to emit a side-exit to the insn following this one, 19760 on the negation of the condition, and continue at the 19761 branch target address (d64). If we wind up back at the 19762 first instruction of the trace, just stop; it's better to 19763 let the IR loop unroller handle that case. */ 19764 stmt( IRStmt_Exit( 19765 mk_amd64g_calculate_condition( 19766 (AMD64Condcode)(1 ^ (opc - 0x70))), 19767 Ijk_Boring, 19768 IRConst_U64(guest_RIP_bbstart+delta), 19769 OFFB_RIP ) ); 19770 dres->whatNext = Dis_ResteerC; 19771 dres->continueAt = d64; 19772 comment = "(assumed taken)"; 19773 } 19774 else 19775 if (resteerCisOk 19776 && vex_control.guest_chase_cond 19777 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19778 && jmpDelta >= 0 19779 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 19780 /* Speculation: assume this forward branch is not taken. So 19781 we need to emit a side-exit to d64 (the dest) and continue 19782 disassembling at the insn immediately following this 19783 one. */ 19784 stmt( IRStmt_Exit( 19785 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 19786 Ijk_Boring, 19787 IRConst_U64(d64), 19788 OFFB_RIP ) ); 19789 dres->whatNext = Dis_ResteerC; 19790 dres->continueAt = guest_RIP_bbstart+delta; 19791 comment = "(assumed not taken)"; 19792 } 19793 else { 19794 /* Conservative default translation - end the block at this 19795 point. */ 19796 jcc_01( dres, (AMD64Condcode)(opc - 0x70), 19797 guest_RIP_bbstart+delta, d64 ); 19798 vassert(dres->whatNext == Dis_StopHere); 19799 } 19800 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); 19801 return delta; 19802 } 19803 19804 case 0x80: /* Grp1 Ib,Eb */ 19805 modrm = getUChar(delta); 19806 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow 19807 just one for the mem case and also require LOCK in this case. 19808 Note that this erroneously allows XACQ/XREL on CMP since we 19809 don't check the subopcode here. No big deal. */ 19810 if (epartIsReg(modrm) && haveF2orF3(pfx)) 19811 goto decode_failure; 19812 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 19813 goto decode_failure; 19814 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 19815 goto decode_failure; 19816 am_sz = lengthAMode(pfx,delta); 19817 sz = 1; 19818 d_sz = 1; 19819 d64 = getSDisp8(delta + am_sz); 19820 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19821 return delta; 19822 19823 case 0x81: /* Grp1 Iv,Ev */ 19824 modrm = getUChar(delta); 19825 /* Same comment as for case 0x80 just above. */ 19826 if (epartIsReg(modrm) && haveF2orF3(pfx)) 19827 goto decode_failure; 19828 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 19829 goto decode_failure; 19830 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 19831 goto decode_failure; 19832 am_sz = lengthAMode(pfx,delta); 19833 d_sz = imin(sz,4); 19834 d64 = getSDisp(d_sz, delta + am_sz); 19835 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19836 return delta; 19837 19838 case 0x83: /* Grp1 Ib,Ev */ 19839 if (haveF2orF3(pfx)) goto decode_failure; 19840 modrm = getUChar(delta); 19841 am_sz = lengthAMode(pfx,delta); 19842 d_sz = 1; 19843 d64 = getSDisp8(delta + am_sz); 19844 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19845 return delta; 19846 19847 case 0x84: /* TEST Eb,Gb */ 19848 if (haveF2orF3(pfx)) goto decode_failure; 19849 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 19850 return delta; 19851 19852 case 0x85: /* TEST Ev,Gv */ 19853 if (haveF2orF3(pfx)) goto decode_failure; 19854 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 19855 return delta; 19856 19857 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 19858 prefix. Therefore, generate CAS regardless of the presence or 19859 otherwise of a LOCK prefix. */ 19860 case 0x86: /* XCHG Gb,Eb */ 19861 sz = 1; 19862 /* Fall through ... */ 19863 case 0x87: /* XCHG Gv,Ev */ 19864 modrm = getUChar(delta); 19865 /* Check whether F2 or F3 are allowable. For the mem case, one 19866 or the othter but not both are. We don't care about the 19867 presence of LOCK in this case -- XCHG is unusual in this 19868 respect. */ 19869 if (haveF2orF3(pfx)) { 19870 if (epartIsReg(modrm)) { 19871 goto decode_failure; 19872 } else { 19873 if (haveF2andF3(pfx)) 19874 goto decode_failure; 19875 } 19876 } 19877 ty = szToITy(sz); 19878 t1 = newTemp(ty); t2 = newTemp(ty); 19879 if (epartIsReg(modrm)) { 19880 assign(t1, getIRegE(sz, pfx, modrm)); 19881 assign(t2, getIRegG(sz, pfx, modrm)); 19882 putIRegG(sz, pfx, modrm, mkexpr(t1)); 19883 putIRegE(sz, pfx, modrm, mkexpr(t2)); 19884 delta++; 19885 DIP("xchg%c %s, %s\n", 19886 nameISize(sz), nameIRegG(sz, pfx, modrm), 19887 nameIRegE(sz, pfx, modrm)); 19888 } else { 19889 *expect_CAS = True; 19890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19891 assign( t1, loadLE(ty, mkexpr(addr)) ); 19892 assign( t2, getIRegG(sz, pfx, modrm) ); 19893 casLE( mkexpr(addr), 19894 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 19895 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 19896 delta += alen; 19897 DIP("xchg%c %s, %s\n", nameISize(sz), 19898 nameIRegG(sz, pfx, modrm), dis_buf); 19899 } 19900 return delta; 19901 19902 case 0x88: { /* MOV Gb,Eb */ 19903 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 19904 Bool ok = True; 19905 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok); 19906 if (!ok) goto decode_failure; 19907 return delta; 19908 } 19909 19910 case 0x89: { /* MOV Gv,Ev */ 19911 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 19912 Bool ok = True; 19913 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok); 19914 if (!ok) goto decode_failure; 19915 return delta; 19916 } 19917 19918 case 0x8A: /* MOV Eb,Gb */ 19919 if (haveF2orF3(pfx)) goto decode_failure; 19920 delta = dis_mov_E_G(vbi, pfx, 1, delta); 19921 return delta; 19922 19923 case 0x8B: /* MOV Ev,Gv */ 19924 if (haveF2orF3(pfx)) goto decode_failure; 19925 delta = dis_mov_E_G(vbi, pfx, sz, delta); 19926 return delta; 19927 19928 case 0x8D: /* LEA M,Gv */ 19929 if (haveF2orF3(pfx)) goto decode_failure; 19930 if (sz != 4 && sz != 8) 19931 goto decode_failure; 19932 modrm = getUChar(delta); 19933 if (epartIsReg(modrm)) 19934 goto decode_failure; 19935 /* NOTE! this is the one place where a segment override prefix 19936 has no effect on the address calculation. Therefore we clear 19937 any segment override bits in pfx. */ 19938 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 19939 delta += alen; 19940 /* This is a hack. But it isn't clear that really doing the 19941 calculation at 32 bits is really worth it. Hence for leal, 19942 do the full 64-bit calculation and then truncate it. */ 19943 putIRegG( sz, pfx, modrm, 19944 sz == 4 19945 ? unop(Iop_64to32, mkexpr(addr)) 19946 : mkexpr(addr) 19947 ); 19948 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 19949 nameIRegG(sz,pfx,modrm)); 19950 return delta; 19951 19952 case 0x8F: { /* POPQ m64 / POPW m16 */ 19953 Int len; 19954 UChar rm; 19955 /* There is no encoding for 32-bit pop in 64-bit mode. 19956 So sz==4 actually means sz==8. */ 19957 if (haveF2orF3(pfx)) goto decode_failure; 19958 vassert(sz == 2 || sz == 4 19959 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 19960 if (sz == 4) sz = 8; 19961 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 19962 19963 rm = getUChar(delta); 19964 19965 /* make sure this instruction is correct POP */ 19966 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 19967 goto decode_failure; 19968 /* and has correct size */ 19969 vassert(sz == 8); 19970 19971 t1 = newTemp(Ity_I64); 19972 t3 = newTemp(Ity_I64); 19973 assign( t1, getIReg64(R_RSP) ); 19974 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 19975 19976 /* Increase RSP; must be done before the STORE. Intel manual 19977 says: If the RSP register is used as a base register for 19978 addressing a destination operand in memory, the POP 19979 instruction computes the effective address of the operand 19980 after it increments the RSP register. */ 19981 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 19982 19983 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 19984 storeLE( mkexpr(addr), mkexpr(t3) ); 19985 19986 DIP("popl %s\n", dis_buf); 19987 19988 delta += len; 19989 return delta; 19990 } 19991 19992 case 0x90: /* XCHG eAX,eAX */ 19993 /* detect and handle F3 90 (rep nop) specially */ 19994 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 19995 DIP("rep nop (P4 pause)\n"); 19996 /* "observe" the hint. The Vex client needs to be careful not 19997 to cause very long delays as a result, though. */ 19998 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta); 19999 vassert(dres->whatNext == Dis_StopHere); 20000 return delta; 20001 } 20002 /* detect and handle NOPs specially */ 20003 if (/* F2/F3 probably change meaning completely */ 20004 !haveF2orF3(pfx) 20005 /* If REX.B is 1, we're not exchanging rAX with itself */ 20006 && getRexB(pfx)==0 ) { 20007 DIP("nop\n"); 20008 return delta; 20009 } 20010 /* else fall through to normal case. */ 20011 case 0x91: /* XCHG rAX,rCX */ 20012 case 0x92: /* XCHG rAX,rDX */ 20013 case 0x93: /* XCHG rAX,rBX */ 20014 case 0x94: /* XCHG rAX,rSP */ 20015 case 0x95: /* XCHG rAX,rBP */ 20016 case 0x96: /* XCHG rAX,rSI */ 20017 case 0x97: /* XCHG rAX,rDI */ 20018 /* guard against mutancy */ 20019 if (haveF2orF3(pfx)) goto decode_failure; 20020 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 20021 return delta; 20022 20023 case 0x98: /* CBW */ 20024 if (haveF2orF3(pfx)) goto decode_failure; 20025 if (sz == 8) { 20026 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 20027 DIP(/*"cdqe\n"*/"cltq"); 20028 return delta; 20029 } 20030 if (sz == 4) { 20031 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 20032 DIP("cwtl\n"); 20033 return delta; 20034 } 20035 if (sz == 2) { 20036 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 20037 DIP("cbw\n"); 20038 return delta; 20039 } 20040 goto decode_failure; 20041 20042 case 0x99: /* CWD/CDQ/CQO */ 20043 if (haveF2orF3(pfx)) goto decode_failure; 20044 vassert(sz == 2 || sz == 4 || sz == 8); 20045 ty = szToITy(sz); 20046 putIRegRDX( sz, 20047 binop(mkSizedOp(ty,Iop_Sar8), 20048 getIRegRAX(sz), 20049 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 20050 DIP(sz == 2 ? "cwd\n" 20051 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 20052 : "cqo\n")); 20053 return delta; 20054 20055 case 0x9B: /* FWAIT (X87 insn) */ 20056 /* ignore? */ 20057 DIP("fwait\n"); 20058 return delta; 20059 20060 case 0x9C: /* PUSHF */ { 20061 /* Note. There is no encoding for a 32-bit pushf in 64-bit 20062 mode. So sz==4 actually means sz==8. */ 20063 /* 24 July 06: has also been seen with a redundant REX prefix, 20064 so must also allow sz==8. */ 20065 if (haveF2orF3(pfx)) goto decode_failure; 20066 vassert(sz == 2 || sz == 4 || sz == 8); 20067 if (sz == 4) sz = 8; 20068 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20069 20070 t1 = newTemp(Ity_I64); 20071 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 20072 putIReg64(R_RSP, mkexpr(t1) ); 20073 20074 t2 = newTemp(Ity_I64); 20075 assign( t2, mk_amd64g_calculate_rflags_all() ); 20076 20077 /* Patch in the D flag. This can simply be a copy of bit 10 of 20078 baseBlock[OFFB_DFLAG]. */ 20079 t3 = newTemp(Ity_I64); 20080 assign( t3, binop(Iop_Or64, 20081 mkexpr(t2), 20082 binop(Iop_And64, 20083 IRExpr_Get(OFFB_DFLAG,Ity_I64), 20084 mkU64(1<<10))) 20085 ); 20086 20087 /* And patch in the ID flag. */ 20088 t4 = newTemp(Ity_I64); 20089 assign( t4, binop(Iop_Or64, 20090 mkexpr(t3), 20091 binop(Iop_And64, 20092 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 20093 mkU8(21)), 20094 mkU64(1<<21))) 20095 ); 20096 20097 /* And patch in the AC flag too. */ 20098 t5 = newTemp(Ity_I64); 20099 assign( t5, binop(Iop_Or64, 20100 mkexpr(t4), 20101 binop(Iop_And64, 20102 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 20103 mkU8(18)), 20104 mkU64(1<<18))) 20105 ); 20106 20107 /* if sz==2, the stored value needs to be narrowed. */ 20108 if (sz == 2) 20109 storeLE( mkexpr(t1), unop(Iop_32to16, 20110 unop(Iop_64to32,mkexpr(t5))) ); 20111 else 20112 storeLE( mkexpr(t1), mkexpr(t5) ); 20113 20114 DIP("pushf%c\n", nameISize(sz)); 20115 return delta; 20116 } 20117 20118 case 0x9D: /* POPF */ 20119 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 20120 So sz==4 actually means sz==8. */ 20121 if (haveF2orF3(pfx)) goto decode_failure; 20122 vassert(sz == 2 || sz == 4); 20123 if (sz == 4) sz = 8; 20124 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20125 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 20126 assign(t2, getIReg64(R_RSP)); 20127 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 20128 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 20129 /* t1 is the flag word. Mask out everything except OSZACP and 20130 set the flags thunk to AMD64G_CC_OP_COPY. */ 20131 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20132 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20133 stmt( IRStmt_Put( OFFB_CC_DEP1, 20134 binop(Iop_And64, 20135 mkexpr(t1), 20136 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 20137 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 20138 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 20139 ) 20140 ) 20141 ); 20142 20143 /* Also need to set the D flag, which is held in bit 10 of t1. 20144 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 20145 stmt( IRStmt_Put( 20146 OFFB_DFLAG, 20147 IRExpr_ITE( 20148 unop(Iop_64to1, 20149 binop(Iop_And64, 20150 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 20151 mkU64(1))), 20152 mkU64(0xFFFFFFFFFFFFFFFFULL), 20153 mkU64(1))) 20154 ); 20155 20156 /* And set the ID flag */ 20157 stmt( IRStmt_Put( 20158 OFFB_IDFLAG, 20159 IRExpr_ITE( 20160 unop(Iop_64to1, 20161 binop(Iop_And64, 20162 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 20163 mkU64(1))), 20164 mkU64(1), 20165 mkU64(0))) 20166 ); 20167 20168 /* And set the AC flag too */ 20169 stmt( IRStmt_Put( 20170 OFFB_ACFLAG, 20171 IRExpr_ITE( 20172 unop(Iop_64to1, 20173 binop(Iop_And64, 20174 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 20175 mkU64(1))), 20176 mkU64(1), 20177 mkU64(0))) 20178 ); 20179 20180 DIP("popf%c\n", nameISize(sz)); 20181 return delta; 20182 20183 case 0x9E: /* SAHF */ 20184 codegen_SAHF(); 20185 DIP("sahf\n"); 20186 return delta; 20187 20188 case 0x9F: /* LAHF */ 20189 codegen_LAHF(); 20190 DIP("lahf\n"); 20191 return delta; 20192 20193 case 0xA0: /* MOV Ob,AL */ 20194 if (have66orF2orF3(pfx)) goto decode_failure; 20195 sz = 1; 20196 /* Fall through ... */ 20197 case 0xA1: /* MOV Ov,eAX */ 20198 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20199 goto decode_failure; 20200 d64 = getDisp64(delta); 20201 delta += 8; 20202 ty = szToITy(sz); 20203 addr = newTemp(Ity_I64); 20204 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20205 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 20206 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 20207 segRegTxt(pfx), d64, 20208 nameIRegRAX(sz)); 20209 return delta; 20210 20211 case 0xA2: /* MOV AL,Ob */ 20212 if (have66orF2orF3(pfx)) goto decode_failure; 20213 sz = 1; 20214 /* Fall through ... */ 20215 case 0xA3: /* MOV eAX,Ov */ 20216 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20217 goto decode_failure; 20218 d64 = getDisp64(delta); 20219 delta += 8; 20220 ty = szToITy(sz); 20221 addr = newTemp(Ity_I64); 20222 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20223 storeLE( mkexpr(addr), getIRegRAX(sz) ); 20224 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 20225 segRegTxt(pfx), d64); 20226 return delta; 20227 20228 case 0xA4: 20229 case 0xA5: 20230 /* F3 A4: rep movsb */ 20231 if (haveF3(pfx) && !haveF2(pfx)) { 20232 if (opc == 0xA4) 20233 sz = 1; 20234 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz, 20235 guest_RIP_curr_instr, 20236 guest_RIP_bbstart+delta, "rep movs", pfx ); 20237 dres->whatNext = Dis_StopHere; 20238 return delta; 20239 } 20240 /* A4: movsb */ 20241 if (!haveF3(pfx) && !haveF2(pfx)) { 20242 if (opc == 0xA4) 20243 sz = 1; 20244 dis_string_op( dis_MOVS, sz, "movs", pfx ); 20245 return delta; 20246 } 20247 goto decode_failure; 20248 20249 case 0xA6: 20250 case 0xA7: 20251 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 20252 if (haveF3(pfx) && !haveF2(pfx)) { 20253 if (opc == 0xA6) 20254 sz = 1; 20255 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz, 20256 guest_RIP_curr_instr, 20257 guest_RIP_bbstart+delta, "repe cmps", pfx ); 20258 dres->whatNext = Dis_StopHere; 20259 return delta; 20260 } 20261 goto decode_failure; 20262 20263 case 0xAA: 20264 case 0xAB: 20265 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 20266 if (haveF3(pfx) && !haveF2(pfx)) { 20267 if (opc == 0xAA) 20268 sz = 1; 20269 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz, 20270 guest_RIP_curr_instr, 20271 guest_RIP_bbstart+delta, "rep stos", pfx ); 20272 vassert(dres->whatNext == Dis_StopHere); 20273 return delta; 20274 } 20275 /* AA/AB: stosb/stos{w,l,q} */ 20276 if (!haveF3(pfx) && !haveF2(pfx)) { 20277 if (opc == 0xAA) 20278 sz = 1; 20279 dis_string_op( dis_STOS, sz, "stos", pfx ); 20280 return delta; 20281 } 20282 goto decode_failure; 20283 20284 case 0xA8: /* TEST Ib, AL */ 20285 if (haveF2orF3(pfx)) goto decode_failure; 20286 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 20287 return delta; 20288 case 0xA9: /* TEST Iv, eAX */ 20289 if (haveF2orF3(pfx)) goto decode_failure; 20290 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 20291 return delta; 20292 20293 case 0xAC: /* LODS, no REP prefix */ 20294 case 0xAD: 20295 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 20296 return delta; 20297 20298 case 0xAE: 20299 case 0xAF: 20300 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 20301 if (haveF2(pfx) && !haveF3(pfx)) { 20302 if (opc == 0xAE) 20303 sz = 1; 20304 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz, 20305 guest_RIP_curr_instr, 20306 guest_RIP_bbstart+delta, "repne scas", pfx ); 20307 vassert(dres->whatNext == Dis_StopHere); 20308 return delta; 20309 } 20310 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 20311 if (!haveF2(pfx) && haveF3(pfx)) { 20312 if (opc == 0xAE) 20313 sz = 1; 20314 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz, 20315 guest_RIP_curr_instr, 20316 guest_RIP_bbstart+delta, "repe scas", pfx ); 20317 vassert(dres->whatNext == Dis_StopHere); 20318 return delta; 20319 } 20320 /* AE/AF: scasb/scas{w,l,q} */ 20321 if (!haveF2(pfx) && !haveF3(pfx)) { 20322 if (opc == 0xAE) 20323 sz = 1; 20324 dis_string_op( dis_SCAS, sz, "scas", pfx ); 20325 return delta; 20326 } 20327 goto decode_failure; 20328 20329 /* XXXX be careful here with moves to AH/BH/CH/DH */ 20330 case 0xB0: /* MOV imm,AL */ 20331 case 0xB1: /* MOV imm,CL */ 20332 case 0xB2: /* MOV imm,DL */ 20333 case 0xB3: /* MOV imm,BL */ 20334 case 0xB4: /* MOV imm,AH */ 20335 case 0xB5: /* MOV imm,CH */ 20336 case 0xB6: /* MOV imm,DH */ 20337 case 0xB7: /* MOV imm,BH */ 20338 if (haveF2orF3(pfx)) goto decode_failure; 20339 d64 = getUChar(delta); 20340 delta += 1; 20341 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 20342 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 20343 return delta; 20344 20345 case 0xB8: /* MOV imm,eAX */ 20346 case 0xB9: /* MOV imm,eCX */ 20347 case 0xBA: /* MOV imm,eDX */ 20348 case 0xBB: /* MOV imm,eBX */ 20349 case 0xBC: /* MOV imm,eSP */ 20350 case 0xBD: /* MOV imm,eBP */ 20351 case 0xBE: /* MOV imm,eSI */ 20352 case 0xBF: /* MOV imm,eDI */ 20353 /* This is the one-and-only place where 64-bit literals are 20354 allowed in the instruction stream. */ 20355 if (haveF2orF3(pfx)) goto decode_failure; 20356 if (sz == 8) { 20357 d64 = getDisp64(delta); 20358 delta += 8; 20359 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 20360 DIP("movabsq $%lld,%s\n", (Long)d64, 20361 nameIRegRexB(8,pfx,opc-0xB8)); 20362 } else { 20363 d64 = getSDisp(imin(4,sz),delta); 20364 delta += imin(4,sz); 20365 putIRegRexB(sz, pfx, opc-0xB8, 20366 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20367 DIP("mov%c $%lld,%s\n", nameISize(sz), 20368 (Long)d64, 20369 nameIRegRexB(sz,pfx,opc-0xB8)); 20370 } 20371 return delta; 20372 20373 case 0xC0: { /* Grp2 Ib,Eb */ 20374 Bool decode_OK = True; 20375 if (haveF2orF3(pfx)) goto decode_failure; 20376 modrm = getUChar(delta); 20377 am_sz = lengthAMode(pfx,delta); 20378 d_sz = 1; 20379 d64 = getUChar(delta + am_sz); 20380 sz = 1; 20381 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20382 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20383 if (!decode_OK) goto decode_failure; 20384 return delta; 20385 } 20386 20387 case 0xC1: { /* Grp2 Ib,Ev */ 20388 Bool decode_OK = True; 20389 if (haveF2orF3(pfx)) goto decode_failure; 20390 modrm = getUChar(delta); 20391 am_sz = lengthAMode(pfx,delta); 20392 d_sz = 1; 20393 d64 = getUChar(delta + am_sz); 20394 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20395 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20396 if (!decode_OK) goto decode_failure; 20397 return delta; 20398 } 20399 20400 case 0xC2: /* RET imm16 */ 20401 if (have66orF3(pfx)) goto decode_failure; 20402 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20403 d64 = getUDisp16(delta); 20404 delta += 2; 20405 dis_ret(dres, vbi, d64); 20406 DIP("ret $%lld\n", d64); 20407 return delta; 20408 20409 case 0xC3: /* RET */ 20410 if (have66(pfx)) goto decode_failure; 20411 /* F3 is acceptable on AMD. */ 20412 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20413 dis_ret(dres, vbi, 0); 20414 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 20415 return delta; 20416 20417 case 0xC6: /* C6 /0 = MOV Ib,Eb */ 20418 sz = 1; 20419 goto maybe_do_Mov_I_E; 20420 case 0xC7: /* C7 /0 = MOV Iv,Ev */ 20421 goto maybe_do_Mov_I_E; 20422 maybe_do_Mov_I_E: 20423 modrm = getUChar(delta); 20424 if (gregLO3ofRM(modrm) == 0) { 20425 if (epartIsReg(modrm)) { 20426 /* Neither F2 nor F3 are allowable. */ 20427 if (haveF2orF3(pfx)) goto decode_failure; 20428 delta++; /* mod/rm byte */ 20429 d64 = getSDisp(imin(4,sz),delta); 20430 delta += imin(4,sz); 20431 putIRegE(sz, pfx, modrm, 20432 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20433 DIP("mov%c $%lld, %s\n", nameISize(sz), 20434 (Long)d64, 20435 nameIRegE(sz,pfx,modrm)); 20436 } else { 20437 if (haveF2(pfx)) goto decode_failure; 20438 /* F3(XRELEASE) is allowable here */ 20439 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 20440 /*xtra*/imin(4,sz) ); 20441 delta += alen; 20442 d64 = getSDisp(imin(4,sz),delta); 20443 delta += imin(4,sz); 20444 storeLE(mkexpr(addr), 20445 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20446 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 20447 } 20448 return delta; 20449 } 20450 /* BEGIN HACKY SUPPORT FOR xbegin */ 20451 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4 20452 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20453 delta++; /* mod/rm byte */ 20454 d64 = getSDisp(4,delta); 20455 delta += 4; 20456 guest_RIP_next_mustcheck = True; 20457 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 20458 Addr64 failAddr = guest_RIP_bbstart + delta + d64; 20459 /* EAX contains the failure status code. Bit 3 is "Set if an 20460 internal buffer overflowed", which seems like the 20461 least-bogus choice we can make here. */ 20462 putIRegRAX(4, mkU32(1<<3)); 20463 /* And jump to the fail address. */ 20464 jmp_lit(dres, Ijk_Boring, failAddr); 20465 vassert(dres->whatNext == Dis_StopHere); 20466 DIP("xbeginq 0x%llx\n", failAddr); 20467 return delta; 20468 } 20469 /* END HACKY SUPPORT FOR xbegin */ 20470 /* BEGIN HACKY SUPPORT FOR xabort */ 20471 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1 20472 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20473 delta++; /* mod/rm byte */ 20474 abyte = getUChar(delta); delta++; 20475 /* There is never a real transaction in progress, so do nothing. */ 20476 DIP("xabort $%d", (Int)abyte); 20477 return delta; 20478 } 20479 /* END HACKY SUPPORT FOR xabort */ 20480 goto decode_failure; 20481 20482 case 0xC8: /* ENTER */ 20483 /* Same comments re operand size as for LEAVE below apply. 20484 Also, only handles the case "enter $imm16, $0"; other cases 20485 for the second operand (nesting depth) are not handled. */ 20486 if (sz != 4) 20487 goto decode_failure; 20488 d64 = getUDisp16(delta); 20489 delta += 2; 20490 vassert(d64 >= 0 && d64 <= 0xFFFF); 20491 if (getUChar(delta) != 0) 20492 goto decode_failure; 20493 delta++; 20494 /* Intel docs seem to suggest: 20495 push rbp 20496 temp = rsp 20497 rbp = temp 20498 rsp = rsp - imm16 20499 */ 20500 t1 = newTemp(Ity_I64); 20501 assign(t1, getIReg64(R_RBP)); 20502 t2 = newTemp(Ity_I64); 20503 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 20504 putIReg64(R_RSP, mkexpr(t2)); 20505 storeLE(mkexpr(t2), mkexpr(t1)); 20506 putIReg64(R_RBP, mkexpr(t2)); 20507 if (d64 > 0) { 20508 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64))); 20509 } 20510 DIP("enter $%u, $0\n", (UInt)d64); 20511 return delta; 20512 20513 case 0xC9: /* LEAVE */ 20514 /* In 64-bit mode this defaults to a 64-bit operand size. There 20515 is no way to encode a 32-bit variant. Hence sz==4 but we do 20516 it as if sz=8. */ 20517 if (sz != 4) 20518 goto decode_failure; 20519 t1 = newTemp(Ity_I64); 20520 t2 = newTemp(Ity_I64); 20521 assign(t1, getIReg64(R_RBP)); 20522 /* First PUT RSP looks redundant, but need it because RSP must 20523 always be up-to-date for Memcheck to work... */ 20524 putIReg64(R_RSP, mkexpr(t1)); 20525 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 20526 putIReg64(R_RBP, mkexpr(t2)); 20527 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 20528 DIP("leave\n"); 20529 return delta; 20530 20531 case 0xCC: /* INT 3 */ 20532 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta); 20533 vassert(dres->whatNext == Dis_StopHere); 20534 DIP("int $0x3\n"); 20535 return delta; 20536 20537 case 0xD0: { /* Grp2 1,Eb */ 20538 Bool decode_OK = True; 20539 if (haveF2orF3(pfx)) goto decode_failure; 20540 modrm = getUChar(delta); 20541 am_sz = lengthAMode(pfx,delta); 20542 d_sz = 0; 20543 d64 = 1; 20544 sz = 1; 20545 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20546 mkU8(d64), NULL, &decode_OK ); 20547 if (!decode_OK) goto decode_failure; 20548 return delta; 20549 } 20550 20551 case 0xD1: { /* Grp2 1,Ev */ 20552 Bool decode_OK = True; 20553 if (haveF2orF3(pfx)) goto decode_failure; 20554 modrm = getUChar(delta); 20555 am_sz = lengthAMode(pfx,delta); 20556 d_sz = 0; 20557 d64 = 1; 20558 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20559 mkU8(d64), NULL, &decode_OK ); 20560 if (!decode_OK) goto decode_failure; 20561 return delta; 20562 } 20563 20564 case 0xD2: { /* Grp2 CL,Eb */ 20565 Bool decode_OK = True; 20566 if (haveF2orF3(pfx)) goto decode_failure; 20567 modrm = getUChar(delta); 20568 am_sz = lengthAMode(pfx,delta); 20569 d_sz = 0; 20570 sz = 1; 20571 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20572 getIRegCL(), "%cl", &decode_OK ); 20573 if (!decode_OK) goto decode_failure; 20574 return delta; 20575 } 20576 20577 case 0xD3: { /* Grp2 CL,Ev */ 20578 Bool decode_OK = True; 20579 if (haveF2orF3(pfx)) goto decode_failure; 20580 modrm = getUChar(delta); 20581 am_sz = lengthAMode(pfx,delta); 20582 d_sz = 0; 20583 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20584 getIRegCL(), "%cl", &decode_OK ); 20585 if (!decode_OK) goto decode_failure; 20586 return delta; 20587 } 20588 20589 case 0xD8: /* X87 instructions */ 20590 case 0xD9: 20591 case 0xDA: 20592 case 0xDB: 20593 case 0xDC: 20594 case 0xDD: 20595 case 0xDE: 20596 case 0xDF: { 20597 Bool redundantREXWok = False; 20598 20599 if (haveF2orF3(pfx)) 20600 goto decode_failure; 20601 20602 /* kludge to tolerate redundant rex.w prefixes (should do this 20603 properly one day) */ 20604 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 20605 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 20606 redundantREXWok = True; 20607 20608 Bool size_OK = False; 20609 if ( sz == 4 ) 20610 size_OK = True; 20611 else if ( sz == 8 ) 20612 size_OK = redundantREXWok; 20613 else if ( sz == 2 ) { 20614 int mod_rm = getUChar(delta+0); 20615 int reg = gregLO3ofRM(mod_rm); 20616 /* The HotSpot JVM uses these */ 20617 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ || 20618 reg == 4 /* FNSAVE */ || 20619 reg == 6 /* FRSTOR */ ) ) 20620 size_OK = True; 20621 } 20622 /* AMD manual says 0x66 size override is ignored, except where 20623 it is meaningful */ 20624 if (!size_OK) 20625 goto decode_failure; 20626 20627 Bool decode_OK = False; 20628 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 20629 if (!decode_OK) 20630 goto decode_failure; 20631 20632 return delta; 20633 } 20634 20635 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 20636 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 20637 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 20638 { /* The docs say this uses rCX as a count depending on the 20639 address size override, not the operand one. */ 20640 IRExpr* zbit = NULL; 20641 IRExpr* count = NULL; 20642 IRExpr* cond = NULL; 20643 const HChar* xtra = NULL; 20644 20645 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 20646 /* So at this point we've rejected any variants which appear to 20647 be governed by the usual operand-size modifiers. Hence only 20648 the address size prefix can have an effect. It changes the 20649 size from 64 (default) to 32. */ 20650 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 20651 delta++; 20652 if (haveASO(pfx)) { 20653 /* 64to32 of 64-bit get is merely a get-put improvement 20654 trick. */ 20655 putIReg32(R_RCX, binop(Iop_Sub32, 20656 unop(Iop_64to32, getIReg64(R_RCX)), 20657 mkU32(1))); 20658 } else { 20659 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 20660 } 20661 20662 /* This is correct, both for 32- and 64-bit versions. If we're 20663 doing a 32-bit dec and the result is zero then the default 20664 zero extension rule will cause the upper 32 bits to be zero 20665 too. Hence a 64-bit check against zero is OK. */ 20666 count = getIReg64(R_RCX); 20667 cond = binop(Iop_CmpNE64, count, mkU64(0)); 20668 switch (opc) { 20669 case 0xE2: 20670 xtra = ""; 20671 break; 20672 case 0xE1: 20673 xtra = "e"; 20674 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 20675 cond = mkAnd1(cond, zbit); 20676 break; 20677 case 0xE0: 20678 xtra = "ne"; 20679 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 20680 cond = mkAnd1(cond, zbit); 20681 break; 20682 default: 20683 vassert(0); 20684 } 20685 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) ); 20686 20687 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); 20688 return delta; 20689 } 20690 20691 case 0xE3: 20692 /* JRCXZ or JECXZ, depending address size override. */ 20693 if (have66orF2orF3(pfx)) goto decode_failure; 20694 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 20695 delta++; 20696 if (haveASO(pfx)) { 20697 /* 32-bit */ 20698 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 20699 unop(Iop_32Uto64, getIReg32(R_RCX)), 20700 mkU64(0)), 20701 Ijk_Boring, 20702 IRConst_U64(d64), 20703 OFFB_RIP 20704 )); 20705 DIP("jecxz 0x%llx\n", d64); 20706 } else { 20707 /* 64-bit */ 20708 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 20709 getIReg64(R_RCX), 20710 mkU64(0)), 20711 Ijk_Boring, 20712 IRConst_U64(d64), 20713 OFFB_RIP 20714 )); 20715 DIP("jrcxz 0x%llx\n", d64); 20716 } 20717 return delta; 20718 20719 case 0xE4: /* IN imm8, AL */ 20720 sz = 1; 20721 t1 = newTemp(Ity_I64); 20722 abyte = getUChar(delta); delta++; 20723 assign(t1, mkU64( abyte & 0xFF )); 20724 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 20725 goto do_IN; 20726 case 0xE5: /* IN imm8, eAX */ 20727 if (!(sz == 2 || sz == 4)) goto decode_failure; 20728 t1 = newTemp(Ity_I64); 20729 abyte = getUChar(delta); delta++; 20730 assign(t1, mkU64( abyte & 0xFF )); 20731 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 20732 goto do_IN; 20733 case 0xEC: /* IN %DX, AL */ 20734 sz = 1; 20735 t1 = newTemp(Ity_I64); 20736 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 20737 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 20738 nameIRegRAX(sz)); 20739 goto do_IN; 20740 case 0xED: /* IN %DX, eAX */ 20741 if (!(sz == 2 || sz == 4)) goto decode_failure; 20742 t1 = newTemp(Ity_I64); 20743 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 20744 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 20745 nameIRegRAX(sz)); 20746 goto do_IN; 20747 do_IN: { 20748 /* At this point, sz indicates the width, and t1 is a 64-bit 20749 value giving port number. */ 20750 IRDirty* d; 20751 if (haveF2orF3(pfx)) goto decode_failure; 20752 vassert(sz == 1 || sz == 2 || sz == 4); 20753 ty = szToITy(sz); 20754 t2 = newTemp(Ity_I64); 20755 d = unsafeIRDirty_1_N( 20756 t2, 20757 0/*regparms*/, 20758 "amd64g_dirtyhelper_IN", 20759 &amd64g_dirtyhelper_IN, 20760 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 20761 ); 20762 /* do the call, dumping the result in t2. */ 20763 stmt( IRStmt_Dirty(d) ); 20764 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 20765 return delta; 20766 } 20767 20768 case 0xE6: /* OUT AL, imm8 */ 20769 sz = 1; 20770 t1 = newTemp(Ity_I64); 20771 abyte = getUChar(delta); delta++; 20772 assign( t1, mkU64( abyte & 0xFF ) ); 20773 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 20774 goto do_OUT; 20775 case 0xE7: /* OUT eAX, imm8 */ 20776 if (!(sz == 2 || sz == 4)) goto decode_failure; 20777 t1 = newTemp(Ity_I64); 20778 abyte = getUChar(delta); delta++; 20779 assign( t1, mkU64( abyte & 0xFF ) ); 20780 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 20781 goto do_OUT; 20782 case 0xEE: /* OUT AL, %DX */ 20783 sz = 1; 20784 t1 = newTemp(Ity_I64); 20785 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 20786 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 20787 nameIRegRDX(2)); 20788 goto do_OUT; 20789 case 0xEF: /* OUT eAX, %DX */ 20790 if (!(sz == 2 || sz == 4)) goto decode_failure; 20791 t1 = newTemp(Ity_I64); 20792 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 20793 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 20794 nameIRegRDX(2)); 20795 goto do_OUT; 20796 do_OUT: { 20797 /* At this point, sz indicates the width, and t1 is a 64-bit 20798 value giving port number. */ 20799 IRDirty* d; 20800 if (haveF2orF3(pfx)) goto decode_failure; 20801 vassert(sz == 1 || sz == 2 || sz == 4); 20802 ty = szToITy(sz); 20803 d = unsafeIRDirty_0_N( 20804 0/*regparms*/, 20805 "amd64g_dirtyhelper_OUT", 20806 &amd64g_dirtyhelper_OUT, 20807 mkIRExprVec_3( mkexpr(t1), 20808 widenUto64( getIRegRAX(sz) ), 20809 mkU64(sz) ) 20810 ); 20811 stmt( IRStmt_Dirty(d) ); 20812 return delta; 20813 } 20814 20815 case 0xE8: /* CALL J4 */ 20816 if (haveF3(pfx)) goto decode_failure; 20817 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20818 d64 = getSDisp32(delta); delta += 4; 20819 d64 += (guest_RIP_bbstart+delta); 20820 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 20821 t1 = newTemp(Ity_I64); 20822 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 20823 putIReg64(R_RSP, mkexpr(t1)); 20824 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 20825 t2 = newTemp(Ity_I64); 20826 assign(t2, mkU64((Addr64)d64)); 20827 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 20828 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 20829 /* follow into the call target. */ 20830 dres->whatNext = Dis_ResteerU; 20831 dres->continueAt = d64; 20832 } else { 20833 jmp_lit(dres, Ijk_Call, d64); 20834 vassert(dres->whatNext == Dis_StopHere); 20835 } 20836 DIP("call 0x%llx\n",d64); 20837 return delta; 20838 20839 case 0xE9: /* Jv (jump, 16/32 offset) */ 20840 if (haveF3(pfx)) goto decode_failure; 20841 if (sz != 4) 20842 goto decode_failure; /* JRS added 2004 July 11 */ 20843 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20844 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 20845 delta += sz; 20846 if (resteerOkFn(callback_opaque, (Addr64)d64)) { 20847 dres->whatNext = Dis_ResteerU; 20848 dres->continueAt = d64; 20849 } else { 20850 jmp_lit(dres, Ijk_Boring, d64); 20851 vassert(dres->whatNext == Dis_StopHere); 20852 } 20853 DIP("jmp 0x%llx\n", d64); 20854 return delta; 20855 20856 case 0xEB: /* Jb (jump, byte offset) */ 20857 if (haveF3(pfx)) goto decode_failure; 20858 if (sz != 4) 20859 goto decode_failure; /* JRS added 2004 July 11 */ 20860 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20861 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 20862 delta++; 20863 if (resteerOkFn(callback_opaque, (Addr64)d64)) { 20864 dres->whatNext = Dis_ResteerU; 20865 dres->continueAt = d64; 20866 } else { 20867 jmp_lit(dres, Ijk_Boring, d64); 20868 vassert(dres->whatNext == Dis_StopHere); 20869 } 20870 DIP("jmp-8 0x%llx\n", d64); 20871 return delta; 20872 20873 case 0xF5: /* CMC */ 20874 case 0xF8: /* CLC */ 20875 case 0xF9: /* STC */ 20876 t1 = newTemp(Ity_I64); 20877 t2 = newTemp(Ity_I64); 20878 assign( t1, mk_amd64g_calculate_rflags_all() ); 20879 switch (opc) { 20880 case 0xF5: 20881 assign( t2, binop(Iop_Xor64, mkexpr(t1), 20882 mkU64(AMD64G_CC_MASK_C))); 20883 DIP("cmc\n"); 20884 break; 20885 case 0xF8: 20886 assign( t2, binop(Iop_And64, mkexpr(t1), 20887 mkU64(~AMD64G_CC_MASK_C))); 20888 DIP("clc\n"); 20889 break; 20890 case 0xF9: 20891 assign( t2, binop(Iop_Or64, mkexpr(t1), 20892 mkU64(AMD64G_CC_MASK_C))); 20893 DIP("stc\n"); 20894 break; 20895 default: 20896 vpanic("disInstr(x64)(cmc/clc/stc)"); 20897 } 20898 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20899 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20900 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) )); 20901 /* Set NDEP even though it isn't used. This makes redundant-PUT 20902 elimination of previous stores to this field work better. */ 20903 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 20904 return delta; 20905 20906 case 0xF6: { /* Grp3 Eb */ 20907 Bool decode_OK = True; 20908 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20909 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 20910 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 20911 if (!decode_OK) goto decode_failure; 20912 return delta; 20913 } 20914 20915 case 0xF7: { /* Grp3 Ev */ 20916 Bool decode_OK = True; 20917 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20918 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 20919 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 20920 if (!decode_OK) goto decode_failure; 20921 return delta; 20922 } 20923 20924 case 0xFC: /* CLD */ 20925 if (haveF2orF3(pfx)) goto decode_failure; 20926 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 20927 DIP("cld\n"); 20928 return delta; 20929 20930 case 0xFD: /* STD */ 20931 if (haveF2orF3(pfx)) goto decode_failure; 20932 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 20933 DIP("std\n"); 20934 return delta; 20935 20936 case 0xFE: { /* Grp4 Eb */ 20937 Bool decode_OK = True; 20938 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20939 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */ 20940 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 20941 if (!decode_OK) goto decode_failure; 20942 return delta; 20943 } 20944 20945 case 0xFF: { /* Grp5 Ev */ 20946 Bool decode_OK = True; 20947 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20948 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */ 20949 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK ); 20950 if (!decode_OK) goto decode_failure; 20951 return delta; 20952 } 20953 20954 default: 20955 break; 20956 20957 } 20958 20959 decode_failure: 20960 return deltaIN; /* fail */ 20961 } 20962 20963 20964 /*------------------------------------------------------------*/ 20965 /*--- ---*/ 20966 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/ 20967 /*--- ---*/ 20968 /*------------------------------------------------------------*/ 20969 20970 static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 20971 { 20972 IRTemp t2 = newTemp(ty); 20973 if (ty == Ity_I64) { 20974 IRTemp m8 = newTemp(Ity_I64); 20975 IRTemp s8 = newTemp(Ity_I64); 20976 IRTemp m16 = newTemp(Ity_I64); 20977 IRTemp s16 = newTemp(Ity_I64); 20978 IRTemp m32 = newTemp(Ity_I64); 20979 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 20980 assign( s8, 20981 binop(Iop_Or64, 20982 binop(Iop_Shr64, 20983 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 20984 mkU8(8)), 20985 binop(Iop_And64, 20986 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 20987 mkexpr(m8)) 20988 ) 20989 ); 20990 20991 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 20992 assign( s16, 20993 binop(Iop_Or64, 20994 binop(Iop_Shr64, 20995 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 20996 mkU8(16)), 20997 binop(Iop_And64, 20998 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 20999 mkexpr(m16)) 21000 ) 21001 ); 21002 21003 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 21004 assign( t2, 21005 binop(Iop_Or64, 21006 binop(Iop_Shr64, 21007 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 21008 mkU8(32)), 21009 binop(Iop_And64, 21010 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 21011 mkexpr(m32)) 21012 ) 21013 ); 21014 return t2; 21015 } 21016 if (ty == Ity_I32) { 21017 assign( t2, 21018 binop( 21019 Iop_Or32, 21020 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 21021 binop( 21022 Iop_Or32, 21023 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 21024 mkU32(0x00FF0000)), 21025 binop(Iop_Or32, 21026 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 21027 mkU32(0x0000FF00)), 21028 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 21029 mkU32(0x000000FF) ) 21030 ))) 21031 ); 21032 return t2; 21033 } 21034 if (ty == Ity_I16) { 21035 assign(t2, 21036 binop(Iop_Or16, 21037 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 21038 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 21039 return t2; 21040 } 21041 vassert(0); 21042 /*NOTREACHED*/ 21043 return IRTemp_INVALID; 21044 } 21045 21046 21047 __attribute__((noinline)) 21048 static 21049 Long dis_ESC_0F ( 21050 /*MB_OUT*/DisResult* dres, 21051 /*MB_OUT*/Bool* expect_CAS, 21052 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 21053 Bool resteerCisOk, 21054 void* callback_opaque, 21055 const VexArchInfo* archinfo, 21056 const VexAbiInfo* vbi, 21057 Prefix pfx, Int sz, Long deltaIN 21058 ) 21059 { 21060 Long d64 = 0; 21061 IRTemp addr = IRTemp_INVALID; 21062 IRTemp t1 = IRTemp_INVALID; 21063 IRTemp t2 = IRTemp_INVALID; 21064 UChar modrm = 0; 21065 Int am_sz = 0; 21066 Int alen = 0; 21067 HChar dis_buf[50]; 21068 21069 /* In the first switch, look for ordinary integer insns. */ 21070 Long delta = deltaIN; 21071 UChar opc = getUChar(delta); 21072 delta++; 21073 switch (opc) { /* first switch */ 21074 21075 case 0x01: 21076 { 21077 modrm = getUChar(delta); 21078 /* 0F 01 /0 -- SGDT */ 21079 /* 0F 01 /1 -- SIDT */ 21080 if (!epartIsReg(modrm) 21081 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) { 21082 /* This is really revolting, but ... since each processor 21083 (core) only has one IDT and one GDT, just let the guest 21084 see it (pass-through semantics). I can't see any way to 21085 construct a faked-up value, so don't bother to try. */ 21086 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21087 delta += alen; 21088 switch (gregLO3ofRM(modrm)) { 21089 case 0: DIP("sgdt %s\n", dis_buf); break; 21090 case 1: DIP("sidt %s\n", dis_buf); break; 21091 default: vassert(0); /*NOTREACHED*/ 21092 } 21093 IRDirty* d = unsafeIRDirty_0_N ( 21094 0/*regparms*/, 21095 "amd64g_dirtyhelper_SxDT", 21096 &amd64g_dirtyhelper_SxDT, 21097 mkIRExprVec_2( mkexpr(addr), 21098 mkU64(gregLO3ofRM(modrm)) ) 21099 ); 21100 /* declare we're writing memory */ 21101 d->mFx = Ifx_Write; 21102 d->mAddr = mkexpr(addr); 21103 d->mSize = 6; 21104 stmt( IRStmt_Dirty(d) ); 21105 return delta; 21106 } 21107 /* 0F 01 D0 = XGETBV */ 21108 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21109 delta += 1; 21110 DIP("xgetbv\n"); 21111 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I 21112 am not sure if that translates in to SEGV or to something 21113 else, in user space. */ 21114 t1 = newTemp(Ity_I32); 21115 assign( t1, getIReg32(R_RCX) ); 21116 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)), 21117 Ijk_SigSEGV, 21118 IRConst_U64(guest_RIP_curr_instr), 21119 OFFB_RIP 21120 )); 21121 putIRegRAX(4, mkU32(7)); 21122 putIRegRDX(4, mkU32(0)); 21123 return delta; 21124 } 21125 /* BEGIN HACKY SUPPORT FOR xend */ 21126 /* 0F 01 D5 = XEND */ 21127 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21128 /* We are never in an transaction (xbegin immediately aborts). 21129 So this just always generates a General Protection Fault. */ 21130 delta += 1; 21131 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta); 21132 vassert(dres->whatNext == Dis_StopHere); 21133 DIP("xend\n"); 21134 return delta; 21135 } 21136 /* END HACKY SUPPORT FOR xend */ 21137 /* BEGIN HACKY SUPPORT FOR xtest */ 21138 /* 0F 01 D6 = XTEST */ 21139 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21140 /* Sets ZF because there never is a transaction, and all 21141 CF, OF, SF, PF and AF are always cleared by xtest. */ 21142 delta += 1; 21143 DIP("xtest\n"); 21144 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21145 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21146 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) )); 21147 /* Set NDEP even though it isn't used. This makes redundant-PUT 21148 elimination of previous stores to this field work better. */ 21149 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21150 return delta; 21151 } 21152 /* END HACKY SUPPORT FOR xtest */ 21153 /* 0F 01 F9 = RDTSCP */ 21154 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) { 21155 delta += 1; 21156 /* Uses dirty helper: 21157 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* ) 21158 declared to wr rax, rcx, rdx 21159 */ 21160 const HChar* fName = "amd64g_dirtyhelper_RDTSCP"; 21161 void* fAddr = &amd64g_dirtyhelper_RDTSCP; 21162 IRDirty* d 21163 = unsafeIRDirty_0_N ( 0/*regparms*/, 21164 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21165 /* declare guest state effects */ 21166 d->nFxState = 3; 21167 vex_bzero(&d->fxState, sizeof(d->fxState)); 21168 d->fxState[0].fx = Ifx_Write; 21169 d->fxState[0].offset = OFFB_RAX; 21170 d->fxState[0].size = 8; 21171 d->fxState[1].fx = Ifx_Write; 21172 d->fxState[1].offset = OFFB_RCX; 21173 d->fxState[1].size = 8; 21174 d->fxState[2].fx = Ifx_Write; 21175 d->fxState[2].offset = OFFB_RDX; 21176 d->fxState[2].size = 8; 21177 /* execute the dirty call, side-effecting guest state */ 21178 stmt( IRStmt_Dirty(d) ); 21179 /* RDTSCP is a serialising insn. So, just in case someone is 21180 using it as a memory fence ... */ 21181 stmt( IRStmt_MBE(Imbe_Fence) ); 21182 DIP("rdtscp\n"); 21183 return delta; 21184 } 21185 /* else decode failed */ 21186 break; 21187 } 21188 21189 case 0x05: /* SYSCALL */ 21190 guest_RIP_next_mustcheck = True; 21191 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 21192 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 21193 /* It's important that all guest state is up-to-date 21194 at this point. So we declare an end-of-block here, which 21195 forces any cached guest state to be flushed. */ 21196 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed); 21197 vassert(dres->whatNext == Dis_StopHere); 21198 DIP("syscall\n"); 21199 return delta; 21200 21201 case 0x0B: /* UD2 */ 21202 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 21203 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr); 21204 vassert(dres->whatNext == Dis_StopHere); 21205 DIP("ud2\n"); 21206 return delta; 21207 21208 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 21209 /* 0F 0D /1 -- prefetchw mem8 */ 21210 if (have66orF2orF3(pfx)) goto decode_failure; 21211 modrm = getUChar(delta); 21212 if (epartIsReg(modrm)) goto decode_failure; 21213 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 21214 goto decode_failure; 21215 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21216 delta += alen; 21217 switch (gregLO3ofRM(modrm)) { 21218 case 0: DIP("prefetch %s\n", dis_buf); break; 21219 case 1: DIP("prefetchw %s\n", dis_buf); break; 21220 default: vassert(0); /*NOTREACHED*/ 21221 } 21222 return delta; 21223 21224 case 0x1F: 21225 if (haveF2orF3(pfx)) goto decode_failure; 21226 modrm = getUChar(delta); 21227 if (epartIsReg(modrm)) goto decode_failure; 21228 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21229 delta += alen; 21230 DIP("nop%c %s\n", nameISize(sz), dis_buf); 21231 return delta; 21232 21233 case 0x31: { /* RDTSC */ 21234 IRTemp val = newTemp(Ity_I64); 21235 IRExpr** args = mkIRExprVec_0(); 21236 IRDirty* d = unsafeIRDirty_1_N ( 21237 val, 21238 0/*regparms*/, 21239 "amd64g_dirtyhelper_RDTSC", 21240 &amd64g_dirtyhelper_RDTSC, 21241 args 21242 ); 21243 if (have66orF2orF3(pfx)) goto decode_failure; 21244 /* execute the dirty call, dumping the result in val. */ 21245 stmt( IRStmt_Dirty(d) ); 21246 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 21247 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 21248 DIP("rdtsc\n"); 21249 return delta; 21250 } 21251 21252 case 0x40: 21253 case 0x41: 21254 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 21255 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 21256 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 21257 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 21258 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 21259 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 21260 case 0x48: /* CMOVSb (cmov negative) */ 21261 case 0x49: /* CMOVSb (cmov not negative) */ 21262 case 0x4A: /* CMOVP (cmov parity even) */ 21263 case 0x4B: /* CMOVNP (cmov parity odd) */ 21264 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 21265 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 21266 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 21267 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 21268 if (haveF2orF3(pfx)) goto decode_failure; 21269 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 21270 return delta; 21271 21272 case 0x80: 21273 case 0x81: 21274 case 0x82: /* JBb/JNAEb (jump below) */ 21275 case 0x83: /* JNBb/JAEb (jump not below) */ 21276 case 0x84: /* JZb/JEb (jump zero) */ 21277 case 0x85: /* JNZb/JNEb (jump not zero) */ 21278 case 0x86: /* JBEb/JNAb (jump below or equal) */ 21279 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 21280 case 0x88: /* JSb (jump negative) */ 21281 case 0x89: /* JSb (jump not negative) */ 21282 case 0x8A: /* JP (jump parity even) */ 21283 case 0x8B: /* JNP/JPO (jump parity odd) */ 21284 case 0x8C: /* JLb/JNGEb (jump less) */ 21285 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 21286 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 21287 case 0x8F: { /* JGb/JNLEb (jump greater) */ 21288 Long jmpDelta; 21289 const HChar* comment = ""; 21290 if (haveF3(pfx)) goto decode_failure; 21291 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21292 jmpDelta = getSDisp32(delta); 21293 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 21294 delta += 4; 21295 if (resteerCisOk 21296 && vex_control.guest_chase_cond 21297 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21298 && jmpDelta < 0 21299 && resteerOkFn( callback_opaque, (Addr64)d64) ) { 21300 /* Speculation: assume this backward branch is taken. So 21301 we need to emit a side-exit to the insn following this 21302 one, on the negation of the condition, and continue at 21303 the branch target address (d64). If we wind up back at 21304 the first instruction of the trace, just stop; it's 21305 better to let the IR loop unroller handle that case. */ 21306 stmt( IRStmt_Exit( 21307 mk_amd64g_calculate_condition( 21308 (AMD64Condcode)(1 ^ (opc - 0x80))), 21309 Ijk_Boring, 21310 IRConst_U64(guest_RIP_bbstart+delta), 21311 OFFB_RIP 21312 )); 21313 dres->whatNext = Dis_ResteerC; 21314 dres->continueAt = d64; 21315 comment = "(assumed taken)"; 21316 } 21317 else 21318 if (resteerCisOk 21319 && vex_control.guest_chase_cond 21320 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21321 && jmpDelta >= 0 21322 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 21323 /* Speculation: assume this forward branch is not taken. 21324 So we need to emit a side-exit to d64 (the dest) and 21325 continue disassembling at the insn immediately 21326 following this one. */ 21327 stmt( IRStmt_Exit( 21328 mk_amd64g_calculate_condition((AMD64Condcode) 21329 (opc - 0x80)), 21330 Ijk_Boring, 21331 IRConst_U64(d64), 21332 OFFB_RIP 21333 )); 21334 dres->whatNext = Dis_ResteerC; 21335 dres->continueAt = guest_RIP_bbstart+delta; 21336 comment = "(assumed not taken)"; 21337 } 21338 else { 21339 /* Conservative default translation - end the block at 21340 this point. */ 21341 jcc_01( dres, (AMD64Condcode)(opc - 0x80), 21342 guest_RIP_bbstart+delta, d64 ); 21343 vassert(dres->whatNext == Dis_StopHere); 21344 } 21345 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); 21346 return delta; 21347 } 21348 21349 case 0x90: 21350 case 0x91: 21351 case 0x92: /* set-Bb/set-NAEb (set if below) */ 21352 case 0x93: /* set-NBb/set-AEb (set if not below) */ 21353 case 0x94: /* set-Zb/set-Eb (set if zero) */ 21354 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 21355 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 21356 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 21357 case 0x98: /* set-Sb (set if negative) */ 21358 case 0x99: /* set-Sb (set if not negative) */ 21359 case 0x9A: /* set-P (set if parity even) */ 21360 case 0x9B: /* set-NP (set if parity odd) */ 21361 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 21362 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 21363 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 21364 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 21365 if (haveF2orF3(pfx)) goto decode_failure; 21366 t1 = newTemp(Ity_I8); 21367 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 21368 modrm = getUChar(delta); 21369 if (epartIsReg(modrm)) { 21370 delta++; 21371 putIRegE(1, pfx, modrm, mkexpr(t1)); 21372 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 21373 nameIRegE(1,pfx,modrm)); 21374 } else { 21375 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21376 delta += alen; 21377 storeLE( mkexpr(addr), mkexpr(t1) ); 21378 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 21379 } 21380 return delta; 21381 21382 case 0x1A: 21383 case 0x1B: { /* Future MPX instructions, currently NOPs. 21384 BNDMK b, m F3 0F 1B 21385 BNDCL b, r/m F3 0F 1A 21386 BNDCU b, r/m F2 0F 1A 21387 BNDCN b, r/m F2 0F 1B 21388 BNDMOV b, b/m 66 0F 1A 21389 BNDMOV b/m, b 66 0F 1B 21390 BNDLDX b, mib 0F 1A 21391 BNDSTX mib, b 0F 1B */ 21392 21393 /* All instructions have two operands. One operand is always the 21394 bnd register number (bnd0-bnd3, other register numbers are 21395 ignored when MPX isn't enabled, but should generate an 21396 exception if MPX is enabled) given by gregOfRexRM. The other 21397 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded 21398 address, all of which can be decoded by using either 21399 eregOfRexRM or disAMode. */ 21400 21401 modrm = getUChar(delta); 21402 int bnd = gregOfRexRM(pfx,modrm); 21403 const HChar *oper; 21404 if (epartIsReg(modrm)) { 21405 oper = nameIReg64 (eregOfRexRM(pfx,modrm)); 21406 delta += 1; 21407 } else { 21408 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21409 delta += alen; 21410 oper = dis_buf; 21411 } 21412 21413 if (haveF3no66noF2 (pfx)) { 21414 if (opc == 0x1B) { 21415 DIP ("bndmk %s, %%bnd%d\n", oper, bnd); 21416 } else /* opc == 0x1A */ { 21417 DIP ("bndcl %s, %%bnd%d\n", oper, bnd); 21418 } 21419 } else if (haveF2no66noF3 (pfx)) { 21420 if (opc == 0x1A) { 21421 DIP ("bndcu %s, %%bnd%d\n", oper, bnd); 21422 } else /* opc == 0x1B */ { 21423 DIP ("bndcn %s, %%bnd%d\n", oper, bnd); 21424 } 21425 } else if (have66noF2noF3 (pfx)) { 21426 if (opc == 0x1A) { 21427 DIP ("bndmov %s, %%bnd%d\n", oper, bnd); 21428 } else /* opc == 0x1B */ { 21429 DIP ("bndmov %%bnd%d, %s\n", bnd, oper); 21430 } 21431 } else if (haveNo66noF2noF3 (pfx)) { 21432 if (opc == 0x1A) { 21433 DIP ("bndldx %s, %%bnd%d\n", oper, bnd); 21434 } else /* opc == 0x1B */ { 21435 DIP ("bndstx %%bnd%d, %s\n", bnd, oper); 21436 } 21437 } else goto decode_failure; 21438 21439 return delta; 21440 } 21441 21442 case 0xA2: { /* CPUID */ 21443 /* Uses dirty helper: 21444 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 21445 declared to mod rax, wr rbx, rcx, rdx 21446 */ 21447 IRDirty* d = NULL; 21448 const HChar* fName = NULL; 21449 void* fAddr = NULL; 21450 21451 /* JRS 2014-11-11: this a really horrible temp kludge to work 21452 around the fact that the Yosemite (OSX 10.10) 21453 /usr/lib/system/libdyld.dylib expects XSAVE/XRSTOR to be 21454 implemented, because amd64g_dirtyhelper_CPUID_avx_and_cx16 21455 claims they are supported, but so far they aren't. So cause 21456 it to fall back to a simpler CPU. The cleaner approach of 21457 setting CPUID(eax=1).OSXSAVE=0 and .XSAVE=0 isn't desirable 21458 since it will (per the official Intel guidelines) lead to 21459 software concluding that AVX isn't supported. 21460 21461 This is also a kludge in that putting these ifdefs here checks 21462 the build (host) architecture, when really we're checking the 21463 guest architecture. */ 21464 Bool this_is_yosemite = False; 21465 # if defined(VGP_amd64_darwin) && DARWIN_VERS == DARWIN_10_10 21466 this_is_yosemite = True; 21467 # endif 21468 21469 if (haveF2orF3(pfx)) goto decode_failure; 21470 /* This isn't entirely correct, CPUID should depend on the VEX 21471 capabilities, not on the underlying CPU. See bug #324882. */ 21472 if (!this_is_yosemite && 21473 (archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21474 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) && 21475 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21476 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16"; 21477 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16; 21478 /* This is a Core-i5-2300-like machine */ 21479 } 21480 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21481 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) { 21482 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 21483 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 21484 /* This is a Core-i5-670-like machine */ 21485 } 21486 else { 21487 /* Give a CPUID for at least a baseline machine, SSE2 21488 only, and no CX16 */ 21489 fName = "amd64g_dirtyhelper_CPUID_baseline"; 21490 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 21491 } 21492 21493 vassert(fName); vassert(fAddr); 21494 d = unsafeIRDirty_0_N ( 0/*regparms*/, 21495 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21496 /* declare guest state effects */ 21497 d->nFxState = 4; 21498 vex_bzero(&d->fxState, sizeof(d->fxState)); 21499 d->fxState[0].fx = Ifx_Modify; 21500 d->fxState[0].offset = OFFB_RAX; 21501 d->fxState[0].size = 8; 21502 d->fxState[1].fx = Ifx_Write; 21503 d->fxState[1].offset = OFFB_RBX; 21504 d->fxState[1].size = 8; 21505 d->fxState[2].fx = Ifx_Modify; 21506 d->fxState[2].offset = OFFB_RCX; 21507 d->fxState[2].size = 8; 21508 d->fxState[3].fx = Ifx_Write; 21509 d->fxState[3].offset = OFFB_RDX; 21510 d->fxState[3].size = 8; 21511 /* execute the dirty call, side-effecting guest state */ 21512 stmt( IRStmt_Dirty(d) ); 21513 /* CPUID is a serialising insn. So, just in case someone is 21514 using it as a memory fence ... */ 21515 stmt( IRStmt_MBE(Imbe_Fence) ); 21516 DIP("cpuid\n"); 21517 return delta; 21518 } 21519 21520 case 0xA3: { /* BT Gv,Ev */ 21521 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21522 Bool ok = True; 21523 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21524 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok ); 21525 if (!ok) goto decode_failure; 21526 return delta; 21527 } 21528 21529 case 0xA4: /* SHLDv imm8,Gv,Ev */ 21530 modrm = getUChar(delta); 21531 d64 = delta + lengthAMode(pfx, delta); 21532 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 21533 delta = dis_SHLRD_Gv_Ev ( 21534 vbi, pfx, delta, modrm, sz, 21535 mkU8(getUChar(d64)), True, /* literal */ 21536 dis_buf, True /* left */ ); 21537 return delta; 21538 21539 case 0xA5: /* SHLDv %cl,Gv,Ev */ 21540 modrm = getUChar(delta); 21541 delta = dis_SHLRD_Gv_Ev ( 21542 vbi, pfx, delta, modrm, sz, 21543 getIRegCL(), False, /* not literal */ 21544 "%cl", True /* left */ ); 21545 return delta; 21546 21547 case 0xAB: { /* BTS Gv,Ev */ 21548 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21549 Bool ok = True; 21550 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21551 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok ); 21552 if (!ok) goto decode_failure; 21553 return delta; 21554 } 21555 21556 case 0xAC: /* SHRDv imm8,Gv,Ev */ 21557 modrm = getUChar(delta); 21558 d64 = delta + lengthAMode(pfx, delta); 21559 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 21560 delta = dis_SHLRD_Gv_Ev ( 21561 vbi, pfx, delta, modrm, sz, 21562 mkU8(getUChar(d64)), True, /* literal */ 21563 dis_buf, False /* right */ ); 21564 return delta; 21565 21566 case 0xAD: /* SHRDv %cl,Gv,Ev */ 21567 modrm = getUChar(delta); 21568 delta = dis_SHLRD_Gv_Ev ( 21569 vbi, pfx, delta, modrm, sz, 21570 getIRegCL(), False, /* not literal */ 21571 "%cl", False /* right */); 21572 return delta; 21573 21574 case 0xAF: /* IMUL Ev, Gv */ 21575 if (haveF2orF3(pfx)) goto decode_failure; 21576 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 21577 return delta; 21578 21579 case 0xB0: { /* CMPXCHG Gb,Eb */ 21580 Bool ok = True; 21581 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 21582 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 21583 if (!ok) goto decode_failure; 21584 return delta; 21585 } 21586 21587 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 21588 Bool ok = True; 21589 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 21590 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 21591 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 21592 if (!ok) goto decode_failure; 21593 return delta; 21594 } 21595 21596 case 0xB3: { /* BTR Gv,Ev */ 21597 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21598 Bool ok = True; 21599 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21600 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok ); 21601 if (!ok) goto decode_failure; 21602 return delta; 21603 } 21604 21605 case 0xB6: /* MOVZXb Eb,Gv */ 21606 if (haveF2orF3(pfx)) goto decode_failure; 21607 if (sz != 2 && sz != 4 && sz != 8) 21608 goto decode_failure; 21609 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 21610 return delta; 21611 21612 case 0xB7: /* MOVZXw Ew,Gv */ 21613 if (haveF2orF3(pfx)) goto decode_failure; 21614 if (sz != 4 && sz != 8) 21615 goto decode_failure; 21616 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 21617 return delta; 21618 21619 case 0xBA: { /* Grp8 Ib,Ev */ 21620 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */ 21621 Bool decode_OK = False; 21622 modrm = getUChar(delta); 21623 am_sz = lengthAMode(pfx,delta); 21624 d64 = getSDisp8(delta + am_sz); 21625 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 21626 &decode_OK ); 21627 if (!decode_OK) 21628 goto decode_failure; 21629 return delta; 21630 } 21631 21632 case 0xBB: { /* BTC Gv,Ev */ 21633 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21634 Bool ok = False; 21635 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21636 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok ); 21637 if (!ok) goto decode_failure; 21638 return delta; 21639 } 21640 21641 case 0xBC: /* BSF Gv,Ev */ 21642 if (!haveF2orF3(pfx) 21643 || (haveF3noF2(pfx) 21644 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) { 21645 /* no-F2 no-F3 0F BC = BSF 21646 or F3 0F BC = REP; BSF on older CPUs. */ 21647 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 21648 return delta; 21649 } 21650 /* Fall through, since F3 0F BC is TZCNT, and needs to 21651 be handled by dis_ESC_0F__SSE4. */ 21652 break; 21653 21654 case 0xBD: /* BSR Gv,Ev */ 21655 if (!haveF2orF3(pfx) 21656 || (haveF3noF2(pfx) 21657 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) { 21658 /* no-F2 no-F3 0F BD = BSR 21659 or F3 0F BD = REP; BSR on older CPUs. */ 21660 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 21661 return delta; 21662 } 21663 /* Fall through, since F3 0F BD is LZCNT, and needs to 21664 be handled by dis_ESC_0F__SSE4. */ 21665 break; 21666 21667 case 0xBE: /* MOVSXb Eb,Gv */ 21668 if (haveF2orF3(pfx)) goto decode_failure; 21669 if (sz != 2 && sz != 4 && sz != 8) 21670 goto decode_failure; 21671 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 21672 return delta; 21673 21674 case 0xBF: /* MOVSXw Ew,Gv */ 21675 if (haveF2orF3(pfx)) goto decode_failure; 21676 if (sz != 4 && sz != 8) 21677 goto decode_failure; 21678 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 21679 return delta; 21680 21681 case 0xC0: { /* XADD Gb,Eb */ 21682 Bool decode_OK = False; 21683 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 21684 if (!decode_OK) 21685 goto decode_failure; 21686 return delta; 21687 } 21688 21689 case 0xC1: { /* XADD Gv,Ev */ 21690 Bool decode_OK = False; 21691 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 21692 if (!decode_OK) 21693 goto decode_failure; 21694 return delta; 21695 } 21696 21697 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 21698 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 21699 IRTemp expdHi = newTemp(elemTy); 21700 IRTemp expdLo = newTemp(elemTy); 21701 IRTemp dataHi = newTemp(elemTy); 21702 IRTemp dataLo = newTemp(elemTy); 21703 IRTemp oldHi = newTemp(elemTy); 21704 IRTemp oldLo = newTemp(elemTy); 21705 IRTemp flags_old = newTemp(Ity_I64); 21706 IRTemp flags_new = newTemp(Ity_I64); 21707 IRTemp success = newTemp(Ity_I1); 21708 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 21709 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 21710 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 21711 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 21712 IRTemp expdHi64 = newTemp(Ity_I64); 21713 IRTemp expdLo64 = newTemp(Ity_I64); 21714 21715 /* Translate this using a DCAS, even if there is no LOCK 21716 prefix. Life is too short to bother with generating two 21717 different translations for the with/without-LOCK-prefix 21718 cases. */ 21719 *expect_CAS = True; 21720 21721 /* Decode, and generate address. */ 21722 if (have66(pfx)) goto decode_failure; 21723 if (sz != 4 && sz != 8) goto decode_failure; 21724 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 21725 goto decode_failure; 21726 modrm = getUChar(delta); 21727 if (epartIsReg(modrm)) goto decode_failure; 21728 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 21729 if (haveF2orF3(pfx)) { 21730 /* Since the e-part is memory only, F2 or F3 (one or the 21731 other) is acceptable if LOCK is also present. But only 21732 for cmpxchg8b. */ 21733 if (sz == 8) goto decode_failure; 21734 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure; 21735 } 21736 21737 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21738 delta += alen; 21739 21740 /* cmpxchg16b requires an alignment check. */ 21741 if (sz == 8) 21742 gen_SEGV_if_not_16_aligned( addr ); 21743 21744 /* Get the expected and new values. */ 21745 assign( expdHi64, getIReg64(R_RDX) ); 21746 assign( expdLo64, getIReg64(R_RAX) ); 21747 21748 /* These are the correctly-sized expected and new values. 21749 However, we also get expdHi64/expdLo64 above as 64-bits 21750 regardless, because we will need them later in the 32-bit 21751 case (paradoxically). */ 21752 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 21753 : mkexpr(expdHi64) ); 21754 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 21755 : mkexpr(expdLo64) ); 21756 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 21757 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 21758 21759 /* Do the DCAS */ 21760 stmt( IRStmt_CAS( 21761 mkIRCAS( oldHi, oldLo, 21762 Iend_LE, mkexpr(addr), 21763 mkexpr(expdHi), mkexpr(expdLo), 21764 mkexpr(dataHi), mkexpr(dataLo) 21765 ))); 21766 21767 /* success when oldHi:oldLo == expdHi:expdLo */ 21768 assign( success, 21769 binop(opCasCmpEQ, 21770 binop(opOR, 21771 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 21772 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 21773 ), 21774 zero 21775 )); 21776 21777 /* If the DCAS is successful, that is to say oldHi:oldLo == 21778 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 21779 which is where they came from originally. Both the actual 21780 contents of these two regs, and any shadow values, are 21781 unchanged. If the DCAS fails then we're putting into 21782 RDX:RAX the value seen in memory. */ 21783 /* Now of course there's a complication in the 32-bit case 21784 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 21785 unchanged; but if we use the same scheme as in the 64-bit 21786 case, we get hit by the standard rule that a write to the 21787 bottom 32 bits of an integer register zeros the upper 32 21788 bits. And so the upper halves of RDX and RAX mysteriously 21789 become zero. So we have to stuff back in the original 21790 64-bit values which we previously stashed in 21791 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 21792 /* It's just _so_ much fun ... */ 21793 putIRegRDX( 8, 21794 IRExpr_ITE( mkexpr(success), 21795 mkexpr(expdHi64), 21796 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 21797 : mkexpr(oldHi) 21798 )); 21799 putIRegRAX( 8, 21800 IRExpr_ITE( mkexpr(success), 21801 mkexpr(expdLo64), 21802 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 21803 : mkexpr(oldLo) 21804 )); 21805 21806 /* Copy the success bit into the Z flag and leave the others 21807 unchanged */ 21808 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 21809 assign( 21810 flags_new, 21811 binop(Iop_Or64, 21812 binop(Iop_And64, mkexpr(flags_old), 21813 mkU64(~AMD64G_CC_MASK_Z)), 21814 binop(Iop_Shl64, 21815 binop(Iop_And64, 21816 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 21817 mkU8(AMD64G_CC_SHIFT_Z)) )); 21818 21819 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21820 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 21821 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21822 /* Set NDEP even though it isn't used. This makes 21823 redundant-PUT elimination of previous stores to this field 21824 work better. */ 21825 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21826 21827 /* Sheesh. Aren't you glad it was me and not you that had to 21828 write and validate all this grunge? */ 21829 21830 DIP("cmpxchg8b %s\n", dis_buf); 21831 return delta; 21832 } 21833 21834 case 0xC8: /* BSWAP %eax */ 21835 case 0xC9: 21836 case 0xCA: 21837 case 0xCB: 21838 case 0xCC: 21839 case 0xCD: 21840 case 0xCE: 21841 case 0xCF: /* BSWAP %edi */ 21842 if (haveF2orF3(pfx)) goto decode_failure; 21843 /* According to the AMD64 docs, this insn can have size 4 or 21844 8. */ 21845 if (sz == 4) { 21846 t1 = newTemp(Ity_I32); 21847 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 21848 t2 = math_BSWAP( t1, Ity_I32 ); 21849 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 21850 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 21851 return delta; 21852 } 21853 if (sz == 8) { 21854 t1 = newTemp(Ity_I64); 21855 t2 = newTemp(Ity_I64); 21856 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 21857 t2 = math_BSWAP( t1, Ity_I64 ); 21858 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 21859 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 21860 return delta; 21861 } 21862 goto decode_failure; 21863 21864 default: 21865 break; 21866 21867 } /* first switch */ 21868 21869 21870 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */ 21871 /* In the second switch, pick off MMX insns. */ 21872 21873 if (!have66orF2orF3(pfx)) { 21874 /* So there's no SIMD prefix. */ 21875 21876 vassert(sz == 4 || sz == 8); 21877 21878 switch (opc) { /* second switch */ 21879 21880 case 0x71: 21881 case 0x72: 21882 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 21883 21884 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 21885 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 21886 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 21887 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 21888 21889 case 0xFC: 21890 case 0xFD: 21891 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 21892 21893 case 0xEC: 21894 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21895 21896 case 0xDC: 21897 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21898 21899 case 0xF8: 21900 case 0xF9: 21901 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 21902 21903 case 0xE8: 21904 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21905 21906 case 0xD8: 21907 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21908 21909 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 21910 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 21911 21912 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 21913 21914 case 0x74: 21915 case 0x75: 21916 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 21917 21918 case 0x64: 21919 case 0x65: 21920 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 21921 21922 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 21923 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 21924 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 21925 21926 case 0x68: 21927 case 0x69: 21928 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 21929 21930 case 0x60: 21931 case 0x61: 21932 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21933 21934 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 21935 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 21936 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 21937 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 21938 21939 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21940 case 0xF2: 21941 case 0xF3: 21942 21943 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21944 case 0xD2: 21945 case 0xD3: 21946 21947 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 21948 case 0xE2: { 21949 Bool decode_OK = False; 21950 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN ); 21951 if (decode_OK) 21952 return delta; 21953 goto decode_failure; 21954 } 21955 21956 default: 21957 break; 21958 } /* second switch */ 21959 21960 } 21961 21962 /* A couple of MMX corner cases */ 21963 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) { 21964 if (sz != 4) 21965 goto decode_failure; 21966 do_EMMS_preamble(); 21967 DIP("{f}emms\n"); 21968 return delta; 21969 } 21970 21971 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */ 21972 /* Perhaps it's an SSE or SSE2 instruction. We can try this 21973 without checking the guest hwcaps because SSE2 is a baseline 21974 facility in 64 bit mode. */ 21975 { 21976 Bool decode_OK = False; 21977 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres ); 21978 if (decode_OK) 21979 return delta; 21980 } 21981 21982 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */ 21983 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps 21984 first. */ 21985 { 21986 Bool decode_OK = False; 21987 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 21988 if (decode_OK) 21989 return delta; 21990 } 21991 21992 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 21993 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps 21994 first. */ 21995 { 21996 Bool decode_OK = False; 21997 delta = dis_ESC_0F__SSE4 ( &decode_OK, 21998 archinfo, vbi, pfx, sz, deltaIN ); 21999 if (decode_OK) 22000 return delta; 22001 } 22002 22003 decode_failure: 22004 return deltaIN; /* fail */ 22005 } 22006 22007 22008 /*------------------------------------------------------------*/ 22009 /*--- ---*/ 22010 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/ 22011 /*--- ---*/ 22012 /*------------------------------------------------------------*/ 22013 22014 __attribute__((noinline)) 22015 static 22016 Long dis_ESC_0F38 ( 22017 /*MB_OUT*/DisResult* dres, 22018 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 22019 Bool resteerCisOk, 22020 void* callback_opaque, 22021 const VexArchInfo* archinfo, 22022 const VexAbiInfo* vbi, 22023 Prefix pfx, Int sz, Long deltaIN 22024 ) 22025 { 22026 Long delta = deltaIN; 22027 UChar opc = getUChar(delta); 22028 delta++; 22029 switch (opc) { 22030 22031 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */ 22032 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */ 22033 if (!haveF2orF3(pfx) && !haveVEX(pfx) 22034 && (sz == 2 || sz == 4 || sz == 8)) { 22035 IRTemp addr = IRTemp_INVALID; 22036 UChar modrm = 0; 22037 Int alen = 0; 22038 HChar dis_buf[50]; 22039 modrm = getUChar(delta); 22040 if (epartIsReg(modrm)) break; 22041 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22042 delta += alen; 22043 IRType ty = szToITy(sz); 22044 IRTemp src = newTemp(ty); 22045 if (opc == 0xF0) { /* LOAD */ 22046 assign(src, loadLE(ty, mkexpr(addr))); 22047 IRTemp dst = math_BSWAP(src, ty); 22048 putIRegG(sz, pfx, modrm, mkexpr(dst)); 22049 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm)); 22050 } else { /* STORE */ 22051 assign(src, getIRegG(sz, pfx, modrm)); 22052 IRTemp dst = math_BSWAP(src, ty); 22053 storeLE(mkexpr(addr), mkexpr(dst)); 22054 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf); 22055 } 22056 return delta; 22057 } 22058 /* else fall through; maybe one of the decoders below knows what 22059 it is. */ 22060 break; 22061 } 22062 22063 default: 22064 break; 22065 22066 } 22067 22068 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22069 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22070 rather than proceeding indiscriminately. */ 22071 { 22072 Bool decode_OK = False; 22073 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22074 if (decode_OK) 22075 return delta; 22076 } 22077 22078 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22079 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22080 rather than proceeding indiscriminately. */ 22081 { 22082 Bool decode_OK = False; 22083 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22084 if (decode_OK) 22085 return delta; 22086 } 22087 22088 /*decode_failure:*/ 22089 return deltaIN; /* fail */ 22090 } 22091 22092 22093 /*------------------------------------------------------------*/ 22094 /*--- ---*/ 22095 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/ 22096 /*--- ---*/ 22097 /*------------------------------------------------------------*/ 22098 22099 __attribute__((noinline)) 22100 static 22101 Long dis_ESC_0F3A ( 22102 /*MB_OUT*/DisResult* dres, 22103 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 22104 Bool resteerCisOk, 22105 void* callback_opaque, 22106 const VexArchInfo* archinfo, 22107 const VexAbiInfo* vbi, 22108 Prefix pfx, Int sz, Long deltaIN 22109 ) 22110 { 22111 Long delta = deltaIN; 22112 UChar opc = getUChar(delta); 22113 delta++; 22114 switch (opc) { 22115 22116 default: 22117 break; 22118 22119 } 22120 22121 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22122 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22123 rather than proceeding indiscriminately. */ 22124 { 22125 Bool decode_OK = False; 22126 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22127 if (decode_OK) 22128 return delta; 22129 } 22130 22131 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22132 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22133 rather than proceeding indiscriminately. */ 22134 { 22135 Bool decode_OK = False; 22136 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22137 if (decode_OK) 22138 return delta; 22139 } 22140 22141 return deltaIN; /* fail */ 22142 } 22143 22144 22145 /*------------------------------------------------------------*/ 22146 /*--- ---*/ 22147 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/ 22148 /*--- ---*/ 22149 /*------------------------------------------------------------*/ 22150 22151 /* FIXME: common up with the _256_ version below? */ 22152 static 22153 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG ( 22154 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22155 Prefix pfx, Long delta, const HChar* name, 22156 /* The actual operation. Use either 'op' or 'opfn', 22157 but not both. */ 22158 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 22159 Bool invertLeftArg, 22160 Bool swapArgs 22161 ) 22162 { 22163 UChar modrm = getUChar(delta); 22164 UInt rD = gregOfRexRM(pfx, modrm); 22165 UInt rSL = getVexNvvvv(pfx); 22166 IRTemp tSL = newTemp(Ity_V128); 22167 IRTemp tSR = newTemp(Ity_V128); 22168 IRTemp addr = IRTemp_INVALID; 22169 HChar dis_buf[50]; 22170 Int alen = 0; 22171 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/); 22172 22173 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL)) 22174 : getXMMReg(rSL)); 22175 22176 if (epartIsReg(modrm)) { 22177 UInt rSR = eregOfRexRM(pfx, modrm); 22178 delta += 1; 22179 assign(tSR, getXMMReg(rSR)); 22180 DIP("%s %s,%s,%s\n", 22181 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD)); 22182 } else { 22183 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 22184 delta += alen; 22185 assign(tSR, loadLE(Ity_V128, mkexpr(addr))); 22186 DIP("%s %s,%s,%s\n", 22187 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD)); 22188 } 22189 22190 IRTemp res = IRTemp_INVALID; 22191 if (op != Iop_INVALID) { 22192 vassert(opFn == NULL); 22193 res = newTemp(Ity_V128); 22194 if (requiresRMode(op)) { 22195 IRTemp rm = newTemp(Ity_I32); 22196 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 22197 assign(res, swapArgs 22198 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 22199 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 22200 } else { 22201 assign(res, swapArgs 22202 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 22203 : binop(op, mkexpr(tSL), mkexpr(tSR))); 22204 } 22205 } else { 22206 vassert(opFn != NULL); 22207 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 22208 } 22209 22210 putYMMRegLoAndZU(rD, mkexpr(res)); 22211 22212 *uses_vvvv = True; 22213 return delta; 22214 } 22215 22216 22217 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp 22218 for the operation, no inversion of the left arg, and no swapping of 22219 args. */ 22220 static 22221 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple ( 22222 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22223 Prefix pfx, Long delta, const HChar* name, 22224 IROp op 22225 ) 22226 { 22227 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22228 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 22229 } 22230 22231 22232 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR 22233 generator to compute the result, no inversion of the left 22234 arg, and no swapping of args. */ 22235 static 22236 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex ( 22237 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22238 Prefix pfx, Long delta, const HChar* name, 22239 IRTemp(*opFn)(IRTemp,IRTemp) 22240 ) 22241 { 22242 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22243 uses_vvvv, vbi, pfx, delta, name, 22244 Iop_INVALID, opFn, False, False ); 22245 } 22246 22247 22248 /* Vector by scalar shift of V by the amount specified at the bottom 22249 of E. */ 22250 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi, 22251 Prefix pfx, Long delta, 22252 const HChar* opname, IROp op ) 22253 { 22254 HChar dis_buf[50]; 22255 Int alen, size; 22256 IRTemp addr; 22257 Bool shl, shr, sar; 22258 UChar modrm = getUChar(delta); 22259 UInt rG = gregOfRexRM(pfx,modrm); 22260 UInt rV = getVexNvvvv(pfx);; 22261 IRTemp g0 = newTemp(Ity_V128); 22262 IRTemp g1 = newTemp(Ity_V128); 22263 IRTemp amt = newTemp(Ity_I64); 22264 IRTemp amt8 = newTemp(Ity_I8); 22265 if (epartIsReg(modrm)) { 22266 UInt rE = eregOfRexRM(pfx,modrm); 22267 assign( amt, getXMMRegLane64(rE, 0) ); 22268 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22269 nameXMMReg(rV), nameXMMReg(rG) ); 22270 delta++; 22271 } else { 22272 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22273 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22274 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 22275 delta += alen; 22276 } 22277 assign( g0, getXMMReg(rV) ); 22278 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22279 22280 shl = shr = sar = False; 22281 size = 0; 22282 switch (op) { 22283 case Iop_ShlN16x8: shl = True; size = 32; break; 22284 case Iop_ShlN32x4: shl = True; size = 32; break; 22285 case Iop_ShlN64x2: shl = True; size = 64; break; 22286 case Iop_SarN16x8: sar = True; size = 16; break; 22287 case Iop_SarN32x4: sar = True; size = 32; break; 22288 case Iop_ShrN16x8: shr = True; size = 16; break; 22289 case Iop_ShrN32x4: shr = True; size = 32; break; 22290 case Iop_ShrN64x2: shr = True; size = 64; break; 22291 default: vassert(0); 22292 } 22293 22294 if (shl || shr) { 22295 assign( 22296 g1, 22297 IRExpr_ITE( 22298 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22299 binop(op, mkexpr(g0), mkexpr(amt8)), 22300 mkV128(0x0000) 22301 ) 22302 ); 22303 } else 22304 if (sar) { 22305 assign( 22306 g1, 22307 IRExpr_ITE( 22308 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22309 binop(op, mkexpr(g0), mkexpr(amt8)), 22310 binop(op, mkexpr(g0), mkU8(size-1)) 22311 ) 22312 ); 22313 } else { 22314 vassert(0); 22315 } 22316 22317 putYMMRegLoAndZU( rG, mkexpr(g1) ); 22318 return delta; 22319 } 22320 22321 22322 /* Vector by scalar shift of V by the amount specified at the bottom 22323 of E. */ 22324 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi, 22325 Prefix pfx, Long delta, 22326 const HChar* opname, IROp op ) 22327 { 22328 HChar dis_buf[50]; 22329 Int alen, size; 22330 IRTemp addr; 22331 Bool shl, shr, sar; 22332 UChar modrm = getUChar(delta); 22333 UInt rG = gregOfRexRM(pfx,modrm); 22334 UInt rV = getVexNvvvv(pfx);; 22335 IRTemp g0 = newTemp(Ity_V256); 22336 IRTemp g1 = newTemp(Ity_V256); 22337 IRTemp amt = newTemp(Ity_I64); 22338 IRTemp amt8 = newTemp(Ity_I8); 22339 if (epartIsReg(modrm)) { 22340 UInt rE = eregOfRexRM(pfx,modrm); 22341 assign( amt, getXMMRegLane64(rE, 0) ); 22342 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22343 nameYMMReg(rV), nameYMMReg(rG) ); 22344 delta++; 22345 } else { 22346 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22347 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22348 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 22349 delta += alen; 22350 } 22351 assign( g0, getYMMReg(rV) ); 22352 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22353 22354 shl = shr = sar = False; 22355 size = 0; 22356 switch (op) { 22357 case Iop_ShlN16x16: shl = True; size = 32; break; 22358 case Iop_ShlN32x8: shl = True; size = 32; break; 22359 case Iop_ShlN64x4: shl = True; size = 64; break; 22360 case Iop_SarN16x16: sar = True; size = 16; break; 22361 case Iop_SarN32x8: sar = True; size = 32; break; 22362 case Iop_ShrN16x16: shr = True; size = 16; break; 22363 case Iop_ShrN32x8: shr = True; size = 32; break; 22364 case Iop_ShrN64x4: shr = True; size = 64; break; 22365 default: vassert(0); 22366 } 22367 22368 if (shl || shr) { 22369 assign( 22370 g1, 22371 IRExpr_ITE( 22372 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22373 binop(op, mkexpr(g0), mkexpr(amt8)), 22374 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 22375 ) 22376 ); 22377 } else 22378 if (sar) { 22379 assign( 22380 g1, 22381 IRExpr_ITE( 22382 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22383 binop(op, mkexpr(g0), mkexpr(amt8)), 22384 binop(op, mkexpr(g0), mkU8(size-1)) 22385 ) 22386 ); 22387 } else { 22388 vassert(0); 22389 } 22390 22391 putYMMReg( rG, mkexpr(g1) ); 22392 return delta; 22393 } 22394 22395 22396 /* Vector by vector shift of V by the amount specified at the bottom 22397 of E. Vector by vector shifts are defined for all shift amounts, 22398 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts 22399 anyway). */ 22400 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi, 22401 Prefix pfx, Long delta, 22402 const HChar* opname, IROp op, Bool isYMM ) 22403 { 22404 HChar dis_buf[50]; 22405 Int alen, size, i; 22406 IRTemp addr; 22407 UChar modrm = getUChar(delta); 22408 UInt rG = gregOfRexRM(pfx,modrm); 22409 UInt rV = getVexNvvvv(pfx);; 22410 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22411 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22412 IRTemp amts[8], sVs[8], res[8]; 22413 if (epartIsReg(modrm)) { 22414 UInt rE = eregOfRexRM(pfx,modrm); 22415 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) ); 22416 if (isYMM) { 22417 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE), 22418 nameYMMReg(rV), nameYMMReg(rG) ); 22419 } else { 22420 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22421 nameXMMReg(rV), nameXMMReg(rG) ); 22422 } 22423 delta++; 22424 } else { 22425 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22426 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) ); 22427 if (isYMM) { 22428 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), 22429 nameYMMReg(rG) ); 22430 } else { 22431 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), 22432 nameXMMReg(rG) ); 22433 } 22434 delta += alen; 22435 } 22436 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) ); 22437 22438 size = 0; 22439 switch (op) { 22440 case Iop_Shl32: size = 32; break; 22441 case Iop_Shl64: size = 64; break; 22442 case Iop_Sar32: size = 32; break; 22443 case Iop_Shr32: size = 32; break; 22444 case Iop_Shr64: size = 64; break; 22445 default: vassert(0); 22446 } 22447 22448 for (i = 0; i < 8; i++) { 22449 sVs[i] = IRTemp_INVALID; 22450 amts[i] = IRTemp_INVALID; 22451 } 22452 switch (size) { 22453 case 32: 22454 if (isYMM) { 22455 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4], 22456 &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22457 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4], 22458 &amts[3], &amts[2], &amts[1], &amts[0] ); 22459 } else { 22460 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22461 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22462 } 22463 break; 22464 case 64: 22465 if (isYMM) { 22466 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22467 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22468 } else { 22469 breakupV128to64s( sV, &sVs[1], &sVs[0] ); 22470 breakupV128to64s( amt, &amts[1], &amts[0] ); 22471 } 22472 break; 22473 default: vassert(0); 22474 } 22475 for (i = 0; i < 8; i++) 22476 if (sVs[i] != IRTemp_INVALID) { 22477 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64); 22478 assign( res[i], 22479 IRExpr_ITE( 22480 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U, 22481 mkexpr(amts[i]), 22482 size == 32 ? mkU32(size) : mkU64(size)), 22483 binop(op, mkexpr(sVs[i]), 22484 unop(size == 32 ? Iop_32to8 : Iop_64to8, 22485 mkexpr(amts[i]))), 22486 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1)) 22487 : size == 32 ? mkU32(0) : mkU64(0) 22488 )); 22489 } 22490 switch (size) { 22491 case 32: 22492 for (i = 0; i < 8; i++) 22493 putYMMRegLane32( rG, i, (i < 4 || isYMM) 22494 ? mkexpr(res[i]) : mkU32(0) ); 22495 break; 22496 case 64: 22497 for (i = 0; i < 4; i++) 22498 putYMMRegLane64( rG, i, (i < 2 || isYMM) 22499 ? mkexpr(res[i]) : mkU64(0) ); 22500 break; 22501 default: vassert(0); 22502 } 22503 22504 return delta; 22505 } 22506 22507 22508 /* Vector by scalar shift of E into V, by an immediate byte. Modified 22509 version of dis_SSE_shiftE_imm. */ 22510 static 22511 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx, 22512 Long delta, const HChar* opname, IROp op ) 22513 { 22514 Bool shl, shr, sar; 22515 UChar rm = getUChar(delta); 22516 IRTemp e0 = newTemp(Ity_V128); 22517 IRTemp e1 = newTemp(Ity_V128); 22518 UInt rD = getVexNvvvv(pfx); 22519 UChar amt, size; 22520 vassert(epartIsReg(rm)); 22521 vassert(gregLO3ofRM(rm) == 2 22522 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 22523 amt = getUChar(delta+1); 22524 delta += 2; 22525 DIP("%s $%d,%s,%s\n", opname, 22526 (Int)amt, 22527 nameXMMReg(eregOfRexRM(pfx,rm)), 22528 nameXMMReg(rD)); 22529 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 22530 22531 shl = shr = sar = False; 22532 size = 0; 22533 switch (op) { 22534 case Iop_ShlN16x8: shl = True; size = 16; break; 22535 case Iop_ShlN32x4: shl = True; size = 32; break; 22536 case Iop_ShlN64x2: shl = True; size = 64; break; 22537 case Iop_SarN16x8: sar = True; size = 16; break; 22538 case Iop_SarN32x4: sar = True; size = 32; break; 22539 case Iop_ShrN16x8: shr = True; size = 16; break; 22540 case Iop_ShrN32x4: shr = True; size = 32; break; 22541 case Iop_ShrN64x2: shr = True; size = 64; break; 22542 default: vassert(0); 22543 } 22544 22545 if (shl || shr) { 22546 assign( e1, amt >= size 22547 ? mkV128(0x0000) 22548 : binop(op, mkexpr(e0), mkU8(amt)) 22549 ); 22550 } else 22551 if (sar) { 22552 assign( e1, amt >= size 22553 ? binop(op, mkexpr(e0), mkU8(size-1)) 22554 : binop(op, mkexpr(e0), mkU8(amt)) 22555 ); 22556 } else { 22557 vassert(0); 22558 } 22559 22560 putYMMRegLoAndZU( rD, mkexpr(e1) ); 22561 return delta; 22562 } 22563 22564 22565 /* Vector by scalar shift of E into V, by an immediate byte. Modified 22566 version of dis_AVX128_shiftE_to_V_imm. */ 22567 static 22568 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx, 22569 Long delta, const HChar* opname, IROp op ) 22570 { 22571 Bool shl, shr, sar; 22572 UChar rm = getUChar(delta); 22573 IRTemp e0 = newTemp(Ity_V256); 22574 IRTemp e1 = newTemp(Ity_V256); 22575 UInt rD = getVexNvvvv(pfx); 22576 UChar amt, size; 22577 vassert(epartIsReg(rm)); 22578 vassert(gregLO3ofRM(rm) == 2 22579 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 22580 amt = getUChar(delta+1); 22581 delta += 2; 22582 DIP("%s $%d,%s,%s\n", opname, 22583 (Int)amt, 22584 nameYMMReg(eregOfRexRM(pfx,rm)), 22585 nameYMMReg(rD)); 22586 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) ); 22587 22588 shl = shr = sar = False; 22589 size = 0; 22590 switch (op) { 22591 case Iop_ShlN16x16: shl = True; size = 16; break; 22592 case Iop_ShlN32x8: shl = True; size = 32; break; 22593 case Iop_ShlN64x4: shl = True; size = 64; break; 22594 case Iop_SarN16x16: sar = True; size = 16; break; 22595 case Iop_SarN32x8: sar = True; size = 32; break; 22596 case Iop_ShrN16x16: shr = True; size = 16; break; 22597 case Iop_ShrN32x8: shr = True; size = 32; break; 22598 case Iop_ShrN64x4: shr = True; size = 64; break; 22599 default: vassert(0); 22600 } 22601 22602 22603 if (shl || shr) { 22604 assign( e1, amt >= size 22605 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 22606 : binop(op, mkexpr(e0), mkU8(amt)) 22607 ); 22608 } else 22609 if (sar) { 22610 assign( e1, amt >= size 22611 ? binop(op, mkexpr(e0), mkU8(size-1)) 22612 : binop(op, mkexpr(e0), mkU8(amt)) 22613 ); 22614 } else { 22615 vassert(0); 22616 } 22617 22618 putYMMReg( rD, mkexpr(e1) ); 22619 return delta; 22620 } 22621 22622 22623 /* Lower 64-bit lane only AVX128 binary operation: 22624 G[63:0] = V[63:0] `op` E[63:0] 22625 G[127:64] = V[127:64] 22626 G[255:128] = 0. 22627 The specified op must be of the 64F0x2 kind, so that it 22628 copies the upper half of the left operand to the result. 22629 */ 22630 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv, 22631 const VexAbiInfo* vbi, 22632 Prefix pfx, Long delta, 22633 const HChar* opname, IROp op ) 22634 { 22635 HChar dis_buf[50]; 22636 Int alen; 22637 IRTemp addr; 22638 UChar rm = getUChar(delta); 22639 UInt rG = gregOfRexRM(pfx,rm); 22640 UInt rV = getVexNvvvv(pfx); 22641 IRExpr* vpart = getXMMReg(rV); 22642 if (epartIsReg(rm)) { 22643 UInt rE = eregOfRexRM(pfx,rm); 22644 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 22645 DIP("%s %s,%s,%s\n", opname, 22646 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22647 delta = delta+1; 22648 } else { 22649 /* We can only do a 64-bit memory read, so the upper half of the 22650 E operand needs to be made simply of zeroes. */ 22651 IRTemp epart = newTemp(Ity_V128); 22652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22653 assign( epart, unop( Iop_64UtoV128, 22654 loadLE(Ity_I64, mkexpr(addr))) ); 22655 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 22656 DIP("%s %s,%s,%s\n", opname, 22657 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22658 delta = delta+alen; 22659 } 22660 putYMMRegLane128( rG, 1, mkV128(0) ); 22661 *uses_vvvv = True; 22662 return delta; 22663 } 22664 22665 22666 /* Lower 64-bit lane only AVX128 unary operation: 22667 G[63:0] = op(E[63:0]) 22668 G[127:64] = V[127:64] 22669 G[255:128] = 0 22670 The specified op must be of the 64F0x2 kind, so that it 22671 copies the upper half of the operand to the result. 22672 */ 22673 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv, 22674 const VexAbiInfo* vbi, 22675 Prefix pfx, Long delta, 22676 const HChar* opname, IROp op ) 22677 { 22678 HChar dis_buf[50]; 22679 Int alen; 22680 IRTemp addr; 22681 UChar rm = getUChar(delta); 22682 UInt rG = gregOfRexRM(pfx,rm); 22683 UInt rV = getVexNvvvv(pfx); 22684 IRTemp e64 = newTemp(Ity_I64); 22685 22686 /* Fetch E[63:0] */ 22687 if (epartIsReg(rm)) { 22688 UInt rE = eregOfRexRM(pfx,rm); 22689 assign(e64, getXMMRegLane64(rE, 0)); 22690 DIP("%s %s,%s,%s\n", opname, 22691 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22692 delta += 1; 22693 } else { 22694 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22695 assign(e64, loadLE(Ity_I64, mkexpr(addr))); 22696 DIP("%s %s,%s,%s\n", opname, 22697 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22698 delta += alen; 22699 } 22700 22701 /* Create a value 'arg' as V[127:64]++E[63:0] */ 22702 IRTemp arg = newTemp(Ity_V128); 22703 assign(arg, 22704 binop(Iop_SetV128lo64, 22705 getXMMReg(rV), mkexpr(e64))); 22706 /* and apply op to it */ 22707 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 22708 *uses_vvvv = True; 22709 return delta; 22710 } 22711 22712 22713 /* Lower 32-bit lane only AVX128 unary operation: 22714 G[31:0] = op(E[31:0]) 22715 G[127:32] = V[127:32] 22716 G[255:128] = 0 22717 The specified op must be of the 32F0x4 kind, so that it 22718 copies the upper 3/4 of the operand to the result. 22719 */ 22720 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv, 22721 const VexAbiInfo* vbi, 22722 Prefix pfx, Long delta, 22723 const HChar* opname, IROp op ) 22724 { 22725 HChar dis_buf[50]; 22726 Int alen; 22727 IRTemp addr; 22728 UChar rm = getUChar(delta); 22729 UInt rG = gregOfRexRM(pfx,rm); 22730 UInt rV = getVexNvvvv(pfx); 22731 IRTemp e32 = newTemp(Ity_I32); 22732 22733 /* Fetch E[31:0] */ 22734 if (epartIsReg(rm)) { 22735 UInt rE = eregOfRexRM(pfx,rm); 22736 assign(e32, getXMMRegLane32(rE, 0)); 22737 DIP("%s %s,%s,%s\n", opname, 22738 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22739 delta += 1; 22740 } else { 22741 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22742 assign(e32, loadLE(Ity_I32, mkexpr(addr))); 22743 DIP("%s %s,%s,%s\n", opname, 22744 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22745 delta += alen; 22746 } 22747 22748 /* Create a value 'arg' as V[127:32]++E[31:0] */ 22749 IRTemp arg = newTemp(Ity_V128); 22750 assign(arg, 22751 binop(Iop_SetV128lo32, 22752 getXMMReg(rV), mkexpr(e32))); 22753 /* and apply op to it */ 22754 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 22755 *uses_vvvv = True; 22756 return delta; 22757 } 22758 22759 22760 /* Lower 32-bit lane only AVX128 binary operation: 22761 G[31:0] = V[31:0] `op` E[31:0] 22762 G[127:32] = V[127:32] 22763 G[255:128] = 0. 22764 The specified op must be of the 32F0x4 kind, so that it 22765 copies the upper 3/4 of the left operand to the result. 22766 */ 22767 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv, 22768 const VexAbiInfo* vbi, 22769 Prefix pfx, Long delta, 22770 const HChar* opname, IROp op ) 22771 { 22772 HChar dis_buf[50]; 22773 Int alen; 22774 IRTemp addr; 22775 UChar rm = getUChar(delta); 22776 UInt rG = gregOfRexRM(pfx,rm); 22777 UInt rV = getVexNvvvv(pfx); 22778 IRExpr* vpart = getXMMReg(rV); 22779 if (epartIsReg(rm)) { 22780 UInt rE = eregOfRexRM(pfx,rm); 22781 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 22782 DIP("%s %s,%s,%s\n", opname, 22783 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22784 delta = delta+1; 22785 } else { 22786 /* We can only do a 32-bit memory read, so the upper 3/4 of the 22787 E operand needs to be made simply of zeroes. */ 22788 IRTemp epart = newTemp(Ity_V128); 22789 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22790 assign( epart, unop( Iop_32UtoV128, 22791 loadLE(Ity_I32, mkexpr(addr))) ); 22792 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 22793 DIP("%s %s,%s,%s\n", opname, 22794 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22795 delta = delta+alen; 22796 } 22797 putYMMRegLane128( rG, 1, mkV128(0) ); 22798 *uses_vvvv = True; 22799 return delta; 22800 } 22801 22802 22803 /* All-lanes AVX128 binary operation: 22804 G[127:0] = V[127:0] `op` E[127:0] 22805 G[255:128] = 0. 22806 */ 22807 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 22808 const VexAbiInfo* vbi, 22809 Prefix pfx, Long delta, 22810 const HChar* opname, IROp op ) 22811 { 22812 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22813 uses_vvvv, vbi, pfx, delta, opname, op, 22814 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 22815 ); 22816 } 22817 22818 22819 /* Handles AVX128 32F/64F comparisons. A derivative of 22820 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 22821 original delta to indicate failure. */ 22822 static 22823 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 22824 const VexAbiInfo* vbi, 22825 Prefix pfx, Long delta, 22826 const HChar* opname, Bool all_lanes, Int sz ) 22827 { 22828 vassert(sz == 4 || sz == 8); 22829 Long deltaIN = delta; 22830 HChar dis_buf[50]; 22831 Int alen; 22832 UInt imm8; 22833 IRTemp addr; 22834 Bool preSwap = False; 22835 IROp op = Iop_INVALID; 22836 Bool postNot = False; 22837 IRTemp plain = newTemp(Ity_V128); 22838 UChar rm = getUChar(delta); 22839 UInt rG = gregOfRexRM(pfx, rm); 22840 UInt rV = getVexNvvvv(pfx); 22841 IRTemp argL = newTemp(Ity_V128); 22842 IRTemp argR = newTemp(Ity_V128); 22843 22844 assign(argL, getXMMReg(rV)); 22845 if (epartIsReg(rm)) { 22846 imm8 = getUChar(delta+1); 22847 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 22848 if (!ok) return deltaIN; /* FAIL */ 22849 UInt rE = eregOfRexRM(pfx,rm); 22850 assign(argR, getXMMReg(rE)); 22851 delta += 1+1; 22852 DIP("%s $%d,%s,%s,%s\n", 22853 opname, (Int)imm8, 22854 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22855 } else { 22856 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 22857 imm8 = getUChar(delta+alen); 22858 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 22859 if (!ok) return deltaIN; /* FAIL */ 22860 assign(argR, 22861 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 22862 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 22863 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))); 22864 delta += alen+1; 22865 DIP("%s $%d,%s,%s,%s\n", 22866 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22867 } 22868 22869 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL)) 22870 : binop(op, mkexpr(argL), mkexpr(argR))); 22871 22872 if (all_lanes) { 22873 /* This is simple: just invert the result, if necessary, and 22874 have done. */ 22875 if (postNot) { 22876 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) ); 22877 } else { 22878 putYMMRegLoAndZU( rG, mkexpr(plain) ); 22879 } 22880 } 22881 else 22882 if (!preSwap) { 22883 /* More complex. It's a one-lane-only, hence need to possibly 22884 invert only that one lane. But at least the other lanes are 22885 correctly "in" the result, having been copied from the left 22886 operand (argL). */ 22887 if (postNot) { 22888 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF); 22889 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain), 22890 mask) ); 22891 } else { 22892 putYMMRegLoAndZU( rG, mkexpr(plain) ); 22893 } 22894 } 22895 else { 22896 /* This is the most complex case. One-lane-only, but the args 22897 were swapped. So we have to possibly invert the bottom lane, 22898 and (definitely) we have to copy the upper lane(s) from argL 22899 since, due to the swapping, what's currently there is from 22900 argR, which is not correct. */ 22901 IRTemp res = newTemp(Ity_V128); 22902 IRTemp mask = newTemp(Ity_V128); 22903 IRTemp notMask = newTemp(Ity_V128); 22904 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF)); 22905 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00)); 22906 if (postNot) { 22907 assign(res, 22908 binop(Iop_OrV128, 22909 binop(Iop_AndV128, 22910 unop(Iop_NotV128, mkexpr(plain)), 22911 mkexpr(mask)), 22912 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 22913 } else { 22914 assign(res, 22915 binop(Iop_OrV128, 22916 binop(Iop_AndV128, 22917 mkexpr(plain), 22918 mkexpr(mask)), 22919 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 22920 } 22921 putYMMRegLoAndZU( rG, mkexpr(res) ); 22922 } 22923 22924 *uses_vvvv = True; 22925 return delta; 22926 } 22927 22928 22929 /* Handles AVX256 32F/64F comparisons. A derivative of 22930 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 22931 original delta to indicate failure. */ 22932 static 22933 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 22934 const VexAbiInfo* vbi, 22935 Prefix pfx, Long delta, 22936 const HChar* opname, Int sz ) 22937 { 22938 vassert(sz == 4 || sz == 8); 22939 Long deltaIN = delta; 22940 HChar dis_buf[50]; 22941 Int alen; 22942 UInt imm8; 22943 IRTemp addr; 22944 Bool preSwap = False; 22945 IROp op = Iop_INVALID; 22946 Bool postNot = False; 22947 IRTemp plain = newTemp(Ity_V256); 22948 UChar rm = getUChar(delta); 22949 UInt rG = gregOfRexRM(pfx, rm); 22950 UInt rV = getVexNvvvv(pfx); 22951 IRTemp argL = newTemp(Ity_V256); 22952 IRTemp argR = newTemp(Ity_V256); 22953 IRTemp argLhi = IRTemp_INVALID; 22954 IRTemp argLlo = IRTemp_INVALID; 22955 IRTemp argRhi = IRTemp_INVALID; 22956 IRTemp argRlo = IRTemp_INVALID; 22957 22958 assign(argL, getYMMReg(rV)); 22959 if (epartIsReg(rm)) { 22960 imm8 = getUChar(delta+1); 22961 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 22962 True/*all_lanes*/, sz); 22963 if (!ok) return deltaIN; /* FAIL */ 22964 UInt rE = eregOfRexRM(pfx,rm); 22965 assign(argR, getYMMReg(rE)); 22966 delta += 1+1; 22967 DIP("%s $%d,%s,%s,%s\n", 22968 opname, (Int)imm8, 22969 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 22970 } else { 22971 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 22972 imm8 = getUChar(delta+alen); 22973 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 22974 True/*all_lanes*/, sz); 22975 if (!ok) return deltaIN; /* FAIL */ 22976 assign(argR, loadLE(Ity_V256, mkexpr(addr)) ); 22977 delta += alen+1; 22978 DIP("%s $%d,%s,%s,%s\n", 22979 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 22980 } 22981 22982 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo ); 22983 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo ); 22984 assign(plain, binop( Iop_V128HLtoV256, 22985 binop(op, mkexpr(argLhi), mkexpr(argRhi)), 22986 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) ); 22987 22988 /* This is simple: just invert the result, if necessary, and 22989 have done. */ 22990 if (postNot) { 22991 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) ); 22992 } else { 22993 putYMMReg( rG, mkexpr(plain) ); 22994 } 22995 22996 *uses_vvvv = True; 22997 return delta; 22998 } 22999 23000 23001 /* Handles AVX128 unary E-to-G all-lanes operations. */ 23002 static 23003 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 23004 const VexAbiInfo* vbi, 23005 Prefix pfx, Long delta, 23006 const HChar* opname, 23007 IRTemp (*opFn)(IRTemp) ) 23008 { 23009 HChar dis_buf[50]; 23010 Int alen; 23011 IRTemp addr; 23012 IRTemp res = newTemp(Ity_V128); 23013 IRTemp arg = newTemp(Ity_V128); 23014 UChar rm = getUChar(delta); 23015 UInt rG = gregOfRexRM(pfx, rm); 23016 if (epartIsReg(rm)) { 23017 UInt rE = eregOfRexRM(pfx,rm); 23018 assign(arg, getXMMReg(rE)); 23019 delta += 1; 23020 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 23021 } else { 23022 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23023 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 23024 delta += alen; 23025 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 23026 } 23027 res = opFn(arg); 23028 putYMMRegLoAndZU( rG, mkexpr(res) ); 23029 *uses_vvvv = False; 23030 return delta; 23031 } 23032 23033 23034 /* Handles AVX128 unary E-to-G all-lanes operations. */ 23035 static 23036 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 23037 const VexAbiInfo* vbi, 23038 Prefix pfx, Long delta, 23039 const HChar* opname, IROp op ) 23040 { 23041 HChar dis_buf[50]; 23042 Int alen; 23043 IRTemp addr; 23044 IRTemp arg = newTemp(Ity_V128); 23045 UChar rm = getUChar(delta); 23046 UInt rG = gregOfRexRM(pfx, rm); 23047 if (epartIsReg(rm)) { 23048 UInt rE = eregOfRexRM(pfx,rm); 23049 assign(arg, getXMMReg(rE)); 23050 delta += 1; 23051 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 23052 } else { 23053 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23054 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 23055 delta += alen; 23056 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 23057 } 23058 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 23059 // up in the usual way. 23060 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 23061 /* XXXROUNDINGFIXME */ 23062 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg)) 23063 : unop(op, mkexpr(arg)); 23064 putYMMRegLoAndZU( rG, res ); 23065 *uses_vvvv = False; 23066 return delta; 23067 } 23068 23069 23070 /* FIXME: common up with the _128_ version above? */ 23071 static 23072 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG ( 23073 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23074 Prefix pfx, Long delta, const HChar* name, 23075 /* The actual operation. Use either 'op' or 'opfn', 23076 but not both. */ 23077 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 23078 Bool invertLeftArg, 23079 Bool swapArgs 23080 ) 23081 { 23082 UChar modrm = getUChar(delta); 23083 UInt rD = gregOfRexRM(pfx, modrm); 23084 UInt rSL = getVexNvvvv(pfx); 23085 IRTemp tSL = newTemp(Ity_V256); 23086 IRTemp tSR = newTemp(Ity_V256); 23087 IRTemp addr = IRTemp_INVALID; 23088 HChar dis_buf[50]; 23089 Int alen = 0; 23090 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/); 23091 23092 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL)) 23093 : getYMMReg(rSL)); 23094 23095 if (epartIsReg(modrm)) { 23096 UInt rSR = eregOfRexRM(pfx, modrm); 23097 delta += 1; 23098 assign(tSR, getYMMReg(rSR)); 23099 DIP("%s %s,%s,%s\n", 23100 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD)); 23101 } else { 23102 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23103 delta += alen; 23104 assign(tSR, loadLE(Ity_V256, mkexpr(addr))); 23105 DIP("%s %s,%s,%s\n", 23106 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD)); 23107 } 23108 23109 IRTemp res = IRTemp_INVALID; 23110 if (op != Iop_INVALID) { 23111 vassert(opFn == NULL); 23112 res = newTemp(Ity_V256); 23113 if (requiresRMode(op)) { 23114 IRTemp rm = newTemp(Ity_I32); 23115 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 23116 assign(res, swapArgs 23117 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 23118 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 23119 } else { 23120 assign(res, swapArgs 23121 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 23122 : binop(op, mkexpr(tSL), mkexpr(tSR))); 23123 } 23124 } else { 23125 vassert(opFn != NULL); 23126 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 23127 } 23128 23129 putYMMReg(rD, mkexpr(res)); 23130 23131 *uses_vvvv = True; 23132 return delta; 23133 } 23134 23135 23136 /* All-lanes AVX256 binary operation: 23137 G[255:0] = V[255:0] `op` E[255:0] 23138 */ 23139 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 23140 const VexAbiInfo* vbi, 23141 Prefix pfx, Long delta, 23142 const HChar* opname, IROp op ) 23143 { 23144 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23145 uses_vvvv, vbi, pfx, delta, opname, op, 23146 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 23147 ); 23148 } 23149 23150 23151 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp 23152 for the operation, no inversion of the left arg, and no swapping of 23153 args. */ 23154 static 23155 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple ( 23156 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23157 Prefix pfx, Long delta, const HChar* name, 23158 IROp op 23159 ) 23160 { 23161 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23162 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 23163 } 23164 23165 23166 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR 23167 generator to compute the result, no inversion of the left 23168 arg, and no swapping of args. */ 23169 static 23170 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex ( 23171 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23172 Prefix pfx, Long delta, const HChar* name, 23173 IRTemp(*opFn)(IRTemp,IRTemp) 23174 ) 23175 { 23176 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23177 uses_vvvv, vbi, pfx, delta, name, 23178 Iop_INVALID, opFn, False, False ); 23179 } 23180 23181 23182 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23183 static 23184 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 23185 const VexAbiInfo* vbi, 23186 Prefix pfx, Long delta, 23187 const HChar* opname, 23188 IRTemp (*opFn)(IRTemp) ) 23189 { 23190 HChar dis_buf[50]; 23191 Int alen; 23192 IRTemp addr; 23193 IRTemp res = newTemp(Ity_V256); 23194 IRTemp arg = newTemp(Ity_V256); 23195 UChar rm = getUChar(delta); 23196 UInt rG = gregOfRexRM(pfx, rm); 23197 if (epartIsReg(rm)) { 23198 UInt rE = eregOfRexRM(pfx,rm); 23199 assign(arg, getYMMReg(rE)); 23200 delta += 1; 23201 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23202 } else { 23203 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23204 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23205 delta += alen; 23206 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23207 } 23208 res = opFn(arg); 23209 putYMMReg( rG, mkexpr(res) ); 23210 *uses_vvvv = False; 23211 return delta; 23212 } 23213 23214 23215 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23216 static 23217 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 23218 const VexAbiInfo* vbi, 23219 Prefix pfx, Long delta, 23220 const HChar* opname, IROp op ) 23221 { 23222 HChar dis_buf[50]; 23223 Int alen; 23224 IRTemp addr; 23225 IRTemp arg = newTemp(Ity_V256); 23226 UChar rm = getUChar(delta); 23227 UInt rG = gregOfRexRM(pfx, rm); 23228 if (epartIsReg(rm)) { 23229 UInt rE = eregOfRexRM(pfx,rm); 23230 assign(arg, getYMMReg(rE)); 23231 delta += 1; 23232 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23233 } else { 23234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23235 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23236 delta += alen; 23237 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23238 } 23239 putYMMReg( rG, unop(op, mkexpr(arg)) ); 23240 *uses_vvvv = False; 23241 return delta; 23242 } 23243 23244 23245 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we 23246 had a variant of Iop_64x4toV256 that took F64s as args instead. */ 23247 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 23248 Long delta ) 23249 { 23250 IRTemp addr = IRTemp_INVALID; 23251 Int alen = 0; 23252 HChar dis_buf[50]; 23253 UChar modrm = getUChar(delta); 23254 IRTemp sV = newTemp(Ity_V128); 23255 UInt rG = gregOfRexRM(pfx,modrm); 23256 if (epartIsReg(modrm)) { 23257 UInt rE = eregOfRexRM(pfx,modrm); 23258 assign( sV, getXMMReg(rE) ); 23259 delta += 1; 23260 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 23261 } else { 23262 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23263 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 23264 delta += alen; 23265 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) ); 23266 } 23267 IRTemp s3, s2, s1, s0; 23268 s3 = s2 = s1 = s0 = IRTemp_INVALID; 23269 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 23270 IRExpr* res 23271 = IRExpr_Qop( 23272 Iop_64x4toV256, 23273 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))), 23274 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))), 23275 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))), 23276 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0))) 23277 ); 23278 putYMMReg(rG, res); 23279 return delta; 23280 } 23281 23282 23283 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 23284 Long delta ) 23285 { 23286 IRTemp addr = IRTemp_INVALID; 23287 Int alen = 0; 23288 HChar dis_buf[50]; 23289 UChar modrm = getUChar(delta); 23290 UInt rG = gregOfRexRM(pfx,modrm); 23291 IRTemp argV = newTemp(Ity_V256); 23292 IRTemp rmode = newTemp(Ity_I32); 23293 if (epartIsReg(modrm)) { 23294 UInt rE = eregOfRexRM(pfx,modrm); 23295 assign( argV, getYMMReg(rE) ); 23296 delta += 1; 23297 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG)); 23298 } else { 23299 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23300 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 23301 delta += alen; 23302 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) ); 23303 } 23304 23305 assign( rmode, get_sse_roundingmode() ); 23306 IRTemp t3, t2, t1, t0; 23307 t3 = t2 = t1 = t0 = IRTemp_INVALID; 23308 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 23309 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \ 23310 unop(Iop_ReinterpI64asF64, mkexpr(_t)) ) 23311 putXMMRegLane32F( rG, 3, CVT(t3) ); 23312 putXMMRegLane32F( rG, 2, CVT(t2) ); 23313 putXMMRegLane32F( rG, 1, CVT(t1) ); 23314 putXMMRegLane32F( rG, 0, CVT(t0) ); 23315 # undef CVT 23316 putYMMRegLane128( rG, 1, mkV128(0) ); 23317 return delta; 23318 } 23319 23320 23321 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op ) 23322 { 23323 IRTemp tLhi, tLlo, tRhi, tRlo; 23324 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID; 23325 IRTemp res = newTemp(Ity_V256); 23326 breakupV256toV128s( tL, &tLhi, &tLlo ); 23327 breakupV256toV128s( tR, &tRhi, &tRlo ); 23328 assign( res, binop( Iop_V128HLtoV256, 23329 binop( op, mkexpr(tRhi), mkexpr(tLhi) ), 23330 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) ); 23331 return res; 23332 } 23333 23334 23335 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR ) 23336 { 23337 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 ); 23338 } 23339 23340 23341 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR ) 23342 { 23343 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 ); 23344 } 23345 23346 23347 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR ) 23348 { 23349 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 ); 23350 } 23351 23352 23353 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR ) 23354 { 23355 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 ); 23356 } 23357 23358 23359 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR ) 23360 { 23361 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 ); 23362 } 23363 23364 23365 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR ) 23366 { 23367 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 ); 23368 } 23369 23370 23371 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR ) 23372 { 23373 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 ); 23374 } 23375 23376 23377 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR ) 23378 { 23379 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 ); 23380 } 23381 23382 23383 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR ) 23384 { 23385 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 ); 23386 } 23387 23388 23389 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR ) 23390 { 23391 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 ); 23392 } 23393 23394 23395 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR ) 23396 { 23397 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 ); 23398 } 23399 23400 23401 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR ) 23402 { 23403 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 ); 23404 } 23405 23406 23407 __attribute__((noinline)) 23408 static 23409 Long dis_ESC_0F__VEX ( 23410 /*MB_OUT*/DisResult* dres, 23411 /*OUT*/ Bool* uses_vvvv, 23412 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 23413 Bool resteerCisOk, 23414 void* callback_opaque, 23415 const VexArchInfo* archinfo, 23416 const VexAbiInfo* vbi, 23417 Prefix pfx, Int sz, Long deltaIN 23418 ) 23419 { 23420 IRTemp addr = IRTemp_INVALID; 23421 Int alen = 0; 23422 HChar dis_buf[50]; 23423 Long delta = deltaIN; 23424 UChar opc = getUChar(delta); 23425 delta++; 23426 *uses_vvvv = False; 23427 23428 switch (opc) { 23429 23430 case 0x10: 23431 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23432 /* Move 64 bits from E (mem only) to G (lo half xmm). 23433 Bits 255-64 of the dest are zeroed out. */ 23434 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 23435 UChar modrm = getUChar(delta); 23436 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23437 UInt rG = gregOfRexRM(pfx,modrm); 23438 IRTemp z128 = newTemp(Ity_V128); 23439 assign(z128, mkV128(0)); 23440 putXMMReg( rG, mkexpr(z128) ); 23441 /* FIXME: ALIGNMENT CHECK? */ 23442 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 23443 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23444 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG)); 23445 delta += alen; 23446 goto decode_success; 23447 } 23448 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23449 /* Reg form. */ 23450 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 23451 UChar modrm = getUChar(delta); 23452 UInt rG = gregOfRexRM(pfx, modrm); 23453 UInt rE = eregOfRexRM(pfx, modrm); 23454 UInt rV = getVexNvvvv(pfx); 23455 delta++; 23456 DIP("vmovsd %s,%s,%s\n", 23457 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23458 IRTemp res = newTemp(Ity_V128); 23459 assign(res, binop(Iop_64HLtoV128, 23460 getXMMRegLane64(rV, 1), 23461 getXMMRegLane64(rE, 0))); 23462 putYMMRegLoAndZU(rG, mkexpr(res)); 23463 *uses_vvvv = True; 23464 goto decode_success; 23465 } 23466 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23467 /* Move 32 bits from E (mem only) to G (lo half xmm). 23468 Bits 255-32 of the dest are zeroed out. */ 23469 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 23470 UChar modrm = getUChar(delta); 23471 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23472 UInt rG = gregOfRexRM(pfx,modrm); 23473 IRTemp z128 = newTemp(Ity_V128); 23474 assign(z128, mkV128(0)); 23475 putXMMReg( rG, mkexpr(z128) ); 23476 /* FIXME: ALIGNMENT CHECK? */ 23477 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) ); 23478 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23479 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG)); 23480 delta += alen; 23481 goto decode_success; 23482 } 23483 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23484 /* Reg form. */ 23485 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 23486 UChar modrm = getUChar(delta); 23487 UInt rG = gregOfRexRM(pfx, modrm); 23488 UInt rE = eregOfRexRM(pfx, modrm); 23489 UInt rV = getVexNvvvv(pfx); 23490 delta++; 23491 DIP("vmovss %s,%s,%s\n", 23492 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23493 IRTemp res = newTemp(Ity_V128); 23494 assign( res, binop( Iop_64HLtoV128, 23495 getXMMRegLane64(rV, 1), 23496 binop(Iop_32HLto64, 23497 getXMMRegLane32(rV, 1), 23498 getXMMRegLane32(rE, 0)) ) ); 23499 putYMMRegLoAndZU(rG, mkexpr(res)); 23500 *uses_vvvv = True; 23501 goto decode_success; 23502 } 23503 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */ 23504 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23505 UChar modrm = getUChar(delta); 23506 UInt rG = gregOfRexRM(pfx, modrm); 23507 if (epartIsReg(modrm)) { 23508 UInt rE = eregOfRexRM(pfx,modrm); 23509 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23510 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23511 delta += 1; 23512 } else { 23513 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23514 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23515 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG)); 23516 delta += alen; 23517 } 23518 goto decode_success; 23519 } 23520 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */ 23521 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23522 UChar modrm = getUChar(delta); 23523 UInt rG = gregOfRexRM(pfx, modrm); 23524 if (epartIsReg(modrm)) { 23525 UInt rE = eregOfRexRM(pfx,modrm); 23526 putYMMReg( rG, getYMMReg( rE )); 23527 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23528 delta += 1; 23529 } else { 23530 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23531 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23532 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG)); 23533 delta += alen; 23534 } 23535 goto decode_success; 23536 } 23537 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */ 23538 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23539 UChar modrm = getUChar(delta); 23540 UInt rG = gregOfRexRM(pfx, modrm); 23541 if (epartIsReg(modrm)) { 23542 UInt rE = eregOfRexRM(pfx,modrm); 23543 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23544 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23545 delta += 1; 23546 } else { 23547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23548 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23549 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG)); 23550 delta += alen; 23551 } 23552 goto decode_success; 23553 } 23554 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */ 23555 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23556 UChar modrm = getUChar(delta); 23557 UInt rG = gregOfRexRM(pfx, modrm); 23558 if (epartIsReg(modrm)) { 23559 UInt rE = eregOfRexRM(pfx,modrm); 23560 putYMMReg( rG, getYMMReg( rE )); 23561 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23562 delta += 1; 23563 } else { 23564 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23565 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23566 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG)); 23567 delta += alen; 23568 } 23569 goto decode_success; 23570 } 23571 break; 23572 23573 case 0x11: 23574 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */ 23575 /* Move 64 bits from G (low half xmm) to mem only. */ 23576 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 23577 UChar modrm = getUChar(delta); 23578 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23579 UInt rG = gregOfRexRM(pfx,modrm); 23580 /* FIXME: ALIGNMENT CHECK? */ 23581 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0)); 23582 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf); 23583 delta += alen; 23584 goto decode_success; 23585 } 23586 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */ 23587 /* Reg form. */ 23588 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 23589 UChar modrm = getUChar(delta); 23590 UInt rG = gregOfRexRM(pfx, modrm); 23591 UInt rE = eregOfRexRM(pfx, modrm); 23592 UInt rV = getVexNvvvv(pfx); 23593 delta++; 23594 DIP("vmovsd %s,%s,%s\n", 23595 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23596 IRTemp res = newTemp(Ity_V128); 23597 assign(res, binop(Iop_64HLtoV128, 23598 getXMMRegLane64(rV, 1), 23599 getXMMRegLane64(rE, 0))); 23600 putYMMRegLoAndZU(rG, mkexpr(res)); 23601 *uses_vvvv = True; 23602 goto decode_success; 23603 } 23604 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */ 23605 /* Move 32 bits from G (low 1/4 xmm) to mem only. */ 23606 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 23607 UChar modrm = getUChar(delta); 23608 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23609 UInt rG = gregOfRexRM(pfx,modrm); 23610 /* FIXME: ALIGNMENT CHECK? */ 23611 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0)); 23612 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf); 23613 delta += alen; 23614 goto decode_success; 23615 } 23616 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */ 23617 /* Reg form. */ 23618 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 23619 UChar modrm = getUChar(delta); 23620 UInt rG = gregOfRexRM(pfx, modrm); 23621 UInt rE = eregOfRexRM(pfx, modrm); 23622 UInt rV = getVexNvvvv(pfx); 23623 delta++; 23624 DIP("vmovss %s,%s,%s\n", 23625 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23626 IRTemp res = newTemp(Ity_V128); 23627 assign( res, binop( Iop_64HLtoV128, 23628 getXMMRegLane64(rV, 1), 23629 binop(Iop_32HLto64, 23630 getXMMRegLane32(rV, 1), 23631 getXMMRegLane32(rE, 0)) ) ); 23632 putYMMRegLoAndZU(rG, mkexpr(res)); 23633 *uses_vvvv = True; 23634 goto decode_success; 23635 } 23636 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */ 23637 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23638 UChar modrm = getUChar(delta); 23639 UInt rG = gregOfRexRM(pfx,modrm); 23640 if (epartIsReg(modrm)) { 23641 UInt rE = eregOfRexRM(pfx,modrm); 23642 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 23643 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 23644 delta += 1; 23645 } else { 23646 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23647 storeLE( mkexpr(addr), getXMMReg(rG) ); 23648 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf); 23649 delta += alen; 23650 } 23651 goto decode_success; 23652 } 23653 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */ 23654 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23655 UChar modrm = getUChar(delta); 23656 UInt rG = gregOfRexRM(pfx,modrm); 23657 if (epartIsReg(modrm)) { 23658 UInt rE = eregOfRexRM(pfx,modrm); 23659 putYMMReg( rE, getYMMReg(rG) ); 23660 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 23661 delta += 1; 23662 } else { 23663 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23664 storeLE( mkexpr(addr), getYMMReg(rG) ); 23665 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf); 23666 delta += alen; 23667 } 23668 goto decode_success; 23669 } 23670 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */ 23671 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23672 UChar modrm = getUChar(delta); 23673 UInt rG = gregOfRexRM(pfx,modrm); 23674 if (epartIsReg(modrm)) { 23675 UInt rE = eregOfRexRM(pfx,modrm); 23676 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 23677 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 23678 delta += 1; 23679 } else { 23680 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23681 storeLE( mkexpr(addr), getXMMReg(rG) ); 23682 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf); 23683 delta += alen; 23684 } 23685 goto decode_success; 23686 } 23687 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */ 23688 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23689 UChar modrm = getUChar(delta); 23690 UInt rG = gregOfRexRM(pfx,modrm); 23691 if (epartIsReg(modrm)) { 23692 UInt rE = eregOfRexRM(pfx,modrm); 23693 putYMMReg( rE, getYMMReg(rG) ); 23694 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 23695 delta += 1; 23696 } else { 23697 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23698 storeLE( mkexpr(addr), getYMMReg(rG) ); 23699 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf); 23700 delta += alen; 23701 } 23702 goto decode_success; 23703 } 23704 break; 23705 23706 case 0x12: 23707 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */ 23708 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23709 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ ); 23710 goto decode_success; 23711 } 23712 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */ 23713 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23714 delta = dis_MOVDDUP_256( vbi, pfx, delta ); 23715 goto decode_success; 23716 } 23717 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */ 23718 /* Insn only exists in reg form */ 23719 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23720 && epartIsReg(getUChar(delta))) { 23721 UChar modrm = getUChar(delta); 23722 UInt rG = gregOfRexRM(pfx, modrm); 23723 UInt rE = eregOfRexRM(pfx, modrm); 23724 UInt rV = getVexNvvvv(pfx); 23725 delta++; 23726 DIP("vmovhlps %s,%s,%s\n", 23727 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23728 IRTemp res = newTemp(Ity_V128); 23729 assign(res, binop(Iop_64HLtoV128, 23730 getXMMRegLane64(rV, 1), 23731 getXMMRegLane64(rE, 1))); 23732 putYMMRegLoAndZU(rG, mkexpr(res)); 23733 *uses_vvvv = True; 23734 goto decode_success; 23735 } 23736 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */ 23737 /* Insn exists only in mem form, it appears. */ 23738 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */ 23739 /* Insn exists only in mem form, it appears. */ 23740 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23741 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23742 UChar modrm = getUChar(delta); 23743 UInt rG = gregOfRexRM(pfx, modrm); 23744 UInt rV = getVexNvvvv(pfx); 23745 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23746 delta += alen; 23747 DIP("vmovlpd %s,%s,%s\n", 23748 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23749 IRTemp res = newTemp(Ity_V128); 23750 assign(res, binop(Iop_64HLtoV128, 23751 getXMMRegLane64(rV, 1), 23752 loadLE(Ity_I64, mkexpr(addr)))); 23753 putYMMRegLoAndZU(rG, mkexpr(res)); 23754 *uses_vvvv = True; 23755 goto decode_success; 23756 } 23757 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */ 23758 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 23759 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 23760 True/*isL*/ ); 23761 goto decode_success; 23762 } 23763 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */ 23764 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 23765 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ ); 23766 goto decode_success; 23767 } 23768 break; 23769 23770 case 0x13: 23771 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */ 23772 /* Insn exists only in mem form, it appears. */ 23773 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */ 23774 /* Insn exists only in mem form, it appears. */ 23775 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23776 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23777 UChar modrm = getUChar(delta); 23778 UInt rG = gregOfRexRM(pfx, modrm); 23779 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23780 delta += alen; 23781 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0)); 23782 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf); 23783 goto decode_success; 23784 } 23785 break; 23786 23787 case 0x14: 23788 case 0x15: 23789 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */ 23790 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */ 23791 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23792 Bool hi = opc == 0x15; 23793 UChar modrm = getUChar(delta); 23794 UInt rG = gregOfRexRM(pfx,modrm); 23795 UInt rV = getVexNvvvv(pfx); 23796 IRTemp eV = newTemp(Ity_V128); 23797 IRTemp vV = newTemp(Ity_V128); 23798 assign( vV, getXMMReg(rV) ); 23799 if (epartIsReg(modrm)) { 23800 UInt rE = eregOfRexRM(pfx,modrm); 23801 assign( eV, getXMMReg(rE) ); 23802 delta += 1; 23803 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23804 nameXMMReg(rE), nameXMMReg(rG)); 23805 } else { 23806 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23807 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23808 delta += alen; 23809 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23810 dis_buf, nameXMMReg(rG)); 23811 } 23812 IRTemp res = math_UNPCKxPS_128( eV, vV, hi ); 23813 putYMMRegLoAndZU( rG, mkexpr(res) ); 23814 *uses_vvvv = True; 23815 goto decode_success; 23816 } 23817 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */ 23818 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */ 23819 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23820 Bool hi = opc == 0x15; 23821 UChar modrm = getUChar(delta); 23822 UInt rG = gregOfRexRM(pfx,modrm); 23823 UInt rV = getVexNvvvv(pfx); 23824 IRTemp eV = newTemp(Ity_V256); 23825 IRTemp vV = newTemp(Ity_V256); 23826 assign( vV, getYMMReg(rV) ); 23827 if (epartIsReg(modrm)) { 23828 UInt rE = eregOfRexRM(pfx,modrm); 23829 assign( eV, getYMMReg(rE) ); 23830 delta += 1; 23831 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23832 nameYMMReg(rE), nameYMMReg(rG)); 23833 } else { 23834 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23835 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23836 delta += alen; 23837 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23838 dis_buf, nameYMMReg(rG)); 23839 } 23840 IRTemp res = math_UNPCKxPS_256( eV, vV, hi ); 23841 putYMMReg( rG, mkexpr(res) ); 23842 *uses_vvvv = True; 23843 goto decode_success; 23844 } 23845 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */ 23846 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */ 23847 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23848 Bool hi = opc == 0x15; 23849 UChar modrm = getUChar(delta); 23850 UInt rG = gregOfRexRM(pfx,modrm); 23851 UInt rV = getVexNvvvv(pfx); 23852 IRTemp eV = newTemp(Ity_V128); 23853 IRTemp vV = newTemp(Ity_V128); 23854 assign( vV, getXMMReg(rV) ); 23855 if (epartIsReg(modrm)) { 23856 UInt rE = eregOfRexRM(pfx,modrm); 23857 assign( eV, getXMMReg(rE) ); 23858 delta += 1; 23859 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23860 nameXMMReg(rE), nameXMMReg(rG)); 23861 } else { 23862 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23863 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23864 delta += alen; 23865 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23866 dis_buf, nameXMMReg(rG)); 23867 } 23868 IRTemp res = math_UNPCKxPD_128( eV, vV, hi ); 23869 putYMMRegLoAndZU( rG, mkexpr(res) ); 23870 *uses_vvvv = True; 23871 goto decode_success; 23872 } 23873 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */ 23874 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */ 23875 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23876 Bool hi = opc == 0x15; 23877 UChar modrm = getUChar(delta); 23878 UInt rG = gregOfRexRM(pfx,modrm); 23879 UInt rV = getVexNvvvv(pfx); 23880 IRTemp eV = newTemp(Ity_V256); 23881 IRTemp vV = newTemp(Ity_V256); 23882 assign( vV, getYMMReg(rV) ); 23883 if (epartIsReg(modrm)) { 23884 UInt rE = eregOfRexRM(pfx,modrm); 23885 assign( eV, getYMMReg(rE) ); 23886 delta += 1; 23887 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23888 nameYMMReg(rE), nameYMMReg(rG)); 23889 } else { 23890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23891 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23892 delta += alen; 23893 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23894 dis_buf, nameYMMReg(rG)); 23895 } 23896 IRTemp res = math_UNPCKxPD_256( eV, vV, hi ); 23897 putYMMReg( rG, mkexpr(res) ); 23898 *uses_vvvv = True; 23899 goto decode_success; 23900 } 23901 break; 23902 23903 case 0x16: 23904 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */ 23905 /* Insn only exists in reg form */ 23906 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23907 && epartIsReg(getUChar(delta))) { 23908 UChar modrm = getUChar(delta); 23909 UInt rG = gregOfRexRM(pfx, modrm); 23910 UInt rE = eregOfRexRM(pfx, modrm); 23911 UInt rV = getVexNvvvv(pfx); 23912 delta++; 23913 DIP("vmovlhps %s,%s,%s\n", 23914 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23915 IRTemp res = newTemp(Ity_V128); 23916 assign(res, binop(Iop_64HLtoV128, 23917 getXMMRegLane64(rE, 0), 23918 getXMMRegLane64(rV, 0))); 23919 putYMMRegLoAndZU(rG, mkexpr(res)); 23920 *uses_vvvv = True; 23921 goto decode_success; 23922 } 23923 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */ 23924 /* Insn exists only in mem form, it appears. */ 23925 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */ 23926 /* Insn exists only in mem form, it appears. */ 23927 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23928 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23929 UChar modrm = getUChar(delta); 23930 UInt rG = gregOfRexRM(pfx, modrm); 23931 UInt rV = getVexNvvvv(pfx); 23932 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23933 delta += alen; 23934 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's', 23935 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23936 IRTemp res = newTemp(Ity_V128); 23937 assign(res, binop(Iop_64HLtoV128, 23938 loadLE(Ity_I64, mkexpr(addr)), 23939 getXMMRegLane64(rV, 0))); 23940 putYMMRegLoAndZU(rG, mkexpr(res)); 23941 *uses_vvvv = True; 23942 goto decode_success; 23943 } 23944 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */ 23945 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 23946 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 23947 False/*!isL*/ ); 23948 goto decode_success; 23949 } 23950 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */ 23951 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 23952 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ ); 23953 goto decode_success; 23954 } 23955 break; 23956 23957 case 0x17: 23958 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */ 23959 /* Insn exists only in mem form, it appears. */ 23960 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */ 23961 /* Insn exists only in mem form, it appears. */ 23962 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23963 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23964 UChar modrm = getUChar(delta); 23965 UInt rG = gregOfRexRM(pfx, modrm); 23966 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23967 delta += alen; 23968 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1)); 23969 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's', 23970 nameXMMReg(rG), dis_buf); 23971 goto decode_success; 23972 } 23973 break; 23974 23975 case 0x28: 23976 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */ 23977 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23978 UChar modrm = getUChar(delta); 23979 UInt rG = gregOfRexRM(pfx, modrm); 23980 if (epartIsReg(modrm)) { 23981 UInt rE = eregOfRexRM(pfx,modrm); 23982 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23983 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23984 delta += 1; 23985 } else { 23986 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23987 gen_SEGV_if_not_16_aligned( addr ); 23988 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23989 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG)); 23990 delta += alen; 23991 } 23992 goto decode_success; 23993 } 23994 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */ 23995 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23996 UChar modrm = getUChar(delta); 23997 UInt rG = gregOfRexRM(pfx, modrm); 23998 if (epartIsReg(modrm)) { 23999 UInt rE = eregOfRexRM(pfx,modrm); 24000 putYMMReg( rG, getYMMReg( rE )); 24001 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 24002 delta += 1; 24003 } else { 24004 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24005 gen_SEGV_if_not_32_aligned( addr ); 24006 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 24007 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG)); 24008 delta += alen; 24009 } 24010 goto decode_success; 24011 } 24012 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */ 24013 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24014 UChar modrm = getUChar(delta); 24015 UInt rG = gregOfRexRM(pfx, modrm); 24016 if (epartIsReg(modrm)) { 24017 UInt rE = eregOfRexRM(pfx,modrm); 24018 putYMMRegLoAndZU( rG, getXMMReg( rE )); 24019 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 24020 delta += 1; 24021 } else { 24022 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24023 gen_SEGV_if_not_16_aligned( addr ); 24024 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 24025 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG)); 24026 delta += alen; 24027 } 24028 goto decode_success; 24029 } 24030 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */ 24031 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24032 UChar modrm = getUChar(delta); 24033 UInt rG = gregOfRexRM(pfx, modrm); 24034 if (epartIsReg(modrm)) { 24035 UInt rE = eregOfRexRM(pfx,modrm); 24036 putYMMReg( rG, getYMMReg( rE )); 24037 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 24038 delta += 1; 24039 } else { 24040 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24041 gen_SEGV_if_not_32_aligned( addr ); 24042 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 24043 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG)); 24044 delta += alen; 24045 } 24046 goto decode_success; 24047 } 24048 break; 24049 24050 case 0x29: 24051 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */ 24052 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24053 UChar modrm = getUChar(delta); 24054 UInt rG = gregOfRexRM(pfx,modrm); 24055 if (epartIsReg(modrm)) { 24056 UInt rE = eregOfRexRM(pfx,modrm); 24057 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24058 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24059 delta += 1; 24060 } else { 24061 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24062 gen_SEGV_if_not_16_aligned( addr ); 24063 storeLE( mkexpr(addr), getXMMReg(rG) ); 24064 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf ); 24065 delta += alen; 24066 } 24067 goto decode_success; 24068 } 24069 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */ 24070 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24071 UChar modrm = getUChar(delta); 24072 UInt rG = gregOfRexRM(pfx,modrm); 24073 if (epartIsReg(modrm)) { 24074 UInt rE = eregOfRexRM(pfx,modrm); 24075 putYMMReg( rE, getYMMReg(rG) ); 24076 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24077 delta += 1; 24078 } else { 24079 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24080 gen_SEGV_if_not_32_aligned( addr ); 24081 storeLE( mkexpr(addr), getYMMReg(rG) ); 24082 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf ); 24083 delta += alen; 24084 } 24085 goto decode_success; 24086 } 24087 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */ 24088 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24089 UChar modrm = getUChar(delta); 24090 UInt rG = gregOfRexRM(pfx,modrm); 24091 if (epartIsReg(modrm)) { 24092 UInt rE = eregOfRexRM(pfx,modrm); 24093 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24094 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24095 delta += 1; 24096 goto decode_success; 24097 } else { 24098 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24099 gen_SEGV_if_not_16_aligned( addr ); 24100 storeLE( mkexpr(addr), getXMMReg(rG) ); 24101 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf ); 24102 delta += alen; 24103 goto decode_success; 24104 } 24105 } 24106 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */ 24107 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24108 UChar modrm = getUChar(delta); 24109 UInt rG = gregOfRexRM(pfx,modrm); 24110 if (epartIsReg(modrm)) { 24111 UInt rE = eregOfRexRM(pfx,modrm); 24112 putYMMReg( rE, getYMMReg(rG) ); 24113 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24114 delta += 1; 24115 goto decode_success; 24116 } else { 24117 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24118 gen_SEGV_if_not_32_aligned( addr ); 24119 storeLE( mkexpr(addr), getYMMReg(rG) ); 24120 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf ); 24121 delta += alen; 24122 goto decode_success; 24123 } 24124 } 24125 break; 24126 24127 case 0x2A: { 24128 IRTemp rmode = newTemp(Ity_I32); 24129 assign( rmode, get_sse_roundingmode() ); 24130 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */ 24131 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24132 UChar modrm = getUChar(delta); 24133 UInt rV = getVexNvvvv(pfx); 24134 UInt rD = gregOfRexRM(pfx, modrm); 24135 IRTemp arg32 = newTemp(Ity_I32); 24136 if (epartIsReg(modrm)) { 24137 UInt rS = eregOfRexRM(pfx,modrm); 24138 assign( arg32, getIReg32(rS) ); 24139 delta += 1; 24140 DIP("vcvtsi2sdl %s,%s,%s\n", 24141 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24142 } else { 24143 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24144 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24145 delta += alen; 24146 DIP("vcvtsi2sdl %s,%s,%s\n", 24147 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24148 } 24149 putXMMRegLane64F( rD, 0, 24150 unop(Iop_I32StoF64, mkexpr(arg32))); 24151 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24152 putYMMRegLane128( rD, 1, mkV128(0) ); 24153 *uses_vvvv = True; 24154 goto decode_success; 24155 } 24156 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */ 24157 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24158 UChar modrm = getUChar(delta); 24159 UInt rV = getVexNvvvv(pfx); 24160 UInt rD = gregOfRexRM(pfx, modrm); 24161 IRTemp arg64 = newTemp(Ity_I64); 24162 if (epartIsReg(modrm)) { 24163 UInt rS = eregOfRexRM(pfx,modrm); 24164 assign( arg64, getIReg64(rS) ); 24165 delta += 1; 24166 DIP("vcvtsi2sdq %s,%s,%s\n", 24167 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24168 } else { 24169 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24170 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24171 delta += alen; 24172 DIP("vcvtsi2sdq %s,%s,%s\n", 24173 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24174 } 24175 putXMMRegLane64F( rD, 0, 24176 binop( Iop_I64StoF64, 24177 get_sse_roundingmode(), 24178 mkexpr(arg64)) ); 24179 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24180 putYMMRegLane128( rD, 1, mkV128(0) ); 24181 *uses_vvvv = True; 24182 goto decode_success; 24183 } 24184 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */ 24185 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24186 UChar modrm = getUChar(delta); 24187 UInt rV = getVexNvvvv(pfx); 24188 UInt rD = gregOfRexRM(pfx, modrm); 24189 IRTemp arg64 = newTemp(Ity_I64); 24190 if (epartIsReg(modrm)) { 24191 UInt rS = eregOfRexRM(pfx,modrm); 24192 assign( arg64, getIReg64(rS) ); 24193 delta += 1; 24194 DIP("vcvtsi2ssq %s,%s,%s\n", 24195 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24196 } else { 24197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24198 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24199 delta += alen; 24200 DIP("vcvtsi2ssq %s,%s,%s\n", 24201 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24202 } 24203 putXMMRegLane32F( rD, 0, 24204 binop(Iop_F64toF32, 24205 mkexpr(rmode), 24206 binop(Iop_I64StoF64, mkexpr(rmode), 24207 mkexpr(arg64)) ) ); 24208 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24209 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24210 putYMMRegLane128( rD, 1, mkV128(0) ); 24211 *uses_vvvv = True; 24212 goto decode_success; 24213 } 24214 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */ 24215 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24216 UChar modrm = getUChar(delta); 24217 UInt rV = getVexNvvvv(pfx); 24218 UInt rD = gregOfRexRM(pfx, modrm); 24219 IRTemp arg32 = newTemp(Ity_I32); 24220 if (epartIsReg(modrm)) { 24221 UInt rS = eregOfRexRM(pfx,modrm); 24222 assign( arg32, getIReg32(rS) ); 24223 delta += 1; 24224 DIP("vcvtsi2ssl %s,%s,%s\n", 24225 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24226 } else { 24227 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24228 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24229 delta += alen; 24230 DIP("vcvtsi2ssl %s,%s,%s\n", 24231 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24232 } 24233 putXMMRegLane32F( rD, 0, 24234 binop(Iop_F64toF32, 24235 mkexpr(rmode), 24236 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 24237 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24238 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24239 putYMMRegLane128( rD, 1, mkV128(0) ); 24240 *uses_vvvv = True; 24241 goto decode_success; 24242 } 24243 break; 24244 } 24245 24246 case 0x2B: 24247 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */ 24248 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */ 24249 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24250 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24251 UChar modrm = getUChar(delta); 24252 UInt rS = gregOfRexRM(pfx, modrm); 24253 IRTemp tS = newTemp(Ity_V128); 24254 assign(tS, getXMMReg(rS)); 24255 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24256 delta += alen; 24257 gen_SEGV_if_not_16_aligned(addr); 24258 storeLE(mkexpr(addr), mkexpr(tS)); 24259 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24260 nameXMMReg(rS), dis_buf); 24261 goto decode_success; 24262 } 24263 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */ 24264 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */ 24265 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24266 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) { 24267 UChar modrm = getUChar(delta); 24268 UInt rS = gregOfRexRM(pfx, modrm); 24269 IRTemp tS = newTemp(Ity_V256); 24270 assign(tS, getYMMReg(rS)); 24271 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24272 delta += alen; 24273 gen_SEGV_if_not_32_aligned(addr); 24274 storeLE(mkexpr(addr), mkexpr(tS)); 24275 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24276 nameYMMReg(rS), dis_buf); 24277 goto decode_success; 24278 } 24279 break; 24280 24281 case 0x2C: 24282 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */ 24283 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24284 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24285 goto decode_success; 24286 } 24287 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */ 24288 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24289 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24290 goto decode_success; 24291 } 24292 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */ 24293 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24294 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24295 goto decode_success; 24296 } 24297 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */ 24298 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24299 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24300 goto decode_success; 24301 } 24302 break; 24303 24304 case 0x2D: 24305 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */ 24306 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24307 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24308 goto decode_success; 24309 } 24310 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */ 24311 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24312 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24313 goto decode_success; 24314 } 24315 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */ 24316 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24317 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24318 goto decode_success; 24319 } 24320 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */ 24321 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24322 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24323 goto decode_success; 24324 } 24325 break; 24326 24327 case 0x2E: 24328 case 0x2F: 24329 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */ 24330 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */ 24331 if (have66noF2noF3(pfx)) { 24332 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc ); 24333 goto decode_success; 24334 } 24335 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */ 24336 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */ 24337 if (haveNo66noF2noF3(pfx)) { 24338 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc ); 24339 goto decode_success; 24340 } 24341 break; 24342 24343 case 0x50: 24344 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */ 24345 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24346 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ ); 24347 goto decode_success; 24348 } 24349 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */ 24350 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24351 delta = dis_MOVMSKPD_256( vbi, pfx, delta ); 24352 goto decode_success; 24353 } 24354 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */ 24355 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24356 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ ); 24357 goto decode_success; 24358 } 24359 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */ 24360 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24361 delta = dis_MOVMSKPS_256( vbi, pfx, delta ); 24362 goto decode_success; 24363 } 24364 break; 24365 24366 case 0x51: 24367 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */ 24368 if (haveF3no66noF2(pfx)) { 24369 delta = dis_AVX128_E_V_to_G_lo32_unary( 24370 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 ); 24371 goto decode_success; 24372 } 24373 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */ 24374 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24375 delta = dis_AVX128_E_to_G_unary_all( 24376 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 ); 24377 goto decode_success; 24378 } 24379 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */ 24380 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24381 delta = dis_AVX256_E_to_G_unary_all( 24382 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 ); 24383 goto decode_success; 24384 } 24385 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */ 24386 if (haveF2no66noF3(pfx)) { 24387 delta = dis_AVX128_E_V_to_G_lo64_unary( 24388 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 ); 24389 goto decode_success; 24390 } 24391 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */ 24392 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24393 delta = dis_AVX128_E_to_G_unary_all( 24394 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 ); 24395 goto decode_success; 24396 } 24397 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */ 24398 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24399 delta = dis_AVX256_E_to_G_unary_all( 24400 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 ); 24401 goto decode_success; 24402 } 24403 break; 24404 24405 case 0x52: 24406 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */ 24407 if (haveF3no66noF2(pfx)) { 24408 delta = dis_AVX128_E_V_to_G_lo32_unary( 24409 uses_vvvv, vbi, pfx, delta, "vrsqrtss", 24410 Iop_RSqrtEst32F0x4 ); 24411 goto decode_success; 24412 } 24413 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */ 24414 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24415 delta = dis_AVX128_E_to_G_unary_all( 24416 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 ); 24417 goto decode_success; 24418 } 24419 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */ 24420 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24421 delta = dis_AVX256_E_to_G_unary_all( 24422 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 ); 24423 goto decode_success; 24424 } 24425 break; 24426 24427 case 0x53: 24428 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */ 24429 if (haveF3no66noF2(pfx)) { 24430 delta = dis_AVX128_E_V_to_G_lo32_unary( 24431 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 ); 24432 goto decode_success; 24433 } 24434 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */ 24435 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24436 delta = dis_AVX128_E_to_G_unary_all( 24437 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 ); 24438 goto decode_success; 24439 } 24440 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */ 24441 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24442 delta = dis_AVX256_E_to_G_unary_all( 24443 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 ); 24444 goto decode_success; 24445 } 24446 break; 24447 24448 case 0x54: 24449 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24450 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */ 24451 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24452 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24453 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 ); 24454 goto decode_success; 24455 } 24456 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24457 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */ 24458 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24459 delta = dis_AVX256_E_V_to_G( 24460 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 ); 24461 goto decode_success; 24462 } 24463 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */ 24464 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24465 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24466 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 ); 24467 goto decode_success; 24468 } 24469 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */ 24470 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24471 delta = dis_AVX256_E_V_to_G( 24472 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 ); 24473 goto decode_success; 24474 } 24475 break; 24476 24477 case 0x55: 24478 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */ 24479 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */ 24480 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24481 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24482 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128, 24483 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24484 goto decode_success; 24485 } 24486 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */ 24487 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24488 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24489 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256, 24490 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24491 goto decode_success; 24492 } 24493 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */ 24494 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24495 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24496 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128, 24497 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24498 goto decode_success; 24499 } 24500 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */ 24501 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24502 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24503 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256, 24504 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24505 goto decode_success; 24506 } 24507 break; 24508 24509 case 0x56: 24510 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24511 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */ 24512 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24513 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24514 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 ); 24515 goto decode_success; 24516 } 24517 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24518 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */ 24519 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24520 delta = dis_AVX256_E_V_to_G( 24521 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 ); 24522 goto decode_success; 24523 } 24524 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24525 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */ 24526 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24527 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24528 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 ); 24529 goto decode_success; 24530 } 24531 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24532 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */ 24533 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24534 delta = dis_AVX256_E_V_to_G( 24535 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 ); 24536 goto decode_success; 24537 } 24538 break; 24539 24540 case 0x57: 24541 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 24542 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */ 24543 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24544 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24545 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 ); 24546 goto decode_success; 24547 } 24548 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 24549 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */ 24550 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24551 delta = dis_AVX256_E_V_to_G( 24552 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 ); 24553 goto decode_success; 24554 } 24555 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 24556 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */ 24557 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24558 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24559 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 ); 24560 goto decode_success; 24561 } 24562 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 24563 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */ 24564 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24565 delta = dis_AVX256_E_V_to_G( 24566 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 ); 24567 goto decode_success; 24568 } 24569 break; 24570 24571 case 0x58: 24572 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */ 24573 if (haveF2no66noF3(pfx)) { 24574 delta = dis_AVX128_E_V_to_G_lo64( 24575 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 ); 24576 goto decode_success; 24577 } 24578 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */ 24579 if (haveF3no66noF2(pfx)) { 24580 delta = dis_AVX128_E_V_to_G_lo32( 24581 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 ); 24582 goto decode_success; 24583 } 24584 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */ 24585 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24586 delta = dis_AVX128_E_V_to_G( 24587 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 ); 24588 goto decode_success; 24589 } 24590 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */ 24591 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24592 delta = dis_AVX256_E_V_to_G( 24593 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 ); 24594 goto decode_success; 24595 } 24596 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */ 24597 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24598 delta = dis_AVX128_E_V_to_G( 24599 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 ); 24600 goto decode_success; 24601 } 24602 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */ 24603 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24604 delta = dis_AVX256_E_V_to_G( 24605 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 ); 24606 goto decode_success; 24607 } 24608 break; 24609 24610 case 0x59: 24611 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */ 24612 if (haveF2no66noF3(pfx)) { 24613 delta = dis_AVX128_E_V_to_G_lo64( 24614 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 ); 24615 goto decode_success; 24616 } 24617 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */ 24618 if (haveF3no66noF2(pfx)) { 24619 delta = dis_AVX128_E_V_to_G_lo32( 24620 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 ); 24621 goto decode_success; 24622 } 24623 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */ 24624 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24625 delta = dis_AVX128_E_V_to_G( 24626 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 ); 24627 goto decode_success; 24628 } 24629 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */ 24630 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24631 delta = dis_AVX256_E_V_to_G( 24632 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 ); 24633 goto decode_success; 24634 } 24635 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */ 24636 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24637 delta = dis_AVX128_E_V_to_G( 24638 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 ); 24639 goto decode_success; 24640 } 24641 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */ 24642 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24643 delta = dis_AVX256_E_V_to_G( 24644 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 ); 24645 goto decode_success; 24646 } 24647 break; 24648 24649 case 0x5A: 24650 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */ 24651 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24652 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ ); 24653 goto decode_success; 24654 } 24655 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */ 24656 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24657 delta = dis_CVTPS2PD_256( vbi, pfx, delta ); 24658 goto decode_success; 24659 } 24660 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */ 24661 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24662 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ ); 24663 goto decode_success; 24664 } 24665 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */ 24666 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24667 delta = dis_CVTPD2PS_256( vbi, pfx, delta ); 24668 goto decode_success; 24669 } 24670 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */ 24671 if (haveF2no66noF3(pfx)) { 24672 UChar modrm = getUChar(delta); 24673 UInt rV = getVexNvvvv(pfx); 24674 UInt rD = gregOfRexRM(pfx, modrm); 24675 IRTemp f64lo = newTemp(Ity_F64); 24676 IRTemp rmode = newTemp(Ity_I32); 24677 assign( rmode, get_sse_roundingmode() ); 24678 if (epartIsReg(modrm)) { 24679 UInt rS = eregOfRexRM(pfx,modrm); 24680 assign(f64lo, getXMMRegLane64F(rS, 0)); 24681 delta += 1; 24682 DIP("vcvtsd2ss %s,%s,%s\n", 24683 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 24684 } else { 24685 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24686 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) ); 24687 delta += alen; 24688 DIP("vcvtsd2ss %s,%s,%s\n", 24689 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24690 } 24691 putXMMRegLane32F( rD, 0, 24692 binop( Iop_F64toF32, mkexpr(rmode), 24693 mkexpr(f64lo)) ); 24694 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24695 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24696 putYMMRegLane128( rD, 1, mkV128(0) ); 24697 *uses_vvvv = True; 24698 goto decode_success; 24699 } 24700 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */ 24701 if (haveF3no66noF2(pfx)) { 24702 UChar modrm = getUChar(delta); 24703 UInt rV = getVexNvvvv(pfx); 24704 UInt rD = gregOfRexRM(pfx, modrm); 24705 IRTemp f32lo = newTemp(Ity_F32); 24706 if (epartIsReg(modrm)) { 24707 UInt rS = eregOfRexRM(pfx,modrm); 24708 assign(f32lo, getXMMRegLane32F(rS, 0)); 24709 delta += 1; 24710 DIP("vcvtss2sd %s,%s,%s\n", 24711 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 24712 } else { 24713 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24714 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 24715 delta += alen; 24716 DIP("vcvtss2sd %s,%s,%s\n", 24717 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24718 } 24719 putXMMRegLane64F( rD, 0, 24720 unop( Iop_F32toF64, mkexpr(f32lo)) ); 24721 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24722 putYMMRegLane128( rD, 1, mkV128(0) ); 24723 *uses_vvvv = True; 24724 goto decode_success; 24725 } 24726 break; 24727 24728 case 0x5B: 24729 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */ 24730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24731 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 24732 True/*isAvx*/, False/*!r2zero*/ ); 24733 goto decode_success; 24734 } 24735 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */ 24736 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24737 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 24738 False/*!r2zero*/ ); 24739 goto decode_success; 24740 } 24741 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */ 24742 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 24743 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 24744 True/*isAvx*/, True/*r2zero*/ ); 24745 goto decode_success; 24746 } 24747 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */ 24748 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 24749 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 24750 True/*r2zero*/ ); 24751 goto decode_success; 24752 } 24753 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */ 24754 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24755 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ ); 24756 goto decode_success; 24757 } 24758 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */ 24759 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24760 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta ); 24761 goto decode_success; 24762 } 24763 break; 24764 24765 case 0x5C: 24766 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */ 24767 if (haveF2no66noF3(pfx)) { 24768 delta = dis_AVX128_E_V_to_G_lo64( 24769 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 ); 24770 goto decode_success; 24771 } 24772 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */ 24773 if (haveF3no66noF2(pfx)) { 24774 delta = dis_AVX128_E_V_to_G_lo32( 24775 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 ); 24776 goto decode_success; 24777 } 24778 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */ 24779 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24780 delta = dis_AVX128_E_V_to_G( 24781 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 ); 24782 goto decode_success; 24783 } 24784 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */ 24785 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24786 delta = dis_AVX256_E_V_to_G( 24787 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 ); 24788 goto decode_success; 24789 } 24790 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */ 24791 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24792 delta = dis_AVX128_E_V_to_G( 24793 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 ); 24794 goto decode_success; 24795 } 24796 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */ 24797 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24798 delta = dis_AVX256_E_V_to_G( 24799 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 ); 24800 goto decode_success; 24801 } 24802 break; 24803 24804 case 0x5D: 24805 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */ 24806 if (haveF2no66noF3(pfx)) { 24807 delta = dis_AVX128_E_V_to_G_lo64( 24808 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 ); 24809 goto decode_success; 24810 } 24811 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */ 24812 if (haveF3no66noF2(pfx)) { 24813 delta = dis_AVX128_E_V_to_G_lo32( 24814 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 ); 24815 goto decode_success; 24816 } 24817 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */ 24818 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24819 delta = dis_AVX128_E_V_to_G( 24820 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 ); 24821 goto decode_success; 24822 } 24823 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */ 24824 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24825 delta = dis_AVX256_E_V_to_G( 24826 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 ); 24827 goto decode_success; 24828 } 24829 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */ 24830 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24831 delta = dis_AVX128_E_V_to_G( 24832 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 ); 24833 goto decode_success; 24834 } 24835 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */ 24836 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24837 delta = dis_AVX256_E_V_to_G( 24838 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 ); 24839 goto decode_success; 24840 } 24841 break; 24842 24843 case 0x5E: 24844 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */ 24845 if (haveF2no66noF3(pfx)) { 24846 delta = dis_AVX128_E_V_to_G_lo64( 24847 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 ); 24848 goto decode_success; 24849 } 24850 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */ 24851 if (haveF3no66noF2(pfx)) { 24852 delta = dis_AVX128_E_V_to_G_lo32( 24853 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 ); 24854 goto decode_success; 24855 } 24856 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */ 24857 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24858 delta = dis_AVX128_E_V_to_G( 24859 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 ); 24860 goto decode_success; 24861 } 24862 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */ 24863 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24864 delta = dis_AVX256_E_V_to_G( 24865 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 ); 24866 goto decode_success; 24867 } 24868 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */ 24869 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24870 delta = dis_AVX128_E_V_to_G( 24871 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 ); 24872 goto decode_success; 24873 } 24874 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */ 24875 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24876 delta = dis_AVX256_E_V_to_G( 24877 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 ); 24878 goto decode_success; 24879 } 24880 break; 24881 24882 case 0x5F: 24883 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */ 24884 if (haveF2no66noF3(pfx)) { 24885 delta = dis_AVX128_E_V_to_G_lo64( 24886 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 ); 24887 goto decode_success; 24888 } 24889 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */ 24890 if (haveF3no66noF2(pfx)) { 24891 delta = dis_AVX128_E_V_to_G_lo32( 24892 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 ); 24893 goto decode_success; 24894 } 24895 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */ 24896 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24897 delta = dis_AVX128_E_V_to_G( 24898 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 ); 24899 goto decode_success; 24900 } 24901 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */ 24902 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24903 delta = dis_AVX256_E_V_to_G( 24904 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 ); 24905 goto decode_success; 24906 } 24907 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */ 24908 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24909 delta = dis_AVX128_E_V_to_G( 24910 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 ); 24911 goto decode_success; 24912 } 24913 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */ 24914 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24915 delta = dis_AVX256_E_V_to_G( 24916 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 ); 24917 goto decode_success; 24918 } 24919 break; 24920 24921 case 0x60: 24922 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 24923 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */ 24924 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24925 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24926 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 24927 Iop_InterleaveLO8x16, NULL, 24928 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24929 goto decode_success; 24930 } 24931 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 24932 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */ 24933 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24934 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24935 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 24936 math_VPUNPCKLBW_YMM ); 24937 goto decode_success; 24938 } 24939 break; 24940 24941 case 0x61: 24942 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 24943 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */ 24944 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24945 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24946 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 24947 Iop_InterleaveLO16x8, NULL, 24948 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24949 goto decode_success; 24950 } 24951 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 24952 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */ 24953 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24954 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24955 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 24956 math_VPUNPCKLWD_YMM ); 24957 goto decode_success; 24958 } 24959 break; 24960 24961 case 0x62: 24962 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 24963 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */ 24964 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24965 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24966 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 24967 Iop_InterleaveLO32x4, NULL, 24968 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24969 goto decode_success; 24970 } 24971 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 24972 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */ 24973 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24974 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24975 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 24976 math_VPUNPCKLDQ_YMM ); 24977 goto decode_success; 24978 } 24979 break; 24980 24981 case 0x63: 24982 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 24983 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */ 24984 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24985 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24986 uses_vvvv, vbi, pfx, delta, "vpacksswb", 24987 Iop_QNarrowBin16Sto8Sx16, NULL, 24988 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24989 goto decode_success; 24990 } 24991 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 24992 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */ 24993 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24994 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24995 uses_vvvv, vbi, pfx, delta, "vpacksswb", 24996 math_VPACKSSWB_YMM ); 24997 goto decode_success; 24998 } 24999 break; 25000 25001 case 0x64: 25002 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 25003 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */ 25004 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25005 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25006 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 ); 25007 goto decode_success; 25008 } 25009 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 25010 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */ 25011 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25012 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25013 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 ); 25014 goto decode_success; 25015 } 25016 break; 25017 25018 case 0x65: 25019 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 25020 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */ 25021 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25022 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25023 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 ); 25024 goto decode_success; 25025 } 25026 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 25027 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */ 25028 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25029 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25030 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 ); 25031 goto decode_success; 25032 } 25033 break; 25034 25035 case 0x66: 25036 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 25037 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */ 25038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25039 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25040 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 ); 25041 goto decode_success; 25042 } 25043 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 25044 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */ 25045 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25046 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25047 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 ); 25048 goto decode_success; 25049 } 25050 break; 25051 25052 case 0x67: 25053 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25054 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */ 25055 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25056 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25057 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25058 Iop_QNarrowBin16Sto8Ux16, NULL, 25059 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25060 goto decode_success; 25061 } 25062 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25063 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */ 25064 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25065 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25066 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25067 math_VPACKUSWB_YMM ); 25068 goto decode_success; 25069 } 25070 break; 25071 25072 case 0x68: 25073 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25074 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */ 25075 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25076 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25077 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25078 Iop_InterleaveHI8x16, NULL, 25079 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25080 goto decode_success; 25081 } 25082 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25083 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */ 25084 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25085 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25086 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25087 math_VPUNPCKHBW_YMM ); 25088 goto decode_success; 25089 } 25090 break; 25091 25092 case 0x69: 25093 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25094 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */ 25095 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25096 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25097 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25098 Iop_InterleaveHI16x8, NULL, 25099 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25100 goto decode_success; 25101 } 25102 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25103 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */ 25104 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25105 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25106 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25107 math_VPUNPCKHWD_YMM ); 25108 goto decode_success; 25109 } 25110 break; 25111 25112 case 0x6A: 25113 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25114 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */ 25115 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25116 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25117 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25118 Iop_InterleaveHI32x4, NULL, 25119 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25120 goto decode_success; 25121 } 25122 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25123 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */ 25124 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25125 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25126 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25127 math_VPUNPCKHDQ_YMM ); 25128 goto decode_success; 25129 } 25130 break; 25131 25132 case 0x6B: 25133 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25134 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */ 25135 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25136 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25137 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25138 Iop_QNarrowBin32Sto16Sx8, NULL, 25139 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25140 goto decode_success; 25141 } 25142 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25143 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */ 25144 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25145 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25146 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25147 math_VPACKSSDW_YMM ); 25148 goto decode_success; 25149 } 25150 break; 25151 25152 case 0x6C: 25153 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25154 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */ 25155 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25156 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25157 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25158 Iop_InterleaveLO64x2, NULL, 25159 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25160 goto decode_success; 25161 } 25162 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25163 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */ 25164 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25165 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25166 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25167 math_VPUNPCKLQDQ_YMM ); 25168 goto decode_success; 25169 } 25170 break; 25171 25172 case 0x6D: 25173 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25174 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */ 25175 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25176 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25177 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25178 Iop_InterleaveHI64x2, NULL, 25179 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25180 goto decode_success; 25181 } 25182 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25183 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */ 25184 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25185 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25186 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25187 math_VPUNPCKHQDQ_YMM ); 25188 goto decode_success; 25189 } 25190 break; 25191 25192 case 0x6E: 25193 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */ 25194 if (have66noF2noF3(pfx) 25195 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25196 vassert(sz == 2); /* even tho we are transferring 4, not 2. */ 25197 UChar modrm = getUChar(delta); 25198 if (epartIsReg(modrm)) { 25199 delta += 1; 25200 putYMMRegLoAndZU( 25201 gregOfRexRM(pfx,modrm), 25202 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 25203 ); 25204 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 25205 nameXMMReg(gregOfRexRM(pfx,modrm))); 25206 } else { 25207 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25208 delta += alen; 25209 putYMMRegLoAndZU( 25210 gregOfRexRM(pfx,modrm), 25211 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr))) 25212 ); 25213 DIP("vmovd %s, %s\n", dis_buf, 25214 nameXMMReg(gregOfRexRM(pfx,modrm))); 25215 } 25216 goto decode_success; 25217 } 25218 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */ 25219 if (have66noF2noF3(pfx) 25220 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25221 vassert(sz == 2); /* even tho we are transferring 8, not 2. */ 25222 UChar modrm = getUChar(delta); 25223 if (epartIsReg(modrm)) { 25224 delta += 1; 25225 putYMMRegLoAndZU( 25226 gregOfRexRM(pfx,modrm), 25227 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 25228 ); 25229 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 25230 nameXMMReg(gregOfRexRM(pfx,modrm))); 25231 } else { 25232 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25233 delta += alen; 25234 putYMMRegLoAndZU( 25235 gregOfRexRM(pfx,modrm), 25236 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr))) 25237 ); 25238 DIP("vmovq %s, %s\n", dis_buf, 25239 nameXMMReg(gregOfRexRM(pfx,modrm))); 25240 } 25241 goto decode_success; 25242 } 25243 break; 25244 25245 case 0x6F: 25246 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */ 25247 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */ 25248 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25249 && 1==getVexL(pfx)/*256*/) { 25250 UChar modrm = getUChar(delta); 25251 UInt rD = gregOfRexRM(pfx, modrm); 25252 IRTemp tD = newTemp(Ity_V256); 25253 Bool isA = have66noF2noF3(pfx); 25254 HChar ch = isA ? 'a' : 'u'; 25255 if (epartIsReg(modrm)) { 25256 UInt rS = eregOfRexRM(pfx, modrm); 25257 delta += 1; 25258 assign(tD, getYMMReg(rS)); 25259 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 25260 } else { 25261 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25262 delta += alen; 25263 if (isA) 25264 gen_SEGV_if_not_32_aligned(addr); 25265 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 25266 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD)); 25267 } 25268 putYMMReg(rD, mkexpr(tD)); 25269 goto decode_success; 25270 } 25271 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */ 25272 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */ 25273 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25274 && 0==getVexL(pfx)/*128*/) { 25275 UChar modrm = getUChar(delta); 25276 UInt rD = gregOfRexRM(pfx, modrm); 25277 IRTemp tD = newTemp(Ity_V128); 25278 Bool isA = have66noF2noF3(pfx); 25279 HChar ch = isA ? 'a' : 'u'; 25280 if (epartIsReg(modrm)) { 25281 UInt rS = eregOfRexRM(pfx, modrm); 25282 delta += 1; 25283 assign(tD, getXMMReg(rS)); 25284 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 25285 } else { 25286 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25287 delta += alen; 25288 if (isA) 25289 gen_SEGV_if_not_16_aligned(addr); 25290 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 25291 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD)); 25292 } 25293 putYMMRegLoAndZU(rD, mkexpr(tD)); 25294 goto decode_success; 25295 } 25296 break; 25297 25298 case 0x70: 25299 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */ 25300 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25301 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/); 25302 goto decode_success; 25303 } 25304 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */ 25305 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25306 delta = dis_PSHUFD_32x8( vbi, pfx, delta); 25307 goto decode_success; 25308 } 25309 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */ 25310 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25311 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25312 True/*isAvx*/, False/*!xIsH*/ ); 25313 goto decode_success; 25314 } 25315 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */ 25316 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25317 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ ); 25318 goto decode_success; 25319 } 25320 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */ 25321 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 25322 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25323 True/*isAvx*/, True/*xIsH*/ ); 25324 goto decode_success; 25325 } 25326 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */ 25327 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 25328 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ ); 25329 goto decode_success; 25330 } 25331 break; 25332 25333 case 0x71: 25334 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */ 25335 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */ 25336 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */ 25337 if (have66noF2noF3(pfx) 25338 && 0==getVexL(pfx)/*128*/ 25339 && epartIsReg(getUChar(delta))) { 25340 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25341 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25342 "vpsrlw", Iop_ShrN16x8 ); 25343 *uses_vvvv = True; 25344 goto decode_success; 25345 } 25346 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25347 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25348 "vpsraw", Iop_SarN16x8 ); 25349 *uses_vvvv = True; 25350 goto decode_success; 25351 } 25352 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25353 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25354 "vpsllw", Iop_ShlN16x8 ); 25355 *uses_vvvv = True; 25356 goto decode_success; 25357 } 25358 /* else fall through */ 25359 } 25360 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */ 25361 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */ 25362 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */ 25363 if (have66noF2noF3(pfx) 25364 && 1==getVexL(pfx)/*256*/ 25365 && epartIsReg(getUChar(delta))) { 25366 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25367 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25368 "vpsrlw", Iop_ShrN16x16 ); 25369 *uses_vvvv = True; 25370 goto decode_success; 25371 } 25372 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25373 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25374 "vpsraw", Iop_SarN16x16 ); 25375 *uses_vvvv = True; 25376 goto decode_success; 25377 } 25378 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25379 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25380 "vpsllw", Iop_ShlN16x16 ); 25381 *uses_vvvv = True; 25382 goto decode_success; 25383 } 25384 /* else fall through */ 25385 } 25386 break; 25387 25388 case 0x72: 25389 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */ 25390 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */ 25391 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */ 25392 if (have66noF2noF3(pfx) 25393 && 0==getVexL(pfx)/*128*/ 25394 && epartIsReg(getUChar(delta))) { 25395 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25396 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25397 "vpsrld", Iop_ShrN32x4 ); 25398 *uses_vvvv = True; 25399 goto decode_success; 25400 } 25401 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25402 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25403 "vpsrad", Iop_SarN32x4 ); 25404 *uses_vvvv = True; 25405 goto decode_success; 25406 } 25407 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25408 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25409 "vpslld", Iop_ShlN32x4 ); 25410 *uses_vvvv = True; 25411 goto decode_success; 25412 } 25413 /* else fall through */ 25414 } 25415 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */ 25416 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */ 25417 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */ 25418 if (have66noF2noF3(pfx) 25419 && 1==getVexL(pfx)/*256*/ 25420 && epartIsReg(getUChar(delta))) { 25421 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25422 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25423 "vpsrld", Iop_ShrN32x8 ); 25424 *uses_vvvv = True; 25425 goto decode_success; 25426 } 25427 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25428 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25429 "vpsrad", Iop_SarN32x8 ); 25430 *uses_vvvv = True; 25431 goto decode_success; 25432 } 25433 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25434 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25435 "vpslld", Iop_ShlN32x8 ); 25436 *uses_vvvv = True; 25437 goto decode_success; 25438 } 25439 /* else fall through */ 25440 } 25441 break; 25442 25443 case 0x73: 25444 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */ 25445 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */ 25446 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */ 25447 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */ 25448 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 25449 && epartIsReg(getUChar(delta))) { 25450 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25451 Int rD = getVexNvvvv(pfx); 25452 IRTemp vecS = newTemp(Ity_V128); 25453 if (gregLO3ofRM(getUChar(delta)) == 3) { 25454 Int imm = (Int)getUChar(delta+1); 25455 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25456 delta += 2; 25457 assign( vecS, getXMMReg(rS) ); 25458 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm ))); 25459 *uses_vvvv = True; 25460 goto decode_success; 25461 } 25462 if (gregLO3ofRM(getUChar(delta)) == 7) { 25463 Int imm = (Int)getUChar(delta+1); 25464 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25465 delta += 2; 25466 assign( vecS, getXMMReg(rS) ); 25467 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm ))); 25468 *uses_vvvv = True; 25469 goto decode_success; 25470 } 25471 if (gregLO3ofRM(getUChar(delta)) == 2) { 25472 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25473 "vpsrlq", Iop_ShrN64x2 ); 25474 *uses_vvvv = True; 25475 goto decode_success; 25476 } 25477 if (gregLO3ofRM(getUChar(delta)) == 6) { 25478 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25479 "vpsllq", Iop_ShlN64x2 ); 25480 *uses_vvvv = True; 25481 goto decode_success; 25482 } 25483 /* else fall through */ 25484 } 25485 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */ 25486 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */ 25487 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */ 25488 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */ 25489 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 25490 && epartIsReg(getUChar(delta))) { 25491 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25492 Int rD = getVexNvvvv(pfx); 25493 if (gregLO3ofRM(getUChar(delta)) == 3) { 25494 IRTemp vecS0 = newTemp(Ity_V128); 25495 IRTemp vecS1 = newTemp(Ity_V128); 25496 Int imm = (Int)getUChar(delta+1); 25497 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25498 delta += 2; 25499 assign( vecS0, getYMMRegLane128(rS, 0)); 25500 assign( vecS1, getYMMRegLane128(rS, 1)); 25501 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm ))); 25502 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm ))); 25503 *uses_vvvv = True; 25504 goto decode_success; 25505 } 25506 if (gregLO3ofRM(getUChar(delta)) == 7) { 25507 IRTemp vecS0 = newTemp(Ity_V128); 25508 IRTemp vecS1 = newTemp(Ity_V128); 25509 Int imm = (Int)getUChar(delta+1); 25510 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25511 delta += 2; 25512 assign( vecS0, getYMMRegLane128(rS, 0)); 25513 assign( vecS1, getYMMRegLane128(rS, 1)); 25514 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm ))); 25515 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm ))); 25516 *uses_vvvv = True; 25517 goto decode_success; 25518 } 25519 if (gregLO3ofRM(getUChar(delta)) == 2) { 25520 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25521 "vpsrlq", Iop_ShrN64x4 ); 25522 *uses_vvvv = True; 25523 goto decode_success; 25524 } 25525 if (gregLO3ofRM(getUChar(delta)) == 6) { 25526 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25527 "vpsllq", Iop_ShlN64x4 ); 25528 *uses_vvvv = True; 25529 goto decode_success; 25530 } 25531 /* else fall through */ 25532 } 25533 break; 25534 25535 case 0x74: 25536 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 25537 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */ 25538 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25539 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25540 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 ); 25541 goto decode_success; 25542 } 25543 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 25544 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */ 25545 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25546 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25547 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 ); 25548 goto decode_success; 25549 } 25550 break; 25551 25552 case 0x75: 25553 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 25554 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */ 25555 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25556 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25557 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 ); 25558 goto decode_success; 25559 } 25560 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 25561 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */ 25562 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25563 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25564 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 ); 25565 goto decode_success; 25566 } 25567 break; 25568 25569 case 0x76: 25570 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 25571 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */ 25572 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25573 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25574 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 ); 25575 goto decode_success; 25576 } 25577 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 25578 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */ 25579 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25580 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25581 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 ); 25582 goto decode_success; 25583 } 25584 break; 25585 25586 case 0x77: 25587 /* VZEROUPPER = VEX.128.0F.WIG 77 */ 25588 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25589 Int i; 25590 IRTemp zero128 = newTemp(Ity_V128); 25591 assign(zero128, mkV128(0)); 25592 for (i = 0; i < 16; i++) { 25593 putYMMRegLane128(i, 1, mkexpr(zero128)); 25594 } 25595 DIP("vzeroupper\n"); 25596 goto decode_success; 25597 } 25598 /* VZEROALL = VEX.256.0F.WIG 77 */ 25599 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25600 Int i; 25601 IRTemp zero128 = newTemp(Ity_V128); 25602 assign(zero128, mkV128(0)); 25603 for (i = 0; i < 16; i++) { 25604 putYMMRegLoAndZU(i, mkexpr(zero128)); 25605 } 25606 DIP("vzeroall\n"); 25607 goto decode_success; 25608 } 25609 break; 25610 25611 case 0x7C: 25612 case 0x7D: 25613 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */ 25614 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */ 25615 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25616 IRTemp sV = newTemp(Ity_V128); 25617 IRTemp dV = newTemp(Ity_V128); 25618 Bool isAdd = opc == 0x7C; 25619 const HChar* str = isAdd ? "add" : "sub"; 25620 UChar modrm = getUChar(delta); 25621 UInt rG = gregOfRexRM(pfx,modrm); 25622 UInt rV = getVexNvvvv(pfx); 25623 if (epartIsReg(modrm)) { 25624 UInt rE = eregOfRexRM(pfx,modrm); 25625 assign( sV, getXMMReg(rE) ); 25626 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 25627 nameXMMReg(rV), nameXMMReg(rG)); 25628 delta += 1; 25629 } else { 25630 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25631 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 25632 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25633 nameXMMReg(rV), nameXMMReg(rG)); 25634 delta += alen; 25635 } 25636 assign( dV, getXMMReg(rV) ); 25637 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) ); 25638 *uses_vvvv = True; 25639 goto decode_success; 25640 } 25641 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */ 25642 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */ 25643 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25644 IRTemp sV = newTemp(Ity_V256); 25645 IRTemp dV = newTemp(Ity_V256); 25646 IRTemp s1, s0, d1, d0; 25647 Bool isAdd = opc == 0x7C; 25648 const HChar* str = isAdd ? "add" : "sub"; 25649 UChar modrm = getUChar(delta); 25650 UInt rG = gregOfRexRM(pfx,modrm); 25651 UInt rV = getVexNvvvv(pfx); 25652 s1 = s0 = d1 = d0 = IRTemp_INVALID; 25653 if (epartIsReg(modrm)) { 25654 UInt rE = eregOfRexRM(pfx,modrm); 25655 assign( sV, getYMMReg(rE) ); 25656 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 25657 nameYMMReg(rV), nameYMMReg(rG)); 25658 delta += 1; 25659 } else { 25660 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25661 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 25662 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25663 nameYMMReg(rV), nameYMMReg(rG)); 25664 delta += alen; 25665 } 25666 assign( dV, getYMMReg(rV) ); 25667 breakupV256toV128s( dV, &d1, &d0 ); 25668 breakupV256toV128s( sV, &s1, &s0 ); 25669 putYMMReg( rG, binop(Iop_V128HLtoV256, 25670 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ), 25671 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) ); 25672 *uses_vvvv = True; 25673 goto decode_success; 25674 } 25675 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */ 25676 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */ 25677 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25678 IRTemp sV = newTemp(Ity_V128); 25679 IRTemp dV = newTemp(Ity_V128); 25680 Bool isAdd = opc == 0x7C; 25681 const HChar* str = isAdd ? "add" : "sub"; 25682 UChar modrm = getUChar(delta); 25683 UInt rG = gregOfRexRM(pfx,modrm); 25684 UInt rV = getVexNvvvv(pfx); 25685 if (epartIsReg(modrm)) { 25686 UInt rE = eregOfRexRM(pfx,modrm); 25687 assign( sV, getXMMReg(rE) ); 25688 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 25689 nameXMMReg(rV), nameXMMReg(rG)); 25690 delta += 1; 25691 } else { 25692 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25693 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 25694 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25695 nameXMMReg(rV), nameXMMReg(rG)); 25696 delta += alen; 25697 } 25698 assign( dV, getXMMReg(rV) ); 25699 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) ); 25700 *uses_vvvv = True; 25701 goto decode_success; 25702 } 25703 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */ 25704 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */ 25705 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25706 IRTemp sV = newTemp(Ity_V256); 25707 IRTemp dV = newTemp(Ity_V256); 25708 IRTemp s1, s0, d1, d0; 25709 Bool isAdd = opc == 0x7C; 25710 const HChar* str = isAdd ? "add" : "sub"; 25711 UChar modrm = getUChar(delta); 25712 UInt rG = gregOfRexRM(pfx,modrm); 25713 UInt rV = getVexNvvvv(pfx); 25714 s1 = s0 = d1 = d0 = IRTemp_INVALID; 25715 if (epartIsReg(modrm)) { 25716 UInt rE = eregOfRexRM(pfx,modrm); 25717 assign( sV, getYMMReg(rE) ); 25718 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 25719 nameYMMReg(rV), nameYMMReg(rG)); 25720 delta += 1; 25721 } else { 25722 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25723 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 25724 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25725 nameYMMReg(rV), nameYMMReg(rG)); 25726 delta += alen; 25727 } 25728 assign( dV, getYMMReg(rV) ); 25729 breakupV256toV128s( dV, &d1, &d0 ); 25730 breakupV256toV128s( sV, &s1, &s0 ); 25731 putYMMReg( rG, binop(Iop_V128HLtoV256, 25732 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ), 25733 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) ); 25734 *uses_vvvv = True; 25735 goto decode_success; 25736 } 25737 break; 25738 25739 case 0x7E: 25740 /* Note the Intel docs don't make sense for this. I think they 25741 are wrong. They seem to imply it is a store when in fact I 25742 think it is a load. Also it's unclear whether this is W0, W1 25743 or WIG. */ 25744 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */ 25745 if (haveF3no66noF2(pfx) 25746 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25747 vassert(sz == 4); /* even tho we are transferring 8, not 4. */ 25748 UChar modrm = getUChar(delta); 25749 UInt rG = gregOfRexRM(pfx,modrm); 25750 if (epartIsReg(modrm)) { 25751 UInt rE = eregOfRexRM(pfx,modrm); 25752 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 )); 25753 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 25754 delta += 1; 25755 } else { 25756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25757 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 25758 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 25759 delta += alen; 25760 } 25761 /* zero bits 255:64 */ 25762 putXMMRegLane64( rG, 1, mkU64(0) ); 25763 putYMMRegLane128( rG, 1, mkV128(0) ); 25764 goto decode_success; 25765 } 25766 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */ 25767 /* Moves from G to E, so is a store-form insn */ 25768 /* Intel docs list this in the VMOVD entry for some reason. */ 25769 if (have66noF2noF3(pfx) 25770 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25771 UChar modrm = getUChar(delta); 25772 UInt rG = gregOfRexRM(pfx,modrm); 25773 if (epartIsReg(modrm)) { 25774 UInt rE = eregOfRexRM(pfx,modrm); 25775 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE)); 25776 putIReg64(rE, getXMMRegLane64(rG, 0)); 25777 delta += 1; 25778 } else { 25779 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25780 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) ); 25781 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 25782 delta += alen; 25783 } 25784 goto decode_success; 25785 } 25786 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */ 25787 /* Moves from G to E, so is a store-form insn */ 25788 if (have66noF2noF3(pfx) 25789 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25790 UChar modrm = getUChar(delta); 25791 UInt rG = gregOfRexRM(pfx,modrm); 25792 if (epartIsReg(modrm)) { 25793 UInt rE = eregOfRexRM(pfx,modrm); 25794 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE)); 25795 putIReg32(rE, getXMMRegLane32(rG, 0)); 25796 delta += 1; 25797 } else { 25798 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25799 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) ); 25800 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG)); 25801 delta += alen; 25802 } 25803 goto decode_success; 25804 } 25805 break; 25806 25807 case 0x7F: 25808 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */ 25809 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */ 25810 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25811 && 1==getVexL(pfx)/*256*/) { 25812 UChar modrm = getUChar(delta); 25813 UInt rS = gregOfRexRM(pfx, modrm); 25814 IRTemp tS = newTemp(Ity_V256); 25815 Bool isA = have66noF2noF3(pfx); 25816 HChar ch = isA ? 'a' : 'u'; 25817 assign(tS, getYMMReg(rS)); 25818 if (epartIsReg(modrm)) { 25819 UInt rD = eregOfRexRM(pfx, modrm); 25820 delta += 1; 25821 putYMMReg(rD, mkexpr(tS)); 25822 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 25823 } else { 25824 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25825 delta += alen; 25826 if (isA) 25827 gen_SEGV_if_not_32_aligned(addr); 25828 storeLE(mkexpr(addr), mkexpr(tS)); 25829 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf); 25830 } 25831 goto decode_success; 25832 } 25833 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */ 25834 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */ 25835 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25836 && 0==getVexL(pfx)/*128*/) { 25837 UChar modrm = getUChar(delta); 25838 UInt rS = gregOfRexRM(pfx, modrm); 25839 IRTemp tS = newTemp(Ity_V128); 25840 Bool isA = have66noF2noF3(pfx); 25841 HChar ch = isA ? 'a' : 'u'; 25842 assign(tS, getXMMReg(rS)); 25843 if (epartIsReg(modrm)) { 25844 UInt rD = eregOfRexRM(pfx, modrm); 25845 delta += 1; 25846 putYMMRegLoAndZU(rD, mkexpr(tS)); 25847 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 25848 } else { 25849 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25850 delta += alen; 25851 if (isA) 25852 gen_SEGV_if_not_16_aligned(addr); 25853 storeLE(mkexpr(addr), mkexpr(tS)); 25854 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf); 25855 } 25856 goto decode_success; 25857 } 25858 break; 25859 25860 case 0xAE: 25861 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */ 25862 if (haveNo66noF2noF3(pfx) 25863 && 0==getVexL(pfx)/*LZ*/ 25864 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 25865 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 25866 && sz == 4) { 25867 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/); 25868 goto decode_success; 25869 } 25870 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */ 25871 if (haveNo66noF2noF3(pfx) 25872 && 0==getVexL(pfx)/*LZ*/ 25873 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 25874 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 25875 && sz == 4) { 25876 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/); 25877 goto decode_success; 25878 } 25879 break; 25880 25881 case 0xC2: 25882 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */ 25883 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */ 25884 if (haveF2no66noF3(pfx)) { 25885 Long delta0 = delta; 25886 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25887 "vcmpsd", False/*!all_lanes*/, 25888 8/*sz*/); 25889 if (delta > delta0) goto decode_success; 25890 /* else fall through -- decoding has failed */ 25891 } 25892 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */ 25893 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */ 25894 if (haveF3no66noF2(pfx)) { 25895 Long delta0 = delta; 25896 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25897 "vcmpss", False/*!all_lanes*/, 25898 4/*sz*/); 25899 if (delta > delta0) goto decode_success; 25900 /* else fall through -- decoding has failed */ 25901 } 25902 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 25903 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */ 25904 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25905 Long delta0 = delta; 25906 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25907 "vcmppd", True/*all_lanes*/, 25908 8/*sz*/); 25909 if (delta > delta0) goto decode_success; 25910 /* else fall through -- decoding has failed */ 25911 } 25912 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 25913 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */ 25914 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25915 Long delta0 = delta; 25916 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25917 "vcmppd", 8/*sz*/); 25918 if (delta > delta0) goto decode_success; 25919 /* else fall through -- decoding has failed */ 25920 } 25921 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 25922 /* = VEX.NDS.128.0F.WIG C2 /r ib */ 25923 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25924 Long delta0 = delta; 25925 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25926 "vcmpps", True/*all_lanes*/, 25927 4/*sz*/); 25928 if (delta > delta0) goto decode_success; 25929 /* else fall through -- decoding has failed */ 25930 } 25931 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 25932 /* = VEX.NDS.256.0F.WIG C2 /r ib */ 25933 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25934 Long delta0 = delta; 25935 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25936 "vcmpps", 4/*sz*/); 25937 if (delta > delta0) goto decode_success; 25938 /* else fall through -- decoding has failed */ 25939 } 25940 break; 25941 25942 case 0xC4: 25943 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */ 25944 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25945 UChar modrm = getUChar(delta); 25946 UInt rG = gregOfRexRM(pfx, modrm); 25947 UInt rV = getVexNvvvv(pfx); 25948 Int imm8; 25949 IRTemp new16 = newTemp(Ity_I16); 25950 25951 if ( epartIsReg( modrm ) ) { 25952 imm8 = (Int)(getUChar(delta+1) & 7); 25953 assign( new16, unop(Iop_32to16, 25954 getIReg32(eregOfRexRM(pfx,modrm))) ); 25955 delta += 1+1; 25956 DIP( "vpinsrw $%d,%s,%s\n", imm8, 25957 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) ); 25958 } else { 25959 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25960 imm8 = (Int)(getUChar(delta+alen) & 7); 25961 assign( new16, loadLE( Ity_I16, mkexpr(addr) )); 25962 delta += alen+1; 25963 DIP( "vpinsrw $%d,%s,%s\n", 25964 imm8, dis_buf, nameXMMReg(rG) ); 25965 } 25966 25967 IRTemp src_vec = newTemp(Ity_V128); 25968 assign(src_vec, getXMMReg( rV )); 25969 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 ); 25970 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 25971 *uses_vvvv = True; 25972 goto decode_success; 25973 } 25974 break; 25975 25976 case 0xC5: 25977 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */ 25978 if (have66noF2noF3(pfx) 25979 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25980 Long delta0 = delta; 25981 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 25982 True/*isAvx*/ ); 25983 if (delta > delta0) goto decode_success; 25984 /* else fall through -- decoding has failed */ 25985 } 25986 break; 25987 25988 case 0xC6: 25989 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 25990 /* = VEX.NDS.128.0F.WIG C6 /r ib */ 25991 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25992 Int imm8 = 0; 25993 IRTemp eV = newTemp(Ity_V128); 25994 IRTemp vV = newTemp(Ity_V128); 25995 UInt modrm = getUChar(delta); 25996 UInt rG = gregOfRexRM(pfx,modrm); 25997 UInt rV = getVexNvvvv(pfx); 25998 assign( vV, getXMMReg(rV) ); 25999 if (epartIsReg(modrm)) { 26000 UInt rE = eregOfRexRM(pfx,modrm); 26001 assign( eV, getXMMReg(rE) ); 26002 imm8 = (Int)getUChar(delta+1); 26003 delta += 1+1; 26004 DIP("vshufps $%d,%s,%s,%s\n", 26005 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 26006 } else { 26007 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26008 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 26009 imm8 = (Int)getUChar(delta+alen); 26010 delta += 1+alen; 26011 DIP("vshufps $%d,%s,%s,%s\n", 26012 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 26013 } 26014 IRTemp res = math_SHUFPS_128( eV, vV, imm8 ); 26015 putYMMRegLoAndZU( rG, mkexpr(res) ); 26016 *uses_vvvv = True; 26017 goto decode_success; 26018 } 26019 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 26020 /* = VEX.NDS.256.0F.WIG C6 /r ib */ 26021 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26022 Int imm8 = 0; 26023 IRTemp eV = newTemp(Ity_V256); 26024 IRTemp vV = newTemp(Ity_V256); 26025 UInt modrm = getUChar(delta); 26026 UInt rG = gregOfRexRM(pfx,modrm); 26027 UInt rV = getVexNvvvv(pfx); 26028 assign( vV, getYMMReg(rV) ); 26029 if (epartIsReg(modrm)) { 26030 UInt rE = eregOfRexRM(pfx,modrm); 26031 assign( eV, getYMMReg(rE) ); 26032 imm8 = (Int)getUChar(delta+1); 26033 delta += 1+1; 26034 DIP("vshufps $%d,%s,%s,%s\n", 26035 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 26036 } else { 26037 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26038 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 26039 imm8 = (Int)getUChar(delta+alen); 26040 delta += 1+alen; 26041 DIP("vshufps $%d,%s,%s,%s\n", 26042 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 26043 } 26044 IRTemp res = math_SHUFPS_256( eV, vV, imm8 ); 26045 putYMMReg( rG, mkexpr(res) ); 26046 *uses_vvvv = True; 26047 goto decode_success; 26048 } 26049 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 26050 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */ 26051 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26052 Int imm8 = 0; 26053 IRTemp eV = newTemp(Ity_V128); 26054 IRTemp vV = newTemp(Ity_V128); 26055 UInt modrm = getUChar(delta); 26056 UInt rG = gregOfRexRM(pfx,modrm); 26057 UInt rV = getVexNvvvv(pfx); 26058 assign( vV, getXMMReg(rV) ); 26059 if (epartIsReg(modrm)) { 26060 UInt rE = eregOfRexRM(pfx,modrm); 26061 assign( eV, getXMMReg(rE) ); 26062 imm8 = (Int)getUChar(delta+1); 26063 delta += 1+1; 26064 DIP("vshufpd $%d,%s,%s,%s\n", 26065 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 26066 } else { 26067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26068 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 26069 imm8 = (Int)getUChar(delta+alen); 26070 delta += 1+alen; 26071 DIP("vshufpd $%d,%s,%s,%s\n", 26072 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 26073 } 26074 IRTemp res = math_SHUFPD_128( eV, vV, imm8 ); 26075 putYMMRegLoAndZU( rG, mkexpr(res) ); 26076 *uses_vvvv = True; 26077 goto decode_success; 26078 } 26079 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 26080 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */ 26081 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26082 Int imm8 = 0; 26083 IRTemp eV = newTemp(Ity_V256); 26084 IRTemp vV = newTemp(Ity_V256); 26085 UInt modrm = getUChar(delta); 26086 UInt rG = gregOfRexRM(pfx,modrm); 26087 UInt rV = getVexNvvvv(pfx); 26088 assign( vV, getYMMReg(rV) ); 26089 if (epartIsReg(modrm)) { 26090 UInt rE = eregOfRexRM(pfx,modrm); 26091 assign( eV, getYMMReg(rE) ); 26092 imm8 = (Int)getUChar(delta+1); 26093 delta += 1+1; 26094 DIP("vshufpd $%d,%s,%s,%s\n", 26095 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 26096 } else { 26097 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26098 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 26099 imm8 = (Int)getUChar(delta+alen); 26100 delta += 1+alen; 26101 DIP("vshufpd $%d,%s,%s,%s\n", 26102 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 26103 } 26104 IRTemp res = math_SHUFPD_256( eV, vV, imm8 ); 26105 putYMMReg( rG, mkexpr(res) ); 26106 *uses_vvvv = True; 26107 goto decode_success; 26108 } 26109 break; 26110 26111 case 0xD0: 26112 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */ 26113 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26114 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26115 uses_vvvv, vbi, pfx, delta, 26116 "vaddsubpd", math_ADDSUBPD_128 ); 26117 goto decode_success; 26118 } 26119 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */ 26120 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26121 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26122 uses_vvvv, vbi, pfx, delta, 26123 "vaddsubpd", math_ADDSUBPD_256 ); 26124 goto decode_success; 26125 } 26126 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */ 26127 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26128 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26129 uses_vvvv, vbi, pfx, delta, 26130 "vaddsubps", math_ADDSUBPS_128 ); 26131 goto decode_success; 26132 } 26133 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */ 26134 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26135 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26136 uses_vvvv, vbi, pfx, delta, 26137 "vaddsubps", math_ADDSUBPS_256 ); 26138 goto decode_success; 26139 } 26140 break; 26141 26142 case 0xD1: 26143 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */ 26144 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26145 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26146 "vpsrlw", Iop_ShrN16x8 ); 26147 *uses_vvvv = True; 26148 goto decode_success; 26149 26150 } 26151 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */ 26152 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26153 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26154 "vpsrlw", Iop_ShrN16x16 ); 26155 *uses_vvvv = True; 26156 goto decode_success; 26157 26158 } 26159 break; 26160 26161 case 0xD2: 26162 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */ 26163 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26164 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26165 "vpsrld", Iop_ShrN32x4 ); 26166 *uses_vvvv = True; 26167 goto decode_success; 26168 } 26169 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */ 26170 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26171 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26172 "vpsrld", Iop_ShrN32x8 ); 26173 *uses_vvvv = True; 26174 goto decode_success; 26175 } 26176 break; 26177 26178 case 0xD3: 26179 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */ 26180 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26181 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26182 "vpsrlq", Iop_ShrN64x2 ); 26183 *uses_vvvv = True; 26184 goto decode_success; 26185 } 26186 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */ 26187 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26188 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26189 "vpsrlq", Iop_ShrN64x4 ); 26190 *uses_vvvv = True; 26191 goto decode_success; 26192 } 26193 break; 26194 26195 case 0xD4: 26196 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26197 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */ 26198 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26199 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26200 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 ); 26201 goto decode_success; 26202 } 26203 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26204 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */ 26205 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26206 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26207 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 ); 26208 goto decode_success; 26209 } 26210 break; 26211 26212 case 0xD5: 26213 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */ 26214 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26215 delta = dis_AVX128_E_V_to_G( 26216 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 ); 26217 goto decode_success; 26218 } 26219 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */ 26220 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26221 delta = dis_AVX256_E_V_to_G( 26222 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 ); 26223 goto decode_success; 26224 } 26225 break; 26226 26227 case 0xD6: 26228 /* I can't even find any Intel docs for this one. */ 26229 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half 26230 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0 26231 (WIG, maybe?) */ 26232 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 26233 && 0==getRexW(pfx)/*this might be redundant, dunno*/) { 26234 UChar modrm = getUChar(delta); 26235 UInt rG = gregOfRexRM(pfx,modrm); 26236 if (epartIsReg(modrm)) { 26237 /* fall through, awaiting test case */ 26238 /* dst: lo half copied, hi half zeroed */ 26239 } else { 26240 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26241 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 )); 26242 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf ); 26243 delta += alen; 26244 goto decode_success; 26245 } 26246 } 26247 break; 26248 26249 case 0xD7: 26250 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */ 26251 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26252 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ ); 26253 goto decode_success; 26254 } 26255 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */ 26256 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26257 delta = dis_PMOVMSKB_256( vbi, pfx, delta ); 26258 goto decode_success; 26259 } 26260 break; 26261 26262 case 0xD8: 26263 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */ 26264 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26265 delta = dis_AVX128_E_V_to_G( 26266 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 ); 26267 goto decode_success; 26268 } 26269 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */ 26270 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26271 delta = dis_AVX256_E_V_to_G( 26272 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 ); 26273 goto decode_success; 26274 } 26275 break; 26276 26277 case 0xD9: 26278 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */ 26279 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26280 delta = dis_AVX128_E_V_to_G( 26281 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 ); 26282 goto decode_success; 26283 } 26284 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */ 26285 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26286 delta = dis_AVX256_E_V_to_G( 26287 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 ); 26288 goto decode_success; 26289 } 26290 break; 26291 26292 case 0xDA: 26293 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */ 26294 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26295 delta = dis_AVX128_E_V_to_G( 26296 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 ); 26297 goto decode_success; 26298 } 26299 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */ 26300 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26301 delta = dis_AVX256_E_V_to_G( 26302 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 ); 26303 goto decode_success; 26304 } 26305 break; 26306 26307 case 0xDB: 26308 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26309 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */ 26310 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26311 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26312 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 ); 26313 goto decode_success; 26314 } 26315 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26316 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */ 26317 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26318 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26319 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 ); 26320 goto decode_success; 26321 } 26322 break; 26323 26324 case 0xDC: 26325 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */ 26326 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26327 delta = dis_AVX128_E_V_to_G( 26328 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 ); 26329 goto decode_success; 26330 } 26331 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */ 26332 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26333 delta = dis_AVX256_E_V_to_G( 26334 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 ); 26335 goto decode_success; 26336 } 26337 break; 26338 26339 case 0xDD: 26340 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */ 26341 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26342 delta = dis_AVX128_E_V_to_G( 26343 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 ); 26344 goto decode_success; 26345 } 26346 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */ 26347 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26348 delta = dis_AVX256_E_V_to_G( 26349 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 ); 26350 goto decode_success; 26351 } 26352 break; 26353 26354 case 0xDE: 26355 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */ 26356 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26357 delta = dis_AVX128_E_V_to_G( 26358 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 ); 26359 goto decode_success; 26360 } 26361 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */ 26362 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26363 delta = dis_AVX256_E_V_to_G( 26364 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 ); 26365 goto decode_success; 26366 } 26367 break; 26368 26369 case 0xDF: 26370 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26371 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */ 26372 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26373 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 26374 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128, 26375 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26376 goto decode_success; 26377 } 26378 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26379 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */ 26380 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26381 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 26382 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256, 26383 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26384 goto decode_success; 26385 } 26386 break; 26387 26388 case 0xE0: 26389 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */ 26390 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26391 delta = dis_AVX128_E_V_to_G( 26392 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 ); 26393 goto decode_success; 26394 } 26395 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */ 26396 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26397 delta = dis_AVX256_E_V_to_G( 26398 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 ); 26399 goto decode_success; 26400 } 26401 break; 26402 26403 case 0xE1: 26404 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */ 26405 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26406 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26407 "vpsraw", Iop_SarN16x8 ); 26408 *uses_vvvv = True; 26409 goto decode_success; 26410 } 26411 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */ 26412 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26413 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26414 "vpsraw", Iop_SarN16x16 ); 26415 *uses_vvvv = True; 26416 goto decode_success; 26417 } 26418 break; 26419 26420 case 0xE2: 26421 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */ 26422 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26423 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26424 "vpsrad", Iop_SarN32x4 ); 26425 *uses_vvvv = True; 26426 goto decode_success; 26427 } 26428 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */ 26429 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26430 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26431 "vpsrad", Iop_SarN32x8 ); 26432 *uses_vvvv = True; 26433 goto decode_success; 26434 } 26435 break; 26436 26437 case 0xE3: 26438 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */ 26439 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26440 delta = dis_AVX128_E_V_to_G( 26441 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 ); 26442 goto decode_success; 26443 } 26444 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */ 26445 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26446 delta = dis_AVX256_E_V_to_G( 26447 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 ); 26448 goto decode_success; 26449 } 26450 break; 26451 26452 case 0xE4: 26453 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */ 26454 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26455 delta = dis_AVX128_E_V_to_G( 26456 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 ); 26457 goto decode_success; 26458 } 26459 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */ 26460 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26461 delta = dis_AVX256_E_V_to_G( 26462 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 ); 26463 goto decode_success; 26464 } 26465 break; 26466 26467 case 0xE5: 26468 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */ 26469 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26470 delta = dis_AVX128_E_V_to_G( 26471 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 ); 26472 goto decode_success; 26473 } 26474 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */ 26475 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26476 delta = dis_AVX256_E_V_to_G( 26477 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 ); 26478 goto decode_success; 26479 } 26480 break; 26481 26482 case 0xE6: 26483 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */ 26484 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 26485 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/); 26486 goto decode_success; 26487 } 26488 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */ 26489 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 26490 delta = dis_CVTDQ2PD_256(vbi, pfx, delta); 26491 goto decode_success; 26492 } 26493 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */ 26494 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26495 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26496 True/*r2zero*/); 26497 goto decode_success; 26498 } 26499 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */ 26500 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26501 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/); 26502 goto decode_success; 26503 } 26504 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */ 26505 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26506 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26507 False/*!r2zero*/); 26508 goto decode_success; 26509 } 26510 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */ 26511 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26512 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/); 26513 goto decode_success; 26514 } 26515 break; 26516 26517 case 0xE7: 26518 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */ 26519 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26520 UChar modrm = getUChar(delta); 26521 UInt rG = gregOfRexRM(pfx,modrm); 26522 if (!epartIsReg(modrm)) { 26523 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26524 gen_SEGV_if_not_16_aligned( addr ); 26525 storeLE( mkexpr(addr), getXMMReg(rG) ); 26526 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG)); 26527 delta += alen; 26528 goto decode_success; 26529 } 26530 /* else fall through */ 26531 } 26532 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */ 26533 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26534 UChar modrm = getUChar(delta); 26535 UInt rG = gregOfRexRM(pfx,modrm); 26536 if (!epartIsReg(modrm)) { 26537 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26538 gen_SEGV_if_not_32_aligned( addr ); 26539 storeLE( mkexpr(addr), getYMMReg(rG) ); 26540 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG)); 26541 delta += alen; 26542 goto decode_success; 26543 } 26544 /* else fall through */ 26545 } 26546 break; 26547 26548 case 0xE8: 26549 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */ 26550 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26551 delta = dis_AVX128_E_V_to_G( 26552 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 ); 26553 goto decode_success; 26554 } 26555 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */ 26556 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26557 delta = dis_AVX256_E_V_to_G( 26558 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 ); 26559 goto decode_success; 26560 } 26561 break; 26562 26563 case 0xE9: 26564 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */ 26565 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26566 delta = dis_AVX128_E_V_to_G( 26567 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 ); 26568 goto decode_success; 26569 } 26570 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */ 26571 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26572 delta = dis_AVX256_E_V_to_G( 26573 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 ); 26574 goto decode_success; 26575 } 26576 break; 26577 26578 case 0xEA: 26579 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 26580 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */ 26581 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26582 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26583 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 ); 26584 goto decode_success; 26585 } 26586 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 26587 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */ 26588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26589 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26590 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 ); 26591 goto decode_success; 26592 } 26593 break; 26594 26595 case 0xEB: 26596 /* VPOR r/m, rV, r ::: r = rV | r/m */ 26597 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */ 26598 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26599 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26600 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 ); 26601 goto decode_success; 26602 } 26603 /* VPOR r/m, rV, r ::: r = rV | r/m */ 26604 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */ 26605 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26606 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26607 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 ); 26608 goto decode_success; 26609 } 26610 break; 26611 26612 case 0xEC: 26613 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */ 26614 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26615 delta = dis_AVX128_E_V_to_G( 26616 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 ); 26617 goto decode_success; 26618 } 26619 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */ 26620 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26621 delta = dis_AVX256_E_V_to_G( 26622 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 ); 26623 goto decode_success; 26624 } 26625 break; 26626 26627 case 0xED: 26628 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */ 26629 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26630 delta = dis_AVX128_E_V_to_G( 26631 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 ); 26632 goto decode_success; 26633 } 26634 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */ 26635 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26636 delta = dis_AVX256_E_V_to_G( 26637 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 ); 26638 goto decode_success; 26639 } 26640 break; 26641 26642 case 0xEE: 26643 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 26644 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */ 26645 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26646 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26647 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 ); 26648 goto decode_success; 26649 } 26650 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 26651 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */ 26652 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26653 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26654 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 ); 26655 goto decode_success; 26656 } 26657 break; 26658 26659 case 0xEF: 26660 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 26661 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */ 26662 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26663 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26664 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 ); 26665 goto decode_success; 26666 } 26667 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 26668 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */ 26669 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26670 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26671 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 ); 26672 goto decode_success; 26673 } 26674 break; 26675 26676 case 0xF0: 26677 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */ 26678 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26679 UChar modrm = getUChar(delta); 26680 UInt rD = gregOfRexRM(pfx, modrm); 26681 IRTemp tD = newTemp(Ity_V256); 26682 if (epartIsReg(modrm)) break; 26683 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26684 delta += alen; 26685 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 26686 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD)); 26687 putYMMReg(rD, mkexpr(tD)); 26688 goto decode_success; 26689 } 26690 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */ 26691 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26692 UChar modrm = getUChar(delta); 26693 UInt rD = gregOfRexRM(pfx, modrm); 26694 IRTemp tD = newTemp(Ity_V128); 26695 if (epartIsReg(modrm)) break; 26696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26697 delta += alen; 26698 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 26699 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD)); 26700 putYMMRegLoAndZU(rD, mkexpr(tD)); 26701 goto decode_success; 26702 } 26703 break; 26704 26705 case 0xF1: 26706 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */ 26707 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26708 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26709 "vpsllw", Iop_ShlN16x8 ); 26710 *uses_vvvv = True; 26711 goto decode_success; 26712 26713 } 26714 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */ 26715 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26716 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26717 "vpsllw", Iop_ShlN16x16 ); 26718 *uses_vvvv = True; 26719 goto decode_success; 26720 26721 } 26722 break; 26723 26724 case 0xF2: 26725 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */ 26726 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26727 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26728 "vpslld", Iop_ShlN32x4 ); 26729 *uses_vvvv = True; 26730 goto decode_success; 26731 } 26732 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */ 26733 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26734 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26735 "vpslld", Iop_ShlN32x8 ); 26736 *uses_vvvv = True; 26737 goto decode_success; 26738 } 26739 break; 26740 26741 case 0xF3: 26742 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */ 26743 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26744 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26745 "vpsllq", Iop_ShlN64x2 ); 26746 *uses_vvvv = True; 26747 goto decode_success; 26748 } 26749 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */ 26750 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26751 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26752 "vpsllq", Iop_ShlN64x4 ); 26753 *uses_vvvv = True; 26754 goto decode_success; 26755 } 26756 break; 26757 26758 case 0xF4: 26759 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */ 26760 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26761 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26762 uses_vvvv, vbi, pfx, delta, 26763 "vpmuludq", math_PMULUDQ_128 ); 26764 goto decode_success; 26765 } 26766 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */ 26767 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26768 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26769 uses_vvvv, vbi, pfx, delta, 26770 "vpmuludq", math_PMULUDQ_256 ); 26771 goto decode_success; 26772 } 26773 break; 26774 26775 case 0xF5: 26776 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */ 26777 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26778 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26779 uses_vvvv, vbi, pfx, delta, 26780 "vpmaddwd", math_PMADDWD_128 ); 26781 goto decode_success; 26782 } 26783 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */ 26784 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26785 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26786 uses_vvvv, vbi, pfx, delta, 26787 "vpmaddwd", math_PMADDWD_256 ); 26788 goto decode_success; 26789 } 26790 break; 26791 26792 case 0xF6: 26793 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */ 26794 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26795 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26796 uses_vvvv, vbi, pfx, delta, 26797 "vpsadbw", math_PSADBW_128 ); 26798 goto decode_success; 26799 } 26800 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */ 26801 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26802 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26803 uses_vvvv, vbi, pfx, delta, 26804 "vpsadbw", math_PSADBW_256 ); 26805 goto decode_success; 26806 } 26807 break; 26808 26809 case 0xF7: 26810 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */ 26811 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 26812 && epartIsReg(getUChar(delta))) { 26813 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ ); 26814 goto decode_success; 26815 } 26816 break; 26817 26818 case 0xF8: 26819 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 26820 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */ 26821 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26822 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26823 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 ); 26824 goto decode_success; 26825 } 26826 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 26827 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */ 26828 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26829 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26830 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 ); 26831 goto decode_success; 26832 } 26833 break; 26834 26835 case 0xF9: 26836 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 26837 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */ 26838 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26839 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26840 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 ); 26841 goto decode_success; 26842 } 26843 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 26844 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */ 26845 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26846 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26847 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 ); 26848 goto decode_success; 26849 } 26850 break; 26851 26852 case 0xFA: 26853 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 26854 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */ 26855 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26856 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26857 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 ); 26858 goto decode_success; 26859 } 26860 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 26861 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */ 26862 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26863 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26864 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 ); 26865 goto decode_success; 26866 } 26867 break; 26868 26869 case 0xFB: 26870 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 26871 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */ 26872 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26873 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26874 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 ); 26875 goto decode_success; 26876 } 26877 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 26878 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */ 26879 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26880 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26881 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 ); 26882 goto decode_success; 26883 } 26884 break; 26885 26886 case 0xFC: 26887 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 26888 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */ 26889 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26890 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26891 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 ); 26892 goto decode_success; 26893 } 26894 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 26895 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */ 26896 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26897 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26898 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 ); 26899 goto decode_success; 26900 } 26901 break; 26902 26903 case 0xFD: 26904 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 26905 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */ 26906 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26907 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26908 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 ); 26909 goto decode_success; 26910 } 26911 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 26912 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */ 26913 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26914 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26915 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 ); 26916 goto decode_success; 26917 } 26918 break; 26919 26920 case 0xFE: 26921 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 26922 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */ 26923 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26924 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26925 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 ); 26926 goto decode_success; 26927 } 26928 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 26929 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */ 26930 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26931 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26932 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 ); 26933 goto decode_success; 26934 } 26935 break; 26936 26937 default: 26938 break; 26939 26940 } 26941 26942 //decode_failure: 26943 return deltaIN; 26944 26945 decode_success: 26946 return delta; 26947 } 26948 26949 26950 /*------------------------------------------------------------*/ 26951 /*--- ---*/ 26952 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/ 26953 /*--- ---*/ 26954 /*------------------------------------------------------------*/ 26955 26956 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 26957 { 26958 /* In the control vector, zero out all but the bottom two bits of 26959 each 32-bit lane. */ 26960 IRExpr* cv1 = binop(Iop_ShrN32x4, 26961 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)), 26962 mkU8(30)); 26963 /* And use the resulting cleaned-up control vector as steering 26964 in a Perm operation. */ 26965 IRTemp res = newTemp(Ity_V128); 26966 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1)); 26967 return res; 26968 } 26969 26970 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 26971 { 26972 IRTemp dHi, dLo, cHi, cLo; 26973 dHi = dLo = cHi = cLo = IRTemp_INVALID; 26974 breakupV256toV128s( dataV, &dHi, &dLo ); 26975 breakupV256toV128s( ctrlV, &cHi, &cLo ); 26976 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi ); 26977 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo ); 26978 IRTemp res = newTemp(Ity_V256); 26979 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 26980 return res; 26981 } 26982 26983 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 26984 { 26985 /* No cleverness here .. */ 26986 IRTemp dHi, dLo, cHi, cLo; 26987 dHi = dLo = cHi = cLo = IRTemp_INVALID; 26988 breakupV128to64s( dataV, &dHi, &dLo ); 26989 breakupV128to64s( ctrlV, &cHi, &cLo ); 26990 IRExpr* rHi 26991 = IRExpr_ITE( unop(Iop_64to1, 26992 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))), 26993 mkexpr(dHi), mkexpr(dLo) ); 26994 IRExpr* rLo 26995 = IRExpr_ITE( unop(Iop_64to1, 26996 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))), 26997 mkexpr(dHi), mkexpr(dLo) ); 26998 IRTemp res = newTemp(Ity_V128); 26999 assign(res, binop(Iop_64HLtoV128, rHi, rLo)); 27000 return res; 27001 } 27002 27003 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 27004 { 27005 IRTemp dHi, dLo, cHi, cLo; 27006 dHi = dLo = cHi = cLo = IRTemp_INVALID; 27007 breakupV256toV128s( dataV, &dHi, &dLo ); 27008 breakupV256toV128s( ctrlV, &cHi, &cLo ); 27009 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi ); 27010 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo ); 27011 IRTemp res = newTemp(Ity_V256); 27012 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 27013 return res; 27014 } 27015 27016 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV ) 27017 { 27018 /* In the control vector, zero out all but the bottom three bits of 27019 each 32-bit lane. */ 27020 IRExpr* cv1 = binop(Iop_ShrN32x8, 27021 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)), 27022 mkU8(29)); 27023 /* And use the resulting cleaned-up control vector as steering 27024 in a Perm operation. */ 27025 IRTemp res = newTemp(Ity_V256); 27026 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1)); 27027 return res; 27028 } 27029 27030 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv, 27031 const VexAbiInfo* vbi, Prefix pfx, Long delta, 27032 const HChar* opname, IROp op8 ) 27033 { 27034 HChar dis_buf[50]; 27035 Int alen; 27036 Int size = getRexW(pfx) ? 8 : 4; 27037 IRType ty = szToITy(size); 27038 IRTemp src = newTemp(ty); 27039 IRTemp amt = newTemp(ty); 27040 UChar rm = getUChar(delta); 27041 27042 assign( amt, getIRegV(size,pfx) ); 27043 if (epartIsReg(rm)) { 27044 assign( src, getIRegE(size,pfx,rm) ); 27045 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), 27046 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 27047 delta++; 27048 } else { 27049 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27050 assign( src, loadLE(ty, mkexpr(addr)) ); 27051 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf, 27052 nameIRegG(size,pfx,rm)); 27053 delta += alen; 27054 } 27055 27056 putIRegG( size, pfx, rm, 27057 binop(mkSizedOp(ty,op8), mkexpr(src), 27058 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt), 27059 mkU(ty,8*size-1)))) ); 27060 /* Flags aren't modified. */ 27061 *uses_vvvv = True; 27062 return delta; 27063 } 27064 27065 27066 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc ) 27067 { 27068 UChar modrm = getUChar(delta); 27069 UInt rG = gregOfRexRM(pfx, modrm); 27070 UInt rV = getVexNvvvv(pfx); 27071 Bool scalar = (opc & 0xF) > 7 && (opc & 1); 27072 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32; 27073 IRType vty = scalar ? ty : getVexL(pfx) ? Ity_V256 : Ity_V128; 27074 IRTemp vX = newTemp(vty); 27075 IRTemp vY = newTemp(vty); 27076 IRTemp vZ = newTemp(vty); 27077 IRExpr *x[8], *y[8], *z[8]; 27078 IRTemp addr = IRTemp_INVALID; 27079 HChar dis_buf[50]; 27080 Int alen = 0; 27081 const HChar *name; 27082 const HChar *suffix; 27083 const HChar *order; 27084 Bool negateRes = False; 27085 Bool negateZeven = False; 27086 Bool negateZodd = False; 27087 Int i, j; 27088 Int count; 27089 static IROp ops[] = { Iop_V256to64_0, Iop_V256to64_1, 27090 Iop_V256to64_2, Iop_V256to64_3, 27091 Iop_V128to64, Iop_V128HIto64 }; 27092 27093 switch (opc & 0xF) { 27094 case 0x6: 27095 name = "addsub"; 27096 negateZeven = True; 27097 break; 27098 case 0x7: 27099 name = "subadd"; 27100 negateZodd = True; 27101 break; 27102 case 0x8: 27103 case 0x9: 27104 name = "add"; 27105 break; 27106 case 0xA: 27107 case 0xB: 27108 name = "sub"; 27109 negateZeven = True; 27110 negateZodd = True; 27111 break; 27112 case 0xC: 27113 case 0xD: 27114 name = "add"; 27115 negateRes = True; 27116 negateZeven = True; 27117 negateZodd = True; 27118 break; 27119 case 0xE: 27120 case 0xF: 27121 name = "sub"; 27122 negateRes = True; 27123 break; 27124 default: 27125 vpanic("dis_FMA(amd64)"); 27126 break; 27127 } 27128 switch (opc & 0xF0) { 27129 case 0x90: order = "132"; break; 27130 case 0xA0: order = "213"; break; 27131 case 0xB0: order = "231"; break; 27132 default: vpanic("dis_FMA(amd64)"); break; 27133 } 27134 if (scalar) 27135 suffix = ty == Ity_F64 ? "sd" : "ss"; 27136 else 27137 suffix = ty == Ity_F64 ? "pd" : "ps"; 27138 27139 if (scalar) { 27140 assign( vX, ty == Ity_F64 27141 ? getXMMRegLane64F(rG, 0) : getXMMRegLane32F(rG, 0) ); 27142 assign( vZ, ty == Ity_F64 27143 ? getXMMRegLane64F(rV, 0) : getXMMRegLane32F(rV, 0) ); 27144 } else { 27145 assign( vX, vty == Ity_V256 ? getYMMReg(rG) : getXMMReg(rG) ); 27146 assign( vZ, vty == Ity_V256 ? getYMMReg(rV) : getXMMReg(rV) ); 27147 } 27148 27149 if (epartIsReg(modrm)) { 27150 UInt rE = eregOfRexRM(pfx, modrm); 27151 delta += 1; 27152 if (scalar) 27153 assign( vY, ty == Ity_F64 27154 ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 27155 else 27156 assign( vY, vty == Ity_V256 ? getYMMReg(rE) : getXMMReg(rE) ); 27157 if (vty == Ity_V256) { 27158 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27159 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV), 27160 nameYMMReg(rG)); 27161 } else { 27162 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27163 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV), 27164 nameXMMReg(rG)); 27165 } 27166 } else { 27167 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27168 delta += alen; 27169 assign(vY, loadLE(vty, mkexpr(addr))); 27170 if (vty == Ity_V256) { 27171 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27172 name, order, suffix, dis_buf, nameYMMReg(rV), 27173 nameYMMReg(rG)); 27174 } else { 27175 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27176 name, order, suffix, dis_buf, nameXMMReg(rV), 27177 nameXMMReg(rG)); 27178 } 27179 } 27180 27181 /* vX/vY/vZ now in 132 order. If it is different order, swap the 27182 arguments. */ 27183 if ((opc & 0xF0) != 0x90) { 27184 IRTemp tem = vX; 27185 if ((opc & 0xF0) == 0xA0) { 27186 vX = vZ; 27187 vZ = vY; 27188 vY = tem; 27189 } else { 27190 vX = vZ; 27191 vZ = tem; 27192 } 27193 } 27194 27195 if (scalar) { 27196 count = 1; 27197 x[0] = mkexpr(vX); 27198 y[0] = mkexpr(vY); 27199 z[0] = mkexpr(vZ); 27200 } else if (ty == Ity_F32) { 27201 count = vty == Ity_V256 ? 8 : 4; 27202 j = vty == Ity_V256 ? 0 : 4; 27203 for (i = 0; i < count; i += 2) { 27204 IRTemp tem = newTemp(Ity_I64); 27205 assign(tem, unop(ops[i / 2 + j], mkexpr(vX))); 27206 x[i] = unop(Iop_64to32, mkexpr(tem)); 27207 x[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27208 tem = newTemp(Ity_I64); 27209 assign(tem, unop(ops[i / 2 + j], mkexpr(vY))); 27210 y[i] = unop(Iop_64to32, mkexpr(tem)); 27211 y[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27212 tem = newTemp(Ity_I64); 27213 assign(tem, unop(ops[i / 2 + j], mkexpr(vZ))); 27214 z[i] = unop(Iop_64to32, mkexpr(tem)); 27215 z[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27216 } 27217 } else { 27218 count = vty == Ity_V256 ? 4 : 2; 27219 j = vty == Ity_V256 ? 0 : 4; 27220 for (i = 0; i < count; i++) { 27221 x[i] = unop(ops[i + j], mkexpr(vX)); 27222 y[i] = unop(ops[i + j], mkexpr(vY)); 27223 z[i] = unop(ops[i + j], mkexpr(vZ)); 27224 } 27225 } 27226 if (!scalar) 27227 for (i = 0; i < count; i++) { 27228 IROp op = ty == Ity_F64 27229 ? Iop_ReinterpI64asF64 : Iop_ReinterpI32asF32; 27230 x[i] = unop(op, x[i]); 27231 y[i] = unop(op, y[i]); 27232 z[i] = unop(op, z[i]); 27233 } 27234 for (i = 0; i < count; i++) { 27235 if ((i & 1) ? negateZodd : negateZeven) 27236 z[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, z[i]); 27237 x[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32, 27238 get_FAKE_roundingmode(), x[i], y[i], z[i]); 27239 if (negateRes) 27240 x[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, x[i]); 27241 if (ty == Ity_F64) 27242 putYMMRegLane64F( rG, i, x[i] ); 27243 else 27244 putYMMRegLane32F( rG, i, x[i] ); 27245 } 27246 if (vty != Ity_V256) 27247 putYMMRegLane128( rG, 1, mkV128(0) ); 27248 27249 return delta; 27250 } 27251 27252 27253 /* Masked load or masked store. */ 27254 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi, 27255 Prefix pfx, Long delta, 27256 const HChar* opname, Bool isYMM, IRType ty, 27257 Bool isLoad ) 27258 { 27259 HChar dis_buf[50]; 27260 Int alen, i; 27261 IRTemp addr; 27262 UChar modrm = getUChar(delta); 27263 UInt rG = gregOfRexRM(pfx,modrm); 27264 UInt rV = getVexNvvvv(pfx); 27265 27266 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27267 delta += alen; 27268 27269 /**/ if (isLoad && isYMM) { 27270 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 27271 } 27272 else if (isLoad && !isYMM) { 27273 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 27274 } 27275 27276 else if (!isLoad && isYMM) { 27277 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf ); 27278 } 27279 else { 27280 vassert(!isLoad && !isYMM); 27281 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf ); 27282 } 27283 27284 vassert(ty == Ity_I32 || ty == Ity_I64); 27285 Bool laneIs32 = ty == Ity_I32; 27286 27287 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2); 27288 27289 for (i = 0; i < nLanes; i++) { 27290 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63); 27291 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1); 27292 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64; 27293 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64; 27294 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i ); 27295 27296 IRTemp cond = newTemp(Ity_I1); 27297 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one)); 27298 27299 IRTemp data = newTemp(ty); 27300 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), 27301 mkU64(i * (laneIs32 ? 4 : 8))); 27302 if (isLoad) { 27303 stmt( 27304 IRStmt_LoadG( 27305 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64, 27306 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond) 27307 )); 27308 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) ); 27309 } else { 27310 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i )); 27311 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) ); 27312 } 27313 } 27314 27315 if (isLoad && !isYMM) 27316 putYMMRegLane128( rG, 1, mkV128(0) ); 27317 27318 *uses_vvvv = True; 27319 return delta; 27320 } 27321 27322 27323 /* Gather. */ 27324 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi, 27325 Prefix pfx, Long delta, 27326 const HChar* opname, Bool isYMM, 27327 Bool isVM64x, IRType ty ) 27328 { 27329 HChar dis_buf[50]; 27330 Int alen, i, vscale, count1, count2; 27331 IRTemp addr; 27332 UChar modrm = getUChar(delta); 27333 UInt rG = gregOfRexRM(pfx,modrm); 27334 UInt rV = getVexNvvvv(pfx); 27335 UInt rI; 27336 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128; 27337 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128; 27338 IRTemp cond; 27339 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI, 27340 idxTy, &vscale ); 27341 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV) 27342 return delta; 27343 if (dstTy == Ity_V256) { 27344 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) ); 27345 } else { 27346 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) ); 27347 } 27348 delta += alen; 27349 27350 if (ty == Ity_I32) { 27351 count1 = isYMM ? 8 : 4; 27352 count2 = isVM64x ? count1 / 2 : count1; 27353 } else { 27354 count1 = count2 = isYMM ? 4 : 2; 27355 } 27356 27357 /* First update the mask register to copies of the sign bit. */ 27358 if (ty == Ity_I32) { 27359 if (isYMM) 27360 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) ); 27361 else 27362 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) ); 27363 } else { 27364 for (i = 0; i < count1; i++) { 27365 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ), 27366 mkU8(63)) ); 27367 } 27368 } 27369 27370 /* Next gather the individual elements. If any fault occurs, the 27371 corresponding mask element will be set and the loop stops. */ 27372 for (i = 0; i < count2; i++) { 27373 IRExpr *expr, *addr_expr; 27374 cond = newTemp(Ity_I1); 27375 assign( cond, 27376 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S, 27377 ty == Ity_I32 ? getYMMRegLane32( rV, i ) 27378 : getYMMRegLane64( rV, i ), 27379 mkU(ty, 0)) ); 27380 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i ) 27381 : getYMMRegLane64( rG, i ); 27382 addr_expr = isVM64x ? getYMMRegLane64( rI, i ) 27383 : unop(Iop_32Sto64, getYMMRegLane32( rI, i )); 27384 switch (vscale) { 27385 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break; 27386 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break; 27387 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break; 27388 default: break; 27389 } 27390 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr); 27391 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr); 27392 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP)); 27393 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr); 27394 if (ty == Ity_I32) { 27395 putYMMRegLane32( rG, i, expr ); 27396 putYMMRegLane32( rV, i, mkU32(0) ); 27397 } else { 27398 putYMMRegLane64( rG, i, expr); 27399 putYMMRegLane64( rV, i, mkU64(0) ); 27400 } 27401 } 27402 27403 if (!isYMM || (ty == Ity_I32 && isVM64x)) { 27404 if (ty == Ity_I64 || isYMM) 27405 putYMMRegLane128( rV, 1, mkV128(0) ); 27406 else if (ty == Ity_I32 && count2 == 2) { 27407 putYMMRegLane64( rV, 1, mkU64(0) ); 27408 putYMMRegLane64( rG, 1, mkU64(0) ); 27409 } 27410 putYMMRegLane128( rG, 1, mkV128(0) ); 27411 } 27412 27413 *uses_vvvv = True; 27414 return delta; 27415 } 27416 27417 27418 __attribute__((noinline)) 27419 static 27420 Long dis_ESC_0F38__VEX ( 27421 /*MB_OUT*/DisResult* dres, 27422 /*OUT*/ Bool* uses_vvvv, 27423 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 27424 Bool resteerCisOk, 27425 void* callback_opaque, 27426 const VexArchInfo* archinfo, 27427 const VexAbiInfo* vbi, 27428 Prefix pfx, Int sz, Long deltaIN 27429 ) 27430 { 27431 IRTemp addr = IRTemp_INVALID; 27432 Int alen = 0; 27433 HChar dis_buf[50]; 27434 Long delta = deltaIN; 27435 UChar opc = getUChar(delta); 27436 delta++; 27437 *uses_vvvv = False; 27438 27439 switch (opc) { 27440 27441 case 0x00: 27442 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27443 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */ 27444 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27445 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27446 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM ); 27447 goto decode_success; 27448 } 27449 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27450 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */ 27451 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27452 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27453 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM ); 27454 goto decode_success; 27455 } 27456 break; 27457 27458 case 0x01: 27459 case 0x02: 27460 case 0x03: 27461 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */ 27462 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */ 27463 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */ 27464 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27465 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27466 *uses_vvvv = True; 27467 goto decode_success; 27468 } 27469 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */ 27470 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */ 27471 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */ 27472 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27473 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27474 *uses_vvvv = True; 27475 goto decode_success; 27476 } 27477 break; 27478 27479 case 0x04: 27480 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */ 27481 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27482 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27483 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27484 math_PMADDUBSW_128 ); 27485 goto decode_success; 27486 } 27487 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */ 27488 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27489 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27490 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27491 math_PMADDUBSW_256 ); 27492 goto decode_success; 27493 } 27494 break; 27495 27496 case 0x05: 27497 case 0x06: 27498 case 0x07: 27499 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */ 27500 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */ 27501 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */ 27502 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27503 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27504 *uses_vvvv = True; 27505 goto decode_success; 27506 } 27507 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */ 27508 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */ 27509 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */ 27510 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27511 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27512 *uses_vvvv = True; 27513 goto decode_success; 27514 } 27515 break; 27516 27517 case 0x08: 27518 case 0x09: 27519 case 0x0A: 27520 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */ 27521 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */ 27522 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */ 27523 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27524 IRTemp sV = newTemp(Ity_V128); 27525 IRTemp dV = newTemp(Ity_V128); 27526 IRTemp sHi, sLo, dHi, dLo; 27527 sHi = sLo = dHi = dLo = IRTemp_INVALID; 27528 HChar ch = '?'; 27529 Int laneszB = 0; 27530 UChar modrm = getUChar(delta); 27531 UInt rG = gregOfRexRM(pfx,modrm); 27532 UInt rV = getVexNvvvv(pfx); 27533 27534 switch (opc) { 27535 case 0x08: laneszB = 1; ch = 'b'; break; 27536 case 0x09: laneszB = 2; ch = 'w'; break; 27537 case 0x0A: laneszB = 4; ch = 'd'; break; 27538 default: vassert(0); 27539 } 27540 27541 assign( dV, getXMMReg(rV) ); 27542 27543 if (epartIsReg(modrm)) { 27544 UInt rE = eregOfRexRM(pfx,modrm); 27545 assign( sV, getXMMReg(rE) ); 27546 delta += 1; 27547 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE), 27548 nameXMMReg(rV), nameXMMReg(rG)); 27549 } else { 27550 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27551 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 27552 delta += alen; 27553 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 27554 nameXMMReg(rV), nameXMMReg(rG)); 27555 } 27556 27557 breakupV128to64s( dV, &dHi, &dLo ); 27558 breakupV128to64s( sV, &sHi, &sLo ); 27559 27560 putYMMRegLoAndZU( 27561 rG, 27562 binop(Iop_64HLtoV128, 27563 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 27564 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 27565 ) 27566 ); 27567 *uses_vvvv = True; 27568 goto decode_success; 27569 } 27570 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */ 27571 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */ 27572 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */ 27573 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27574 IRTemp sV = newTemp(Ity_V256); 27575 IRTemp dV = newTemp(Ity_V256); 27576 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 27577 s3 = s2 = s1 = s0 = IRTemp_INVALID; 27578 d3 = d2 = d1 = d0 = IRTemp_INVALID; 27579 UChar ch = '?'; 27580 Int laneszB = 0; 27581 UChar modrm = getUChar(delta); 27582 UInt rG = gregOfRexRM(pfx,modrm); 27583 UInt rV = getVexNvvvv(pfx); 27584 27585 switch (opc) { 27586 case 0x08: laneszB = 1; ch = 'b'; break; 27587 case 0x09: laneszB = 2; ch = 'w'; break; 27588 case 0x0A: laneszB = 4; ch = 'd'; break; 27589 default: vassert(0); 27590 } 27591 27592 assign( dV, getYMMReg(rV) ); 27593 27594 if (epartIsReg(modrm)) { 27595 UInt rE = eregOfRexRM(pfx,modrm); 27596 assign( sV, getYMMReg(rE) ); 27597 delta += 1; 27598 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE), 27599 nameYMMReg(rV), nameYMMReg(rG)); 27600 } else { 27601 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27602 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 27603 delta += alen; 27604 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 27605 nameYMMReg(rV), nameYMMReg(rG)); 27606 } 27607 27608 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 27609 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 27610 27611 putYMMReg( 27612 rG, 27613 binop( Iop_V128HLtoV256, 27614 binop(Iop_64HLtoV128, 27615 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ), 27616 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB ) 27617 ), 27618 binop(Iop_64HLtoV128, 27619 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ), 27620 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB ) 27621 ) 27622 ) 27623 ); 27624 *uses_vvvv = True; 27625 goto decode_success; 27626 } 27627 break; 27628 27629 case 0x0B: 27630 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */ 27631 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27632 IRTemp sV = newTemp(Ity_V128); 27633 IRTemp dV = newTemp(Ity_V128); 27634 IRTemp sHi, sLo, dHi, dLo; 27635 sHi = sLo = dHi = dLo = IRTemp_INVALID; 27636 UChar modrm = getUChar(delta); 27637 UInt rG = gregOfRexRM(pfx,modrm); 27638 UInt rV = getVexNvvvv(pfx); 27639 27640 assign( dV, getXMMReg(rV) ); 27641 27642 if (epartIsReg(modrm)) { 27643 UInt rE = eregOfRexRM(pfx,modrm); 27644 assign( sV, getXMMReg(rE) ); 27645 delta += 1; 27646 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE), 27647 nameXMMReg(rV), nameXMMReg(rG)); 27648 } else { 27649 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27650 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 27651 delta += alen; 27652 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 27653 nameXMMReg(rV), nameXMMReg(rG)); 27654 } 27655 27656 breakupV128to64s( dV, &dHi, &dLo ); 27657 breakupV128to64s( sV, &sHi, &sLo ); 27658 27659 putYMMRegLoAndZU( 27660 rG, 27661 binop(Iop_64HLtoV128, 27662 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 27663 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 27664 ) 27665 ); 27666 *uses_vvvv = True; 27667 goto decode_success; 27668 } 27669 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */ 27670 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27671 IRTemp sV = newTemp(Ity_V256); 27672 IRTemp dV = newTemp(Ity_V256); 27673 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 27674 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 27675 UChar modrm = getUChar(delta); 27676 UInt rG = gregOfRexRM(pfx,modrm); 27677 UInt rV = getVexNvvvv(pfx); 27678 27679 assign( dV, getYMMReg(rV) ); 27680 27681 if (epartIsReg(modrm)) { 27682 UInt rE = eregOfRexRM(pfx,modrm); 27683 assign( sV, getYMMReg(rE) ); 27684 delta += 1; 27685 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE), 27686 nameYMMReg(rV), nameYMMReg(rG)); 27687 } else { 27688 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27689 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 27690 delta += alen; 27691 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 27692 nameYMMReg(rV), nameYMMReg(rG)); 27693 } 27694 27695 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 27696 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 27697 27698 putYMMReg( 27699 rG, 27700 binop(Iop_V128HLtoV256, 27701 binop(Iop_64HLtoV128, 27702 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ), 27703 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ), 27704 binop(Iop_64HLtoV128, 27705 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ), 27706 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) ) 27707 ) 27708 ); 27709 *uses_vvvv = True; 27710 goto decode_success; 27711 } 27712 break; 27713 27714 case 0x0C: 27715 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */ 27716 if (have66noF2noF3(pfx) 27717 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 27718 UChar modrm = getUChar(delta); 27719 UInt rG = gregOfRexRM(pfx, modrm); 27720 UInt rV = getVexNvvvv(pfx); 27721 IRTemp ctrlV = newTemp(Ity_V128); 27722 if (epartIsReg(modrm)) { 27723 UInt rE = eregOfRexRM(pfx, modrm); 27724 delta += 1; 27725 DIP("vpermilps %s,%s,%s\n", 27726 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 27727 assign(ctrlV, getXMMReg(rE)); 27728 } else { 27729 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27730 delta += alen; 27731 DIP("vpermilps %s,%s,%s\n", 27732 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 27733 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 27734 } 27735 IRTemp dataV = newTemp(Ity_V128); 27736 assign(dataV, getXMMReg(rV)); 27737 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV); 27738 putYMMRegLoAndZU(rG, mkexpr(resV)); 27739 *uses_vvvv = True; 27740 goto decode_success; 27741 } 27742 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */ 27743 if (have66noF2noF3(pfx) 27744 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27745 UChar modrm = getUChar(delta); 27746 UInt rG = gregOfRexRM(pfx, modrm); 27747 UInt rV = getVexNvvvv(pfx); 27748 IRTemp ctrlV = newTemp(Ity_V256); 27749 if (epartIsReg(modrm)) { 27750 UInt rE = eregOfRexRM(pfx, modrm); 27751 delta += 1; 27752 DIP("vpermilps %s,%s,%s\n", 27753 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 27754 assign(ctrlV, getYMMReg(rE)); 27755 } else { 27756 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27757 delta += alen; 27758 DIP("vpermilps %s,%s,%s\n", 27759 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 27760 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 27761 } 27762 IRTemp dataV = newTemp(Ity_V256); 27763 assign(dataV, getYMMReg(rV)); 27764 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV); 27765 putYMMReg(rG, mkexpr(resV)); 27766 *uses_vvvv = True; 27767 goto decode_success; 27768 } 27769 break; 27770 27771 case 0x0D: 27772 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */ 27773 if (have66noF2noF3(pfx) 27774 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 27775 UChar modrm = getUChar(delta); 27776 UInt rG = gregOfRexRM(pfx, modrm); 27777 UInt rV = getVexNvvvv(pfx); 27778 IRTemp ctrlV = newTemp(Ity_V128); 27779 if (epartIsReg(modrm)) { 27780 UInt rE = eregOfRexRM(pfx, modrm); 27781 delta += 1; 27782 DIP("vpermilpd %s,%s,%s\n", 27783 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 27784 assign(ctrlV, getXMMReg(rE)); 27785 } else { 27786 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27787 delta += alen; 27788 DIP("vpermilpd %s,%s,%s\n", 27789 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 27790 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 27791 } 27792 IRTemp dataV = newTemp(Ity_V128); 27793 assign(dataV, getXMMReg(rV)); 27794 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV); 27795 putYMMRegLoAndZU(rG, mkexpr(resV)); 27796 *uses_vvvv = True; 27797 goto decode_success; 27798 } 27799 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */ 27800 if (have66noF2noF3(pfx) 27801 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27802 UChar modrm = getUChar(delta); 27803 UInt rG = gregOfRexRM(pfx, modrm); 27804 UInt rV = getVexNvvvv(pfx); 27805 IRTemp ctrlV = newTemp(Ity_V256); 27806 if (epartIsReg(modrm)) { 27807 UInt rE = eregOfRexRM(pfx, modrm); 27808 delta += 1; 27809 DIP("vpermilpd %s,%s,%s\n", 27810 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 27811 assign(ctrlV, getYMMReg(rE)); 27812 } else { 27813 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27814 delta += alen; 27815 DIP("vpermilpd %s,%s,%s\n", 27816 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 27817 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 27818 } 27819 IRTemp dataV = newTemp(Ity_V256); 27820 assign(dataV, getYMMReg(rV)); 27821 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV); 27822 putYMMReg(rG, mkexpr(resV)); 27823 *uses_vvvv = True; 27824 goto decode_success; 27825 } 27826 break; 27827 27828 case 0x0E: 27829 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */ 27830 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27831 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 ); 27832 goto decode_success; 27833 } 27834 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */ 27835 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27836 delta = dis_xTESTy_256( vbi, pfx, delta, 32 ); 27837 goto decode_success; 27838 } 27839 break; 27840 27841 case 0x0F: 27842 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */ 27843 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27844 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 ); 27845 goto decode_success; 27846 } 27847 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */ 27848 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27849 delta = dis_xTESTy_256( vbi, pfx, delta, 64 ); 27850 goto decode_success; 27851 } 27852 break; 27853 27854 case 0x16: 27855 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */ 27856 if (have66noF2noF3(pfx) 27857 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27858 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27859 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD ); 27860 goto decode_success; 27861 } 27862 break; 27863 27864 case 0x17: 27865 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */ 27866 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27867 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 ); 27868 goto decode_success; 27869 } 27870 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */ 27871 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27872 delta = dis_xTESTy_256( vbi, pfx, delta, 0 ); 27873 goto decode_success; 27874 } 27875 break; 27876 27877 case 0x18: 27878 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 27879 if (have66noF2noF3(pfx) 27880 && 0==getVexL(pfx)/*128*/ 27881 && !epartIsReg(getUChar(delta))) { 27882 UChar modrm = getUChar(delta); 27883 UInt rG = gregOfRexRM(pfx, modrm); 27884 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27885 delta += alen; 27886 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG)); 27887 IRTemp t32 = newTemp(Ity_I32); 27888 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 27889 IRTemp t64 = newTemp(Ity_I64); 27890 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27891 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 27892 putYMMRegLoAndZU(rG, res); 27893 goto decode_success; 27894 } 27895 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 27896 if (have66noF2noF3(pfx) 27897 && 1==getVexL(pfx)/*256*/ 27898 && !epartIsReg(getUChar(delta))) { 27899 UChar modrm = getUChar(delta); 27900 UInt rG = gregOfRexRM(pfx, modrm); 27901 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27902 delta += alen; 27903 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG)); 27904 IRTemp t32 = newTemp(Ity_I32); 27905 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 27906 IRTemp t64 = newTemp(Ity_I64); 27907 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27908 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27909 mkexpr(t64), mkexpr(t64)); 27910 putYMMReg(rG, res); 27911 goto decode_success; 27912 } 27913 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 27914 if (have66noF2noF3(pfx) 27915 && 0==getVexL(pfx)/*128*/ 27916 && epartIsReg(getUChar(delta))) { 27917 UChar modrm = getUChar(delta); 27918 UInt rG = gregOfRexRM(pfx, modrm); 27919 UInt rE = eregOfRexRM(pfx, modrm); 27920 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 27921 IRTemp t32 = newTemp(Ity_I32); 27922 assign(t32, getXMMRegLane32(rE, 0)); 27923 IRTemp t64 = newTemp(Ity_I64); 27924 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27925 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 27926 putYMMRegLoAndZU(rG, res); 27927 delta++; 27928 goto decode_success; 27929 } 27930 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 27931 if (have66noF2noF3(pfx) 27932 && 1==getVexL(pfx)/*256*/ 27933 && epartIsReg(getUChar(delta))) { 27934 UChar modrm = getUChar(delta); 27935 UInt rG = gregOfRexRM(pfx, modrm); 27936 UInt rE = eregOfRexRM(pfx, modrm); 27937 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 27938 IRTemp t32 = newTemp(Ity_I32); 27939 assign(t32, getXMMRegLane32(rE, 0)); 27940 IRTemp t64 = newTemp(Ity_I64); 27941 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27942 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27943 mkexpr(t64), mkexpr(t64)); 27944 putYMMReg(rG, res); 27945 delta++; 27946 goto decode_success; 27947 } 27948 break; 27949 27950 case 0x19: 27951 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 27952 if (have66noF2noF3(pfx) 27953 && 1==getVexL(pfx)/*256*/ 27954 && !epartIsReg(getUChar(delta))) { 27955 UChar modrm = getUChar(delta); 27956 UInt rG = gregOfRexRM(pfx, modrm); 27957 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27958 delta += alen; 27959 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG)); 27960 IRTemp t64 = newTemp(Ity_I64); 27961 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 27962 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27963 mkexpr(t64), mkexpr(t64)); 27964 putYMMReg(rG, res); 27965 goto decode_success; 27966 } 27967 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 27968 if (have66noF2noF3(pfx) 27969 && 1==getVexL(pfx)/*256*/ 27970 && epartIsReg(getUChar(delta))) { 27971 UChar modrm = getUChar(delta); 27972 UInt rG = gregOfRexRM(pfx, modrm); 27973 UInt rE = eregOfRexRM(pfx, modrm); 27974 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 27975 IRTemp t64 = newTemp(Ity_I64); 27976 assign(t64, getXMMRegLane64(rE, 0)); 27977 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27978 mkexpr(t64), mkexpr(t64)); 27979 putYMMReg(rG, res); 27980 delta++; 27981 goto decode_success; 27982 } 27983 break; 27984 27985 case 0x1A: 27986 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */ 27987 if (have66noF2noF3(pfx) 27988 && 1==getVexL(pfx)/*256*/ 27989 && !epartIsReg(getUChar(delta))) { 27990 UChar modrm = getUChar(delta); 27991 UInt rG = gregOfRexRM(pfx, modrm); 27992 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27993 delta += alen; 27994 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG)); 27995 IRTemp t128 = newTemp(Ity_V128); 27996 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 27997 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 27998 goto decode_success; 27999 } 28000 break; 28001 28002 case 0x1C: 28003 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */ 28004 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28005 delta = dis_AVX128_E_to_G_unary( 28006 uses_vvvv, vbi, pfx, delta, 28007 "vpabsb", math_PABS_XMM_pap1 ); 28008 goto decode_success; 28009 } 28010 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */ 28011 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28012 delta = dis_AVX256_E_to_G_unary( 28013 uses_vvvv, vbi, pfx, delta, 28014 "vpabsb", math_PABS_YMM_pap1 ); 28015 goto decode_success; 28016 } 28017 break; 28018 28019 case 0x1D: 28020 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */ 28021 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28022 delta = dis_AVX128_E_to_G_unary( 28023 uses_vvvv, vbi, pfx, delta, 28024 "vpabsw", math_PABS_XMM_pap2 ); 28025 goto decode_success; 28026 } 28027 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */ 28028 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28029 delta = dis_AVX256_E_to_G_unary( 28030 uses_vvvv, vbi, pfx, delta, 28031 "vpabsw", math_PABS_YMM_pap2 ); 28032 goto decode_success; 28033 } 28034 break; 28035 28036 case 0x1E: 28037 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */ 28038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28039 delta = dis_AVX128_E_to_G_unary( 28040 uses_vvvv, vbi, pfx, delta, 28041 "vpabsd", math_PABS_XMM_pap4 ); 28042 goto decode_success; 28043 } 28044 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */ 28045 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28046 delta = dis_AVX256_E_to_G_unary( 28047 uses_vvvv, vbi, pfx, delta, 28048 "vpabsd", math_PABS_YMM_pap4 ); 28049 goto decode_success; 28050 } 28051 break; 28052 28053 case 0x20: 28054 /* VPMOVSXBW xmm2/m64, xmm1 */ 28055 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */ 28056 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28057 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 28058 True/*isAvx*/, False/*!xIsZ*/ ); 28059 goto decode_success; 28060 } 28061 /* VPMOVSXBW xmm2/m128, ymm1 */ 28062 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */ 28063 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28064 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28065 goto decode_success; 28066 } 28067 break; 28068 28069 case 0x21: 28070 /* VPMOVSXBD xmm2/m32, xmm1 */ 28071 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */ 28072 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28073 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28074 True/*isAvx*/, False/*!xIsZ*/ ); 28075 goto decode_success; 28076 } 28077 /* VPMOVSXBD xmm2/m64, ymm1 */ 28078 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */ 28079 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28080 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28081 goto decode_success; 28082 } 28083 break; 28084 28085 case 0x22: 28086 /* VPMOVSXBQ xmm2/m16, xmm1 */ 28087 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */ 28088 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28089 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28090 goto decode_success; 28091 } 28092 /* VPMOVSXBQ xmm2/m32, ymm1 */ 28093 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */ 28094 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28095 delta = dis_PMOVSXBQ_256( vbi, pfx, delta ); 28096 goto decode_success; 28097 } 28098 break; 28099 28100 case 0x23: 28101 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */ 28102 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28103 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28104 True/*isAvx*/, False/*!xIsZ*/ ); 28105 goto decode_success; 28106 } 28107 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */ 28108 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28109 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28110 goto decode_success; 28111 } 28112 break; 28113 28114 case 0x24: 28115 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */ 28116 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28117 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28118 goto decode_success; 28119 } 28120 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */ 28121 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28122 delta = dis_PMOVSXWQ_256( vbi, pfx, delta ); 28123 goto decode_success; 28124 } 28125 break; 28126 28127 case 0x25: 28128 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */ 28129 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28130 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28131 True/*isAvx*/, False/*!xIsZ*/ ); 28132 goto decode_success; 28133 } 28134 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */ 28135 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28136 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28137 goto decode_success; 28138 } 28139 break; 28140 28141 case 0x28: 28142 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */ 28143 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28144 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 28145 uses_vvvv, vbi, pfx, delta, 28146 "vpmuldq", math_PMULDQ_128 ); 28147 goto decode_success; 28148 } 28149 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */ 28150 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28151 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28152 uses_vvvv, vbi, pfx, delta, 28153 "vpmuldq", math_PMULDQ_256 ); 28154 goto decode_success; 28155 } 28156 break; 28157 28158 case 0x29: 28159 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28160 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */ 28161 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28162 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28163 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 ); 28164 goto decode_success; 28165 } 28166 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28167 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */ 28168 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28169 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28170 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 ); 28171 goto decode_success; 28172 } 28173 break; 28174 28175 case 0x2A: 28176 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */ 28177 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28178 && !epartIsReg(getUChar(delta))) { 28179 UChar modrm = getUChar(delta); 28180 UInt rD = gregOfRexRM(pfx, modrm); 28181 IRTemp tD = newTemp(Ity_V128); 28182 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28183 delta += alen; 28184 gen_SEGV_if_not_16_aligned(addr); 28185 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 28186 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD)); 28187 putYMMRegLoAndZU(rD, mkexpr(tD)); 28188 goto decode_success; 28189 } 28190 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */ 28191 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28192 && !epartIsReg(getUChar(delta))) { 28193 UChar modrm = getUChar(delta); 28194 UInt rD = gregOfRexRM(pfx, modrm); 28195 IRTemp tD = newTemp(Ity_V256); 28196 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28197 delta += alen; 28198 gen_SEGV_if_not_32_aligned(addr); 28199 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 28200 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD)); 28201 putYMMReg(rD, mkexpr(tD)); 28202 goto decode_success; 28203 } 28204 break; 28205 28206 case 0x2B: 28207 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28208 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */ 28209 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28210 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 28211 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28212 Iop_QNarrowBin32Sto16Ux8, NULL, 28213 False/*!invertLeftArg*/, True/*swapArgs*/ ); 28214 goto decode_success; 28215 } 28216 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28217 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */ 28218 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28219 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28220 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28221 math_VPACKUSDW_YMM ); 28222 goto decode_success; 28223 } 28224 break; 28225 28226 case 0x2C: 28227 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */ 28228 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28229 && 0==getRexW(pfx)/*W0*/ 28230 && !epartIsReg(getUChar(delta))) { 28231 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28232 /*!isYMM*/False, Ity_I32, /*isLoad*/True ); 28233 goto decode_success; 28234 } 28235 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */ 28236 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28237 && 0==getRexW(pfx)/*W0*/ 28238 && !epartIsReg(getUChar(delta))) { 28239 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28240 /*isYMM*/True, Ity_I32, /*isLoad*/True ); 28241 goto decode_success; 28242 } 28243 break; 28244 28245 case 0x2D: 28246 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */ 28247 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28248 && 0==getRexW(pfx)/*W0*/ 28249 && !epartIsReg(getUChar(delta))) { 28250 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28251 /*!isYMM*/False, Ity_I64, /*isLoad*/True ); 28252 goto decode_success; 28253 } 28254 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */ 28255 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28256 && 0==getRexW(pfx)/*W0*/ 28257 && !epartIsReg(getUChar(delta))) { 28258 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28259 /*isYMM*/True, Ity_I64, /*isLoad*/True ); 28260 goto decode_success; 28261 } 28262 break; 28263 28264 case 0x2E: 28265 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */ 28266 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28267 && 0==getRexW(pfx)/*W0*/ 28268 && !epartIsReg(getUChar(delta))) { 28269 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28270 /*!isYMM*/False, Ity_I32, /*!isLoad*/False ); 28271 goto decode_success; 28272 } 28273 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */ 28274 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28275 && 0==getRexW(pfx)/*W0*/ 28276 && !epartIsReg(getUChar(delta))) { 28277 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28278 /*isYMM*/True, Ity_I32, /*!isLoad*/False ); 28279 goto decode_success; 28280 } 28281 break; 28282 28283 case 0x2F: 28284 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */ 28285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28286 && 0==getRexW(pfx)/*W0*/ 28287 && !epartIsReg(getUChar(delta))) { 28288 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28289 /*!isYMM*/False, Ity_I64, /*!isLoad*/False ); 28290 goto decode_success; 28291 } 28292 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */ 28293 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28294 && 0==getRexW(pfx)/*W0*/ 28295 && !epartIsReg(getUChar(delta))) { 28296 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28297 /*isYMM*/True, Ity_I64, /*!isLoad*/False ); 28298 goto decode_success; 28299 } 28300 break; 28301 28302 case 0x30: 28303 /* VPMOVZXBW xmm2/m64, xmm1 */ 28304 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */ 28305 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28306 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 28307 True/*isAvx*/, True/*xIsZ*/ ); 28308 goto decode_success; 28309 } 28310 /* VPMOVZXBW xmm2/m128, ymm1 */ 28311 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */ 28312 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28313 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ ); 28314 goto decode_success; 28315 } 28316 break; 28317 28318 case 0x31: 28319 /* VPMOVZXBD xmm2/m32, xmm1 */ 28320 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */ 28321 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28322 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28323 True/*isAvx*/, True/*xIsZ*/ ); 28324 goto decode_success; 28325 } 28326 /* VPMOVZXBD xmm2/m64, ymm1 */ 28327 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */ 28328 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28329 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28330 goto decode_success; 28331 } 28332 break; 28333 28334 case 0x32: 28335 /* VPMOVZXBQ xmm2/m16, xmm1 */ 28336 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */ 28337 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28338 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28339 goto decode_success; 28340 } 28341 /* VPMOVZXBQ xmm2/m32, ymm1 */ 28342 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */ 28343 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28344 delta = dis_PMOVZXBQ_256( vbi, pfx, delta ); 28345 goto decode_success; 28346 } 28347 break; 28348 28349 case 0x33: 28350 /* VPMOVZXWD xmm2/m64, xmm1 */ 28351 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */ 28352 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28353 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28354 True/*isAvx*/, True/*xIsZ*/ ); 28355 goto decode_success; 28356 } 28357 /* VPMOVZXWD xmm2/m128, ymm1 */ 28358 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */ 28359 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28360 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28361 goto decode_success; 28362 } 28363 break; 28364 28365 case 0x34: 28366 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */ 28367 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28368 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28369 goto decode_success; 28370 } 28371 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */ 28372 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28373 delta = dis_PMOVZXWQ_256( vbi, pfx, delta ); 28374 goto decode_success; 28375 } 28376 break; 28377 28378 case 0x35: 28379 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */ 28380 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28381 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28382 True/*isAvx*/, True/*xIsZ*/ ); 28383 goto decode_success; 28384 } 28385 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */ 28386 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28387 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ ); 28388 goto decode_success; 28389 } 28390 break; 28391 28392 case 0x36: 28393 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */ 28394 if (have66noF2noF3(pfx) 28395 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28396 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28397 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD ); 28398 goto decode_success; 28399 } 28400 break; 28401 28402 case 0x37: 28403 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28404 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */ 28405 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28406 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28407 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 ); 28408 goto decode_success; 28409 } 28410 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28411 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */ 28412 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28413 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28414 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 ); 28415 goto decode_success; 28416 } 28417 break; 28418 28419 case 0x38: 28420 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28421 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */ 28422 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28423 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28424 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 ); 28425 goto decode_success; 28426 } 28427 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28428 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */ 28429 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28430 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28431 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 ); 28432 goto decode_success; 28433 } 28434 break; 28435 28436 case 0x39: 28437 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28438 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */ 28439 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28440 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28441 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 ); 28442 goto decode_success; 28443 } 28444 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28445 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */ 28446 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28447 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28448 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 ); 28449 goto decode_success; 28450 } 28451 break; 28452 28453 case 0x3A: 28454 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28455 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */ 28456 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28457 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28458 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 ); 28459 goto decode_success; 28460 } 28461 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28462 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */ 28463 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28464 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28465 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 ); 28466 goto decode_success; 28467 } 28468 break; 28469 28470 case 0x3B: 28471 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28472 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */ 28473 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28474 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28475 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 ); 28476 goto decode_success; 28477 } 28478 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28479 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */ 28480 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28481 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28482 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 ); 28483 goto decode_success; 28484 } 28485 break; 28486 28487 case 0x3C: 28488 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28489 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */ 28490 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28491 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28492 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 ); 28493 goto decode_success; 28494 } 28495 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28496 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */ 28497 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28498 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28499 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 ); 28500 goto decode_success; 28501 } 28502 break; 28503 28504 case 0x3D: 28505 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28506 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */ 28507 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28508 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28509 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 ); 28510 goto decode_success; 28511 } 28512 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28513 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */ 28514 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28515 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28516 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 ); 28517 goto decode_success; 28518 } 28519 break; 28520 28521 case 0x3E: 28522 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28523 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */ 28524 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28525 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28526 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 ); 28527 goto decode_success; 28528 } 28529 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28530 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */ 28531 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28532 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28533 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 ); 28534 goto decode_success; 28535 } 28536 break; 28537 28538 case 0x3F: 28539 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 28540 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */ 28541 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28542 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28543 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 ); 28544 goto decode_success; 28545 } 28546 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 28547 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */ 28548 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28549 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28550 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 ); 28551 goto decode_success; 28552 } 28553 break; 28554 28555 case 0x40: 28556 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 28557 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */ 28558 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28559 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28560 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 ); 28561 goto decode_success; 28562 } 28563 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 28564 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */ 28565 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28566 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28567 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 ); 28568 goto decode_success; 28569 } 28570 break; 28571 28572 case 0x41: 28573 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */ 28574 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28575 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ ); 28576 goto decode_success; 28577 } 28578 break; 28579 28580 case 0x45: 28581 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */ 28582 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */ 28583 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28584 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd", 28585 Iop_Shr32, 1==getVexL(pfx) ); 28586 *uses_vvvv = True; 28587 goto decode_success; 28588 } 28589 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */ 28590 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */ 28591 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 28592 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq", 28593 Iop_Shr64, 1==getVexL(pfx) ); 28594 *uses_vvvv = True; 28595 goto decode_success; 28596 } 28597 break; 28598 28599 case 0x46: 28600 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */ 28601 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */ 28602 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28603 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd", 28604 Iop_Sar32, 1==getVexL(pfx) ); 28605 *uses_vvvv = True; 28606 goto decode_success; 28607 } 28608 break; 28609 28610 case 0x47: 28611 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */ 28612 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */ 28613 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28614 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd", 28615 Iop_Shl32, 1==getVexL(pfx) ); 28616 *uses_vvvv = True; 28617 goto decode_success; 28618 } 28619 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */ 28620 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */ 28621 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 28622 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq", 28623 Iop_Shl64, 1==getVexL(pfx) ); 28624 *uses_vvvv = True; 28625 goto decode_success; 28626 } 28627 break; 28628 28629 case 0x58: 28630 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */ 28631 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28632 && 0==getRexW(pfx)/*W0*/) { 28633 UChar modrm = getUChar(delta); 28634 UInt rG = gregOfRexRM(pfx, modrm); 28635 IRTemp t32 = newTemp(Ity_I32); 28636 if (epartIsReg(modrm)) { 28637 UInt rE = eregOfRexRM(pfx, modrm); 28638 delta++; 28639 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28640 assign(t32, getXMMRegLane32(rE, 0)); 28641 } else { 28642 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28643 delta += alen; 28644 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG)); 28645 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28646 } 28647 IRTemp t64 = newTemp(Ity_I64); 28648 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28649 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28650 putYMMRegLoAndZU(rG, res); 28651 goto decode_success; 28652 } 28653 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */ 28654 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28655 && 0==getRexW(pfx)/*W0*/) { 28656 UChar modrm = getUChar(delta); 28657 UInt rG = gregOfRexRM(pfx, modrm); 28658 IRTemp t32 = newTemp(Ity_I32); 28659 if (epartIsReg(modrm)) { 28660 UInt rE = eregOfRexRM(pfx, modrm); 28661 delta++; 28662 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28663 assign(t32, getXMMRegLane32(rE, 0)); 28664 } else { 28665 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28666 delta += alen; 28667 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG)); 28668 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28669 } 28670 IRTemp t64 = newTemp(Ity_I64); 28671 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28672 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28673 mkexpr(t64), mkexpr(t64)); 28674 putYMMReg(rG, res); 28675 goto decode_success; 28676 } 28677 break; 28678 28679 case 0x59: 28680 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */ 28681 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28682 && 0==getRexW(pfx)/*W0*/) { 28683 UChar modrm = getUChar(delta); 28684 UInt rG = gregOfRexRM(pfx, modrm); 28685 IRTemp t64 = newTemp(Ity_I64); 28686 if (epartIsReg(modrm)) { 28687 UInt rE = eregOfRexRM(pfx, modrm); 28688 delta++; 28689 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28690 assign(t64, getXMMRegLane64(rE, 0)); 28691 } else { 28692 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28693 delta += alen; 28694 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG)); 28695 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 28696 } 28697 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28698 putYMMRegLoAndZU(rG, res); 28699 goto decode_success; 28700 } 28701 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */ 28702 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28703 && 0==getRexW(pfx)/*W0*/) { 28704 UChar modrm = getUChar(delta); 28705 UInt rG = gregOfRexRM(pfx, modrm); 28706 IRTemp t64 = newTemp(Ity_I64); 28707 if (epartIsReg(modrm)) { 28708 UInt rE = eregOfRexRM(pfx, modrm); 28709 delta++; 28710 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28711 assign(t64, getXMMRegLane64(rE, 0)); 28712 } else { 28713 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28714 delta += alen; 28715 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG)); 28716 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 28717 } 28718 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28719 mkexpr(t64), mkexpr(t64)); 28720 putYMMReg(rG, res); 28721 goto decode_success; 28722 } 28723 break; 28724 28725 case 0x5A: 28726 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */ 28727 if (have66noF2noF3(pfx) 28728 && 1==getVexL(pfx)/*256*/ 28729 && !epartIsReg(getUChar(delta))) { 28730 UChar modrm = getUChar(delta); 28731 UInt rG = gregOfRexRM(pfx, modrm); 28732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28733 delta += alen; 28734 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG)); 28735 IRTemp t128 = newTemp(Ity_V128); 28736 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 28737 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 28738 goto decode_success; 28739 } 28740 break; 28741 28742 case 0x78: 28743 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */ 28744 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28745 && 0==getRexW(pfx)/*W0*/) { 28746 UChar modrm = getUChar(delta); 28747 UInt rG = gregOfRexRM(pfx, modrm); 28748 IRTemp t8 = newTemp(Ity_I8); 28749 if (epartIsReg(modrm)) { 28750 UInt rE = eregOfRexRM(pfx, modrm); 28751 delta++; 28752 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28753 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 28754 } else { 28755 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28756 delta += alen; 28757 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG)); 28758 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 28759 } 28760 IRTemp t16 = newTemp(Ity_I16); 28761 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 28762 IRTemp t32 = newTemp(Ity_I32); 28763 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28764 IRTemp t64 = newTemp(Ity_I64); 28765 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28766 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28767 putYMMRegLoAndZU(rG, res); 28768 goto decode_success; 28769 } 28770 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */ 28771 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28772 && 0==getRexW(pfx)/*W0*/) { 28773 UChar modrm = getUChar(delta); 28774 UInt rG = gregOfRexRM(pfx, modrm); 28775 IRTemp t8 = newTemp(Ity_I8); 28776 if (epartIsReg(modrm)) { 28777 UInt rE = eregOfRexRM(pfx, modrm); 28778 delta++; 28779 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28780 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 28781 } else { 28782 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28783 delta += alen; 28784 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG)); 28785 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 28786 } 28787 IRTemp t16 = newTemp(Ity_I16); 28788 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 28789 IRTemp t32 = newTemp(Ity_I32); 28790 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28791 IRTemp t64 = newTemp(Ity_I64); 28792 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28793 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28794 mkexpr(t64), mkexpr(t64)); 28795 putYMMReg(rG, res); 28796 goto decode_success; 28797 } 28798 break; 28799 28800 case 0x79: 28801 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */ 28802 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28803 && 0==getRexW(pfx)/*W0*/) { 28804 UChar modrm = getUChar(delta); 28805 UInt rG = gregOfRexRM(pfx, modrm); 28806 IRTemp t16 = newTemp(Ity_I16); 28807 if (epartIsReg(modrm)) { 28808 UInt rE = eregOfRexRM(pfx, modrm); 28809 delta++; 28810 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28811 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 28812 } else { 28813 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28814 delta += alen; 28815 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG)); 28816 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 28817 } 28818 IRTemp t32 = newTemp(Ity_I32); 28819 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28820 IRTemp t64 = newTemp(Ity_I64); 28821 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28822 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28823 putYMMRegLoAndZU(rG, res); 28824 goto decode_success; 28825 } 28826 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */ 28827 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28828 && 0==getRexW(pfx)/*W0*/) { 28829 UChar modrm = getUChar(delta); 28830 UInt rG = gregOfRexRM(pfx, modrm); 28831 IRTemp t16 = newTemp(Ity_I16); 28832 if (epartIsReg(modrm)) { 28833 UInt rE = eregOfRexRM(pfx, modrm); 28834 delta++; 28835 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28836 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 28837 } else { 28838 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28839 delta += alen; 28840 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG)); 28841 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 28842 } 28843 IRTemp t32 = newTemp(Ity_I32); 28844 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28845 IRTemp t64 = newTemp(Ity_I64); 28846 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28847 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28848 mkexpr(t64), mkexpr(t64)); 28849 putYMMReg(rG, res); 28850 goto decode_success; 28851 } 28852 break; 28853 28854 case 0x8C: 28855 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */ 28856 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28857 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28858 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28859 /*!isYMM*/False, Ity_I32, /*isLoad*/True ); 28860 goto decode_success; 28861 } 28862 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */ 28863 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28864 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28865 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28866 /*isYMM*/True, Ity_I32, /*isLoad*/True ); 28867 goto decode_success; 28868 } 28869 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */ 28870 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28871 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28872 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28873 /*!isYMM*/False, Ity_I64, /*isLoad*/True ); 28874 goto decode_success; 28875 } 28876 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */ 28877 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28878 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28879 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28880 /*isYMM*/True, Ity_I64, /*isLoad*/True ); 28881 goto decode_success; 28882 } 28883 break; 28884 28885 case 0x8E: 28886 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */ 28887 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28888 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28889 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28890 /*!isYMM*/False, Ity_I32, /*!isLoad*/False ); 28891 goto decode_success; 28892 } 28893 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */ 28894 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28895 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28896 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28897 /*isYMM*/True, Ity_I32, /*!isLoad*/False ); 28898 goto decode_success; 28899 } 28900 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */ 28901 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28902 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28903 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28904 /*!isYMM*/False, Ity_I64, /*!isLoad*/False ); 28905 goto decode_success; 28906 } 28907 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */ 28908 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28909 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28910 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28911 /*isYMM*/True, Ity_I64, /*!isLoad*/False ); 28912 goto decode_success; 28913 } 28914 break; 28915 28916 case 0x90: 28917 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */ 28918 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28919 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28920 Long delta0 = delta; 28921 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 28922 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 28923 if (delta != delta0) 28924 goto decode_success; 28925 } 28926 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */ 28927 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28928 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28929 Long delta0 = delta; 28930 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 28931 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 28932 if (delta != delta0) 28933 goto decode_success; 28934 } 28935 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */ 28936 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28937 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28938 Long delta0 = delta; 28939 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 28940 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 28941 if (delta != delta0) 28942 goto decode_success; 28943 } 28944 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */ 28945 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28946 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28947 Long delta0 = delta; 28948 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 28949 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 28950 if (delta != delta0) 28951 goto decode_success; 28952 } 28953 break; 28954 28955 case 0x91: 28956 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */ 28957 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28958 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28959 Long delta0 = delta; 28960 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 28961 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 28962 if (delta != delta0) 28963 goto decode_success; 28964 } 28965 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */ 28966 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28967 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28968 Long delta0 = delta; 28969 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 28970 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 28971 if (delta != delta0) 28972 goto decode_success; 28973 } 28974 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */ 28975 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28976 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28977 Long delta0 = delta; 28978 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 28979 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 28980 if (delta != delta0) 28981 goto decode_success; 28982 } 28983 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */ 28984 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28985 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28986 Long delta0 = delta; 28987 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 28988 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 28989 if (delta != delta0) 28990 goto decode_success; 28991 } 28992 break; 28993 28994 case 0x92: 28995 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */ 28996 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28997 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28998 Long delta0 = delta; 28999 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 29000 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 29001 if (delta != delta0) 29002 goto decode_success; 29003 } 29004 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */ 29005 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29006 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29007 Long delta0 = delta; 29008 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 29009 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 29010 if (delta != delta0) 29011 goto decode_success; 29012 } 29013 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */ 29014 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29015 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29016 Long delta0 = delta; 29017 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 29018 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 29019 if (delta != delta0) 29020 goto decode_success; 29021 } 29022 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */ 29023 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29024 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29025 Long delta0 = delta; 29026 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 29027 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 29028 if (delta != delta0) 29029 goto decode_success; 29030 } 29031 break; 29032 29033 case 0x93: 29034 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */ 29035 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29036 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29037 Long delta0 = delta; 29038 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 29039 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 29040 if (delta != delta0) 29041 goto decode_success; 29042 } 29043 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */ 29044 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29045 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29046 Long delta0 = delta; 29047 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 29048 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 29049 if (delta != delta0) 29050 goto decode_success; 29051 } 29052 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */ 29053 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29054 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29055 Long delta0 = delta; 29056 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 29057 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 29058 if (delta != delta0) 29059 goto decode_success; 29060 } 29061 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */ 29062 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29063 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29064 Long delta0 = delta; 29065 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 29066 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 29067 if (delta != delta0) 29068 goto decode_success; 29069 } 29070 break; 29071 29072 case 0x96 ... 0x9F: 29073 case 0xA6 ... 0xAF: 29074 case 0xB6 ... 0xBF: 29075 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */ 29076 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */ 29077 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */ 29078 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */ 29079 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */ 29080 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */ 29081 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */ 29082 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */ 29083 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */ 29084 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */ 29085 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */ 29086 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */ 29087 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */ 29088 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */ 29089 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */ 29090 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */ 29091 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */ 29092 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */ 29093 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */ 29094 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */ 29095 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */ 29096 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */ 29097 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */ 29098 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */ 29099 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */ 29100 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */ 29101 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */ 29102 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */ 29103 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */ 29104 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */ 29105 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */ 29106 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */ 29107 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */ 29108 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */ 29109 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */ 29110 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */ 29111 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */ 29112 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */ 29113 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */ 29114 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */ 29115 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */ 29116 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */ 29117 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */ 29118 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */ 29119 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */ 29120 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */ 29121 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */ 29122 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */ 29123 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */ 29124 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */ 29125 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */ 29126 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */ 29127 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */ 29128 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */ 29129 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */ 29130 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */ 29131 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */ 29132 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */ 29133 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */ 29134 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */ 29135 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */ 29136 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */ 29137 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */ 29138 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */ 29139 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */ 29140 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */ 29141 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */ 29142 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */ 29143 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */ 29144 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */ 29145 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */ 29146 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */ 29147 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */ 29148 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */ 29149 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */ 29150 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */ 29151 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */ 29152 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */ 29153 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */ 29154 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */ 29155 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */ 29156 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */ 29157 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */ 29158 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */ 29159 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */ 29160 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */ 29161 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */ 29162 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */ 29163 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */ 29164 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */ 29165 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */ 29166 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */ 29167 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */ 29168 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */ 29169 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */ 29170 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */ 29171 if (have66noF2noF3(pfx)) { 29172 delta = dis_FMA( vbi, pfx, delta, opc ); 29173 *uses_vvvv = True; 29174 goto decode_success; 29175 } 29176 break; 29177 29178 case 0xDB: 29179 case 0xDC: 29180 case 0xDD: 29181 case 0xDE: 29182 case 0xDF: 29183 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */ 29184 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */ 29185 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */ 29186 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */ 29187 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */ 29188 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29189 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc ); 29190 if (opc != 0xDB) *uses_vvvv = True; 29191 goto decode_success; 29192 } 29193 break; 29194 29195 case 0xF2: 29196 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */ 29197 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */ 29198 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29199 Int size = getRexW(pfx) ? 8 : 4; 29200 IRType ty = szToITy(size); 29201 IRTemp dst = newTemp(ty); 29202 IRTemp src1 = newTemp(ty); 29203 IRTemp src2 = newTemp(ty); 29204 UChar rm = getUChar(delta); 29205 29206 assign( src1, getIRegV(size,pfx) ); 29207 if (epartIsReg(rm)) { 29208 assign( src2, getIRegE(size,pfx,rm) ); 29209 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm), 29210 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29211 delta++; 29212 } else { 29213 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29214 assign( src2, loadLE(ty, mkexpr(addr)) ); 29215 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29216 nameIRegG(size,pfx,rm)); 29217 delta += alen; 29218 } 29219 29220 assign( dst, binop( mkSizedOp(ty,Iop_And8), 29221 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ), 29222 mkexpr(src2) ) ); 29223 putIRegG( size, pfx, rm, mkexpr(dst) ); 29224 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29225 ? AMD64G_CC_OP_ANDN64 29226 : AMD64G_CC_OP_ANDN32)) ); 29227 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29228 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 29229 *uses_vvvv = True; 29230 goto decode_success; 29231 } 29232 break; 29233 29234 case 0xF3: 29235 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */ 29236 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */ 29237 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29238 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) { 29239 Int size = getRexW(pfx) ? 8 : 4; 29240 IRType ty = szToITy(size); 29241 IRTemp src = newTemp(ty); 29242 IRTemp dst = newTemp(ty); 29243 UChar rm = getUChar(delta); 29244 29245 if (epartIsReg(rm)) { 29246 assign( src, getIRegE(size,pfx,rm) ); 29247 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm), 29248 nameIRegV(size,pfx)); 29249 delta++; 29250 } else { 29251 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29252 assign( src, loadLE(ty, mkexpr(addr)) ); 29253 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29254 delta += alen; 29255 } 29256 29257 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29258 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0), 29259 mkexpr(src)), mkexpr(src)) ); 29260 putIRegV( size, pfx, mkexpr(dst) ); 29261 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29262 ? AMD64G_CC_OP_BLSI64 29263 : AMD64G_CC_OP_BLSI32)) ); 29264 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29265 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29266 *uses_vvvv = True; 29267 goto decode_success; 29268 } 29269 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */ 29270 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */ 29271 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29272 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) { 29273 Int size = getRexW(pfx) ? 8 : 4; 29274 IRType ty = szToITy(size); 29275 IRTemp src = newTemp(ty); 29276 IRTemp dst = newTemp(ty); 29277 UChar rm = getUChar(delta); 29278 29279 if (epartIsReg(rm)) { 29280 assign( src, getIRegE(size,pfx,rm) ); 29281 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm), 29282 nameIRegV(size,pfx)); 29283 delta++; 29284 } else { 29285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29286 assign( src, loadLE(ty, mkexpr(addr)) ); 29287 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29288 delta += alen; 29289 } 29290 29291 assign( dst, binop(mkSizedOp(ty,Iop_Xor8), 29292 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29293 mkU(ty, 1)), mkexpr(src)) ); 29294 putIRegV( size, pfx, mkexpr(dst) ); 29295 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29296 ? AMD64G_CC_OP_BLSMSK64 29297 : AMD64G_CC_OP_BLSMSK32)) ); 29298 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29299 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29300 *uses_vvvv = True; 29301 goto decode_success; 29302 } 29303 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */ 29304 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */ 29305 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29306 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) { 29307 Int size = getRexW(pfx) ? 8 : 4; 29308 IRType ty = szToITy(size); 29309 IRTemp src = newTemp(ty); 29310 IRTemp dst = newTemp(ty); 29311 UChar rm = getUChar(delta); 29312 29313 if (epartIsReg(rm)) { 29314 assign( src, getIRegE(size,pfx,rm) ); 29315 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm), 29316 nameIRegV(size,pfx)); 29317 delta++; 29318 } else { 29319 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29320 assign( src, loadLE(ty, mkexpr(addr)) ); 29321 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29322 delta += alen; 29323 } 29324 29325 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29326 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29327 mkU(ty, 1)), mkexpr(src)) ); 29328 putIRegV( size, pfx, mkexpr(dst) ); 29329 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29330 ? AMD64G_CC_OP_BLSR64 29331 : AMD64G_CC_OP_BLSR32)) ); 29332 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29333 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29334 *uses_vvvv = True; 29335 goto decode_success; 29336 } 29337 break; 29338 29339 case 0xF5: 29340 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */ 29341 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */ 29342 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29343 Int size = getRexW(pfx) ? 8 : 4; 29344 IRType ty = szToITy(size); 29345 IRTemp dst = newTemp(ty); 29346 IRTemp src1 = newTemp(ty); 29347 IRTemp src2 = newTemp(ty); 29348 IRTemp start = newTemp(Ity_I8); 29349 IRTemp cond = newTemp(Ity_I1); 29350 UChar rm = getUChar(delta); 29351 29352 assign( src2, getIRegV(size,pfx) ); 29353 if (epartIsReg(rm)) { 29354 assign( src1, getIRegE(size,pfx,rm) ); 29355 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), 29356 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 29357 delta++; 29358 } else { 29359 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29360 assign( src1, loadLE(ty, mkexpr(addr)) ); 29361 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 29362 nameIRegG(size,pfx,rm)); 29363 delta += alen; 29364 } 29365 29366 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) ); 29367 assign( cond, binop(Iop_CmpLT32U, 29368 unop(Iop_8Uto32, mkexpr(start)), 29369 mkU32(8*size)) ); 29370 /* if (start < opsize) { 29371 if (start == 0) 29372 dst = 0; 29373 else 29374 dst = (src1 << (opsize-start)) u>> (opsize-start); 29375 } else { 29376 dst = src1; 29377 } */ 29378 assign( dst, 29379 IRExpr_ITE( 29380 mkexpr(cond), 29381 IRExpr_ITE( 29382 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)), 29383 mkU(ty, 0), 29384 binop( 29385 mkSizedOp(ty,Iop_Shr8), 29386 binop( 29387 mkSizedOp(ty,Iop_Shl8), 29388 mkexpr(src1), 29389 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29390 ), 29391 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29392 ) 29393 ), 29394 mkexpr(src1) 29395 ) 29396 ); 29397 putIRegG( size, pfx, rm, mkexpr(dst) ); 29398 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29399 ? AMD64G_CC_OP_BLSR64 29400 : AMD64G_CC_OP_BLSR32)) ); 29401 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29402 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) ); 29403 *uses_vvvv = True; 29404 goto decode_success; 29405 } 29406 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */ 29407 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */ 29408 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29409 Int size = getRexW(pfx) ? 8 : 4; 29410 IRType ty = szToITy(size); 29411 IRTemp src = newTemp(ty); 29412 IRTemp mask = newTemp(ty); 29413 UChar rm = getUChar(delta); 29414 29415 assign( src, getIRegV(size,pfx) ); 29416 if (epartIsReg(rm)) { 29417 assign( mask, getIRegE(size,pfx,rm) ); 29418 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm), 29419 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29420 delta++; 29421 } else { 29422 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29423 assign( mask, loadLE(ty, mkexpr(addr)) ); 29424 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29425 nameIRegG(size,pfx,rm)); 29426 delta += alen; 29427 } 29428 29429 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)), 29430 widenUto64(mkexpr(mask)) ); 29431 putIRegG( size, pfx, rm, 29432 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29433 "amd64g_calculate_pdep", 29434 &amd64g_calculate_pdep, args)) ); 29435 *uses_vvvv = True; 29436 /* Flags aren't modified. */ 29437 goto decode_success; 29438 } 29439 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */ 29440 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */ 29441 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29442 Int size = getRexW(pfx) ? 8 : 4; 29443 IRType ty = szToITy(size); 29444 IRTemp src = newTemp(ty); 29445 IRTemp mask = newTemp(ty); 29446 UChar rm = getUChar(delta); 29447 29448 assign( src, getIRegV(size,pfx) ); 29449 if (epartIsReg(rm)) { 29450 assign( mask, getIRegE(size,pfx,rm) ); 29451 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm), 29452 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29453 delta++; 29454 } else { 29455 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29456 assign( mask, loadLE(ty, mkexpr(addr)) ); 29457 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29458 nameIRegG(size,pfx,rm)); 29459 delta += alen; 29460 } 29461 29462 /* First mask off bits not set in mask, they are ignored 29463 and it should be fine if they contain undefined values. */ 29464 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8), 29465 mkexpr(src), mkexpr(mask)); 29466 IRExpr** args = mkIRExprVec_2( widenUto64(masked), 29467 widenUto64(mkexpr(mask)) ); 29468 putIRegG( size, pfx, rm, 29469 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29470 "amd64g_calculate_pext", 29471 &amd64g_calculate_pext, args)) ); 29472 *uses_vvvv = True; 29473 /* Flags aren't modified. */ 29474 goto decode_success; 29475 } 29476 break; 29477 29478 case 0xF6: 29479 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */ 29480 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */ 29481 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29482 Int size = getRexW(pfx) ? 8 : 4; 29483 IRType ty = szToITy(size); 29484 IRTemp src1 = newTemp(ty); 29485 IRTemp src2 = newTemp(ty); 29486 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64); 29487 UChar rm = getUChar(delta); 29488 29489 assign( src1, getIRegRDX(size) ); 29490 if (epartIsReg(rm)) { 29491 assign( src2, getIRegE(size,pfx,rm) ); 29492 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm), 29493 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29494 delta++; 29495 } else { 29496 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29497 assign( src2, loadLE(ty, mkexpr(addr)) ); 29498 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29499 nameIRegG(size,pfx,rm)); 29500 delta += alen; 29501 } 29502 29503 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32, 29504 mkexpr(src1), mkexpr(src2)) ); 29505 putIRegV( size, pfx, 29506 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) ); 29507 putIRegG( size, pfx, rm, 29508 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32, 29509 mkexpr(res)) ); 29510 *uses_vvvv = True; 29511 /* Flags aren't modified. */ 29512 goto decode_success; 29513 } 29514 break; 29515 29516 case 0xF7: 29517 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */ 29518 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */ 29519 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29520 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 ); 29521 goto decode_success; 29522 } 29523 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */ 29524 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */ 29525 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29526 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 ); 29527 goto decode_success; 29528 } 29529 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */ 29530 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */ 29531 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29532 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 ); 29533 goto decode_success; 29534 } 29535 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */ 29536 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */ 29537 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29538 Int size = getRexW(pfx) ? 8 : 4; 29539 IRType ty = szToITy(size); 29540 IRTemp dst = newTemp(ty); 29541 IRTemp src1 = newTemp(ty); 29542 IRTemp src2 = newTemp(ty); 29543 IRTemp stle = newTemp(Ity_I16); 29544 IRTemp start = newTemp(Ity_I8); 29545 IRTemp len = newTemp(Ity_I8); 29546 UChar rm = getUChar(delta); 29547 29548 assign( src2, getIRegV(size,pfx) ); 29549 if (epartIsReg(rm)) { 29550 assign( src1, getIRegE(size,pfx,rm) ); 29551 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), 29552 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 29553 delta++; 29554 } else { 29555 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29556 assign( src1, loadLE(ty, mkexpr(addr)) ); 29557 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 29558 nameIRegG(size,pfx,rm)); 29559 delta += alen; 29560 } 29561 29562 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) ); 29563 assign( start, unop( Iop_16to8, mkexpr(stle) ) ); 29564 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) ); 29565 /* if (start+len < opsize) { 29566 if (len != 0) 29567 dst = (src1 << (opsize-start-len)) u>> (opsize-len); 29568 else 29569 dst = 0; 29570 } else { 29571 if (start < opsize) 29572 dst = src1 u>> start; 29573 else 29574 dst = 0; 29575 } */ 29576 assign( dst, 29577 IRExpr_ITE( 29578 binop(Iop_CmpLT32U, 29579 binop(Iop_Add32, 29580 unop(Iop_8Uto32, mkexpr(start)), 29581 unop(Iop_8Uto32, mkexpr(len))), 29582 mkU32(8*size)), 29583 IRExpr_ITE( 29584 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)), 29585 mkU(ty, 0), 29586 binop(mkSizedOp(ty,Iop_Shr8), 29587 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1), 29588 binop(Iop_Sub8, 29589 binop(Iop_Sub8, mkU8(8*size), 29590 mkexpr(start)), 29591 mkexpr(len))), 29592 binop(Iop_Sub8, mkU8(8*size), 29593 mkexpr(len))) 29594 ), 29595 IRExpr_ITE( 29596 binop(Iop_CmpLT32U, 29597 unop(Iop_8Uto32, mkexpr(start)), 29598 mkU32(8*size)), 29599 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1), 29600 mkexpr(start)), 29601 mkU(ty, 0) 29602 ) 29603 ) 29604 ); 29605 putIRegG( size, pfx, rm, mkexpr(dst) ); 29606 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29607 ? AMD64G_CC_OP_ANDN64 29608 : AMD64G_CC_OP_ANDN32)) ); 29609 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29610 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 29611 *uses_vvvv = True; 29612 goto decode_success; 29613 } 29614 break; 29615 29616 default: 29617 break; 29618 29619 } 29620 29621 //decode_failure: 29622 return deltaIN; 29623 29624 decode_success: 29625 return delta; 29626 } 29627 29628 29629 /*------------------------------------------------------------*/ 29630 /*--- ---*/ 29631 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/ 29632 /*--- ---*/ 29633 /*------------------------------------------------------------*/ 29634 29635 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 ) 29636 { 29637 vassert(imm8 < 256); 29638 IRTemp s3, s2, s1, s0; 29639 s3 = s2 = s1 = s0 = IRTemp_INVALID; 29640 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 29641 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \ 29642 : ((_nn)==2) ? s2 : s3) 29643 IRTemp res = newTemp(Ity_V128); 29644 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3), 29645 SEL((imm8 >> 4) & 3), 29646 SEL((imm8 >> 2) & 3), 29647 SEL((imm8 >> 0) & 3) )); 29648 # undef SEL 29649 return res; 29650 } 29651 29652 __attribute__((noinline)) 29653 static 29654 Long dis_ESC_0F3A__VEX ( 29655 /*MB_OUT*/DisResult* dres, 29656 /*OUT*/ Bool* uses_vvvv, 29657 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 29658 Bool resteerCisOk, 29659 void* callback_opaque, 29660 const VexArchInfo* archinfo, 29661 const VexAbiInfo* vbi, 29662 Prefix pfx, Int sz, Long deltaIN 29663 ) 29664 { 29665 IRTemp addr = IRTemp_INVALID; 29666 Int alen = 0; 29667 HChar dis_buf[50]; 29668 Long delta = deltaIN; 29669 UChar opc = getUChar(delta); 29670 delta++; 29671 *uses_vvvv = False; 29672 29673 switch (opc) { 29674 29675 case 0x00: 29676 case 0x01: 29677 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */ 29678 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */ 29679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29680 && 1==getRexW(pfx)/*W1*/) { 29681 UChar modrm = getUChar(delta); 29682 UInt imm8 = 0; 29683 UInt rG = gregOfRexRM(pfx, modrm); 29684 IRTemp sV = newTemp(Ity_V256); 29685 const HChar *name = opc == 0 ? "vpermq" : "vpermpd"; 29686 if (epartIsReg(modrm)) { 29687 UInt rE = eregOfRexRM(pfx, modrm); 29688 delta += 1; 29689 imm8 = getUChar(delta); 29690 DIP("%s $%u,%s,%s\n", 29691 name, imm8, nameYMMReg(rE), nameYMMReg(rG)); 29692 assign(sV, getYMMReg(rE)); 29693 } else { 29694 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29695 delta += alen; 29696 imm8 = getUChar(delta); 29697 DIP("%s $%u,%s,%s\n", 29698 name, imm8, dis_buf, nameYMMReg(rG)); 29699 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29700 } 29701 delta++; 29702 IRTemp s[4]; 29703 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 29704 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]); 29705 IRTemp dV = newTemp(Ity_V256); 29706 assign(dV, IRExpr_Qop(Iop_64x4toV256, 29707 mkexpr(s[(imm8 >> 6) & 3]), 29708 mkexpr(s[(imm8 >> 4) & 3]), 29709 mkexpr(s[(imm8 >> 2) & 3]), 29710 mkexpr(s[(imm8 >> 0) & 3]))); 29711 putYMMReg(rG, mkexpr(dV)); 29712 goto decode_success; 29713 } 29714 break; 29715 29716 case 0x02: 29717 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */ 29718 if (have66noF2noF3(pfx) 29719 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 29720 UChar modrm = getUChar(delta); 29721 UInt imm8 = 0; 29722 UInt rG = gregOfRexRM(pfx, modrm); 29723 UInt rV = getVexNvvvv(pfx); 29724 IRTemp sV = newTemp(Ity_V128); 29725 IRTemp dV = newTemp(Ity_V128); 29726 UInt i; 29727 IRTemp s[4], d[4]; 29728 assign(sV, getXMMReg(rV)); 29729 if (epartIsReg(modrm)) { 29730 UInt rE = eregOfRexRM(pfx, modrm); 29731 delta += 1; 29732 imm8 = getUChar(delta); 29733 DIP("vpblendd $%u,%s,%s,%s\n", 29734 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 29735 assign(dV, getXMMReg(rE)); 29736 } else { 29737 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29738 delta += alen; 29739 imm8 = getUChar(delta); 29740 DIP("vpblendd $%u,%s,%s,%s\n", 29741 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 29742 assign(dV, loadLE(Ity_V128, mkexpr(addr))); 29743 } 29744 delta++; 29745 for (i = 0; i < 4; i++) { 29746 s[i] = IRTemp_INVALID; 29747 d[i] = IRTemp_INVALID; 29748 } 29749 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] ); 29750 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] ); 29751 for (i = 0; i < 4; i++) 29752 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 29753 putYMMRegLane128(rG, 1, mkV128(0)); 29754 *uses_vvvv = True; 29755 goto decode_success; 29756 } 29757 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */ 29758 if (have66noF2noF3(pfx) 29759 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 29760 UChar modrm = getUChar(delta); 29761 UInt imm8 = 0; 29762 UInt rG = gregOfRexRM(pfx, modrm); 29763 UInt rV = getVexNvvvv(pfx); 29764 IRTemp sV = newTemp(Ity_V256); 29765 IRTemp dV = newTemp(Ity_V256); 29766 UInt i; 29767 IRTemp s[8], d[8]; 29768 assign(sV, getYMMReg(rV)); 29769 if (epartIsReg(modrm)) { 29770 UInt rE = eregOfRexRM(pfx, modrm); 29771 delta += 1; 29772 imm8 = getUChar(delta); 29773 DIP("vpblendd $%u,%s,%s,%s\n", 29774 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 29775 assign(dV, getYMMReg(rE)); 29776 } else { 29777 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29778 delta += alen; 29779 imm8 = getUChar(delta); 29780 DIP("vpblendd $%u,%s,%s,%s\n", 29781 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 29782 assign(dV, loadLE(Ity_V256, mkexpr(addr))); 29783 } 29784 delta++; 29785 for (i = 0; i < 8; i++) { 29786 s[i] = IRTemp_INVALID; 29787 d[i] = IRTemp_INVALID; 29788 } 29789 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 29790 &s[3], &s[2], &s[1], &s[0] ); 29791 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4], 29792 &d[3], &d[2], &d[1], &d[0] ); 29793 for (i = 0; i < 8; i++) 29794 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 29795 *uses_vvvv = True; 29796 goto decode_success; 29797 } 29798 break; 29799 29800 case 0x04: 29801 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */ 29802 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29803 UChar modrm = getUChar(delta); 29804 UInt imm8 = 0; 29805 UInt rG = gregOfRexRM(pfx, modrm); 29806 IRTemp sV = newTemp(Ity_V256); 29807 if (epartIsReg(modrm)) { 29808 UInt rE = eregOfRexRM(pfx, modrm); 29809 delta += 1; 29810 imm8 = getUChar(delta); 29811 DIP("vpermilps $%u,%s,%s\n", 29812 imm8, nameYMMReg(rE), nameYMMReg(rG)); 29813 assign(sV, getYMMReg(rE)); 29814 } else { 29815 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29816 delta += alen; 29817 imm8 = getUChar(delta); 29818 DIP("vpermilps $%u,%s,%s\n", 29819 imm8, dis_buf, nameYMMReg(rG)); 29820 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29821 } 29822 delta++; 29823 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 29824 breakupV256toV128s( sV, &sVhi, &sVlo ); 29825 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 ); 29826 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 ); 29827 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo)); 29828 putYMMReg(rG, res); 29829 goto decode_success; 29830 } 29831 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */ 29832 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29833 UChar modrm = getUChar(delta); 29834 UInt imm8 = 0; 29835 UInt rG = gregOfRexRM(pfx, modrm); 29836 IRTemp sV = newTemp(Ity_V128); 29837 if (epartIsReg(modrm)) { 29838 UInt rE = eregOfRexRM(pfx, modrm); 29839 delta += 1; 29840 imm8 = getUChar(delta); 29841 DIP("vpermilps $%u,%s,%s\n", 29842 imm8, nameXMMReg(rE), nameXMMReg(rG)); 29843 assign(sV, getXMMReg(rE)); 29844 } else { 29845 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29846 delta += alen; 29847 imm8 = getUChar(delta); 29848 DIP("vpermilps $%u,%s,%s\n", 29849 imm8, dis_buf, nameXMMReg(rG)); 29850 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 29851 } 29852 delta++; 29853 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) ); 29854 goto decode_success; 29855 } 29856 break; 29857 29858 case 0x05: 29859 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */ 29860 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29861 UChar modrm = getUChar(delta); 29862 UInt imm8 = 0; 29863 UInt rG = gregOfRexRM(pfx, modrm); 29864 IRTemp sV = newTemp(Ity_V128); 29865 if (epartIsReg(modrm)) { 29866 UInt rE = eregOfRexRM(pfx, modrm); 29867 delta += 1; 29868 imm8 = getUChar(delta); 29869 DIP("vpermilpd $%u,%s,%s\n", 29870 imm8, nameXMMReg(rE), nameXMMReg(rG)); 29871 assign(sV, getXMMReg(rE)); 29872 } else { 29873 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29874 delta += alen; 29875 imm8 = getUChar(delta); 29876 DIP("vpermilpd $%u,%s,%s\n", 29877 imm8, dis_buf, nameXMMReg(rG)); 29878 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 29879 } 29880 delta++; 29881 IRTemp s1 = newTemp(Ity_I64); 29882 IRTemp s0 = newTemp(Ity_I64); 29883 assign(s1, unop(Iop_V128HIto64, mkexpr(sV))); 29884 assign(s0, unop(Iop_V128to64, mkexpr(sV))); 29885 IRTemp dV = newTemp(Ity_V128); 29886 assign(dV, binop(Iop_64HLtoV128, 29887 mkexpr((imm8 & (1<<1)) ? s1 : s0), 29888 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 29889 putYMMRegLoAndZU(rG, mkexpr(dV)); 29890 goto decode_success; 29891 } 29892 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */ 29893 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29894 UChar modrm = getUChar(delta); 29895 UInt imm8 = 0; 29896 UInt rG = gregOfRexRM(pfx, modrm); 29897 IRTemp sV = newTemp(Ity_V256); 29898 if (epartIsReg(modrm)) { 29899 UInt rE = eregOfRexRM(pfx, modrm); 29900 delta += 1; 29901 imm8 = getUChar(delta); 29902 DIP("vpermilpd $%u,%s,%s\n", 29903 imm8, nameYMMReg(rE), nameYMMReg(rG)); 29904 assign(sV, getYMMReg(rE)); 29905 } else { 29906 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29907 delta += alen; 29908 imm8 = getUChar(delta); 29909 DIP("vpermilpd $%u,%s,%s\n", 29910 imm8, dis_buf, nameYMMReg(rG)); 29911 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29912 } 29913 delta++; 29914 IRTemp s3, s2, s1, s0; 29915 s3 = s2 = s1 = s0 = IRTemp_INVALID; 29916 breakupV256to64s(sV, &s3, &s2, &s1, &s0); 29917 IRTemp dV = newTemp(Ity_V256); 29918 assign(dV, IRExpr_Qop(Iop_64x4toV256, 29919 mkexpr((imm8 & (1<<3)) ? s3 : s2), 29920 mkexpr((imm8 & (1<<2)) ? s3 : s2), 29921 mkexpr((imm8 & (1<<1)) ? s1 : s0), 29922 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 29923 putYMMReg(rG, mkexpr(dV)); 29924 goto decode_success; 29925 } 29926 break; 29927 29928 case 0x06: 29929 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */ 29930 if (have66noF2noF3(pfx) 29931 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 29932 UChar modrm = getUChar(delta); 29933 UInt imm8 = 0; 29934 UInt rG = gregOfRexRM(pfx, modrm); 29935 UInt rV = getVexNvvvv(pfx); 29936 IRTemp s00 = newTemp(Ity_V128); 29937 IRTemp s01 = newTemp(Ity_V128); 29938 IRTemp s10 = newTemp(Ity_V128); 29939 IRTemp s11 = newTemp(Ity_V128); 29940 assign(s00, getYMMRegLane128(rV, 0)); 29941 assign(s01, getYMMRegLane128(rV, 1)); 29942 if (epartIsReg(modrm)) { 29943 UInt rE = eregOfRexRM(pfx, modrm); 29944 delta += 1; 29945 imm8 = getUChar(delta); 29946 DIP("vperm2f128 $%u,%s,%s,%s\n", 29947 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 29948 assign(s10, getYMMRegLane128(rE, 0)); 29949 assign(s11, getYMMRegLane128(rE, 1)); 29950 } else { 29951 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29952 delta += alen; 29953 imm8 = getUChar(delta); 29954 DIP("vperm2f128 $%u,%s,%s,%s\n", 29955 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 29956 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 29957 mkexpr(addr), mkU64(0)))); 29958 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 29959 mkexpr(addr), mkU64(16)))); 29960 } 29961 delta++; 29962 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 29963 : ((_nn)==2) ? s10 : s11) 29964 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 29965 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 29966 # undef SEL 29967 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 29968 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 29969 *uses_vvvv = True; 29970 goto decode_success; 29971 } 29972 break; 29973 29974 case 0x08: 29975 /* VROUNDPS imm8, xmm2/m128, xmm1 */ 29976 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */ 29977 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29978 UChar modrm = getUChar(delta); 29979 UInt rG = gregOfRexRM(pfx, modrm); 29980 IRTemp src = newTemp(Ity_V128); 29981 IRTemp s0 = IRTemp_INVALID; 29982 IRTemp s1 = IRTemp_INVALID; 29983 IRTemp s2 = IRTemp_INVALID; 29984 IRTemp s3 = IRTemp_INVALID; 29985 IRTemp rm = newTemp(Ity_I32); 29986 Int imm = 0; 29987 29988 modrm = getUChar(delta); 29989 29990 if (epartIsReg(modrm)) { 29991 UInt rE = eregOfRexRM(pfx, modrm); 29992 assign( src, getXMMReg( rE ) ); 29993 imm = getUChar(delta+1); 29994 if (imm & ~15) break; 29995 delta += 1+1; 29996 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 29997 } else { 29998 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29999 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 30000 imm = getUChar(delta+alen); 30001 if (imm & ~15) break; 30002 delta += alen+1; 30003 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 30004 } 30005 30006 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30007 that encoding is the same as the encoding for IRRoundingMode, 30008 we can use that value directly in the IR as a rounding 30009 mode. */ 30010 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30011 30012 breakupV128to32s( src, &s3, &s2, &s1, &s0 ); 30013 putYMMRegLane128( rG, 1, mkV128(0) ); 30014 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 30015 unop(Iop_ReinterpI32asF32, mkexpr(s))) 30016 putYMMRegLane32F( rG, 3, CVT(s3) ); 30017 putYMMRegLane32F( rG, 2, CVT(s2) ); 30018 putYMMRegLane32F( rG, 1, CVT(s1) ); 30019 putYMMRegLane32F( rG, 0, CVT(s0) ); 30020 # undef CVT 30021 goto decode_success; 30022 } 30023 /* VROUNDPS imm8, ymm2/m256, ymm1 */ 30024 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */ 30025 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30026 UChar modrm = getUChar(delta); 30027 UInt rG = gregOfRexRM(pfx, modrm); 30028 IRTemp src = newTemp(Ity_V256); 30029 IRTemp s0 = IRTemp_INVALID; 30030 IRTemp s1 = IRTemp_INVALID; 30031 IRTemp s2 = IRTemp_INVALID; 30032 IRTemp s3 = IRTemp_INVALID; 30033 IRTemp s4 = IRTemp_INVALID; 30034 IRTemp s5 = IRTemp_INVALID; 30035 IRTemp s6 = IRTemp_INVALID; 30036 IRTemp s7 = IRTemp_INVALID; 30037 IRTemp rm = newTemp(Ity_I32); 30038 Int imm = 0; 30039 30040 modrm = getUChar(delta); 30041 30042 if (epartIsReg(modrm)) { 30043 UInt rE = eregOfRexRM(pfx, modrm); 30044 assign( src, getYMMReg( rE ) ); 30045 imm = getUChar(delta+1); 30046 if (imm & ~15) break; 30047 delta += 1+1; 30048 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 30049 } else { 30050 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30051 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 30052 imm = getUChar(delta+alen); 30053 if (imm & ~15) break; 30054 delta += alen+1; 30055 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 30056 } 30057 30058 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30059 that encoding is the same as the encoding for IRRoundingMode, 30060 we can use that value directly in the IR as a rounding 30061 mode. */ 30062 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30063 30064 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 30065 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 30066 unop(Iop_ReinterpI32asF32, mkexpr(s))) 30067 putYMMRegLane32F( rG, 7, CVT(s7) ); 30068 putYMMRegLane32F( rG, 6, CVT(s6) ); 30069 putYMMRegLane32F( rG, 5, CVT(s5) ); 30070 putYMMRegLane32F( rG, 4, CVT(s4) ); 30071 putYMMRegLane32F( rG, 3, CVT(s3) ); 30072 putYMMRegLane32F( rG, 2, CVT(s2) ); 30073 putYMMRegLane32F( rG, 1, CVT(s1) ); 30074 putYMMRegLane32F( rG, 0, CVT(s0) ); 30075 # undef CVT 30076 goto decode_success; 30077 } 30078 30079 case 0x09: 30080 /* VROUNDPD imm8, xmm2/m128, xmm1 */ 30081 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */ 30082 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30083 UChar modrm = getUChar(delta); 30084 UInt rG = gregOfRexRM(pfx, modrm); 30085 IRTemp src = newTemp(Ity_V128); 30086 IRTemp s0 = IRTemp_INVALID; 30087 IRTemp s1 = IRTemp_INVALID; 30088 IRTemp rm = newTemp(Ity_I32); 30089 Int imm = 0; 30090 30091 modrm = getUChar(delta); 30092 30093 if (epartIsReg(modrm)) { 30094 UInt rE = eregOfRexRM(pfx, modrm); 30095 assign( src, getXMMReg( rE ) ); 30096 imm = getUChar(delta+1); 30097 if (imm & ~15) break; 30098 delta += 1+1; 30099 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 30100 } else { 30101 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30102 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 30103 imm = getUChar(delta+alen); 30104 if (imm & ~15) break; 30105 delta += alen+1; 30106 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 30107 } 30108 30109 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30110 that encoding is the same as the encoding for IRRoundingMode, 30111 we can use that value directly in the IR as a rounding 30112 mode. */ 30113 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30114 30115 breakupV128to64s( src, &s1, &s0 ); 30116 putYMMRegLane128( rG, 1, mkV128(0) ); 30117 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 30118 unop(Iop_ReinterpI64asF64, mkexpr(s))) 30119 putYMMRegLane64F( rG, 1, CVT(s1) ); 30120 putYMMRegLane64F( rG, 0, CVT(s0) ); 30121 # undef CVT 30122 goto decode_success; 30123 } 30124 /* VROUNDPD imm8, ymm2/m256, ymm1 */ 30125 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */ 30126 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30127 UChar modrm = getUChar(delta); 30128 UInt rG = gregOfRexRM(pfx, modrm); 30129 IRTemp src = newTemp(Ity_V256); 30130 IRTemp s0 = IRTemp_INVALID; 30131 IRTemp s1 = IRTemp_INVALID; 30132 IRTemp s2 = IRTemp_INVALID; 30133 IRTemp s3 = IRTemp_INVALID; 30134 IRTemp rm = newTemp(Ity_I32); 30135 Int imm = 0; 30136 30137 modrm = getUChar(delta); 30138 30139 if (epartIsReg(modrm)) { 30140 UInt rE = eregOfRexRM(pfx, modrm); 30141 assign( src, getYMMReg( rE ) ); 30142 imm = getUChar(delta+1); 30143 if (imm & ~15) break; 30144 delta += 1+1; 30145 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 30146 } else { 30147 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30148 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 30149 imm = getUChar(delta+alen); 30150 if (imm & ~15) break; 30151 delta += alen+1; 30152 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 30153 } 30154 30155 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30156 that encoding is the same as the encoding for IRRoundingMode, 30157 we can use that value directly in the IR as a rounding 30158 mode. */ 30159 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30160 30161 breakupV256to64s( src, &s3, &s2, &s1, &s0 ); 30162 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 30163 unop(Iop_ReinterpI64asF64, mkexpr(s))) 30164 putYMMRegLane64F( rG, 3, CVT(s3) ); 30165 putYMMRegLane64F( rG, 2, CVT(s2) ); 30166 putYMMRegLane64F( rG, 1, CVT(s1) ); 30167 putYMMRegLane64F( rG, 0, CVT(s0) ); 30168 # undef CVT 30169 goto decode_success; 30170 } 30171 30172 case 0x0A: 30173 case 0x0B: 30174 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */ 30175 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */ 30176 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */ 30177 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */ 30178 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30179 UChar modrm = getUChar(delta); 30180 UInt rG = gregOfRexRM(pfx, modrm); 30181 UInt rV = getVexNvvvv(pfx); 30182 Bool isD = opc == 0x0B; 30183 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 30184 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 30185 Int imm = 0; 30186 30187 if (epartIsReg(modrm)) { 30188 UInt rE = eregOfRexRM(pfx, modrm); 30189 assign( src, 30190 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 30191 imm = getUChar(delta+1); 30192 if (imm & ~15) break; 30193 delta += 1+1; 30194 DIP( "vrounds%c $%d,%s,%s,%s\n", 30195 isD ? 'd' : 's', 30196 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) ); 30197 } else { 30198 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30199 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 30200 imm = getUChar(delta+alen); 30201 if (imm & ~15) break; 30202 delta += alen+1; 30203 DIP( "vrounds%c $%d,%s,%s,%s\n", 30204 isD ? 'd' : 's', 30205 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) ); 30206 } 30207 30208 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30209 that encoding is the same as the encoding for IRRoundingMode, 30210 we can use that value directly in the IR as a rounding 30211 mode. */ 30212 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 30213 (imm & 4) ? get_sse_roundingmode() 30214 : mkU32(imm & 3), 30215 mkexpr(src)) ); 30216 30217 if (isD) 30218 putXMMRegLane64F( rG, 0, mkexpr(res) ); 30219 else { 30220 putXMMRegLane32F( rG, 0, mkexpr(res) ); 30221 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) ); 30222 } 30223 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) ); 30224 putYMMRegLane128( rG, 1, mkV128(0) ); 30225 *uses_vvvv = True; 30226 goto decode_success; 30227 } 30228 break; 30229 30230 case 0x0C: 30231 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */ 30232 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */ 30233 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30234 UChar modrm = getUChar(delta); 30235 UInt imm8; 30236 UInt rG = gregOfRexRM(pfx, modrm); 30237 UInt rV = getVexNvvvv(pfx); 30238 IRTemp sV = newTemp(Ity_V256); 30239 IRTemp sE = newTemp(Ity_V256); 30240 assign ( sV, getYMMReg(rV) ); 30241 if (epartIsReg(modrm)) { 30242 UInt rE = eregOfRexRM(pfx, modrm); 30243 delta += 1; 30244 imm8 = getUChar(delta); 30245 DIP("vblendps $%u,%s,%s,%s\n", 30246 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30247 assign(sE, getYMMReg(rE)); 30248 } else { 30249 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30250 delta += alen; 30251 imm8 = getUChar(delta); 30252 DIP("vblendps $%u,%s,%s,%s\n", 30253 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30254 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30255 } 30256 delta++; 30257 putYMMReg( rG, 30258 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) ); 30259 *uses_vvvv = True; 30260 goto decode_success; 30261 } 30262 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */ 30263 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */ 30264 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30265 UChar modrm = getUChar(delta); 30266 UInt imm8; 30267 UInt rG = gregOfRexRM(pfx, modrm); 30268 UInt rV = getVexNvvvv(pfx); 30269 IRTemp sV = newTemp(Ity_V128); 30270 IRTemp sE = newTemp(Ity_V128); 30271 assign ( sV, getXMMReg(rV) ); 30272 if (epartIsReg(modrm)) { 30273 UInt rE = eregOfRexRM(pfx, modrm); 30274 delta += 1; 30275 imm8 = getUChar(delta); 30276 DIP("vblendps $%u,%s,%s,%s\n", 30277 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30278 assign(sE, getXMMReg(rE)); 30279 } else { 30280 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30281 delta += alen; 30282 imm8 = getUChar(delta); 30283 DIP("vblendps $%u,%s,%s,%s\n", 30284 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30285 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30286 } 30287 delta++; 30288 putYMMRegLoAndZU( rG, 30289 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) ); 30290 *uses_vvvv = True; 30291 goto decode_success; 30292 } 30293 break; 30294 30295 case 0x0D: 30296 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */ 30297 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */ 30298 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30299 UChar modrm = getUChar(delta); 30300 UInt imm8; 30301 UInt rG = gregOfRexRM(pfx, modrm); 30302 UInt rV = getVexNvvvv(pfx); 30303 IRTemp sV = newTemp(Ity_V256); 30304 IRTemp sE = newTemp(Ity_V256); 30305 assign ( sV, getYMMReg(rV) ); 30306 if (epartIsReg(modrm)) { 30307 UInt rE = eregOfRexRM(pfx, modrm); 30308 delta += 1; 30309 imm8 = getUChar(delta); 30310 DIP("vblendpd $%u,%s,%s,%s\n", 30311 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30312 assign(sE, getYMMReg(rE)); 30313 } else { 30314 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30315 delta += alen; 30316 imm8 = getUChar(delta); 30317 DIP("vblendpd $%u,%s,%s,%s\n", 30318 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30319 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30320 } 30321 delta++; 30322 putYMMReg( rG, 30323 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) ); 30324 *uses_vvvv = True; 30325 goto decode_success; 30326 } 30327 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */ 30328 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */ 30329 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30330 UChar modrm = getUChar(delta); 30331 UInt imm8; 30332 UInt rG = gregOfRexRM(pfx, modrm); 30333 UInt rV = getVexNvvvv(pfx); 30334 IRTemp sV = newTemp(Ity_V128); 30335 IRTemp sE = newTemp(Ity_V128); 30336 assign ( sV, getXMMReg(rV) ); 30337 if (epartIsReg(modrm)) { 30338 UInt rE = eregOfRexRM(pfx, modrm); 30339 delta += 1; 30340 imm8 = getUChar(delta); 30341 DIP("vblendpd $%u,%s,%s,%s\n", 30342 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30343 assign(sE, getXMMReg(rE)); 30344 } else { 30345 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30346 delta += alen; 30347 imm8 = getUChar(delta); 30348 DIP("vblendpd $%u,%s,%s,%s\n", 30349 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30350 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30351 } 30352 delta++; 30353 putYMMRegLoAndZU( rG, 30354 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) ); 30355 *uses_vvvv = True; 30356 goto decode_success; 30357 } 30358 break; 30359 30360 case 0x0E: 30361 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */ 30362 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */ 30363 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30364 UChar modrm = getUChar(delta); 30365 UInt imm8; 30366 UInt rG = gregOfRexRM(pfx, modrm); 30367 UInt rV = getVexNvvvv(pfx); 30368 IRTemp sV = newTemp(Ity_V128); 30369 IRTemp sE = newTemp(Ity_V128); 30370 assign ( sV, getXMMReg(rV) ); 30371 if (epartIsReg(modrm)) { 30372 UInt rE = eregOfRexRM(pfx, modrm); 30373 delta += 1; 30374 imm8 = getUChar(delta); 30375 DIP("vpblendw $%u,%s,%s,%s\n", 30376 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30377 assign(sE, getXMMReg(rE)); 30378 } else { 30379 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30380 delta += alen; 30381 imm8 = getUChar(delta); 30382 DIP("vpblendw $%u,%s,%s,%s\n", 30383 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30384 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30385 } 30386 delta++; 30387 putYMMRegLoAndZU( rG, 30388 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) ); 30389 *uses_vvvv = True; 30390 goto decode_success; 30391 } 30392 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */ 30393 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */ 30394 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30395 UChar modrm = getUChar(delta); 30396 UInt imm8; 30397 UInt rG = gregOfRexRM(pfx, modrm); 30398 UInt rV = getVexNvvvv(pfx); 30399 IRTemp sV = newTemp(Ity_V256); 30400 IRTemp sE = newTemp(Ity_V256); 30401 IRTemp sVhi, sVlo, sEhi, sElo; 30402 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID; 30403 assign ( sV, getYMMReg(rV) ); 30404 if (epartIsReg(modrm)) { 30405 UInt rE = eregOfRexRM(pfx, modrm); 30406 delta += 1; 30407 imm8 = getUChar(delta); 30408 DIP("vpblendw $%u,%s,%s,%s\n", 30409 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30410 assign(sE, getYMMReg(rE)); 30411 } else { 30412 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30413 delta += alen; 30414 imm8 = getUChar(delta); 30415 DIP("vpblendw $%u,%s,%s,%s\n", 30416 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30417 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30418 } 30419 delta++; 30420 breakupV256toV128s( sV, &sVhi, &sVlo ); 30421 breakupV256toV128s( sE, &sEhi, &sElo ); 30422 putYMMReg( rG, binop( Iop_V128HLtoV256, 30423 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ), 30424 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) ); 30425 *uses_vvvv = True; 30426 goto decode_success; 30427 } 30428 break; 30429 30430 case 0x0F: 30431 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */ 30432 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */ 30433 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30434 UChar modrm = getUChar(delta); 30435 UInt rG = gregOfRexRM(pfx, modrm); 30436 UInt rV = getVexNvvvv(pfx); 30437 IRTemp sV = newTemp(Ity_V128); 30438 IRTemp dV = newTemp(Ity_V128); 30439 UInt imm8; 30440 30441 assign( dV, getXMMReg(rV) ); 30442 30443 if ( epartIsReg( modrm ) ) { 30444 UInt rE = eregOfRexRM(pfx, modrm); 30445 assign( sV, getXMMReg(rE) ); 30446 imm8 = getUChar(delta+1); 30447 delta += 1+1; 30448 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE), 30449 nameXMMReg(rV), nameXMMReg(rG)); 30450 } else { 30451 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30452 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 30453 imm8 = getUChar(delta+alen); 30454 delta += alen+1; 30455 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf, 30456 nameXMMReg(rV), nameXMMReg(rG)); 30457 } 30458 30459 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 ); 30460 putYMMRegLoAndZU( rG, mkexpr(res) ); 30461 *uses_vvvv = True; 30462 goto decode_success; 30463 } 30464 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */ 30465 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */ 30466 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30467 UChar modrm = getUChar(delta); 30468 UInt rG = gregOfRexRM(pfx, modrm); 30469 UInt rV = getVexNvvvv(pfx); 30470 IRTemp sV = newTemp(Ity_V256); 30471 IRTemp dV = newTemp(Ity_V256); 30472 IRTemp sHi, sLo, dHi, dLo; 30473 sHi = sLo = dHi = dLo = IRTemp_INVALID; 30474 UInt imm8; 30475 30476 assign( dV, getYMMReg(rV) ); 30477 30478 if ( epartIsReg( modrm ) ) { 30479 UInt rE = eregOfRexRM(pfx, modrm); 30480 assign( sV, getYMMReg(rE) ); 30481 imm8 = getUChar(delta+1); 30482 delta += 1+1; 30483 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameYMMReg(rE), 30484 nameYMMReg(rV), nameYMMReg(rG)); 30485 } else { 30486 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30487 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 30488 imm8 = getUChar(delta+alen); 30489 delta += alen+1; 30490 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf, 30491 nameYMMReg(rV), nameYMMReg(rG)); 30492 } 30493 30494 breakupV256toV128s( dV, &dHi, &dLo ); 30495 breakupV256toV128s( sV, &sHi, &sLo ); 30496 putYMMReg( rG, binop( Iop_V128HLtoV256, 30497 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ), 30498 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) ) 30499 ); 30500 *uses_vvvv = True; 30501 goto decode_success; 30502 } 30503 break; 30504 30505 case 0x14: 30506 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */ 30507 if (have66noF2noF3(pfx) 30508 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30509 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 30510 goto decode_success; 30511 } 30512 break; 30513 30514 case 0x15: 30515 /* VPEXTRW imm8, reg/m16, xmm2 */ 30516 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */ 30517 if (have66noF2noF3(pfx) 30518 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30519 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ ); 30520 goto decode_success; 30521 } 30522 break; 30523 30524 case 0x16: 30525 /* VPEXTRD imm8, r32/m32, xmm2 */ 30526 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */ 30527 if (have66noF2noF3(pfx) 30528 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30529 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ ); 30530 goto decode_success; 30531 } 30532 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */ 30533 if (have66noF2noF3(pfx) 30534 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 30535 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ ); 30536 goto decode_success; 30537 } 30538 break; 30539 30540 case 0x17: 30541 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */ 30542 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30543 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ ); 30544 goto decode_success; 30545 } 30546 break; 30547 30548 case 0x18: 30549 /* VINSERTF128 r/m, rV, rD 30550 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 30551 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */ 30552 if (have66noF2noF3(pfx) 30553 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30554 UChar modrm = getUChar(delta); 30555 UInt ib = 0; 30556 UInt rG = gregOfRexRM(pfx, modrm); 30557 UInt rV = getVexNvvvv(pfx); 30558 IRTemp t128 = newTemp(Ity_V128); 30559 if (epartIsReg(modrm)) { 30560 UInt rE = eregOfRexRM(pfx, modrm); 30561 delta += 1; 30562 assign(t128, getXMMReg(rE)); 30563 ib = getUChar(delta); 30564 DIP("vinsertf128 $%u,%s,%s,%s\n", 30565 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30566 } else { 30567 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30568 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 30569 delta += alen; 30570 ib = getUChar(delta); 30571 DIP("vinsertf128 $%u,%s,%s,%s\n", 30572 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30573 } 30574 delta++; 30575 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 30576 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 30577 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 30578 *uses_vvvv = True; 30579 goto decode_success; 30580 } 30581 break; 30582 30583 case 0x19: 30584 /* VEXTRACTF128 $lane_no, rS, r/m 30585 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 30586 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */ 30587 if (have66noF2noF3(pfx) 30588 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30589 UChar modrm = getUChar(delta); 30590 UInt ib = 0; 30591 UInt rS = gregOfRexRM(pfx, modrm); 30592 IRTemp t128 = newTemp(Ity_V128); 30593 if (epartIsReg(modrm)) { 30594 UInt rD = eregOfRexRM(pfx, modrm); 30595 delta += 1; 30596 ib = getUChar(delta); 30597 assign(t128, getYMMRegLane128(rS, ib & 1)); 30598 putYMMRegLoAndZU(rD, mkexpr(t128)); 30599 DIP("vextractf128 $%u,%s,%s\n", 30600 ib, nameXMMReg(rS), nameYMMReg(rD)); 30601 } else { 30602 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30603 delta += alen; 30604 ib = getUChar(delta); 30605 assign(t128, getYMMRegLane128(rS, ib & 1)); 30606 storeLE(mkexpr(addr), mkexpr(t128)); 30607 DIP("vextractf128 $%u,%s,%s\n", 30608 ib, nameYMMReg(rS), dis_buf); 30609 } 30610 delta++; 30611 /* doesn't use vvvv */ 30612 goto decode_success; 30613 } 30614 break; 30615 30616 case 0x20: 30617 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */ 30618 if (have66noF2noF3(pfx) 30619 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30620 UChar modrm = getUChar(delta); 30621 UInt rG = gregOfRexRM(pfx, modrm); 30622 UInt rV = getVexNvvvv(pfx); 30623 Int imm8; 30624 IRTemp src_u8 = newTemp(Ity_I8); 30625 30626 if ( epartIsReg( modrm ) ) { 30627 UInt rE = eregOfRexRM(pfx,modrm); 30628 imm8 = (Int)(getUChar(delta+1) & 15); 30629 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) ); 30630 delta += 1+1; 30631 DIP( "vpinsrb $%d,%s,%s,%s\n", 30632 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30633 } else { 30634 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30635 imm8 = (Int)(getUChar(delta+alen) & 15); 30636 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) ); 30637 delta += alen+1; 30638 DIP( "vpinsrb $%d,%s,%s,%s\n", 30639 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30640 } 30641 30642 IRTemp src_vec = newTemp(Ity_V128); 30643 assign(src_vec, getXMMReg( rV )); 30644 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 ); 30645 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30646 *uses_vvvv = True; 30647 goto decode_success; 30648 } 30649 break; 30650 30651 case 0x21: 30652 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1 30653 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */ 30654 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30655 UChar modrm = getUChar(delta); 30656 UInt rG = gregOfRexRM(pfx, modrm); 30657 UInt rV = getVexNvvvv(pfx); 30658 UInt imm8; 30659 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 30660 const IRTemp inval = IRTemp_INVALID; 30661 30662 if ( epartIsReg( modrm ) ) { 30663 UInt rE = eregOfRexRM(pfx, modrm); 30664 IRTemp vE = newTemp(Ity_V128); 30665 assign( vE, getXMMReg(rE) ); 30666 IRTemp dsE[4] = { inval, inval, inval, inval }; 30667 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 30668 imm8 = getUChar(delta+1); 30669 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 30670 delta += 1+1; 30671 DIP( "insertps $%u, %s,%s\n", 30672 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 30673 } else { 30674 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30675 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 30676 imm8 = getUChar(delta+alen); 30677 delta += alen+1; 30678 DIP( "insertps $%u, %s,%s\n", 30679 imm8, dis_buf, nameXMMReg(rG) ); 30680 } 30681 30682 IRTemp vV = newTemp(Ity_V128); 30683 assign( vV, getXMMReg(rV) ); 30684 30685 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) ); 30686 *uses_vvvv = True; 30687 goto decode_success; 30688 } 30689 break; 30690 30691 case 0x22: 30692 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */ 30693 if (have66noF2noF3(pfx) 30694 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30695 UChar modrm = getUChar(delta); 30696 UInt rG = gregOfRexRM(pfx, modrm); 30697 UInt rV = getVexNvvvv(pfx); 30698 Int imm8_10; 30699 IRTemp src_u32 = newTemp(Ity_I32); 30700 30701 if ( epartIsReg( modrm ) ) { 30702 UInt rE = eregOfRexRM(pfx,modrm); 30703 imm8_10 = (Int)(getUChar(delta+1) & 3); 30704 assign( src_u32, getIReg32( rE ) ); 30705 delta += 1+1; 30706 DIP( "vpinsrd $%d,%s,%s,%s\n", 30707 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30708 } else { 30709 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30710 imm8_10 = (Int)(getUChar(delta+alen) & 3); 30711 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 30712 delta += alen+1; 30713 DIP( "vpinsrd $%d,%s,%s,%s\n", 30714 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30715 } 30716 30717 IRTemp src_vec = newTemp(Ity_V128); 30718 assign(src_vec, getXMMReg( rV )); 30719 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 30720 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30721 *uses_vvvv = True; 30722 goto decode_success; 30723 } 30724 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */ 30725 if (have66noF2noF3(pfx) 30726 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 30727 UChar modrm = getUChar(delta); 30728 UInt rG = gregOfRexRM(pfx, modrm); 30729 UInt rV = getVexNvvvv(pfx); 30730 Int imm8_0; 30731 IRTemp src_u64 = newTemp(Ity_I64); 30732 30733 if ( epartIsReg( modrm ) ) { 30734 UInt rE = eregOfRexRM(pfx,modrm); 30735 imm8_0 = (Int)(getUChar(delta+1) & 1); 30736 assign( src_u64, getIReg64( rE ) ); 30737 delta += 1+1; 30738 DIP( "vpinsrq $%d,%s,%s,%s\n", 30739 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30740 } else { 30741 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30742 imm8_0 = (Int)(getUChar(delta+alen) & 1); 30743 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 30744 delta += alen+1; 30745 DIP( "vpinsrd $%d,%s,%s,%s\n", 30746 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30747 } 30748 30749 IRTemp src_vec = newTemp(Ity_V128); 30750 assign(src_vec, getXMMReg( rV )); 30751 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 30752 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30753 *uses_vvvv = True; 30754 goto decode_success; 30755 } 30756 break; 30757 30758 case 0x38: 30759 /* VINSERTI128 r/m, rV, rD 30760 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 30761 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */ 30762 if (have66noF2noF3(pfx) 30763 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30764 UChar modrm = getUChar(delta); 30765 UInt ib = 0; 30766 UInt rG = gregOfRexRM(pfx, modrm); 30767 UInt rV = getVexNvvvv(pfx); 30768 IRTemp t128 = newTemp(Ity_V128); 30769 if (epartIsReg(modrm)) { 30770 UInt rE = eregOfRexRM(pfx, modrm); 30771 delta += 1; 30772 assign(t128, getXMMReg(rE)); 30773 ib = getUChar(delta); 30774 DIP("vinserti128 $%u,%s,%s,%s\n", 30775 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30776 } else { 30777 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30778 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 30779 delta += alen; 30780 ib = getUChar(delta); 30781 DIP("vinserti128 $%u,%s,%s,%s\n", 30782 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30783 } 30784 delta++; 30785 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 30786 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 30787 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 30788 *uses_vvvv = True; 30789 goto decode_success; 30790 } 30791 break; 30792 30793 case 0x39: 30794 /* VEXTRACTI128 $lane_no, rS, r/m 30795 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 30796 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */ 30797 if (have66noF2noF3(pfx) 30798 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30799 UChar modrm = getUChar(delta); 30800 UInt ib = 0; 30801 UInt rS = gregOfRexRM(pfx, modrm); 30802 IRTemp t128 = newTemp(Ity_V128); 30803 if (epartIsReg(modrm)) { 30804 UInt rD = eregOfRexRM(pfx, modrm); 30805 delta += 1; 30806 ib = getUChar(delta); 30807 assign(t128, getYMMRegLane128(rS, ib & 1)); 30808 putYMMRegLoAndZU(rD, mkexpr(t128)); 30809 DIP("vextracti128 $%u,%s,%s\n", 30810 ib, nameXMMReg(rS), nameYMMReg(rD)); 30811 } else { 30812 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30813 delta += alen; 30814 ib = getUChar(delta); 30815 assign(t128, getYMMRegLane128(rS, ib & 1)); 30816 storeLE(mkexpr(addr), mkexpr(t128)); 30817 DIP("vextracti128 $%u,%s,%s\n", 30818 ib, nameYMMReg(rS), dis_buf); 30819 } 30820 delta++; 30821 /* doesn't use vvvv */ 30822 goto decode_success; 30823 } 30824 break; 30825 30826 case 0x40: 30827 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */ 30828 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30829 UChar modrm = getUChar(delta); 30830 UInt rG = gregOfRexRM(pfx, modrm); 30831 UInt rV = getVexNvvvv(pfx); 30832 IRTemp dst_vec = newTemp(Ity_V128); 30833 Int imm8; 30834 if (epartIsReg( modrm )) { 30835 UInt rE = eregOfRexRM(pfx,modrm); 30836 imm8 = (Int)getUChar(delta+1); 30837 assign( dst_vec, getXMMReg( rE ) ); 30838 delta += 1+1; 30839 DIP( "vdpps $%d,%s,%s,%s\n", 30840 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30841 } else { 30842 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30843 imm8 = (Int)getUChar(delta+alen); 30844 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30845 delta += alen+1; 30846 DIP( "vdpps $%d,%s,%s,%s\n", 30847 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30848 } 30849 30850 IRTemp src_vec = newTemp(Ity_V128); 30851 assign(src_vec, getXMMReg( rV )); 30852 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 ); 30853 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30854 *uses_vvvv = True; 30855 goto decode_success; 30856 } 30857 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */ 30858 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30859 UChar modrm = getUChar(delta); 30860 UInt rG = gregOfRexRM(pfx, modrm); 30861 UInt rV = getVexNvvvv(pfx); 30862 IRTemp dst_vec = newTemp(Ity_V256); 30863 Int imm8; 30864 if (epartIsReg( modrm )) { 30865 UInt rE = eregOfRexRM(pfx,modrm); 30866 imm8 = (Int)getUChar(delta+1); 30867 assign( dst_vec, getYMMReg( rE ) ); 30868 delta += 1+1; 30869 DIP( "vdpps $%d,%s,%s,%s\n", 30870 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 30871 } else { 30872 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30873 imm8 = (Int)getUChar(delta+alen); 30874 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 30875 delta += alen+1; 30876 DIP( "vdpps $%d,%s,%s,%s\n", 30877 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 30878 } 30879 30880 IRTemp src_vec = newTemp(Ity_V256); 30881 assign(src_vec, getYMMReg( rV )); 30882 IRTemp s0, s1, d0, d1; 30883 s0 = s1 = d0 = d1 = IRTemp_INVALID; 30884 breakupV256toV128s( dst_vec, &d1, &d0 ); 30885 breakupV256toV128s( src_vec, &s1, &s0 ); 30886 putYMMReg( rG, binop( Iop_V128HLtoV256, 30887 mkexpr( math_DPPS_128(s1, d1, imm8) ), 30888 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) ); 30889 *uses_vvvv = True; 30890 goto decode_success; 30891 } 30892 break; 30893 30894 case 0x41: 30895 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */ 30896 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30897 UChar modrm = getUChar(delta); 30898 UInt rG = gregOfRexRM(pfx, modrm); 30899 UInt rV = getVexNvvvv(pfx); 30900 IRTemp dst_vec = newTemp(Ity_V128); 30901 Int imm8; 30902 if (epartIsReg( modrm )) { 30903 UInt rE = eregOfRexRM(pfx,modrm); 30904 imm8 = (Int)getUChar(delta+1); 30905 assign( dst_vec, getXMMReg( rE ) ); 30906 delta += 1+1; 30907 DIP( "vdppd $%d,%s,%s,%s\n", 30908 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30909 } else { 30910 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30911 imm8 = (Int)getUChar(delta+alen); 30912 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30913 delta += alen+1; 30914 DIP( "vdppd $%d,%s,%s,%s\n", 30915 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30916 } 30917 30918 IRTemp src_vec = newTemp(Ity_V128); 30919 assign(src_vec, getXMMReg( rV )); 30920 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 ); 30921 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30922 *uses_vvvv = True; 30923 goto decode_success; 30924 } 30925 break; 30926 30927 case 0x42: 30928 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */ 30929 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */ 30930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30931 UChar modrm = getUChar(delta); 30932 Int imm8; 30933 IRTemp src_vec = newTemp(Ity_V128); 30934 IRTemp dst_vec = newTemp(Ity_V128); 30935 UInt rG = gregOfRexRM(pfx, modrm); 30936 UInt rV = getVexNvvvv(pfx); 30937 30938 assign( dst_vec, getXMMReg(rV) ); 30939 30940 if ( epartIsReg( modrm ) ) { 30941 UInt rE = eregOfRexRM(pfx, modrm); 30942 30943 imm8 = (Int)getUChar(delta+1); 30944 assign( src_vec, getXMMReg(rE) ); 30945 delta += 1+1; 30946 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30947 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30948 } else { 30949 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 30950 1/* imm8 is 1 byte after the amode */ ); 30951 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30952 imm8 = (Int)getUChar(delta+alen); 30953 delta += alen+1; 30954 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30955 dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30956 } 30957 30958 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec, 30959 src_vec, imm8) ) ); 30960 *uses_vvvv = True; 30961 goto decode_success; 30962 } 30963 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */ 30964 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */ 30965 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30966 UChar modrm = getUChar(delta); 30967 Int imm8; 30968 IRTemp src_vec = newTemp(Ity_V256); 30969 IRTemp dst_vec = newTemp(Ity_V256); 30970 UInt rG = gregOfRexRM(pfx, modrm); 30971 UInt rV = getVexNvvvv(pfx); 30972 IRTemp sHi, sLo, dHi, dLo; 30973 sHi = sLo = dHi = dLo = IRTemp_INVALID; 30974 30975 assign( dst_vec, getYMMReg(rV) ); 30976 30977 if ( epartIsReg( modrm ) ) { 30978 UInt rE = eregOfRexRM(pfx, modrm); 30979 30980 imm8 = (Int)getUChar(delta+1); 30981 assign( src_vec, getYMMReg(rE) ); 30982 delta += 1+1; 30983 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30984 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 30985 } else { 30986 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 30987 1/* imm8 is 1 byte after the amode */ ); 30988 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 30989 imm8 = (Int)getUChar(delta+alen); 30990 delta += alen+1; 30991 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30992 dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 30993 } 30994 30995 breakupV256toV128s( dst_vec, &dHi, &dLo ); 30996 breakupV256toV128s( src_vec, &sHi, &sLo ); 30997 putYMMReg( rG, binop( Iop_V128HLtoV256, 30998 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ), 30999 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) ); 31000 *uses_vvvv = True; 31001 goto decode_success; 31002 } 31003 break; 31004 31005 case 0x44: 31006 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */ 31007 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */ 31008 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 31009 * Carry-less multiplication of selected XMM quadwords into XMM 31010 * registers (a.k.a multiplication of polynomials over GF(2)) 31011 */ 31012 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31013 UChar modrm = getUChar(delta); 31014 Int imm8; 31015 IRTemp sV = newTemp(Ity_V128); 31016 IRTemp dV = newTemp(Ity_V128); 31017 UInt rG = gregOfRexRM(pfx, modrm); 31018 UInt rV = getVexNvvvv(pfx); 31019 31020 assign( dV, getXMMReg(rV) ); 31021 31022 if ( epartIsReg( modrm ) ) { 31023 UInt rE = eregOfRexRM(pfx, modrm); 31024 imm8 = (Int)getUChar(delta+1); 31025 assign( sV, getXMMReg(rE) ); 31026 delta += 1+1; 31027 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8, 31028 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31029 } else { 31030 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 31031 1/* imm8 is 1 byte after the amode */ ); 31032 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) ); 31033 imm8 = (Int)getUChar(delta+alen); 31034 delta += alen+1; 31035 DIP( "vpclmulqdq $%d, %s,%s,%s\n", 31036 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31037 } 31038 31039 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) ); 31040 *uses_vvvv = True; 31041 goto decode_success; 31042 } 31043 break; 31044 31045 case 0x46: 31046 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */ 31047 if (have66noF2noF3(pfx) 31048 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31049 UChar modrm = getUChar(delta); 31050 UInt imm8 = 0; 31051 UInt rG = gregOfRexRM(pfx, modrm); 31052 UInt rV = getVexNvvvv(pfx); 31053 IRTemp s00 = newTemp(Ity_V128); 31054 IRTemp s01 = newTemp(Ity_V128); 31055 IRTemp s10 = newTemp(Ity_V128); 31056 IRTemp s11 = newTemp(Ity_V128); 31057 assign(s00, getYMMRegLane128(rV, 0)); 31058 assign(s01, getYMMRegLane128(rV, 1)); 31059 if (epartIsReg(modrm)) { 31060 UInt rE = eregOfRexRM(pfx, modrm); 31061 delta += 1; 31062 imm8 = getUChar(delta); 31063 DIP("vperm2i128 $%u,%s,%s,%s\n", 31064 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 31065 assign(s10, getYMMRegLane128(rE, 0)); 31066 assign(s11, getYMMRegLane128(rE, 1)); 31067 } else { 31068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31069 delta += alen; 31070 imm8 = getUChar(delta); 31071 DIP("vperm2i128 $%u,%s,%s,%s\n", 31072 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 31073 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 31074 mkexpr(addr), mkU64(0)))); 31075 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 31076 mkexpr(addr), mkU64(16)))); 31077 } 31078 delta++; 31079 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 31080 : ((_nn)==2) ? s10 : s11) 31081 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 31082 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 31083 # undef SEL 31084 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 31085 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 31086 *uses_vvvv = True; 31087 goto decode_success; 31088 } 31089 break; 31090 31091 case 0x4A: 31092 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4 31093 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31094 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */ 31095 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31096 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31097 "vblendvps", 4, Iop_SarN32x4 ); 31098 *uses_vvvv = True; 31099 goto decode_success; 31100 } 31101 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4 31102 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31103 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */ 31104 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31105 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31106 "vblendvps", 4, Iop_SarN32x4 ); 31107 *uses_vvvv = True; 31108 goto decode_success; 31109 } 31110 break; 31111 31112 case 0x4B: 31113 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4 31114 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31115 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */ 31116 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31117 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31118 "vblendvpd", 8, Iop_SarN64x2 ); 31119 *uses_vvvv = True; 31120 goto decode_success; 31121 } 31122 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4 31123 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31124 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */ 31125 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31126 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31127 "vblendvpd", 8, Iop_SarN64x2 ); 31128 *uses_vvvv = True; 31129 goto decode_success; 31130 } 31131 break; 31132 31133 case 0x4C: 31134 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4 31135 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31136 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */ 31137 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31138 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31139 "vpblendvb", 1, Iop_SarN8x16 ); 31140 *uses_vvvv = True; 31141 goto decode_success; 31142 } 31143 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4 31144 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31145 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */ 31146 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31147 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31148 "vpblendvb", 1, Iop_SarN8x16 ); 31149 *uses_vvvv = True; 31150 goto decode_success; 31151 } 31152 break; 31153 31154 case 0x60: 31155 case 0x61: 31156 case 0x62: 31157 case 0x63: 31158 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1 31159 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1 31160 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1 31161 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1 31162 (selected special cases that actually occur in glibc, 31163 not by any means a complete implementation.) 31164 */ 31165 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31166 Long delta0 = delta; 31167 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc ); 31168 if (delta > delta0) goto decode_success; 31169 /* else fall though; dis_PCMPxSTRx failed to decode it */ 31170 } 31171 break; 31172 31173 case 0xDF: 31174 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */ 31175 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31176 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ ); 31177 goto decode_success; 31178 } 31179 break; 31180 31181 case 0xF0: 31182 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */ 31183 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */ 31184 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 31185 Int size = getRexW(pfx) ? 8 : 4; 31186 IRType ty = szToITy(size); 31187 IRTemp src = newTemp(ty); 31188 UChar rm = getUChar(delta); 31189 UChar imm8; 31190 31191 if (epartIsReg(rm)) { 31192 imm8 = getUChar(delta+1); 31193 assign( src, getIRegE(size,pfx,rm) ); 31194 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm), 31195 nameIRegG(size,pfx,rm)); 31196 delta += 2; 31197 } else { 31198 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 31199 imm8 = getUChar(delta+alen); 31200 assign( src, loadLE(ty, mkexpr(addr)) ); 31201 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm)); 31202 delta += alen + 1; 31203 } 31204 imm8 &= 8*size-1; 31205 31206 /* dst = (src >>u imm8) | (src << (size-imm8)) */ 31207 putIRegG( size, pfx, rm, 31208 imm8 == 0 ? mkexpr(src) 31209 : binop( mkSizedOp(ty,Iop_Or8), 31210 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src), 31211 mkU8(imm8) ), 31212 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src), 31213 mkU8(8*size-imm8) ) ) ); 31214 /* Flags aren't modified. */ 31215 goto decode_success; 31216 } 31217 break; 31218 31219 default: 31220 break; 31221 31222 } 31223 31224 //decode_failure: 31225 return deltaIN; 31226 31227 decode_success: 31228 return delta; 31229 } 31230 31231 31232 /*------------------------------------------------------------*/ 31233 /*--- ---*/ 31234 /*--- Disassemble a single instruction ---*/ 31235 /*--- ---*/ 31236 /*------------------------------------------------------------*/ 31237 31238 /* Disassemble a single instruction into IR. The instruction is 31239 located in host memory at &guest_code[delta]. */ 31240 31241 static 31242 DisResult disInstr_AMD64_WRK ( 31243 /*OUT*/Bool* expect_CAS, 31244 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 31245 Bool resteerCisOk, 31246 void* callback_opaque, 31247 Long delta64, 31248 const VexArchInfo* archinfo, 31249 const VexAbiInfo* vbi, 31250 Bool sigill_diag 31251 ) 31252 { 31253 IRTemp t1, t2; 31254 UChar pre; 31255 Int n, n_prefixes; 31256 DisResult dres; 31257 31258 /* The running delta */ 31259 Long delta = delta64; 31260 31261 /* Holds eip at the start of the insn, so that we can print 31262 consistent error messages for unimplemented insns. */ 31263 Long delta_start = delta; 31264 31265 /* sz denotes the nominal data-op size of the insn; we change it to 31266 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 31267 conflict REX.W takes precedence. */ 31268 Int sz = 4; 31269 31270 /* pfx holds the summary of prefixes. */ 31271 Prefix pfx = PFX_EMPTY; 31272 31273 /* Holds the computed opcode-escape indication. */ 31274 Escape esc = ESC_NONE; 31275 31276 /* Set result defaults. */ 31277 dres.whatNext = Dis_Continue; 31278 dres.len = 0; 31279 dres.continueAt = 0; 31280 dres.jk_StopHere = Ijk_INVALID; 31281 *expect_CAS = False; 31282 31283 vassert(guest_RIP_next_assumed == 0); 31284 vassert(guest_RIP_next_mustcheck == False); 31285 31286 t1 = t2 = IRTemp_INVALID; 31287 31288 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 31289 31290 /* Spot "Special" instructions (see comment at top of file). */ 31291 { 31292 const UChar* code = guest_code + delta; 31293 /* Spot the 16-byte preamble: 31294 48C1C703 rolq $3, %rdi 31295 48C1C70D rolq $13, %rdi 31296 48C1C73D rolq $61, %rdi 31297 48C1C733 rolq $51, %rdi 31298 */ 31299 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 31300 && code[ 3] == 0x03 && 31301 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 31302 && code[ 7] == 0x0D && 31303 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 31304 && code[11] == 0x3D && 31305 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 31306 && code[15] == 0x33) { 31307 /* Got a "Special" instruction preamble. Which one is it? */ 31308 if (code[16] == 0x48 && code[17] == 0x87 31309 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 31310 /* %RDX = client_request ( %RAX ) */ 31311 DIP("%%rdx = client_request ( %%rax )\n"); 31312 delta += 19; 31313 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta); 31314 vassert(dres.whatNext == Dis_StopHere); 31315 goto decode_success; 31316 } 31317 else 31318 if (code[16] == 0x48 && code[17] == 0x87 31319 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 31320 /* %RAX = guest_NRADDR */ 31321 DIP("%%rax = guest_NRADDR\n"); 31322 delta += 19; 31323 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 31324 goto decode_success; 31325 } 31326 else 31327 if (code[16] == 0x48 && code[17] == 0x87 31328 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 31329 /* call-noredir *%RAX */ 31330 DIP("call-noredir *%%rax\n"); 31331 delta += 19; 31332 t1 = newTemp(Ity_I64); 31333 assign(t1, getIRegRAX(8)); 31334 t2 = newTemp(Ity_I64); 31335 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 31336 putIReg64(R_RSP, mkexpr(t2)); 31337 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 31338 jmp_treg(&dres, Ijk_NoRedir, t1); 31339 vassert(dres.whatNext == Dis_StopHere); 31340 goto decode_success; 31341 } 31342 else 31343 if (code[16] == 0x48 && code[17] == 0x87 31344 && code[18] == 0xff /* xchgq %rdi,%rdi */) { 31345 /* IR injection */ 31346 DIP("IR injection\n"); 31347 vex_inject_ir(irsb, Iend_LE); 31348 31349 // Invalidate the current insn. The reason is that the IRop we're 31350 // injecting here can change. In which case the translation has to 31351 // be redone. For ease of handling, we simply invalidate all the 31352 // time. 31353 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr))); 31354 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19))); 31355 31356 delta += 19; 31357 31358 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 31359 dres.whatNext = Dis_StopHere; 31360 dres.jk_StopHere = Ijk_InvalICache; 31361 goto decode_success; 31362 } 31363 /* We don't know what it is. */ 31364 goto decode_failure; 31365 /*NOTREACHED*/ 31366 } 31367 } 31368 31369 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 31370 as many invalid combinations as possible. */ 31371 n_prefixes = 0; 31372 while (True) { 31373 if (n_prefixes > 7) goto decode_failure; 31374 pre = getUChar(delta); 31375 switch (pre) { 31376 case 0x66: pfx |= PFX_66; break; 31377 case 0x67: pfx |= PFX_ASO; break; 31378 case 0xF2: pfx |= PFX_F2; break; 31379 case 0xF3: pfx |= PFX_F3; break; 31380 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 31381 case 0x2E: pfx |= PFX_CS; break; 31382 case 0x3E: pfx |= PFX_DS; break; 31383 case 0x26: pfx |= PFX_ES; break; 31384 case 0x64: pfx |= PFX_FS; break; 31385 case 0x65: pfx |= PFX_GS; break; 31386 case 0x36: pfx |= PFX_SS; break; 31387 case 0x40 ... 0x4F: 31388 pfx |= PFX_REX; 31389 if (pre & (1<<3)) pfx |= PFX_REXW; 31390 if (pre & (1<<2)) pfx |= PFX_REXR; 31391 if (pre & (1<<1)) pfx |= PFX_REXX; 31392 if (pre & (1<<0)) pfx |= PFX_REXB; 31393 break; 31394 default: 31395 goto not_a_legacy_prefix; 31396 } 31397 n_prefixes++; 31398 delta++; 31399 } 31400 31401 not_a_legacy_prefix: 31402 /* We've used up all the non-VEX prefixes. Parse and validate a 31403 VEX prefix if that's appropriate. */ 31404 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) { 31405 /* Used temporarily for holding VEX prefixes. */ 31406 UChar vex0 = getUChar(delta); 31407 if (vex0 == 0xC4) { 31408 /* 3-byte VEX */ 31409 UChar vex1 = getUChar(delta+1); 31410 UChar vex2 = getUChar(delta+2); 31411 delta += 3; 31412 pfx |= PFX_VEX; 31413 /* Snarf contents of byte 1 */ 31414 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31415 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX; 31416 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB; 31417 /* m-mmmm */ 31418 switch (vex1 & 0x1F) { 31419 case 1: esc = ESC_0F; break; 31420 case 2: esc = ESC_0F38; break; 31421 case 3: esc = ESC_0F3A; break; 31422 /* Any other m-mmmm field will #UD */ 31423 default: goto decode_failure; 31424 } 31425 /* Snarf contents of byte 2 */ 31426 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0; 31427 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3; 31428 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2; 31429 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1; 31430 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0; 31431 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0; 31432 /* pp */ 31433 switch (vex2 & 3) { 31434 case 0: break; 31435 case 1: pfx |= PFX_66; break; 31436 case 2: pfx |= PFX_F3; break; 31437 case 3: pfx |= PFX_F2; break; 31438 default: vassert(0); 31439 } 31440 } 31441 else if (vex0 == 0xC5) { 31442 /* 2-byte VEX */ 31443 UChar vex1 = getUChar(delta+1); 31444 delta += 2; 31445 pfx |= PFX_VEX; 31446 /* Snarf contents of byte 1 */ 31447 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31448 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3; 31449 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2; 31450 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1; 31451 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0; 31452 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0; 31453 /* pp */ 31454 switch (vex1 & 3) { 31455 case 0: break; 31456 case 1: pfx |= PFX_66; break; 31457 case 2: pfx |= PFX_F3; break; 31458 case 3: pfx |= PFX_F2; break; 31459 default: vassert(0); 31460 } 31461 /* implied: */ 31462 esc = ESC_0F; 31463 } 31464 /* Can't have both VEX and REX */ 31465 if ((pfx & PFX_VEX) && (pfx & PFX_REX)) 31466 goto decode_failure; /* can't have both */ 31467 } 31468 31469 /* Dump invalid combinations */ 31470 n = 0; 31471 if (pfx & PFX_F2) n++; 31472 if (pfx & PFX_F3) n++; 31473 if (n > 1) 31474 goto decode_failure; /* can't have both */ 31475 31476 n = 0; 31477 if (pfx & PFX_CS) n++; 31478 if (pfx & PFX_DS) n++; 31479 if (pfx & PFX_ES) n++; 31480 if (pfx & PFX_FS) n++; 31481 if (pfx & PFX_GS) n++; 31482 if (pfx & PFX_SS) n++; 31483 if (n > 1) 31484 goto decode_failure; /* multiple seg overrides == illegal */ 31485 31486 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 31487 that we should accept it. */ 31488 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const) 31489 goto decode_failure; 31490 31491 /* Ditto for %gs prefixes. */ 31492 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const) 31493 goto decode_failure; 31494 31495 /* Set up sz. */ 31496 sz = 4; 31497 if (pfx & PFX_66) sz = 2; 31498 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 31499 31500 /* Now we should be looking at the primary opcode byte or the 31501 leading escapes. Check that any LOCK prefix is actually 31502 allowed. */ 31503 if (haveLOCK(pfx)) { 31504 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) { 31505 DIP("lock "); 31506 } else { 31507 *expect_CAS = False; 31508 goto decode_failure; 31509 } 31510 } 31511 31512 /* Eat up opcode escape bytes, until we're really looking at the 31513 primary opcode byte. But only if there's no VEX present. */ 31514 if (!(pfx & PFX_VEX)) { 31515 vassert(esc == ESC_NONE); 31516 pre = getUChar(delta); 31517 if (pre == 0x0F) { 31518 delta++; 31519 pre = getUChar(delta); 31520 switch (pre) { 31521 case 0x38: esc = ESC_0F38; delta++; break; 31522 case 0x3A: esc = ESC_0F3A; delta++; break; 31523 default: esc = ESC_0F; break; 31524 } 31525 } 31526 } 31527 31528 /* So now we're really really looking at the primary opcode 31529 byte. */ 31530 Long delta_at_primary_opcode = delta; 31531 31532 if (!(pfx & PFX_VEX)) { 31533 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE 31534 instructions preserve the upper 128 bits of YMM registers; 31535 iow we can simply ignore the presence of the upper halves of 31536 these registers. */ 31537 switch (esc) { 31538 case ESC_NONE: 31539 delta = dis_ESC_NONE( &dres, expect_CAS, 31540 resteerOkFn, resteerCisOk, callback_opaque, 31541 archinfo, vbi, pfx, sz, delta ); 31542 break; 31543 case ESC_0F: 31544 delta = dis_ESC_0F ( &dres, expect_CAS, 31545 resteerOkFn, resteerCisOk, callback_opaque, 31546 archinfo, vbi, pfx, sz, delta ); 31547 break; 31548 case ESC_0F38: 31549 delta = dis_ESC_0F38( &dres, 31550 resteerOkFn, resteerCisOk, callback_opaque, 31551 archinfo, vbi, pfx, sz, delta ); 31552 break; 31553 case ESC_0F3A: 31554 delta = dis_ESC_0F3A( &dres, 31555 resteerOkFn, resteerCisOk, callback_opaque, 31556 archinfo, vbi, pfx, sz, delta ); 31557 break; 31558 default: 31559 vassert(0); 31560 } 31561 } else { 31562 /* VEX prefixed instruction */ 31563 /* Sloppy Intel wording: "An instruction encoded with a VEX.128 31564 prefix that loads a YMM register operand ..." zeroes out bits 31565 128 and above of the register. */ 31566 Bool uses_vvvv = False; 31567 switch (esc) { 31568 case ESC_0F: 31569 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv, 31570 resteerOkFn, resteerCisOk, 31571 callback_opaque, 31572 archinfo, vbi, pfx, sz, delta ); 31573 break; 31574 case ESC_0F38: 31575 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv, 31576 resteerOkFn, resteerCisOk, 31577 callback_opaque, 31578 archinfo, vbi, pfx, sz, delta ); 31579 break; 31580 case ESC_0F3A: 31581 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv, 31582 resteerOkFn, resteerCisOk, 31583 callback_opaque, 31584 archinfo, vbi, pfx, sz, delta ); 31585 break; 31586 case ESC_NONE: 31587 /* The presence of a VEX prefix, by Intel definition, 31588 always implies at least an 0F escape. */ 31589 goto decode_failure; 31590 default: 31591 vassert(0); 31592 } 31593 /* If the insn doesn't use VEX.vvvv then it must be all ones. 31594 Check this. */ 31595 if (!uses_vvvv) { 31596 if (getVexNvvvv(pfx) != 0) 31597 goto decode_failure; 31598 } 31599 } 31600 31601 vassert(delta - delta_at_primary_opcode >= 0); 31602 vassert(delta - delta_at_primary_opcode < 16/*let's say*/); 31603 31604 /* Use delta == delta_at_primary_opcode to denote decode failure. 31605 This implies that any successful decode must use at least one 31606 byte up. */ 31607 if (delta == delta_at_primary_opcode) 31608 goto decode_failure; 31609 else 31610 goto decode_success; /* \o/ */ 31611 31612 #if 0 /* XYZZY */ 31613 31614 /* ---------------------------------------------------- */ 31615 /* --- The SSE/SSE2 decoder. --- */ 31616 /* ---------------------------------------------------- */ 31617 31618 /* What did I do to deserve SSE ? Perhaps I was really bad in a 31619 previous life? */ 31620 31621 /* Note, this doesn't handle SSE3 right now. All amd64s support 31622 SSE2 as a minimum so there is no point distinguishing SSE1 vs 31623 SSE2. */ 31624 31625 insn = &guest_code[delta]; 31626 31627 /* FXSAVE is spuriously at the start here only because it is 31628 thusly placed in guest-x86/toIR.c. */ 31629 31630 /* ------ SSE decoder main ------ */ 31631 31632 /* ---------------------------------------------------- */ 31633 /* --- end of the SSE decoder. --- */ 31634 /* ---------------------------------------------------- */ 31635 31636 /* ---------------------------------------------------- */ 31637 /* --- start of the SSE2 decoder. --- */ 31638 /* ---------------------------------------------------- */ 31639 31640 /* ---------------------------------------------------- */ 31641 /* --- end of the SSE/SSE2 decoder. --- */ 31642 /* ---------------------------------------------------- */ 31643 31644 /* ---------------------------------------------------- */ 31645 /* --- start of the SSE3 decoder. --- */ 31646 /* ---------------------------------------------------- */ 31647 31648 /* ---------------------------------------------------- */ 31649 /* --- end of the SSE3 decoder. --- */ 31650 /* ---------------------------------------------------- */ 31651 31652 /* ---------------------------------------------------- */ 31653 /* --- start of the SSSE3 decoder. --- */ 31654 /* ---------------------------------------------------- */ 31655 31656 /* ---------------------------------------------------- */ 31657 /* --- end of the SSSE3 decoder. --- */ 31658 /* ---------------------------------------------------- */ 31659 31660 /* ---------------------------------------------------- */ 31661 /* --- start of the SSE4 decoder --- */ 31662 /* ---------------------------------------------------- */ 31663 31664 /* ---------------------------------------------------- */ 31665 /* --- end of the SSE4 decoder --- */ 31666 /* ---------------------------------------------------- */ 31667 31668 /*after_sse_decoders:*/ 31669 31670 /* Get the primary opcode. */ 31671 opc = getUChar(delta); delta++; 31672 31673 /* We get here if the current insn isn't SSE, or this CPU doesn't 31674 support SSE. */ 31675 31676 switch (opc) { 31677 31678 /* ------------------------ Control flow --------------- */ 31679 31680 /* ------------------------ CWD/CDQ -------------------- */ 31681 31682 /* ------------------------ FPU ops -------------------- */ 31683 31684 /* ------------------------ INT ------------------------ */ 31685 31686 case 0xCD: { /* INT imm8 */ 31687 IRJumpKind jk = Ijk_Boring; 31688 if (have66orF2orF3(pfx)) goto decode_failure; 31689 d64 = getUChar(delta); delta++; 31690 switch (d64) { 31691 case 32: jk = Ijk_Sys_int32; break; 31692 default: goto decode_failure; 31693 } 31694 guest_RIP_next_mustcheck = True; 31695 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 31696 jmp_lit(jk, guest_RIP_next_assumed); 31697 /* It's important that all ArchRegs carry their up-to-date value 31698 at this point. So we declare an end-of-block here, which 31699 forces any TempRegs caching ArchRegs to be flushed. */ 31700 vassert(dres.whatNext == Dis_StopHere); 31701 DIP("int $0x%02x\n", (UInt)d64); 31702 break; 31703 } 31704 31705 /* ------------------------ Jcond, byte offset --------- */ 31706 31707 /* ------------------------ IMUL ----------------------- */ 31708 31709 /* ------------------------ MOV ------------------------ */ 31710 31711 /* ------------------------ MOVx ------------------------ */ 31712 31713 /* ------------------------ opl imm, A ----------------- */ 31714 31715 /* ------------------------ opl Ev, Gv ----------------- */ 31716 31717 /* ------------------------ opl Gv, Ev ----------------- */ 31718 31719 /* ------------------------ POP ------------------------ */ 31720 31721 /* ------------------------ PUSH ----------------------- */ 31722 31723 /* ------ AE: SCAS variants ------ */ 31724 31725 /* ------ A6, A7: CMPS variants ------ */ 31726 31727 /* ------ AA, AB: STOS variants ------ */ 31728 31729 /* ------ A4, A5: MOVS variants ------ */ 31730 31731 /* ------------------------ XCHG ----------------------- */ 31732 31733 /* ------------------------ IN / OUT ----------------------- */ 31734 31735 /* ------------------------ (Grp1 extensions) ---------- */ 31736 31737 /* ------------------------ (Grp2 extensions) ---------- */ 31738 31739 /* ------------------------ (Grp3 extensions) ---------- */ 31740 31741 /* ------------------------ (Grp4 extensions) ---------- */ 31742 31743 /* ------------------------ (Grp5 extensions) ---------- */ 31744 31745 /* ------------------------ Escapes to 2-byte opcodes -- */ 31746 31747 case 0x0F: { 31748 opc = getUChar(delta); delta++; 31749 switch (opc) { 31750 31751 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 31752 31753 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 31754 31755 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 31756 31757 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 31758 31759 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 31760 31761 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 31762 31763 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 31764 31765 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 31766 31767 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 31768 31769 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 31770 31771 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 31772 31773 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ 31774 31775 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 31776 31777 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 31778 31779 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 31780 31781 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ 31782 31783 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 31784 31785 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 31786 31787 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 31788 31789 default: 31790 goto decode_failure; 31791 } /* switch (opc) for the 2-byte opcodes */ 31792 goto decode_success; 31793 } /* case 0x0F: of primary opcode */ 31794 31795 /* ------------------------ ??? ------------------------ */ 31796 #endif /* XYZZY */ 31797 31798 //default: 31799 decode_failure: 31800 /* All decode failures end up here. */ 31801 if (sigill_diag) { 31802 vex_printf("vex amd64->IR: unhandled instruction bytes: " 31803 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 31804 (Int)getUChar(delta_start+0), 31805 (Int)getUChar(delta_start+1), 31806 (Int)getUChar(delta_start+2), 31807 (Int)getUChar(delta_start+3), 31808 (Int)getUChar(delta_start+4), 31809 (Int)getUChar(delta_start+5), 31810 (Int)getUChar(delta_start+6), 31811 (Int)getUChar(delta_start+7) ); 31812 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n", 31813 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx), 31814 getRexX(pfx), getRexB(pfx)); 31815 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n", 31816 haveVEX(pfx) ? 1 : 0, getVexL(pfx), 31817 getVexNvvvv(pfx), 31818 esc==ESC_NONE ? "NONE" : 31819 esc==ESC_0F ? "0F" : 31820 esc==ESC_0F38 ? "0F38" : 31821 esc==ESC_0F3A ? "0F3A" : "???"); 31822 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n", 31823 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0, 31824 haveF3(pfx) ? 1 : 0); 31825 } 31826 31827 /* Tell the dispatcher that this insn cannot be decoded, and so has 31828 not been executed, and (is currently) the next to be executed. 31829 RIP should be up-to-date since it made so at the start of each 31830 insn, but nevertheless be paranoid and update it again right 31831 now. */ 31832 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 31833 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr); 31834 vassert(dres.whatNext == Dis_StopHere); 31835 dres.len = 0; 31836 /* We also need to say that a CAS is not expected now, regardless 31837 of what it might have been set to at the start of the function, 31838 since the IR that we've emitted just above (to synthesis a 31839 SIGILL) does not involve any CAS, and presumably no other IR has 31840 been emitted for this (non-decoded) insn. */ 31841 *expect_CAS = False; 31842 return dres; 31843 31844 // } /* switch (opc) for the main (primary) opcode switch. */ 31845 31846 decode_success: 31847 /* All decode successes end up here. */ 31848 switch (dres.whatNext) { 31849 case Dis_Continue: 31850 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 31851 break; 31852 case Dis_ResteerU: 31853 case Dis_ResteerC: 31854 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) ); 31855 break; 31856 case Dis_StopHere: 31857 break; 31858 default: 31859 vassert(0); 31860 } 31861 31862 DIP("\n"); 31863 dres.len = toUInt(delta - delta_start); 31864 return dres; 31865 } 31866 31867 #undef DIP 31868 #undef DIS 31869 31870 31871 /*------------------------------------------------------------*/ 31872 /*--- Top-level fn ---*/ 31873 /*------------------------------------------------------------*/ 31874 31875 /* Disassemble a single instruction into IR. The instruction 31876 is located in host memory at &guest_code[delta]. */ 31877 31878 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 31879 Bool (*resteerOkFn) ( void*, Addr ), 31880 Bool resteerCisOk, 31881 void* callback_opaque, 31882 const UChar* guest_code_IN, 31883 Long delta, 31884 Addr guest_IP, 31885 VexArch guest_arch, 31886 const VexArchInfo* archinfo, 31887 const VexAbiInfo* abiinfo, 31888 VexEndness host_endness_IN, 31889 Bool sigill_diag_IN ) 31890 { 31891 Int i, x1, x2; 31892 Bool expect_CAS, has_CAS; 31893 DisResult dres; 31894 31895 /* Set globals (see top of this file) */ 31896 vassert(guest_arch == VexArchAMD64); 31897 guest_code = guest_code_IN; 31898 irsb = irsb_IN; 31899 host_endness = host_endness_IN; 31900 guest_RIP_curr_instr = guest_IP; 31901 guest_RIP_bbstart = guest_IP - delta; 31902 31903 /* We'll consult these after doing disInstr_AMD64_WRK. */ 31904 guest_RIP_next_assumed = 0; 31905 guest_RIP_next_mustcheck = False; 31906 31907 x1 = irsb_IN->stmts_used; 31908 expect_CAS = False; 31909 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 31910 resteerCisOk, 31911 callback_opaque, 31912 delta, archinfo, abiinfo, sigill_diag_IN ); 31913 x2 = irsb_IN->stmts_used; 31914 vassert(x2 >= x1); 31915 31916 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 31917 got it right. Failure of this assertion is serious and denotes 31918 a bug in disInstr. */ 31919 if (guest_RIP_next_mustcheck 31920 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 31921 vex_printf("\n"); 31922 vex_printf("assumed next %%rip = 0x%llx\n", 31923 guest_RIP_next_assumed ); 31924 vex_printf(" actual next %%rip = 0x%llx\n", 31925 guest_RIP_curr_instr + dres.len ); 31926 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 31927 } 31928 31929 /* See comment at the top of disInstr_AMD64_WRK for meaning of 31930 expect_CAS. Here, we (sanity-)check for the presence/absence of 31931 IRCAS as directed by the returned expect_CAS value. */ 31932 has_CAS = False; 31933 for (i = x1; i < x2; i++) { 31934 if (irsb_IN->stmts[i]->tag == Ist_CAS) 31935 has_CAS = True; 31936 } 31937 31938 if (expect_CAS != has_CAS) { 31939 /* inconsistency detected. re-disassemble the instruction so as 31940 to generate a useful error message; then assert. */ 31941 vex_traceflags |= VEX_TRACE_FE; 31942 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 31943 resteerCisOk, 31944 callback_opaque, 31945 delta, archinfo, abiinfo, sigill_diag_IN ); 31946 for (i = x1; i < x2; i++) { 31947 vex_printf("\t\t"); 31948 ppIRStmt(irsb_IN->stmts[i]); 31949 vex_printf("\n"); 31950 } 31951 /* Failure of this assertion is serious and denotes a bug in 31952 disInstr. */ 31953 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 31954 } 31955 31956 return dres; 31957 } 31958 31959 31960 /*------------------------------------------------------------*/ 31961 /*--- Unused stuff ---*/ 31962 /*------------------------------------------------------------*/ 31963 31964 // A potentially more Memcheck-friendly version of gen_LZCNT, if 31965 // this should ever be needed. 31966 // 31967 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 31968 //{ 31969 // /* Scheme is simple: propagate the most significant 1-bit into all 31970 // lower positions in the word. This gives a word of the form 31971 // 0---01---1. Now invert it, giving a word of the form 31972 // 1---10---0, then do a population-count idiom (to count the 1s, 31973 // which is the number of leading zeroes, or the word size if the 31974 // original word was 0. 31975 // */ 31976 // Int i; 31977 // IRTemp t[7]; 31978 // for (i = 0; i < 7; i++) { 31979 // t[i] = newTemp(ty); 31980 // } 31981 // if (ty == Ity_I64) { 31982 // assign(t[0], binop(Iop_Or64, mkexpr(src), 31983 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 31984 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 31985 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 31986 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 31987 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 31988 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 31989 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 31990 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 31991 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 31992 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 31993 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 31994 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 31995 // return gen_POPCOUNT(ty, t[6]); 31996 // } 31997 // if (ty == Ity_I32) { 31998 // assign(t[0], binop(Iop_Or32, mkexpr(src), 31999 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 32000 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 32001 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 32002 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 32003 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 32004 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 32005 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 32006 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 32007 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 32008 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 32009 // return gen_POPCOUNT(ty, t[5]); 32010 // } 32011 // if (ty == Ity_I16) { 32012 // assign(t[0], binop(Iop_Or16, mkexpr(src), 32013 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 32014 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 32015 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 32016 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 32017 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 32018 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 32019 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 32020 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 32021 // return gen_POPCOUNT(ty, t[4]); 32022 // } 32023 // vassert(0); 32024 //} 32025 32026 32027 /*--------------------------------------------------------------------*/ 32028 /*--- end guest_amd64_toIR.c ---*/ 32029 /*--------------------------------------------------------------------*/ 32030